#################################################
#                                               #
#   Higher-Order Histospline S-plus Functions   #
#                                               #
#   Save as text, then source into S-plus       #
#                                               #
#   Copyright 1998, Michael C. Minnotte         #
#                                               #
#   For assistance, or to report bugs, please   #
#   contact:                                    #
#                                               #
#   minnotte@math.usu.edu                       #
#                                               #
#   Last updated: September 15, 1998            #
#                                               #
#   For more information about higher-order     #
#   histosplines, see:                          #
#                                               #
#   Minnotte, M.C. (1998), "Achieving Higher-   #
#   Order Convergence Rates for Density         #
#   Estimation with Binned Data," Journal of    #
#   the American Statistical Association, 93,   #
#   663--672.                                   #
#                                               #
#   Functions include:                          #
#                                               #
#   hohs.binned - h.o.histospline from bin      #
#      counts and edges                         #
#   hohs.unbinned - h.o.h.s.  from raw data,    #
#      bin origin, and bin width                #
#   bspline.set - calculates all B splines up   #
#      to a given order for a given series of   #
#      knots                                    #
#   nicex - default points of evaluation,       #
#      equally spaced points extending 10%      #
#      beyond the range of the data or bin      #
#      edges                                    #
#                                               #
#################################################


bspline.set<-function(x,k,n)

{
# x = points of evaluation (vector)
# k = knots (vector)
# n = highest order of spline
nx<-length(x)
nk<-length(k)
if (n < 0 | n >= nk)
	stop("n must be a positive integer less than the number of knots\n")
out<-array(0,c(n,nk-1,nx))
for (i in 1:(nk-1))
	out[1,i,x>=k[i] & x < k[i+1]]<-1
if (n > 1)
	for (j in 2:n)
		for (i in 1:(nk-j))
			{out[j,i,]<-(x-k[i])/(k[i+j-1]-k[i])*out[j-1,i,]+
	                (k[i+j]-x)/(k[i+j]-k[i+1])*out[j-1,i+1,]}
return(out)}

#################################################

nicex<-function(dat,beta=0.1,nx=401)

{ab<-range(dat)
ab2<-(ab+c(-1,1)*diff(ab)*beta)
return(seq(ab2[1],ab2[2],length=nx))}

#################################################

hohs.binned<-function(counts, edges, x=nicex(edges), p=4)

{# counts = vector of bin counts
# edges = vector of histogram bin edges (1 longer than counts)
# x = points to evaluate histospline
# p = order of histospline (4 = cubic, recommended for most purposes)

if ((ceiling(p/2) != floor(p/2)) | (p<2))
	stop("p must be a positive even integer.")
if (length(unique(signif(diff(edges),5))) > 1)
	stop("edges must be equally spaced.")

if (!exists("hohs.weights"))
	{#  initialize hohs.weights for order p histospline
	cat("Initializing order ",p," histospline weights.\n This may take a few minutes.\n")

	p2<-p/2
	a<-rep(0,p2+1)
	for (i in 0:p2)
		{for (q in 0:i)
			a[i+1]<-a[i+1]+(-1)^q*(2*i+1-2*q)^p*dbinom(q,p+1,1/2)
		}
	a<-a*2/gamma(p+1)
	a<-rev(a)

	C<-matrix(0,641,641)
	C[cbind(1:641,1:641)]<-a[1]
	for (i in 1:p2)
		{C[cbind(1:(641-i),(1+i):641)]<-a[i+1]
		C[cbind((1+i):641,1:(641-i))]<-a[i+1]}
	C2<-solve(C)
	
	hohs.weights<<-list(p=p,w=matrix(C2[321,21:621],nrow=1))

	cat("Initialization complete.\n  Do not delete hohs.weights for faster performance in the future.\n")
	}
else if (length(hohs.weights$p[hohs.weights$p==p])==0)
	{# hohs.weights exists, but not for order p
	cat("Initializing order ",p," histospline weights.\n This may take a few minutes.\n")

	p2<-p/2
	a<-rep(0,p2+1)
	for (i in 0:p2)
		{for (q in 0:i)
			a[i+1]<-a[i+1]+(-1)^q*(2*i+1-2*q)^p*dbinom(q,p+1,1/2)
		}
	a<-a*2/gamma(p+1)
	a<-rev(a)

	C<-matrix(0,641,641)
	C[cbind(1:641,1:641)]<-a[1]
	for (i in 1:p2)
		{C[cbind(1:(641-i),(1+i):641)]<-a[i+1]
		C[cbind((1+i):641,1:(641-i))]<-a[i+1]}
	C2<-solve(C)
	
	hohs.weights$p<<-c(hohs.weights$p,p)
	hohs.weights$w<<-rbind(hohs.weights$w,C2[321,21:621])

	cat("Initialization complete.\n  Do not delete hohs.weights for faster performance in the future.\n")
	}

n<-sum(counts)
nc<-length(counts)
p2<-p/2
w<-hohs.weights$w[(1:length(hohs.weights$p))[hohs.weights$p==p],]
y<-rep(0,length(x))
h<-edges[2]-edges[1]
t0<-(edges[2]+edges[1])/2
bl<-ceiling((t0-min(x))/h)+p
br<-ceiling((max(x)-t0)/h)+p
k<-t0+h*((-bl):br)
nk<-length(k)

extra<-0
bsp<-bspline.set(x,k,p)
if (p2+nk-p-bl > 301)
	{extra<-p2+nk-p-bl-301
	w<-c(rep(0,extra),w)}
if (300-p2+bl+nc+extra > 601)
	w<-c(w,rep(0,bl+nc+extra-p2-301))
for (i in 1:(nk-p))
	y<-y+w[301-p2-i+bl+(1:nc)+extra]%*%counts*bsp[p,i,]
y<-y/n/h
return(x,y)
}

#################################################

hohs.unbinned<-function(data, t0 = mean(range(data)), h = diff(range(data))/10,
	x = nicex(data), p = 4, histplot=F)

{# data = raw (unbinned) data
# t0 = starting histogram bin edge
# h = bin width
# x = points of evaluation of histospline
# p = order of histospline (4 = cubic, recommended for most purposes)
# histplot = if T, plots histogram generating histospline

bl<-ceiling((t0-min(data))/h)
br<-ceiling((max(data)-t0)/h)
edges<-t0+h*((-bl):br)
counts<-hist(data,breaks=edges,plot=F)$counts
if (histplot) hist(data,breaks=edges,probability=T)
out<-hohs.binned(counts,edges,x,p)
return(list(x=out$x,y=out$y,counts=counts,edges=edges))
}