#################################################
# #
# Higher-Order Histospline S-plus Functions #
# #
# Save as text, then source into S-plus #
# #
# Copyright 1998, Michael C. Minnotte #
# #
# For assistance, or to report bugs, please #
# contact: #
# #
# minnotte@math.usu.edu #
# #
# Last updated: September 15, 1998 #
# #
# For more information about higher-order #
# histosplines, see: #
# #
# Minnotte, M.C. (1998), "Achieving Higher- #
# Order Convergence Rates for Density #
# Estimation with Binned Data," Journal of #
# the American Statistical Association, 93, #
# 663--672. #
# #
# Functions include: #
# #
# hohs.binned - h.o.histospline from bin #
# counts and edges #
# hohs.unbinned - h.o.h.s. from raw data, #
# bin origin, and bin width #
# bspline.set - calculates all B splines up #
# to a given order for a given series of #
# knots #
# nicex - default points of evaluation, #
# equally spaced points extending 10% #
# beyond the range of the data or bin #
# edges #
# #
#################################################
bspline.set<-function(x,k,n)
{
# x = points of evaluation (vector)
# k = knots (vector)
# n = highest order of spline
nx<-length(x)
nk<-length(k)
if (n < 0 | n >= nk)
stop("n must be a positive integer less than the number of knots\n")
out<-array(0,c(n,nk-1,nx))
for (i in 1:(nk-1))
out[1,i,x>=k[i] & x < k[i+1]]<-1
if (n > 1)
for (j in 2:n)
for (i in 1:(nk-j))
{out[j,i,]<-(x-k[i])/(k[i+j-1]-k[i])*out[j-1,i,]+
(k[i+j]-x)/(k[i+j]-k[i+1])*out[j-1,i+1,]}
return(out)}
#################################################
nicex<-function(dat,beta=0.1,nx=401)
{ab<-range(dat)
ab2<-(ab+c(-1,1)*diff(ab)*beta)
return(seq(ab2[1],ab2[2],length=nx))}
#################################################
hohs.binned<-function(counts, edges, x=nicex(edges), p=4)
{# counts = vector of bin counts
# edges = vector of histogram bin edges (1 longer than counts)
# x = points to evaluate histospline
# p = order of histospline (4 = cubic, recommended for most purposes)
if ((ceiling(p/2) != floor(p/2)) | (p<2))
stop("p must be a positive even integer.")
if (length(unique(signif(diff(edges),5))) > 1)
stop("edges must be equally spaced.")
if (!exists("hohs.weights"))
{# initialize hohs.weights for order p histospline
cat("Initializing order ",p," histospline weights.\n This may take a few minutes.\n")
p2<-p/2
a<-rep(0,p2+1)
for (i in 0:p2)
{for (q in 0:i)
a[i+1]<-a[i+1]+(-1)^q*(2*i+1-2*q)^p*dbinom(q,p+1,1/2)
}
a<-a*2/gamma(p+1)
a<-rev(a)
C<-matrix(0,641,641)
C[cbind(1:641,1:641)]<-a[1]
for (i in 1:p2)
{C[cbind(1:(641-i),(1+i):641)]<-a[i+1]
C[cbind((1+i):641,1:(641-i))]<-a[i+1]}
C2<-solve(C)
hohs.weights<<-list(p=p,w=matrix(C2[321,21:621],nrow=1))
cat("Initialization complete.\n Do not delete hohs.weights for faster performance in the future.\n")
}
else if (length(hohs.weights$p[hohs.weights$p==p])==0)
{# hohs.weights exists, but not for order p
cat("Initializing order ",p," histospline weights.\n This may take a few minutes.\n")
p2<-p/2
a<-rep(0,p2+1)
for (i in 0:p2)
{for (q in 0:i)
a[i+1]<-a[i+1]+(-1)^q*(2*i+1-2*q)^p*dbinom(q,p+1,1/2)
}
a<-a*2/gamma(p+1)
a<-rev(a)
C<-matrix(0,641,641)
C[cbind(1:641,1:641)]<-a[1]
for (i in 1:p2)
{C[cbind(1:(641-i),(1+i):641)]<-a[i+1]
C[cbind((1+i):641,1:(641-i))]<-a[i+1]}
C2<-solve(C)
hohs.weights$p<<-c(hohs.weights$p,p)
hohs.weights$w<<-rbind(hohs.weights$w,C2[321,21:621])
cat("Initialization complete.\n Do not delete hohs.weights for faster performance in the future.\n")
}
n<-sum(counts)
nc<-length(counts)
p2<-p/2
w<-hohs.weights$w[(1:length(hohs.weights$p))[hohs.weights$p==p],]
y<-rep(0,length(x))
h<-edges[2]-edges[1]
t0<-(edges[2]+edges[1])/2
bl<-ceiling((t0-min(x))/h)+p
br<-ceiling((max(x)-t0)/h)+p
k<-t0+h*((-bl):br)
nk<-length(k)
extra<-0
bsp<-bspline.set(x,k,p)
if (p2+nk-p-bl > 301)
{extra<-p2+nk-p-bl-301
w<-c(rep(0,extra),w)}
if (300-p2+bl+nc+extra > 601)
w<-c(w,rep(0,bl+nc+extra-p2-301))
for (i in 1:(nk-p))
y<-y+w[301-p2-i+bl+(1:nc)+extra]%*%counts*bsp[p,i,]
y<-y/n/h
return(x,y)
}
#################################################
hohs.unbinned<-function(data, t0 = mean(range(data)), h = diff(range(data))/10,
x = nicex(data), p = 4, histplot=F)
{# data = raw (unbinned) data
# t0 = starting histogram bin edge
# h = bin width
# x = points of evaluation of histospline
# p = order of histospline (4 = cubic, recommended for most purposes)
# histplot = if T, plots histogram generating histospline
bl<-ceiling((t0-min(data))/h)
br<-ceiling((max(data)-t0)/h)
edges<-t0+h*((-bl):br)
counts<-hist(data,breaks=edges,plot=F)$counts
if (histplot) hist(data,breaks=edges,probability=T)
out<-hohs.binned(counts,edges,x,p)
return(list(x=out$x,y=out$y,counts=counts,edges=edges))
}