#######################################################################
#                                                                     #
# Mode Forest S-plus Functions                                        #
#                                                                     #
# Save as text, then source into S-plus                               #
#                                                                     #
# Copyright 1996, Michael C. Minnotte                                 #
#                                                                     #
# For assistance, or to report bugs, please contact:                  #
#                                                                     #
# minnotte@math.usu.edu                                               #
#                                                                     #
# Last updated, September 17, 1998                                    #
#                                                                     #
# For more information about mode forests, see:                       #
#                                                                     #
# Minnotte, M.C., Marchette, D.J., and Wegman, E.J., (1998), "The     #
# bumpy road to the mode forest," Journal of Computational and        #
# Graphical Statistics, 7, 239-251.                                   #
#                                                                     #
# Functions include:                                                  #
#                                                                     #
# forest - main mode forest routine.  Handles all versions with       #
#     appropriate options set                                         #
# onetree - shortcut to crude single mode tree                        #
# resamp.forest - shortcut to resampled mode forest                   #
# subsamp.forest - shortcut to random subset mode forest              #
# nnjit.forest - shorcut to nearest neighbor mode forest              #
# ransamp.forest - shortcut to random subset mode forest (random size)#
# forestplot - mode forest plotting function using image, called      #
#     from forest if doplot = T                                       #
# forestplot.poly - mode forest plotting function using polygons; not #
#     recommended if forestplot works                                 #
# forestps - postscript device with shading levels set                #
# normkde - normal kernel density estimate                            #
# nicerange - range of data increased by pct (default 10%) each side  #
#                                                                     #
#######################################################################
normkde<-function(dat,xlist,h) {# normal kernel density estimate # dat - data # xlist - points of evaluation # h - bandwidth (smoothing parameter; standard deviation of normal kernel) m<-length(xlist) fhat<-rep(0,m) n<-length(dat) if(m<n) for (i in 1:m) fhat[i]<-sum(dnorm(dat,xlist[i],h)) else for (i in 1:n) fhat<-fhat+dnorm(xlist,dat[i],h) fhat<-fhat/n return(fhat)} ####################################################################### nicerange<-function(dat,pct=10) {# gives range of data increased by pct % each end # dat - data # pct - % to widen range by at each end datr<-range(dat) datdiff<-diff(datr) datnr<-c(datr[1]-pct*datdiff/100,datr[2]+pct*datdiff/100) return(datnr)} ######################################################################### forestps<-function(filename,hor=T...) {# sets up postscript device for mode forest plots postscript(filename,colors=((0:100)/100),horizontal=hor,...)} ######################################################################### forestplot.poly<-function(forestlist,minc=7,maxc=106,add=F,xlabel="x",sqrtt=F) {# plot a mode forest using the polygon function # slower and much bigger files than forestplot, so not generally # recommended unless forestplot does not work # forestlist - mode forest; output from forest # minc, maxc - color numbers for polygons making up image # add - if T, add to existing plot # xlabel - label for x-axis # sqrtt - transform weights by square root before mapping to shadings count<-forestlist$count x<-forestlist$x h<-forestlist$h if (!add) plot(x[1],h[1],xlim=range(x),ylim=range(h),log='y',xlab=xlabel, ylab="h",type='n') m<-max(count) nh<-nrow(count) if (sqrtt) count2<-as.vector(round(sqrt(count/m)*(maxc-minc+1))) else count2<-as.vector(round((count/m)*(maxc-minc+1))) levs<-sort(unique(count2)) if (levs[1]==0) levs<-levs[-1] levs<-rev(levs) lm<-length(count2) for (i in levs) {levindex<-(1:lm)[count2==i] levx<-floor((levindex-1)/nh)+1 levh<-levindex-(levx-1)*nh xp1<-cbind(x[levx],x[levx],x[levx+1],x[levx+1],NA) yp1<-cbind(h[levh],h[levh+1],h[levh+1],h[levh],NA) xp2<-as.vector(t(xp1)) yp2<-as.vector(t(yp1)) polygon(xp2,yp2,border=F,col=maxc+1-i)} } ######################################################################### forestplot<-function(forestlist,add=F,xlabel="x",sqrtt=F,ps=F) {# plot a mode forest using the image function # forestlist - mode forest; output from forest # add - if T, add to existing plot # xlabel - label for x-axis # sqrtt - transform weights by square root before mapping to shadings # ps - if T, reverse shadings for postscript output count<-forestlist$count x<-forestlist$x h<-forestlist$h y<-log(h) if (ps) {z<-t(count) zlim<-c(1,max(count))} else {z<-t(max(count)-count) zlim<-c(0,max(count)-1)} if (sqrtt) z<-z/rep((h[-1]*h[-length(h)])^.25,rep(length(x)-1,length(h)-1)) if (zlim[2]<zlim[1]) zlim<-c(0,max(count)) image(x,y,z,zlim=zlim,add=add,xlab=xlabel,ylab='h',tck=0,axes=F) xtck<-pretty(x) yt1<-floor(log10(min(h))) yt2<-ceiling(log10(max(h))) ytck<-sort(c(10^(yt1:yt2),5*10^(yt1:(yt2-1)))) axis(1,at=xtck,labels=xtck) axis(2,at=log(ytck),labels=ytck,srt=90) #axis(2,at=log(ytck),labels=ytck) box() } ######################################################################### forest<-function(dat, xr=nicerange(dat), hr=c(diff(nicerange(dat))/4, diff(nicerange(dat))/400), nx=200, nh=50, nsamp=0, xlabel="x",docat=T, doplot=T,update=F,sampsize=length(dat),replace=T,orig=T,what="mode", jit=0,rsampsize=F) {# main function for computing mode forests # dat - data # xr - range of points of evaluation # hr - range of bandwidths # nx - number of points of evaluation # nh - number of bandwidths # nsamp - number of samples to generate # xlabel - label for x axis when plotted # docat - print indication to screen of sample, bandwidth numbers, allows # monitoring of progress # doplot - if T, plot forest # update - if T, doplot T, plot forest after every sample; can be slow. # If F, doplot T, plot only at end of computation # sampsize - size of generated samples # replace - draw samples from data with replacement? # orig - include original sample (as one sample for forest)? # what - one of "mode", "bump", or "density". Most mode forests will use "bump" # jit - if "nn", do nearest neighbor jittering of new samples; if numeric, # jitter new samples by uniform[-jit,jit] random variables. # rsampsize - if T, resample size is random, binomial(length(data),1/2), # each sample dat<-sort(dat) n<-length(dat) xrange<-seq(xr[1],xr[2],length=nx+1) xcent<-(xrange[-1]+xrange[-(nx+1)])/2 loghrange<-seq(log(hr[1]),log(hr[2]),length=nh+1) loghcent<-(loghrange[-1]+loghrange[-(nh+1)])/2 hrange<-exp(loghrange) hcent<-exp(loghcent) nwhat<-charmatch(what,c("mode","bump","density")) if (is.na(nwhat)) return("What not matched. Should be 'mode', 'bump', or 'density'.") count<-matrix(0,nh,nx) if (orig) {for (i in 1:nh) {if (docat) cat("Starting ",i,"\n") fhat<-normkde(dat, xcent, hcent[i]) if (nwhat==1) # count modes {modes<-(2:(nx-1))[fhat[2:(nx-1)] > fhat[1:(nx-2)] & fhat[3:nx] < fhat[2:(nx-1)]] count[i,modes]<-count[i,modes]+1 } else if (nwhat==2) # count bumps {bumps<-(2:(nx-1))[fhat[2:(nx-1)] - fhat[1:(nx-2)] > fhat[3:nx] - fhat[2:(nx-1)]] count[i,bumps]<-count[i,bumps]+1 } else # density {count[i,]<-count[i,]+fhat*hcent[i] } } if (doplot & update) forestplot(list(count=count,x=xrange,h=hrange)) } if (nsamp > 0) {if (jit == "nn") {jitlist<-c((3*dat[1]-dat[2])/2,(dat[-1]+dat[-n])/2,(3*dat[n]-dat[n-1])/2) } for (k in 1:nsamp) {if (rsampsize) sampsize<-rbinom(1,n,1/2) resamp<-sample(n,sampsize,replace=replace) if (jit=="nn") {smooth<-runif(sampsize,jitlist[resamp],jitlist[resamp+1]) } else if (jit > 0) {smooth<-runif(sampsize,dat[resamp]-jit,dat[resamp]+jit) } else {smooth<-dat[resamp] } for (i in 1:nh) {if (docat) cat("Sample ",k,"; h ",i,"\n") fhat<-normkde(smooth, xcent, hcent[i]) if (nwhat==1) # count modes {modes<-(2:(nx-1))[fhat[2:(nx-1)] > fhat[1:(nx-2)] & fhat[3:nx] < fhat[2:(nx-1)]] count[i,modes]<-count[i,modes]+1 } else if (nwhat==2) # count bumps {bumps<-(2:(nx-1))[fhat[2:(nx-1)] - fhat[1:(nx-2)] > fhat[3:nx] - fhat[2:(nx-1)]] count[i,bumps]<-count[i,bumps]+1 } else # density {count[i,]<-count[i,]+fhat*hcent[i] } } if (doplot & update & orig == F & k==1) {forestplot(list(count=count,x=xrange,h=hrange)) } else if (doplot & update) {forestplot(list(count=count,x=xrange,h=hrange),add=T) } } } if (doplot & !update) forestplot(list(count=count,x=xrange,h=hrange)) return(count=count,x=xrange,h=hrange,n=n,nsamp=nsamp, sampsize=sampsize,replace=replace,what=what,orig=orig,jit=jit) } ########################################################################## onetree<-function(dat, xr=nicerange(dat), hr=c(diff(nicerange(dat))/4, diff(nicerange(dat))/400), nx=200, nh=50, xlabel="x",docat=T, doplot=T,what="mode") {# shortcut to generate a single mode tree # parameters are as in forest, with different defaults forest(dat, xr=xr, hr=hr, nx=nx, nh=nh, nsamp=0, xlabel=xlabel,docat=docat, doplot=doplot,what=what)} ################################################################################ resamp.forest<-function(dat, xr=nicerange(dat), hr=c(diff(nicerange(dat))/4, diff(nicerange(dat))/400), nx=200, nh=50, nsamp=49, xlabel="x",docat=T, doplot=T,update=F,orig=T,what="bump") {# shortcut to generate a resampled mode forest # parameters are as in forest, with different defaults forest(dat, xr=xr, hr=hr, nx=nx, nh=nh, nsamp=nsamp, xlabel=xlabel,docat=docat, doplot=doplot,update=update,orig=orig,what=what)} ################################################################################ subsamp.forest<-function(dat, xr=nicerange(dat), hr=c(diff(nicerange(dat))/4, diff(nicerange(dat))/400), nx=200, nh=50, nsamp=49, xlabel="x",docat=T, doplot=T,update=F,sampsize=round(sqrt(length(dat))),replace=F,orig=F, what="bump") {# shortcut to generate a random subset mode forest # parameters are as in forest, with different defaults forest(dat, xr=xr, hr=hr, nx=nx, nh=nh, nsamp=nsamp, xlabel=xlabel,docat=docat, doplot=doplot,update=update,sampsize=sampsize,replace=replace,orig=orig, what=what,jit=0)} ################################################################################ nnjit.forest<-function(dat, xr=nicerange(dat), hr=c(diff(nicerange(dat))/4, diff(nicerange(dat))/400), nx=200, nh=50, nsamp=49, xlabel="x",docat=T, doplot=T,update=F,sampsize=length(dat),replace=F,orig=T,what="bump", jit="nn") {# shortcut to generate a nearest-neighbor jittered mode forest # parameters are as in forest, with different defaults forest(dat, xr=xr, hr=hr, nx=nx, nh=nh, nsamp=nsamp, xlabel=xlabel,docat=docat, doplot=doplot,update=update,sampsize=sampsize,replace=replace,orig=orig, what=what,jit=jit)} ################################################################################ ransamp.forest<-function(dat, xr=nicerange(dat), hr=c(diff(nicerange(dat))/4, diff(nicerange(dat))/400), nx=200, nh=50, nsamp=49, xlabel="x",docat=T, doplot=T,update=F,sampsize=round(sqrt(length(dat))),replace=F,orig=F, what="bump") {# shortcut to generate a random sample size mode forest # parameters are as in forest, with different defaults forest(dat, xr=xr, hr=hr, nx=nx, nh=nh, nsamp=nsamp, xlabel=xlabel,docat=docat, doplot=doplot,update=update,sampsize=sampsize,replace=replace,orig=orig, what=what,jit=0,rsampsize=T)} ################################################################################