R17 Random Forest Classifiers # Heart Attack Data # Classification Tree (from R16) library(rpart) heart <- read.table("F:/S5600/Data sets/SAheart.txt") head(heart) pairs(heart, col=heart$chd+1) heart.cart<-rpart(chd~., data=heart, method="class") heart.cart plot(heart.cart,margin=.2) text(heart.cart, use.n=FALSE, pretty=0, cex=.8) heart.cart.pred <- predict(heart.cart, heart[,-10], type="class") table(Actual=heart$chd, Classified=heart.cart.pred) nrow(heart) (27+71)/462 # Overall Error Rate # Bootstrap sample boot.samp <- sample(462,462,replace=T) sort(boot.samp)[1:50] heart.boot <- heart[boot.samp,] heart.b.cart <- rpart(chd~., data=heart.boot, method="class") plot(heart.b.cart) text(heart.b.cart, use.n=FALSE, pretty=0, cex=.8) # Repeat bootstrap tree - note differences # Random Forests library(randomForest) # Must be installed heart$chd <- factor(heart$chd, labels=c("absent","present")) # Response MUST be a factor for classification random forests heart.rf <- randomForest(chd~., data=heart, importance=T, proximity = T) heart.rf plot(heart.rf) # plot cumulative error rates - black (overall), # red - class 0 (absent), green - class 1 (present) # variable importance importance(heart.rf) varImpPlot(heart.rf) pairs(heart[,c(2,3,4,5,9)],pch=unclass(heart$chd),col=unclass(heart$chd)) # MDS using RF proximities MDSplot(heart.rf, heart$chd, pch=unclass(heart$chd)) # Sample more balanced sets - balance error rates? heart.rf.bal <- randomForest(chd~., data=heart, sampsize=c(100,100), importance=T, proximity = T) heart.rf.bal plot(heart.rf.bal) importance(heart.rf.bal) varImpPlot(heart.rf.bal) MDSplot(heart.rf.bal, heart$chd, pch=unclass(heart$chd)) # Crabs Data library(MASS) data(crabs) crabs <- crabs[,c(1,4:8)] crabs.rf <- randomForest(sp~., data=crabs, importance=T, proximity = T) crabs.rf plot(crabs.rf) importance(crabs.rf) varImpPlot(crabs.rf) # Forensic Glass Data library(MASS) data(fgl) head(fgl) fgl.rf <- randomForest(type~., data=fgl, importance=T, proximity = T) fgl.rf plot(fgl.rf) importance(fgl.rf) varImpPlot(fgl.rf)