R15 Discriminant Analysis library(MASS) C1 <- mvrnorm(30, c(0,-1), matrix(c(1,.8,.8,1),2)) C2 <- mvrnorm(30, c(0,1), matrix(c(1,.8,.8,1),2)) C3 <- rbind(C1,C2) C <- data.frame(X1=C3[,1], X2=C3[,2] ) C.class <- as.factor(rep(c("a","b"),each=30)) plot(X2~X1,pch=unclass(C.class),col=unclass(C.class), data=C) # Linear Discriminant Analysis C.lda <- lda(C, C.class) C.pred <- predict(C.lda, C) plot(X2~X1,pch=unclass(C.class),col=unclass(C.pred$class),data=C) source("F:/S5600/predplot.txt") predplot(C.lda, C, C.class) # Table of classification results table(Actual=C.class, Classified=C.pred$class) C.lda$scaling C.disc <- as.matrix(C) %*% C.lda$scaling stripchart(C.disc~C.class, "jitter") boxplot(C.disc~C.class) # Different Variance Matrices D1 <- mvrnorm(30, c(0,-1), matrix(c(1,.8,.8,1),2)) D2 <- mvrnorm(30, c(-1,2), matrix(c(.25,-.2,-.2,.25),2)) D3 <- rbind(D1,D2) D <- data.frame(X1=D3[,1], X2=D3[,2] ) D.class <- as.factor(rep(c("a","b"),each=30)) plot(X2~X1,pch=unclass(D.class),col=unclass(D.class), data=D) D.lda <- lda(D, D.class) D.pred.l <- predict(D.lda, D) plot(X2~X1,pch=unclass(D.class),col=unclass(D.pred.l$class),data=D) predplot(D.lda, D, D.class) # Better: Quadratic Discriminant Analysis D.qda <- qda(D, D.class) D.pred.q <- predict(D.qda, D) plot(X2~X1,pch=unclass(D.class),col=unclass(D.pred.q$class),data=D) predplot(D.qda, D, D.class) table(Actual=D.class, Classified=D.pred.q$class) # Cushings Syndrome Data # Classes "Adenoma", "Bilateral Hyperplasia", "Carcinoma", "Unknown" data(Cushings) plot(Cushings[,1:2],col=unclass(Cushings[,3]),pch=as.character(Cushings[,3])) Cushings nrow(Cushings) unk <- (1:27)[Cushings[,3]=="u"] cush.ltp <- log(Cushings[-unk,1:2]) cush.class <- factor(Cushings[-unk,3]) plot(cush.ltp,col=unclass(cush.class),pch=as.character(cush.class)) cush.lda <- lda(cush.ltp, cush.class) cush.pred.l <- predict(cush.lda, cush.ltp) plot(cush.ltp,pch=unclass(cush.class),col=unclass(cush.pred.l$class)) predplot(cush.lda, cush.ltp, cush.class) cush.qda <- qda(cush.ltp, cush.class) cush.pred.q <- predict(cush.qda, cush.ltp) plot(cush.ltp,pch=unclass(cush.class),col=unclass(cush.pred.q$class)) predplot(cush.qda, cush.ltp, cush.class) table(Actual=cush.class, Classified=cush.pred.q$class) # Predictions - unknown observations cush.ltp.u <- log(Cushings[unk, 1:2]) cush.pred.u.q <- predict(cush.qda, cush.ltp.u) points(cush.ltp.u, pch="u", col=unclass(cush.pred.u.q$class)) # Leave-one-out (jackknife) cross-validation cush.qda.cv <- qda(cush.ltp, cush.class, CV=T) table(Actual=cush.class, Classified=cush.qda.cv$class) # crabs data - 5 physical measurements on 2 forms (species?), both sexes data(crabs) crabs[sample(200,10),] sp <- crabs$sp sex <- crabs$sex crabs <- crabs[,4:8] pairs(crabs) pairs(crabs, col=c("blue","orange")[unclass(sp)], pch=unclass(sp)) # Principal Components crabs.pca <- prcomp(crabs,scale=T) pairs(crabs.pca$x) plot(crabs.pca$x[,2:3], pch=as.character(sex), col=c("blue","orange")[unclass(sp)]) crabs.pca$rotation # LDA sp.lda <- lda(crabs,sp) sp.pred.l <- predict(sp.lda, crabs) table(Actual = sp, Classified = sp.pred.l$class) sp.lda sp.disc <- as.matrix(crabs) %*% sp.lda$scaling stripchart(sp.disc~sp, "jitter") boxplot(sp.disc~sp) # Leave-one-out (jackknife) cross-validation sp.lda.cv <- lda(crabs, sp, CV=T) table(Actual=sp, Classified=sp.lda.cv$class) # Split into training, calibration sets training <- sample(200,100) crabs.t <- crabs[training,] crabs.c <- crabs[-training,] sp.t <- sp[training] sp.c <- sp[-training] sp.lda.t <- lda(crabs.t, sp.t) sp.pred.lt <- predict(sp.lda.t, crabs.t, prior=c(.5,.5)) table(Actual = sp.t, Classified = sp.pred.lt$class) # Verification sp.pred.lc <- predict(sp.lda.t, crabs.c, prior=c(.5,.5)) table(Actual = sp.c, Classified = sp.pred.lc$class) # Do same for sex classification sex.t <- sex[training] sex.c <- sex[-training] sex.lda.t <- lda(crabs.t, sex.t) sex.pred.lt <- predict(sex.lda.t, crabs.t, prior=c(.5,.5)) table(Actual = sex.t, Classified = sex.pred.lt$class) # Verification sex.pred.lc <- predict(sex.lda.t, crabs.c, prior=c(.5,.5)) table(Actual = sex.c, Classified = sex.pred.lc$class) sex.lda <- lda(crabs,sex) sex.pred.l <- predict(sex.lda, crabs) table(Actual = sex, Classified = sex.pred.l$class) sex.lda sex.disc <- as.matrix(crabs) %*% sex.lda$scaling stripchart(sex.disc~sex, "jitter") boxplot(sex.disc~sex) # All four combined categories spsex <- factor(paste(sp, sex, sep="")) spsex spsex.lda <- lda(crabs,spsex) spsex.pred.l <- predict(spsex.lda, crabs) table(Actual = spsex, Classified = spsex.pred.l$class) spsex.lda spsex.disc <- as.matrix(crabs) %*% spsex.lda$scaling plot(spsex.disc[,1:2], pch=as.character(sex), col=c("cyan","blue","orange","red")[unclass(spsex)]) plot(spsex.disc[,1:2], pch=unclass(spsex), col=c("cyan","blue","orange","red")[unclass(spsex.pred.l$class)]) # Verification spsex.lda.cv <- lda(crabs, spsex, CV=T) table(Actual=spsex, Classified=spsex.lda.cv$class) spsex.t <- spsex[training] spsex.c <- spsex[-training] spsex.lda.t <- lda(crabs.t, spsex.t) spsex.pred.lt <- predict(spsex.lda.t, crabs.t, prior=c(.25,.25,.25,.25)) table(Actual = spsex.t, Classified = spsex.pred.lt$class) spsex.pred.lc <- predict(spsex.lda.t, crabs.c, prior=c(.25,.25,.25,.25)) table(Actual = spsex.c, Classified = spsex.pred.lc$class) # The importance of verification (jackknife or hold-out) # Purely random - no structure A <- matrix(rnorm(200),ncol=10) A.class <- rep(c(1,2), each=10) A.lda <- lda(A, A.class) A.pred.l <- predict(A.lda, A) table(Actual = A.class, Classified = A.pred.l$class) A.lda.cv <- lda(A, A.class, CV=T) table(Actual=A.class, Classified=A.lda.cv$class)