R07 Exploratory Factor Analysis # Psychological Test Results Data (from book) data(Harman74.cor) test.cor <- Harman74.cor$cov[c(6, 7, 9, 10, 12),c(6, 7, 9, 10, 12)] colnames(test.cor) <- c("PARA","SENT","WORD","ADD","COUNT") rownames(test.cor) <- colnames(test.cor) test.cor image(1:5, 1:5, test.cor, zlim=c(-1,1), col=cm.colors(21)) # Plot of correlations - magenta = positive, cyan = negative image(1:5, 1:5, test.cor, zlim=c(-1,1), col=grey(0:20/20) ) # Similar, with white = +1, black = -1. test.pc <- eigen(test.cor) # Principal components analysis test.pc$values plot(test.pc$values,type="o", pch=16) abline(h=1,col="grey") test.pc$vectors[,1:2] test.fa2 <- factanal(covmat = test.cor, factors=2, n.obs=145) test.fa2 test.fa1 <- update(test.fa2, factors=1) # shorthand to make minor changes test.fa1 # to a model # Note p-value for one-factor model # Artificial Data - From R factanal( ) help pages # A little demonstration, v2 is just v1 with noise, # and same for v4 vs. v3 and v6 vs. v5 v1 <- c(1,1,1,1,1,1,1,1,1,1,3,3,3,3,3,4,5,6) v2 <- c(1,2,1,1,1,1,2,1,2,1,3,4,3,3,3,4,6,5) v3 <- c(3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,5,4,6) v4 <- c(3,3,4,3,3,1,1,2,1,1,1,1,2,1,1,5,6,4) v5 <- c(1,1,1,1,1,3,3,3,3,3,1,1,1,1,1,6,4,5) v6 <- c(1,1,1,2,1,3,3,3,4,3,1,1,1,2,1,6,5,4) m1 <- cbind(v1,v2,v3,v4,v5,v6) pairs(m1) pairs(m1+runif(6*18, -.3, .3)) # "jittering" to break ties m1.pc <- prcomp(m1) plot(m1.pc$sdev^2, type="o", pch=16) abline(h=1,col="grey") pairs(m1.pc$x[,1:3]) m1.pc$rotation[,1:3] m1.fa1 <- factanal(m1, factors=1) m1.fa1 m1.fa2 <- factanal(m1, factors=2, rotation="none") m1.fa2 m1.fa3 <- factanal(m1, factors=3, rotation="none") m1.fa3 m1.fa3a <- factanal(m1, factors=3, rotation="varimax", scores="regression") # default rotation m1.fa3a # Note improved interpretation of loadings pairs(m1.fa3a$scores) # Girls physical measurements data data(Harman23.cor) girls.cor <- Harman23.cor$cov # Correlation matrix of 8 physical measurements on 305 # girls between 7 - 17 girls.cor image(1:8, 1:8, girls.cor, zlim=c(-1,1), col=cm.colors(21) ) girls.pc <- eigen(girls.cor) # Principal components analysis girls.pc$values plot(girls.pc$values,type="o", pch=16) abline(h=1,col="grey") rownames(girls.pc$vectors) <- colnames(girls.cor) girls.pc$vectors[,1:2] girls.fa1 <- factanal(covmat=girls.cor, factors=1, n.obs=305) girls.fa1 girls.fa2 <- factanal(covmat=girls.cor, factors=2, n.obs=305, rotation="none") girls.fa2 plot(0,0, xlim=c(-1,1), ylim=c(-1,1), type='n', xlab="Factor 1 Loadings", ylab="Factor 2 Loadings") abline(h=0, col='grey') abline(v=0, col='grey') arrows(0, 0, girls.fa2$loadings[,1], girls.fa2$loadings[,2]) text(girls.fa2$loadings[,1], girls.fa2$loadings[,2], rownames(girls.fa2$loadings)) girls.fa2a <- factanal(covmat=girls.cor, factors=2, n.obs=305) # Varimax rotation girls.fa2a arrows(0, 0, girls.fa2a$loadings[,1], girls.fa2a$loadings[,2], col="red") text(girls.fa2a$loadings[,1], girls.fa2a$loadings[,2], rownames(girls.fa2$loadings), col="red") girls.fa2b <- factanal(covmat=girls.cor, factors=2, n.obs=305, rotation="promax") # Promax rotation girls.fa2b arrows(0, 0, girls.fa2b$loadings[,1], girls.fa2b$loadings[,2], col="blue") text(girls.fa2b$loadings[,1], girls.fa2b$loadings[,2], rownames(girls.fa2$loadings), col="blue") girls.fa3 <- factanal(covmat=girls.cor, factors=3, n.obs=305) girls.fa3 # The third factor contributes far less information than the first two varimax(girls.pc$vectors[,1:2]) # Varimax rotation may be used for PCA loadings too promax(girls.pc$vectors[,1:2]) # As may promax girls2.cor <- girls.cor[1:5,1:5] # Drop weight-related variables except weight itself girls2.fa2 <- factanal(covmat=girls2.cor, factors=2, n.obs=305) girls2.fa2 # Weight has high uniqueness, but not its own factor # Pain Reliever Perceptions Data (from book) pain <- read.table("H:/S5600/Data sets/ASCII/Ch05/PAIN_RELIEF.txt") colnames(pain) <- c("No Upset Stomach", "No Side Effects", "Stops Pain", "Works Quickly", "Keeps Me Awake", "Limited Relief") pain.pc <- prcomp(pain) plot(pain.pc$sdev^2, type="o", pch=16) abline(h=1,col="grey") pain.pc$rotation[,1:2] pain.fa<-factanal(pain, factors=2, scores="reg") pain.fa plot(pain.fa$scores) varimax(pain.pc$rotation[,1:2]) promax(pain.pc$rotation[,1:2]) plot(pain.pc$x %*% varimax(pain.pc$rotation[,1:2])$loadings) # Luxury Car Perceptions Data (from book) car.cor <- read.tri("H:/S5600/Data sets/ASCII/Ch05/LUXURY_CAR.txt") colnames(car.cor) <- c("Luxury", "Style", "Reliability", "Fuel Econ", "Safety", "Maintenance", "Quality", "Durable", "Performance") rownames(car.cor) <- colnames(car.cor) car.pc <- eigen(car.cor) car.pc$values plot(car.pc$values,type="o", pch=16) abline(h=1,col="grey") factanal(covmat=car.cor, factors=2) rownames(car.pc$vectors) <- colnames(car.cor) car.pc$vectors[,1:2] varimax(car.pc$vectors[,1:2]) # Back to full Psychological Test Results data fulltest.cor <- Harman74.cor$cov image(1:24, 1:24, fulltest.cor, zlim=c(-1,1), col=cm.colors(21)) fulltest.pc <- eigen(fulltest.cor) # Principal components analysis fulltest.pc$values plot(fulltest.pc$values,type="o", pch=16) abline(h=1,col="grey") fulltest.pc$vectors[,1:4] varimax(fulltest.pc$vectors[,1:4]) promax(fulltest.pc$vectors[,1:4]) fulltest.fa1 <- factanal(covmat = fulltest.cor, factors=1, n.obs=145) fulltest.fa1 update(fulltest.fa1, factors=2) # Note use of test to choose number of factors update(fulltest.fa1, factors=3) update(fulltest.fa1, factors=4) update(fulltest.fa1, factors=5)