R11 Distances and k-Means Clustering # Alternative Distance Measures x1 <- c(1, 2, 3, 4, 7, 8, 10) x2 <- c(8, 2, 3, 1, 11, 8, 10) X <- cbind(x1, x2) plot(X, pch=16) identify(X, labels=1:7) X dist(X) # default - Euclidean (2 norm) sqrt((1-2)^2+(8-2)^2) # d(1,2) dist(X, method="manhattan") # 1 norm abs(1-2)+abs(8-2) # d(1,2) dist(X, method="maximum") # sup (infinity) norm max(abs(1-2),abs(8-2)) # d(1,2) dist(X, method="minkowski", p = 3) # 3 norm (abs(1-2)^3+abs(8-2)^3)^(1/3) # d(1,2) pairs(swiss, pch=16) swiss.d2 <- dist(scale(swiss)) swiss.hc.c2 <- hclust(swiss.d2, method="complete") plot(swiss.hc.c2) pairs(swiss, pch=cutree(swiss.hc.c2, 4), col=cutree(swiss.hc.c2, 4)) swiss.d1 <- dist(scale(swiss), "manhattan") swiss.hc.c1 <- hclust(swiss.d1, method="complete") plot(swiss.hc.c1) pairs(swiss, pch=cutree(swiss.hc.c1, 4), col=cutree(swiss.hc.c1, 4)) swiss.pca <- prcomp(swiss, scale=T) pairs(swiss.pca$x[,1:4], pch=cutree(swiss.hc.c2, 4), col=cutree(swiss.hc.c2, 4)) pairs(swiss.pca$x[,1:4], pch=cutree(swiss.hc.c1, 4), col=cutree(swiss.hc.c1, 4)) # Compare assignments from different distances table(data.frame(c2=cutree(swiss.hc.c2,4),c1=cutree(swiss.hc.c1,4))) swiss.dsup <- dist(scale(swiss), "maximum") swiss.hc.csup <- hclust(swiss.d1, method="complete") plot(swiss.hc.csup) pairs(swiss, pch=cutree(swiss.hc.csup, 4), col=cutree(swiss.hc.csup, 4)) pairs(swiss.pca$x[,1:4], pch=cutree(swiss.hc.csup, 4), col=cutree(swiss.hc.csup, 4)) table(data.frame(c2=cutree(swiss.hc.c2,4),csup=cutree(swiss.hc.csup,4))) table(data.frame(c1=cutree(swiss.hc.c1,4),csup=cutree(swiss.hc.csup,4))) # Natural Modality: Bimodal vs Unimodal library(MASS) mu1 <- c(3,3) mu2 <- c(6,6) Sigma1 <- matrix(c(1,0,0,1),nrow=2) bimodal <- rbind(mvrnorm(25,mu=mu1,Sigma=Sigma1),mvrnorm(25,mu=mu2,Sigma=Sigma1)) plot(bimodal, pch=16) mu3 <- c(4.5, 4.5) Sigma2 <- matrix(c(2.25,1.5,1.5,2.25),nrow=2) unimodal <- mvrnorm(50, mu=mu3, Sigma=Sigma2) plot(unimodal, pch=16) bim.d2 <- dist(bimodal) unim.d2 <- dist(unimodal) plot(hclust(bim.d2, method="complete")) plot(hclust(unim.d2, method="complete")) plot(hclust(bim.d2, method="single")) plot(hclust(unim.d2, method="single")) plot(hclust(bim.d2, method="ward")) plot(hclust(unim.d2, method="ward")) bim.d1 <- dist(bimodal, "manhattan") unim.d1 <- dist(unimodal, "manhattan") plot(hclust(bim.d1, method="complete")) plot(hclust(unim.d1, method="complete")) plot(hclust(bim.d1, method="single")) plot(hclust(unim.d1, method="single")) plot(hclust(bim.d1, method="ward")) plot(hclust(unim.d1, method="ward")) # Kth Nearest Neighbor Density Distance source("F:/S5600/knn.dist.txt") bim.dk.10 <- knn.dist(bimodal, 10) unim.dk.10 <- knn.dist(unimodal, 10) plot(hclust(bim.dk.10, method="single")) plot(hclust(unim.dk.10, method="single")) bim.dk.5 <- knn.dist(bimodal, 5) unim.dk.5 <- knn.dist(unimodal, 5) plot(hclust(bim.dk.5, method="single")) plot(hclust(unim.dk.5, method="single"))