Principal components analysis

A very simple, two-variable analysis using Davis' boxes data [boxes.csv]

# boxes.pca -- principal components analysis of Davis boxes data
boxes.matrix <- data.matrix(cbind(boxes[,1],boxes[,4]))
dimnames(boxes.matrix) <- list(NULL, cbind("long","diag"))

plot (boxes.matrix)

boxes.pca <- princomp(boxes.matrix, cor=T)
boxes.pca
summary(boxes.pca)
print(loadings(boxes.pca),cutoff=0.0)
biplot(boxes.pca)

# get parameters of component lines (after Everitt & Rabe-Hesketh)
load <- boxes.pca$loadings
slope <- load[2,]/load[1,]
mn <- apply(boxes.matrix,2,mean)
intcpt <- mn[2]-(slope*mn[1])

# scatter plot with the two new axes added
par(pty="s") # square plotting frame
xlim <- range(boxes.matrix) # overall min, max
plot(boxes.matrix, xlim=xlim, ylim=xlim) # both axes same length
abline(intcpt[1],slope[1],lwd=2)
abline(intcpt[2],slope[2],lwd=2,lty=2)

A second example using the large-cites data set [cities.csv]

# cities.pca -- principal components analysis of cities data
attach(cities)
cities.matrix <- data.matrix(cities[,2:12])

plot(cities[,2:12])

cities.pca <- princomp(cities.matrix, cor=T)
cities.pca
summary(cities.pca)
screeplot(cities.pca)
plot(loadings(cities.pca))
biplot(cities.pca)