# pcanova ######################################################### # Create yet another class of objects pe <- function(x, classes, labels, colors, usecor=T, center=F) { # note 1: classes must be a subset of labels # note 2: assume rows of x have already been centered # function that yields column vectors with all ones ones <- function(rows, cols=1) { matrix(1, rows, cols) } n <- dim(x)[2] # number of samples g <- length(classes) # number of groups p <- dim(x)[1] # number of variables qq <- matrix(0, g, n) # transpose of n x g class indicator matrix for(i in 1:n) { qq[classes==labels[i], i] <- 1 } nn <- qq %*% ones(n) # number of samples in each class xsums <- x %*% t(qq) # matrix of class sums cc <- xsums %*% diag(as.vector(1/nn)) # matrix of class means xm <- x %*% ones(n)/n # vector of mean values zz <- x - (xm %*% t(ones(n))) # center the rows vv <- (cc - (xm %*% t(ones(g)))) # matrix of centered class means ss <- cc %*% qq - xm %*% t(ones(n)) # ss0 <- cc - xm %*% t(ones(g)) rr <- x - (cc %*% qq) # matrix of residuals # first step: do PCA on original data xd <- sample.pc(zz, usecor=usecor, center=F) orig.pca <- xd$scores # second step: form the vectors of class medians into a matrix yd <- sample.pc(ss0, usecor=usecor, center=F) class.pca <- yd$scores mixed.pca <- t(zz) %*% yd$components # third step: compute the residual matrix rd <- sample.pc(rr, usecor=usecor, center=F) resid.pca <- rd$scores # fourth step: compute transformation matrices from one PC to another # note: we only keep a number of PCS equal to the number of classes n <- length(classes) trans.class.orig <- t(t(yd$components) %*% xd$components)[1:n, 1:n] trans.class.resid <- t(t(yd$components) %*% rd$components)[1:n, 1:n] trans.orig.resid <- t(t(xd$components) %*% rd$components)[1:n, 1:n] class2orig <- rep(0, n) class2resid <- rep(0, n) orig2resid <- rep(0, n) for (i in 1:n) { class2orig[i] <- 1-acos((sqrt(sum(trans.class.orig[i, 1:i]^2)) + sqrt(sum(trans.class.orig[1:i,i]^2)))/2) /(pi/2) class2resid[i] <- 1-acos((sqrt(sum(trans.class.resid[i, 1:i]^2)) + sqrt(sum(trans.class.resid[1:i,i]^2)))/2)/(pi/2) orig2resid[i] <- 1-acos((sqrt(sum(trans.orig.resid[i, 1:i]^2)) + sqrt(sum(trans.orig.resid[1:i,i]^2)))/2) /(pi/2) # class2orig[i] <- (sqrt(sum(trans.class.orig[i, 1:i]^2)) + sqrt(sum(trans.class.orig[1:i,i]^2))) / 2 # class2resid[i] <- (sqrt(sum(trans.class.resid[i, 1:i]^2)) + sqrt(sum(trans.class.resid[1:i,i]^2))) / 2 # orig2resid[i] <- (sqrt(sum(trans.orig.resid[i, 1:i]^2)) + sqrt(sum(trans.orig.resid[1:i,i]^2))) / 2 } # finally do some clustering on correlation hc <- hclust((1-cor(cc))/2, method='average') xc <- hclust((1-cor(x))/2, method='average') rc <- hclust((1-cor(rr))/2, method='average') val <- list(orig.pca=orig.pca, class.pca=class.pca, mixed.pca=mixed.pca, resid.pca=resid.pca, xc=xc, hc=hc, rc=rc, n=n, trans.class.orig=trans.class.orig, trans.class.resid=trans.class.resid, trans.orig.resid=trans.orig.resid, class2orig=class2orig, class2resid=class2resid, orig2resid=orig2resid, labels=labels, classes=classes, colors=colors , xd=xd, yd=yd, rd=rd) class(val) <- 'pe' val } .points.lab.col <- function(x, y, labs, cols, ...) { for(i in 1:length(x)) { points(x[i], y[i], pch=labs[i], col=as.numeric(cols[i]), ...) } invisible(x) } .plot.lab.col <- function(x, y, labs, cols, ...) { plot(x, y, type='n', ...) .points.lab.col(x, y, labs, cols) invisible(x) } plot.pe <- function(ob, tag='', mscale = 1, ...) { .plot.lab.col(ob$orig.pca[,1], ob$orig.pca[,2], ob$labels, ob$colors, xlab='comp. 1', ylab='comp. 2', ...) title('PCA by Element') groups <- sort(unique(ob$labels)) reps <- unlist(lapply(groups, function(g, lab) {sort(grep(g, lab))[1]}, ob$classes)) plot(c(ob$mixed.pca[,1]/mscale, ob$class.pca[,1]), c(ob$mixed.pca[,2]/mscale, ob$class.pca[,2]), type='n', xlab='comp. 1', ylab='comp. 2', ...) .points.lab.col(ob$mixed.pca[,1]/mscale, ob$mixed.pca[,2]/mscale, ob$labels, ob$colors, ...) .points.lab.col(ob$class.pca[reps,1], ob$class.pca[reps,2], labs=groups, cols=rep(1, length(groups)), cex=2) title('PCA by Class') .plot.lab.col(ob$resid.pca[,1], ob$resid.pca[,2], ob$labels, ob$colors, xlab='comp. 1', ylab='comp. 2', ...) title('Residual PCA') plot(c(0, ob$n), c(0,2), type='n', ylim=c(0, 1), xlab='Number of Components', ylab='PC Correlation') points(cumsum(ob$class2resid)/(1:ob$n), type='b', col=4, pch=15) points(cumsum(ob$class2orig)/(1:ob$n), type='b', col=8, pch=16) points(cumsum(ob$orig2resid)/(1:ob$n), type='b', col=6, pch=17) abline(h=1/2) legend((ob$n-1)/2, 1, c('d(Class, Residual)', 'd(Total, Class)', 'd(Total, Residual)'), col=c(4, 8, 6), marks=c(15, 16, 17), cex=0.6) title(paste('Cumulative PC-ANOVA', tag)) invisible(ob) } .plot.color.clust <- function(hd, labs, cols, ...) { n <- length(labs) temp <- plclust(hd, plot = F, hang = -1) plclust(hd, hang = -1, labels = rep("", n), ...) for(i in unique(labs)) { sel <- labs == i labclust(temp$x[sel], temp$y[sel], labels = labs[sel], col = as.numeric( cols[sel])[1], ...) } } pltree.pe <- function(ob, ...) { .plot.color.clust(ob$xc, labs=ob$labels, cols=ob$colors, hang=-1, ...) title('Clustering the Samples') .plot.color.clust(ob$rc, labs=ob$labels, cols=ob$colors, hang=-1, ...) title('Clustering the Residuals') plclust(ob$hc, labels=ob$classes) title('Clustering the Classes') invisible(ob) } summary.pe <- function(object, ...) { print('products') print(Re(object$products)) print('sum absolute') print(object$sumabs) invisible(object) } print.pe <- function(object, ...) { print('products') print(Re(object$products)) print('sum absolute') print(object$sumabs) invisible(object) }