# This function does simple clustering in the sample direction # for an exprSet. It alwyas works with log-transformed data. The # heuristic for determining if the data has been log-transformed # is crude: is just checks to see if any value is bigger than 100. # Expression filtering is automatic; we only keep the genes whose # mean expression is greater than the 75th percentile. We also # give no options for distance metric or linkage; all clustering # is performed with Pearson correlation distance and average linkage. simpleCluster <- function(es) { if (!inherits(es, 'exprSet')) { stop('This function only works for exprSet objects') } data <- exprs(es) if (max(data, na.rm=TRUE) > 100) { # needs to be log transformed data <- logb(data, 2) } avg <- apply(data, 1, mean) # get mean expression of each gene data <- data[avg > quantile(avg, 0.75), ] # only keep the top 25% distmat <- as.dist((1-cor(data))/2) # distance based on correlation plclust(hclust(distmat, "average")) invisible(0) }