Heatmap - microbiome/microbiome GitHub Wiki

The microbiome package contains handy tools for heatmap visualization and cross-correlating data sets. Tested in Ubuntu with R-2.15.2. Source code can be found at microbiome Github repo. If you use these tools in publications, kindly cite this article.

Load R libraries

Load (and install if needed) the necessary R libraries:

# Microbiome package, see: https://github.com/microbiome/microbiome To install the package, use:
#library(devtools, quietly = TRUE); install_github("microbiome", "microbiome", ref = "master") 
library(microbiome, quietly = TRUE)

Get example data

Let us visualize correlations between intestinal bacteria and host metabolism based on data from Lahti et al. PeerJ 1:e32, 2013:

data(peerj32) # From https://peerj.com/articles/32/
dat1 <- peerj32$lipids # Lipids (44 samples x 389 lipids)
dat2 <- peerj32$microbes # Microbiota (44 samples x 130 bacteria)
meta <- peerj32$meta

Interactive correlation heatmap

For interactive correlation heatmap, see this example.

Two-way matrix heatmap

Visualize deviation of all bacteria from their population mean (smaller: blue; higher: red):

dat <- t(scale(dat2)) # Shift mean to zero for each bacteria
hm <- heatmap(dat) # Find visually appealing order for rows and columns

plot of chunk heatmapexample

tmp <- microbiome::PlotMatrix(dat[hm$rowInd, hm$colInd], type = "twoway", mar = c(5, 12, 1, 1), cex.axis = 0.5)
## Error: 'PlotMatrix' is not an exported object from 'namespace:microbiome'

Cross-correlating data sets

Cross-correlate columns of two data sets. This will return correlations, raw p-values, and q-value estimates (not strictly proper as the comparisons are not independent). Here we use robust biweight midcorrelation ('bicor') from the WGCNA package. Keep only those elements that have at least only one significant correlation (n.signif):

correlations <- microbiome::cross.correlate(dat1, dat2, 
	     				    method = "bicor", mode = "matrix", 
					    n.signif = 1, p.adj.threshold = 0.05, 
					    p.adj.method = "qvalue")
## Error in match.arg(method): 'arg' should be one of "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"

Arrange the results in handy table format:

correlation.table <- microbiome::cmat2table(correlations)
## Error in microbiome::cmat2table(correlations): object 'correlations' not found
head(correlation.table)
## Error in head(correlation.table): error in evaluating the argument 'x' in selecting a method for function 'head': Error: object 'correlation.table' not found

Correlation heatmaps

Rearrange the data and plot the heatmap and mark significant correlations with stars to reproduce microbiota-lipidome heatmap from this article:

p <- microbiome::correlation.heatmap(correlation.table, "X1", "X2", fill = "Correlation", star = "p.adj", p.adj.threshold = 0.05) 
## Error in microbiome::correlation.heatmap(correlation.table, "X1", "X2", : object 'correlation.table' not found
print(p)

plot of chunk heatmap-example-stars3

Heatmaps with ggplot2

The above examples provide handy shortcuts for heatmap visualization. You can also directly modify the ggplot2 routines. This time, let us set q-value threshold also for cell coloring:

# Order the rows and columns with levels argument if needed:
correlation.table$X1 <- factor(correlation.table$X1, levels = unique(as.character(correlation.table$X1)))
## Error in factor(correlation.table$X1, levels = unique(as.character(correlation.table$X1))): object 'correlation.table' not found
correlation.table$X2 <- factor(correlation.table$X2, levels = unique(as.character(correlation.table$X2)))
## Error in factor(correlation.table$X2, levels = unique(as.character(correlation.table$X2))): object 'correlation.table' not found
# Set black-and-white theme
library(ggplot2)
theme_set(theme_bw())

# Pick only the correlations with q<0.05
# Note: this will leave other cells empty
subtable <- subset(correlation.table, p.adj < 0.05)
## Error in subset(correlation.table, p.adj < 0.05): error in evaluating the argument 'x' in selecting a method for function 'subset': Error: object 'correlation.table' not found
# Arrange the figure
p <- ggplot(subtable, aes(x = X1, y = X2, fill = Correlation))
## Error in ggplot(subtable, aes(x = X1, y = X2, fill = Correlation)): object 'subtable' not found
p <- p + geom_tile() 
p <- p + scale_fill_gradientn("Correlation", 
       	 		       breaks = seq(from = -1, to = 1, by = 0.2), 
			       colours = c("darkblue", "blue", "white", "red", "darkred"), 
			       limits = c(-1,1)) 
p <- p + opts(axis.text.x=theme_text(angle = 90)) + xlab("") + ylab("")
## Error: Use 'theme' instead. (Defunct; last used in version 0.9.1)
# Mark the most significant cells with stars
p <- p + geom_text(data = subset(correlation.table, p.adj < 0.02), 
       	 	   aes(x = X1, y = X2, label = "+"), col = "white", size = 5)
## Error in subset(correlation.table, p.adj < 0.02): error in evaluating the argument 'x' in selecting a method for function 'subset': Error: object 'correlation.table' not found
print(p)
## Error in eval(expr, envir, enclos): object 'y' not found

Heatmap with text

For detailed information, might be handy to print the actual values on top of the heatmap:

correlations <- microbiome::cross.correlate(dat1, dat2, 
	     				    method = "bicor", 
					    mode = "table", 
					    n.signif = 1, 
					    p.adj.threshold = 0.01, 
					    p.adj.method = "qvalue")
## Error in match.arg(method): 'arg' should be one of "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"
theme_set(theme_bw(20))
df <- correlations
## Error in eval(expr, envir, enclos): object 'correlations' not found
df$X1 <- factor(df$X1)
## Error in `$<-.data.frame`(`*tmp*`, "X1", value = structure(integer(0), .Label = character(0), class = "factor")): replacement has 0 rows, data has 20
df$X2 <- factor(df$X2)
## Error in `$<-.data.frame`(`*tmp*`, "X2", value = structure(integer(0), .Label = character(0), class = "factor")): replacement has 0 rows, data has 20
p <- ggplot(df, aes(X1, X2, group=X2)) 
p <- p + geom_tile(aes(fill = Correlation)) 
p <- p + geom_text(aes(fill = df$Correlation, label = round(df$Correlation, 1))) 
#p <- p + scale_fill_gradient(low = "white", high = "red") 
p <- p + scale_fill_gradientn("Correlation", breaks = seq(from = -1, to = 1,  by = 0.1), colours = c("blue", "white", "red"), limits = c(-1, 1))
p <- p + theme(axis.text.x = element_text(angle = 60, vjust = 0.5)) + xlab("") + ylab("")
print(p)
## Error in eval(expr, envir, enclos): object 'Correlation' not found

Oligo heatmap

This reproduces the oligo-level heatmap from profiling script. If there are problems, try to tune ppcm, figureratio and fontsize (see help(add.heatmap) for details)

library(microbiome)

# Load Phylogeny
phylogeny.info <- GetPhylogeny("HITChip")

# Load oligo-level data

# Replace data.directory here with your own profiling script output data directory
data.directory <- system.file("extdata", package = "microbiome")

oligodata <- read.profiling(level = "oligo", log10 = FALSE, data.dir = data.directory)

# Produce the plot and save it to the working directory
hc.params <- add.heatmap(log10(oligodata), output.dir = ".", phylogeny.info = phylogeny.info)
## Error in eval(expr, envir, enclos): could not find function "add.heatmap"