Making a heatmap presence absence table - meyermicrobiolab/Meyer_Lab_Resources GitHub Wiki
This tutorial uses data that has been split already into presence/absence tables of the predicted gene (there will be multiple), a single key text file that includes the VFClass and Virulence Factor of the genes, and a single metadata text file. For example one of the presence/absence tables
library(ggplot2)
library(dplyr)
library(reshape2)
key<-read.table("VFDB_key.txt",sep="\t",header=T)
vibrio_meta<-read.table("vibrio_metadata.txt",sep="\t",header=TRUE)
# repeat this pattern for all your files
a<-read.table("VFDB_BAA-450.txt",sep="\t",header=T)
b<-read.table("VFDB_MCA25.txt",sep="\t",header=T)
c<-read.table("VFDB_MCA32.txt",sep="\t",header=T)
d<-read.table("VFDB_McD22-P3.txt",sep="\t",header=T)
e<-read.table("VFDB_OCN008.txt",sep="\t",header=T)
Merge the tables together and then melt the data into a long format before merging in the key data
#Repeat this pattern for all your tables
aa<-merge(a,b,"Gene", all=TRUE)
ab<-merge(aa,c,"Gene", all=TRUE)
ac<-merge(ab,d,"Gene", all=TRUE)
ad<-merge(ac,e,"Gene", all=TRUE)
ae<-merge(ad,f,"Gene", all=TRUE)
#after all tables merged
ak[is.na(ak)] <- 0 #replace NAs with 0s
ak_long<-melt(ak,id.vars="Gene",variable.name="Sample",value.name="Count")
keyed<-merge(key,ak_long,"Gene",all=TRUE)
Group the data to perform your summaries. Then we can merge in our metadata.
keyed_VF<- keyed %>% group_by(Virulence_factor,Sample) %>% summarize(VF_Count=sum(Count))
keyed_VFclass<- keyed %>% group_by(VFclass,Sample) %>% summarize(VFclass_Count=sum(Count))
keyed_VFclass_meta<-merge(keyed_VFclass,vibrio_meta,"Sample",all=TRUE)
We can also factor our samples so that they are ordered how we want them in the graph.
keyed_VFclass_meta$radius <- sqrt(keyed_VFclass_meta$VFclass_Count / pi )
keyed_VFclass_meta$Sample<-factor(keyed_VFclass_meta$Sample, levels=c("OfT6_17","OfT6_21","OfT7_21","MCA25","MCA32","Of7_15","Of14_4","McD22_P3","OCN008","OCN014","P1","BAA_450"))
Plot the summary: this is the first way Julie learned to do a bubble plot, but there are alternate ways (as in first example)
ggplot(keyed_VFclass_meta,aes(Sample,VFclass,fill=host_health))+
scale_y_discrete(limits = rev(levels(keyed_VFclass_meta$VFclass)))+
geom_point(aes(size=radius*2),shape=21)+
scale_size_identity("legend",guide="legend")+
theme_bw()+
theme(axis.text.x=element_text(size=10,angle=90,hjust=1,vjust=0))+
theme(axis.text.y=element_text(size=12))+
scale_fill_brewer()