TaxaPlot.R Tutorial - sciencesharon/MicrobialSeq GitHub Wiki
This tutorial can follow usage of Merged_Abundance_Processing.R or can be used with any relative abundance table in the format of Samples as columns and Taxa as rows.
If starting from Merged_Abundance_Processing.R:
If starting from a relative abundance table:
merged <- read.table("/path/to/file/merged_abundance_example.txt", header = FALSE, sep = "\t")
output_dir <- ("/path/to/file/output")
relabun_list <- merged_abundance_processing(merged, output_dir)
met <- read.csv("/path/to/file/metadata_example.csv")
met <- tibble::column_to_rownames(met, var = "X")
- change this to however you want to subset for your example data
samples <- met %>% filter(str_detect(SampleID, regex("\\bSample", ignore_case = TRUE)))
subset_samples <- samples$SampleID
3. Subset the merged abundance output using subset_by_colnames_and_save (if working from merged abundance table)
new_dir <- ("/path/to/file/output/subset")
subset_relabun_list <- subset_by_colnames_and_save(relabun_list, subset_samples, prefix = "Subset", output_dir = new_dir)
Genus_otu_most = mostAbundant(Subset_Genus_RelAbun, N = 15, items = NULL, others = TRUE, rescale = TRUE)
Species_otu_most = mostAbundant(Subset_Species_RelAbun, N = 15, items = NULL, others = TRUE, rescale = TRUE)
sample_metagroup <- list('All_Samples' = subset_samples)
five_samples <- subset(samples, Timepoint == "5_month")
five_samples <- five_samples$SampleID
two_samples <- subset(samples, Timepoint == "2_week")
two_samples <- two_samples$SampleID
time_samples <- (c(two_samples, five_samples))
timepoint_metagroup <- list('5_month' = five_samples, '2_week' = two_samples)
subset_genus <- Genus_otu_most[,subset_samples]
subset_species <- Species_otu_most[,subset_samples]
timepoint_genus <- Genus_otu_most[,time_samples]
timepoint_species <- Species_otu_most[,time_samples]
color_taxa_gen <- color_taxa(Genus_otu_most)
color_taxa_spec <- color_taxa(Species_otu_most)
label_fill_gen <- rownames(Genus_otu_most)
label_fill_spec <- rownames(Species_otu_most)
- change height and width of the pdf as needed
- data: relative abundance table
- label_y: y label
- color: colors for taxa
- label_fill: labels for taxa
- base_size: text size
- nrow & ncol: number of columns and rows for facet_wrap
- metadata_groups: groups by which to facet_wrap
pdf("/path/to/file/status.genus.pdf", width = 8, height = 5) #change the height and width of the pdf as you desire
taxa_plot(data = timepoint_genus, label_y = "Relative Abundance", color = color_taxa_gen, label_fill = label_fill_gen , base_size = 12, nrow = 1,ncol = 2,metadata_groups = timepoint_metagroup)
dev.off()
write.csv(timepoint_genus, "/path/to/file/timepoint.genus.csv")
Species_otu_most = mostAbundant(Subset_Species_RelAbun, N = 30, items = NULL, others = TRUE, rescale = TRUE)
Species_otu_most <- Species_otu_most %>% rownames_to_column(var = "Species")
custom_order <- c("Other", "Bifidobacterium bifidum", "Bifidobacterium breve", "Bifidobacterium dentium", "Bifidobacterium longum", "Bifidobacterium pseudocatenulatum", "Klebsiella michiganensis", "Klebsiella oxytoca", "Klebsiella pneumoniae", "Klebsiella variicola", "Escherichia coli", "Veillonella parvula", "Veillonella ratti", "Veillonella rogosae", "Bacteroides fragilis", "Bacteroides ovatus", "Citrobacter freundii", "Citrobacter sp RHBSTW 00671", "Phocaeicola dorei", "Phocaeicola vulgatus", "Streptococcus salivarius", "Parabacteroides distasonis", "Parabacteroides merdae", "Staphylococcus epidermidis", "Erysipelatoclostridium ramosum", "Enterococcus faecalis", "Clostridium butyricum", "Hungatella hathewayi", "Megasphaera sp MJR8396C", "Ruminococcus gnavus", "Ruminococcus torques")
Species_otu_most <- Species_otu_most[match(custom_order, Species_otu_most$Species), ]
rownames(Species_otu_most) = NULL
Species_otu_most <- Species_otu_most %>% column_to_rownames(var = "Species")
color_taxa_spec <- c("Other" = "#bdbcbc", "Bifidobacterium bifidum" = "#85A1EF", "Bifidobacterium breve" = "#B7D8E8", "Bifidobacterium dentium" = "#7EC8E3", "Bifidobacterium longum" = "#0F52BA", "Bifidobacterium pseudocatenulatum" = "#00FFFF", "Klebsiella michiganensis" = "#b2d689", "Klebsiella oxytoca" = "#B4C424", "Klebsiella pneumoniae" = "#7CFC00", "Klebsiella variicola" = "#DFFF00", "Escherichia coli" = '#FFEA00', "Veillonella parvula" = "#f7766d", "Veillonella ratti" = "#b54e4b", "Veillonella rogosae"= "#732728", "Bacteroides fragilis" = "#196619", "Bacteroides ovatus" = "#123D12", "Citrobacter freundii" = "#D4FBE7", "Citrobacter sp RHBSTW 00671"= "#75D7A2", "Phocaeicola dorei" = "#0000FF", "Phocaeicola vulgatus" = "#00008B", "Streptococcus salivarius" = "#f5bcbc", "Parabacteroides distasonis" = "#AB6666", "Parabacteroides merdae" = "#F5DEB3", "Staphylococcus epidermidis"= "#f59a40", "Erysipelatoclostridium ramosum"= "#c8b1d5", "Enterococcus faecalis" = "#D22B2B", "Clostridium butyricum" = "#FF7518", "Hungatella hathewayi"= "#FF10F0", "Megasphaera sp MJR8396C"= "#9F2B68", "Ruminococcus gnavus" ="#5D3FD3", "Ruminococcus torques" = "#800080")
label_fill_spec <- rownames(Species_otu_most)
timepoint_species <- Species_otu_most[,time_samples]
pdf("/path/to/file/timepoint.species_extended.pdf", width = 12, height = 5) #change the height and width as needed
taxa_plot(timepoint_species, label_y = "Relative Abundance", color = color_taxa_spec, label_fill = label_fill_spec, base_size = 12, nrow = 1, ncol = 2, metadata_groups = timepoint_metagroup)
dev.off()
write.csv(timepoint_species, "/path/to/file/timepoint.species_extended.csv")