8. Graphics - DianaCarolinaVergara/SNPs_pipeline GitHub Wiki

Graphics to see SNPs filtering process

Muricea_dp<- extract.gt(Muricea.vcf_ID_SNPs, element = "DP", as.numeric=TRUE)

After this command, you can create an initial violin plot:

dpf <- melt(Muricea_dp, varnames = c("Index", "Sample"),
            value.name = "Depth", na.rm = TRUE)
dpf <- dpf[ dpf$Depth > 0, ]
p <- ggplot(dpf, aes(x = Sample, y = Depth))
p <- p + geom_violin(fill = "#C0C0C0", adjust = 1.0,
                     scale = "count", trim = TRUE)
p <- p + theme_bw()
p <- p + theme(axis.title.x = element_blank(),
               axis.text.x = element_text(angle = 60, hjust = 1))
p <- p + scale_y_continuous(trans = scales::log2_trans(),
                            breaks = c(1, 10, 100, 800),
                            minor_breaks = c(1:10, 2:10 * 10, 2:8 * 100))
p <- p + theme(panel.grid.major.y = element_line(color = "#A9A9A9", size = 0.6))
p <- p + theme(panel.grid.minor.y = element_line(color = "#C0C0C0", size = 0.2))
p <- p + ylab("Depth (DP)")
p

or a heatmap:

quants <- apply(Muricea_dp, MARGIN=2, quantile, probs=c(0.1, 0.9), na.rm=TRUE)
dp2 <- sweep(Muricea_dp, MARGIN = 2, FUN = "-", quants[1, ])
Muricea_dp[dp2 < 0] <- NA
dp2 <- sweep(Muricea_dp, MARGIN = 2, FUN = "-", quants[2, ])
Muricea_dp[dp2 > 0] <- NA
Muricea_dp[Muricea_dp < 10] <- NA
# Update the vcfR object with our changes.
Muricea.vcf_ID_SNPs@gt[, -1][ is.na(Muricea_dp) == TRUE ] <- NA
Muricea.vcf_ID_SNPs

Muricea.vcf_ID_SNPs_dp<- extract.gt(Muricea.vcf_ID_SNPs, element = "DP", as.numeric=TRUE)

heatmap.bp(Muricea.vcf_ID_SNPs_dp[1:1000,], rlabels=FALSE)

After omitting samples, you can use a barplot for visualization.

Muricea.vcf_ID_SNPs_miss <- apply(Muricea.vcf_ID_SNPs_dp, MARGIN = 2, function(x){ sum(is.na(x)) })
Muricea.vcf_ID_SNPs_miss <- Muricea.vcf_ID_SNPs_miss/nrow(Muricea.vcf_ID_SNPs_dp)*100

##Code for barplot

par(mar = c(7,4,4,2))
barplot(Muricea.vcf_ID_SNPs_miss, las = 2, col = 1:12, axes=TRUE, axisnames = TRUE)
title("Missingness (%)")
par(font.axis = 2)
par(font.lab = 2)
par(mar = c(5,4,4,2))
barplot(Muricea.vcf_ID_SNPs_miss, col = 1:100, axes=TRUE, axisnames = TRUE)
title(main= "Missingness in Muricea", ylab = "Missingness (%)", xlab= "Sample")