006‐ Find marker genes - rezakj/iCellR GitHub Wiki
Find markers
Identifying marker genes for clusters helps define the unique biological characteristics of each cluster. Marker genes are genes whose expression is significantly enriched in a specific cluster compared to others, often showcasing distinct cell populations and functions.
marker.genes <- findMarkers(my.obj,
fold.change = 2,
padjval = 0.1)
Examin the marker genes:
dim(marker.genes)
# [1] 1070 17
head(marker.genes)
# baseMean baseSD AvExpInCluster AvExpInOtherClusters foldChange
#PPBP 0.8257760 12.144694 181.3945 0.1399852 1295.8120969
#GPX1 1.3989591 4.344717 57.4034 1.1862571 48.3903523
#CALM3 0.5469743 1.230942 10.7848 0.5080915 21.2260968
#OAZ1 4.9077851 5.979586 46.7867 4.7487311 9.8524635
#MYL6 3.0806167 3.562124 21.3690 3.0111584 7.0966045
#CD74 8.5523704 13.359205 2.6120 8.5749316 0.3046088
# log2FoldChange pval padj clusters gene cluster_1
#PPBP 10.339641 1.586683e-06 0.014786300 1 PPBP 181.3945
#GPX1 5.596648 1.107541e-07 0.001103775 1 GPX1 57.4034
#CALM3 4.407767 2.098341e-06 0.019415953 1 CALM3 10.7848
#OAZ1 3.300485 7.857814e-07 0.007464137 1 OAZ1 46.7867
#MYL6 2.827129 1.296112e-06 0.012156230 1 MYL6 21.3690
#CD74 -1.714970 9.505749e-06 0.083983296 1 CD74 2.6120
# cluster_2 cluster_3 cluster_4 cluster_5 cluster_6 cluster_7
#PPBP 0.0000000 0.1444327 0.2282912 0.0640625 0.01739706 0.1541084
#GPX1 0.2424969 1.2218772 3.9292720 4.4329583 0.25663235 0.2712831
#CALM3 0.6537205 0.8149415 0.6071034 0.5245625 0.44687500 0.5081867
#OAZ1 3.2077826 12.2072339 8.6080077 10.8738208 2.71288971 3.6402289
#MYL6 4.9660870 5.7945673 4.2813218 4.3046458 2.42854412 3.9030542
#CD74 2.9385839 8.9848538 15.7646245 5.9454250 2.19555882 3.8323072
# cluster_8 cluster_9 cluster_10
#PPBP 0.02478274 0.3668433 0.01026335
#GPX1 0.61210714 0.4635153 0.39311786
#CALM3 0.22591369 0.5210339 0.48856538
#OAZ1 3.67225595 2.3590420 2.53362063
#MYL6 1.72344048 1.6460420 2.59901289
#CD74 36.10877976 1.5638853 1.82587477
# baseMean: average expression in all the cells
# baseSD: Standard Deviation
# AvExpInCluster: average expression in cluster number (see clusters)
# AvExpInOtherClusters: average expression in all the other clusters
# foldChange: AvExpInCluster/AvExpInOtherClusters
# log2FoldChange: log2(AvExpInCluster/AvExpInOtherClusters)
# pval: P value
# padj: Adjusted P value
# clusters: marker for cluster number
# gene: marker gene for the cluster
# the rest are the average expression for each cluster
Heatmap
# find top genes
MyGenes <- top.markers(marker.genes, topde = 10, min.base.mean = 0.2,filt.ambig = F)
MyGenes <- unique(MyGenes)
# main data
heatmap.gg.plot(my.obj, gene = MyGenes, interactive = F, cluster.by = "clusters", conds.to.plot = NULL)
# imputed data
heatmap.gg.plot(my.obj, gene = MyGenes, interactive = F, cluster.by = "clusters", data.type = "imputed", conds.to.plot = NULL)
# sort cells and plot only one condition
heatmap.gg.plot(my.obj, gene = MyGenes, interactive = F, cluster.by = "clusters", data.type = "imputed", cell.sort = TRUE, conds.to.plot = c("WT"))
# Pseudotime stile
heatmap.gg.plot(my.obj, gene = MyGenes, interactive = F, cluster.by = "none", data.type = "imputed", cell.sort = TRUE)
# intractive
# heatmap.gg.plot(my.obj, gene = MyGenes, interactive = T, out.name = "heatmap_gg", cluster.by = "clusters")
Bubble heatmap
png('heatmap_bubble_gg_genes.png', width = 10, height = 20, units = 'in', res = 300)
bubble.gg.plot(my.obj, gene = MyGenes, interactive = F, conds.to.plot = NULL, size = "Percent.Expressed",colour = "Expression")
dev.off()
Multiple plots for markers
Let's say you have these markers:
genelist = c("MS4A1","GNLY","FCGR3A","NKG7","CD14","CD3E","CD8A","CD4","GZMH","CCR7","CD68")
- Make a
functionlike below change the section in between####signs for different plots (e.g. boxplot, bar, ...).
rm(list = ls(pattern="PL_"))
for(i in genelist){
####
MyPlot <- gene.plot(my.obj, gene = i,
interactive = F,
cell.size = 0.1,
plot.data.type = "knetl",
data.type = "main",
scaleValue = T,
min.scale = 0,max.scale = 2.0,
cell.transparency = 1)
####
NameCol=paste("PL",i,sep="_")
eval(call("<-", as.name(NameCol), MyPlot))
}
Write all the plots made using the function above
library(cowplot)
# make a list of all plots
filenames <- ls(pattern="PL_")
# add the cluster plot to the list
B <- cluster.plot(my.obj,plot.type = "knetl",interactive = F,cell.size = 0.1,cell.transparency = 1,anno.clust=T)
filenames <- c("B",filenames)
# write as PNG
png('genes_KNetL.png',width = 15, height = 12, units = 'in', res = 300)
plot_grid(plotlist=mget(filenames))
dev.off()
Heatmap of the marker genes
heatmap.gg.plot(my.obj, gene = genelist, interactive = F, cluster.by = "clusters")