006‐ Find marker genes - rezakj/iCellR GitHub Wiki

Find markers

Identifying marker genes for clusters helps define the unique biological characteristics of each cluster. Marker genes are genes whose expression is significantly enriched in a specific cluster compared to others, often showcasing distinct cell populations and functions.

marker.genes <- findMarkers(my.obj,
	fold.change = 2,
	padjval = 0.1)

Examin the marker genes:

dim(marker.genes)
# [1] 1070   17

head(marker.genes)
#      baseMean    baseSD AvExpInCluster AvExpInOtherClusters   foldChange
#PPBP  0.8257760 12.144694       181.3945            0.1399852 1295.8120969
#GPX1  1.3989591  4.344717        57.4034            1.1862571   48.3903523
#CALM3 0.5469743  1.230942        10.7848            0.5080915   21.2260968
#OAZ1  4.9077851  5.979586        46.7867            4.7487311    9.8524635
#MYL6  3.0806167  3.562124        21.3690            3.0111584    7.0966045
#CD74  8.5523704 13.359205         2.6120            8.5749316    0.3046088
#      log2FoldChange         pval        padj clusters  gene cluster_1
#PPBP       10.339641 1.586683e-06 0.014786300        1  PPBP  181.3945
#GPX1        5.596648 1.107541e-07 0.001103775        1  GPX1   57.4034
#CALM3       4.407767 2.098341e-06 0.019415953        1 CALM3   10.7848
#OAZ1        3.300485 7.857814e-07 0.007464137        1  OAZ1   46.7867
#MYL6        2.827129 1.296112e-06 0.012156230        1  MYL6   21.3690
#CD74       -1.714970 9.505749e-06 0.083983296        1  CD74    2.6120
#      cluster_2  cluster_3  cluster_4  cluster_5  cluster_6 cluster_7
#PPBP  0.0000000  0.1444327  0.2282912  0.0640625 0.01739706 0.1541084
#GPX1  0.2424969  1.2218772  3.9292720  4.4329583 0.25663235 0.2712831
#CALM3 0.6537205  0.8149415  0.6071034  0.5245625 0.44687500 0.5081867
#OAZ1  3.2077826 12.2072339  8.6080077 10.8738208 2.71288971 3.6402289
#MYL6  4.9660870  5.7945673  4.2813218  4.3046458 2.42854412 3.9030542
#CD74  2.9385839  8.9848538 15.7646245  5.9454250 2.19555882 3.8323072
#        cluster_8 cluster_9 cluster_10
#PPBP   0.02478274 0.3668433 0.01026335
#GPX1   0.61210714 0.4635153 0.39311786
#CALM3  0.22591369 0.5210339 0.48856538
#OAZ1   3.67225595 2.3590420 2.53362063
#MYL6   1.72344048 1.6460420 2.59901289
#CD74  36.10877976 1.5638853 1.82587477

# baseMean: average expression in all the cells
# baseSD: Standard Deviation
# AvExpInCluster: average expression in cluster number (see clusters)
# AvExpInOtherClusters: average expression in all the other clusters
# foldChange: AvExpInCluster/AvExpInOtherClusters
# log2FoldChange: log2(AvExpInCluster/AvExpInOtherClusters)
# pval: P value 
# padj: Adjusted P value 
# clusters: marker for cluster number
# gene: marker gene for the cluster
# the rest are the average expression for each cluster

Heatmap

# find top genes
MyGenes <- top.markers(marker.genes, topde = 10, min.base.mean = 0.2,filt.ambig = F)
MyGenes <- unique(MyGenes)

# main data 
heatmap.gg.plot(my.obj, gene = MyGenes, interactive = F, cluster.by = "clusters", conds.to.plot = NULL)

# imputed data 
heatmap.gg.plot(my.obj, gene = MyGenes, interactive = F, cluster.by = "clusters", data.type = "imputed", conds.to.plot = NULL)

# sort cells and plot only one condition
heatmap.gg.plot(my.obj, gene = MyGenes, interactive = F, cluster.by = "clusters", data.type = "imputed", cell.sort = TRUE, conds.to.plot = c("WT"))

# Pseudotime stile
heatmap.gg.plot(my.obj, gene = MyGenes, interactive = F, cluster.by = "none", data.type = "imputed", cell.sort = TRUE)

# intractive 
# heatmap.gg.plot(my.obj, gene = MyGenes, interactive = T, out.name = "heatmap_gg", cluster.by = "clusters")

Bubble heatmap

png('heatmap_bubble_gg_genes.png', width = 10, height = 20, units = 'in', res = 300)
bubble.gg.plot(my.obj, gene = MyGenes, interactive = F, conds.to.plot = NULL, size = "Percent.Expressed",colour = "Expression")
dev.off()

Multiple plots for markers

Let's say you have these markers:

genelist = c("MS4A1","GNLY","FCGR3A","NKG7","CD14","CD3E","CD8A","CD4","GZMH","CCR7","CD68")
  • Make a function like below change the section in between #### signs for different plots (e.g. boxplot, bar, ...).
rm(list = ls(pattern="PL_"))
for(i in genelist){
####
    MyPlot <- gene.plot(my.obj, gene = i,
        interactive = F,
        cell.size = 0.1,
        plot.data.type = "knetl",
        data.type = "main",
        scaleValue = T,
        min.scale = 0,max.scale = 2.0,
        cell.transparency = 1)
####
    NameCol=paste("PL",i,sep="_")
    eval(call("<-", as.name(NameCol), MyPlot))
}

Write all the plots made using the function above

library(cowplot)

# make a list of all plots
filenames <- ls(pattern="PL_")

# add the cluster plot to the list
B <- cluster.plot(my.obj,plot.type = "knetl",interactive = F,cell.size = 0.1,cell.transparency = 1,anno.clust=T)
filenames <- c("B",filenames)

# write as PNG
png('genes_KNetL.png',width = 15, height = 12, units = 'in', res = 300)
plot_grid(plotlist=mget(filenames))
dev.off()

Heatmap of the marker genes

heatmap.gg.plot(my.obj, gene = genelist, interactive = F, cluster.by = "clusters")