012‐ Batch Sample alignment - rezakj/iCellR GitHub Wiki
Batch correction (sample alignment) methods: 1- CPCA (iCellR)** recommended (faster than CCCA)
2- CCCA (iCellR)* recommended
3- MNN (scran wraper) optional
4- MultiCCA (Seurat wraper) optional
5- CPCA + KNetL based clustering (iCellR)*** recommended for best results!
Run CPCA
For batch alignment, simply run the command below. This will replace your original PCA data with batch-aligned data. However, you will need to re-run processes such as UMAP, KNetL, t-SNE, clustering, and marker detection afterward.
my.obj <- iba(my.obj,dims = 1:30, k = 10,ba.method = "CPCA", method = "gene.model", gene.list = [email protected])
To explore further, here’s an example run using 9 sample datasets:
1- How to perform Combined Principal Component Alignment (CPCA)
Here we analyzed nine PBMC sample datasets provided by the Broad Institute to detect batch differences. These datasets were generated using varying technologies, including 10x Chromium v2 (3 samples), 10x Chromium v3, CEL-Seq2, Drop-seq, inDrop, Seq-Well and SMART-Seq. For more info read: https://www.biorxiv.org/content/10.1101/2020.03.31.019109v1.full
## download an object of 9 PBMC samples
sample.file.url = "https://genome.med.nyu.edu/results/external/iCellR/data/pbmc_data/my.obj.Robj"
# download the file
download.file(url = sample.file.url,
destfile = "my.obj.Robj",
method = "auto")
### load iCellR and the object
library(iCellR)
load("my.obj.Robj")
### run PCA on top 2000 genes
my.obj <- run.pca(my.obj, top.rank = 2000)
### find best genes for second round PCA or batch alignment
my.obj <- find.dim.genes(my.obj, dims = 1:30,top.pos = 20, top.neg = 20)
length([email protected])
########### Batch alignment (CPCA method)
my.obj <- iba(my.obj,dims = 1:30, k = 10,ba.method = "CPCA", method = "gene.model", gene.list = [email protected])
### impute data
my.obj <- run.impute(my.obj,dims = 1:10,data.type = "pca", nn = 10)
### tSNE and UMAP
my.obj <- run.pc.tsne(my.obj, dims = 1:10)
my.obj <- run.umap(my.obj, dims = 1:10)
### save object
save(my.obj, file = "my.obj.Robj")
### plot
library(gridExtra)
A= cluster.plot(my.obj,plot.type = "umap",interactive = F,cell.size = 0.1)
B= cluster.plot(my.obj,plot.type = "tsne",interactive = F,cell.size = 0.1)
C= cluster.plot(my.obj,plot.type = "umap",col.by = "conditions",interactive = F,cell.size = 0.1)
D=cluster.plot(my.obj,plot.type = "tsne",col.by = "conditions",interactive = F,cell.size = 0.1)
png('AllClusts.png', width = 12, height = 12, units = 'in', res = 300)
grid.arrange(A,B,C,D)
dev.off()
png('AllConds_clusts.png', width = 15, height = 15, units = 'in', res = 300)
cluster.plot(my.obj,
cell.size = 0.5,
plot.type = "umap",
cell.color = "black",
back.col = "white",
cell.transparency = 1,
clust.dim = 2,
interactive = F,cond.facet = T)
dev.off()
genelist = c("PPBP","LYZ","MS4A1","GNLY","FCGR3A","NKG7","CD14","S100A9","CD3E","CD8A","CD4","CD19","IL7R","FOXP3","EPCAM")
for(i in genelist){
MyPlot <- gene.plot(my.obj, gene = i,
interactive = F,
conds.to.plot = NULL,
cell.size = 0.1,
data.type = "main",
plot.data.type = "umap",
scaleValue = T,
min.scale = -2.5,max.scale = 2.0,
cell.transparency = 1)
NameCol=paste("PL",i,sep="_")
eval(call("<-", as.name(NameCol), MyPlot))
}
UMAP = cluster.plot(my.obj,plot.type = "umap",interactive = F,cell.size = 0.1, anno.size=5)
library(cowplot)
filenames <- ls(pattern="PL_")
filenames <- c("UMAP", filenames)
png('genes.png',width = 18, height = 15, units = 'in', res = 300)
plot_grid(plotlist=mget(filenames))
dev.off()
2- How to perform Combined Coverage Correction Alignment (CCCA)
# same as above only change the option to CCCA
my.obj <- iba(my.obj,dims = 1:30, k = 10,ba.method = "CCCA", method = "gene.model", gene.list = [email protected])
3- How to perform mutual nearest neighbor (MNN) sample alignment
# same as above only use run.mnn function instead of iba.
###### Run MNN
# This would automatically run all the samples in your experiment
library(scran)
my.obj <- run.mnn(my.obj, k=20, d=50, method = "gene.model", gene.list = [email protected])
# detach the scran pacakge after MNN as it masks some of the functions
detach("package:scran", unload=TRUE)
4- How to perform Seurat's MultiCCA sample alignment
# same as above only use run.anchor function instead of iba.
###### Run Anchor
# This would automatically run all the samples in your experiment
library(Seurat)
my.obj <- run.anchor(my.obj,
normalization.method = "SCT",
scale.factor = 10000,
selection.method = "vst",
nfeatures = 2000,
dims = 1:20)
5- How to perform CPCA + KNetL based clustering for sample alignment/integration
## download an object of 9 PBMC samples
sample.file.url = "https://genome.med.nyu.edu/results/external/iCellR/example2/my.obj.Robj"
# download the file
download.file(url = sample.file.url,
destfile = "my.obj.Robj",
method = "auto")
### load iCellR and the object
library(iCellR)
load("my.obj.Robj")
### run PCA on top 2000 genes
my.obj <- run.pca(my.obj, top.rank = 2000)
### find best genes for second round PCA or batch alignment
my.obj <- find.dim.genes(my.obj, dims = 1:30,top.pos = 20, top.neg = 20)
length([email protected])
########### Batch alignment (CPCA method)
my.obj <- iba(my.obj,dims = 1:30, k = 10,ba.method = "CPCA", method = "gene.model", gene.list = [email protected])
### impute data
my.obj <- run.impute(my.obj,dims = 1:10,data.type = "pca", nn = 10)
### tSNE and UMAP
my.obj <- run.pc.tsne(my.obj, dims = 1:10)
my.obj <- run.umap(my.obj, dims = 1:10)
### run KNetL
my.obj <- run.knetl(my.obj, dims = 1:20, k = 400)
### cluster based on KNetL coordinates
# The object is already clustered but here is an example:
# my.obj <- iclust(my.obj, k = 300, data.type = "knetl")
### save object
save(my.obj, file = "my.obj.Robj")
### plot 1
A= cluster.plot(my.obj,plot.type = "pca",interactive = F,cell.size = 0.5,cell.transparency = 1, anno.clust=F)
B= cluster.plot(my.obj,plot.type = "umap",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=F)
C= cluster.plot(my.obj,plot.type = "tsne",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=F)
D= cluster.plot(my.obj,plot.type = "knetl",interactive = F,cell.size = 0.5,cell.transparency = 1,anno.clust=F)
library(gridExtra)
grid.arrange(A,B,C,D)
### plot 2
cluster.plot(my.obj,
cell.size = 0.5,
plot.type = "knetl",
cell.color = "black",
back.col = "white",
cell.transparency = 1,
clust.dim = 2,
interactive = F,cond.facet = T)
### plot 3
genelist = c("LYZ","MS4A1","GNLY","FCGR3A","NKG7","CD14","S100A9","CD3E","CD8A","CD4","CD19","KLRB1","LTB","IL7R","GZMH","CD68","CCR7","CD68","CD69","CXCR4","IFITM3","IL32","JCHAIN","VCAN","PPBP")
rm(list = ls(pattern="PL_"))
for(i in genelist){
MyPlot <- gene.plot(my.obj, gene = i,
interactive = F,
cell.size = 0.1,
plot.data.type = "knetl",
data.type = "main",
scaleValue = T,
min.scale = -2.5,max.scale = 2.0,
cell.transparency = 1)
NameCol=paste("PL",i,sep="_")
eval(call("<-", as.name(NameCol), MyPlot))
}
library(cowplot)
filenames <- ls(pattern="PL_")
B <- cluster.plot(my.obj,plot.type = "knetl",interactive = F,cell.size = 0.1,cell.transparency = 1,anno.clust=T)
filenames <- c("B",filenames)
plot_grid(plotlist=mget(filenames))