Multi‐Omics and Batch Integration - iffatAGheyas/bioinformatics-tutorial-wiki GitHub Wiki
6.1.8 Multi-Omics & Batch Integration
Modern single-cell experiments often measure multiple modalities (e.g. RNA + surface proteins) or combine data from multiple batches. Properly integrating these datasets increases power and reduces technical artifacts.
A. CITE-seq (RNA + Protein)
Installation (Seurat v4)
# in R
if (!requireNamespace("BiocManager", quietly=TRUE))
install.packages("BiocManager")
BiocManager::install("Seurat")
Load 10x CITE-seq output
library(Seurat)
# point to a cellranger “filtered_feature_bc_matrix” dir
data_dir <- "outs/filtered_feature_bc_matrix/"
# Read both RNA & ADT (antibody-derived tag) assays
cite_data <- Read10X(data.dir = data_dir)
# cite_data is a list: $`Gene Expression`, $`Antibody Capture`, etc.
# Create Seurat object with RNA
seurat_obj <- CreateSeuratObject(
counts = cite_data["Gene Expression"](/iffatAGheyas/bioinformatics-tutorial-wiki/wiki/"Gene-Expression"),
project = "CITE",
assay = "RNA"
)
# Add ADT assay
adt_assay <- CreateAssayObject(counts = cite_data["Antibody Capture"](/iffatAGheyas/bioinformatics-tutorial-wiki/wiki/"Antibody-Capture"))
seurat_obj["ADT"](/iffatAGheyas/bioinformatics-tutorial-wiki/wiki/"ADT") <- adt_assay
# Normalize RNA & protein separately
seurat_obj <- SCTransform(seurat_obj, assay="RNA", verbose=FALSE)
seurat_obj <- NormalizeData(seurat_obj, assay="ADT", normalization.method="CLR")
seurat_obj <- ScaleData(seurat_obj, assay="ADT")
# Joint dimensionality reduction
seurat_obj <- RunPCA(seurat_obj, assay="RNA", reduction.name="pca.rna")
seurat_obj <- RunPCA(seurat_obj, assay="ADT", reduction.name="pca.adt")
seurat_obj <- FindMultiModalNeighbors(
seurat_obj,
reduction.list = list("pca.rna", "pca.adt"),
dims.list = list(1:30, 1:18),
modality.weight.name = "RNA.protein.weight"
)
seurat_obj <- RunUMAP(
seurat_obj,
nn.name = "weighted.nn",
reduction.name = "wnn.umap",
reduction.key = "wnnUMAP_"
)
seurat_obj <- FindClusters(
seurat_obj,
graph.name = "wsnn",
algorithm = 3, # Leiden
resolution = 0.5
)
# Visualization
DimPlot(seurat_obj, reduction="wnn.umap", group.by="seurat_clusters") +
ggtitle("CITE-seq WNN Clustering")
B. Batch Correction
- Harmony (R)
# install harmony
BiocManager::install("harmony")
library(harmony)
# assume `seurat_obj` has a `batch` metadata column
seurat_obj <- RunHarmony(
object = seurat_obj,
group.by.vars = "batch",
assay.use = "SCT",
reduction = "pca"
)
# UMAP on Harmony embeddings
seurat_obj <- RunUMAP(
seurat_obj,
reduction = "harmony",
dims = 1:30,
assay = "SCT"
)
DimPlot(seurat_obj, reduction="umap", split.by="batch")
- Seurat IntegrateData (R)
# Split by batch and SCTransform each
seurat_list <- SplitObject(seurat_obj, split.by="batch")
for (i in seq_along(seurat_list)) {
seurat_list[i](/iffatAGheyas/bioinformatics-tutorial-wiki/wiki/i) <- SCTransform(seurat_list[i](/iffatAGheyas/bioinformatics-tutorial-wiki/wiki/i), verbose=FALSE)
}
# Select integration features
features <- SelectIntegrationFeatures(
object.list = seurat_list,
nfeatures = 3000
)
# Prepare for SCT integration
seurat_list <- PrepSCTIntegration(
object.list = seurat_list,
anchor.features = features
)
# Find anchors & integrate
anchors <- FindIntegrationAnchors(
object.list = seurat_list,
normalization.method = "SCT",
anchor.features = features
)
seurat_int <- IntegrateData(
anchorset = anchors,
normalization.method = "SCT"
)
# Run PCA/UMAP on integrated object
seurat_int <- RunPCA(seurat_int, verbose=FALSE)
seurat_int <- RunUMAP(seurat_int, dims=1:30)
DimPlot(seurat_int, group.by="batch")
C. Multi-ome Integration
1.** MOFA2 (R)**
# install MOFA2
BiocManager::install("MOFA2")
library(MOFA2)
# Prepare named list of modalities (e.g. RNA & protein)
data_list <- list(
RNA = as.matrix(seurat_obj@assays$RNA@counts),
ADT = as.matrix(seurat_obj@assays$ADT@counts)
)
# Create and run MOFA model
mofa_obj <- create_mofa(data_list)
mofa_obj <- run_mofa(mofa_obj)
# Extract factors
factors <- get_factors(mofa_obj, factors="all")
plot_factors(mofa_obj, factors=1:2, color_by=seurat_obj$batch)
- Seurat WNN (R) (see CITE-seq section above—WNN is Seurat’s built-in multi-omic integration)
# After FindMultiModalNeighbors
seurat_obj <- FindClusters(
seurat_obj,
graph.name = "wsnn", # uses weighted shared nearest neighbors
algorithm = 3,
resolution = 0.5
)
DimPlot(seurat_obj, reduction="wnn.umap", label=TRUE)
Key points:
-
CITE-seq leverages ADT counts to refine clusters beyond RNA alone.
-
Harmony and IntegrateData both correct batch effects—Harmony works “in‐place” on embeddings, while Seurat’s integration creates a new assay.
-
MOFA2 uncovers shared latent factors across modalities; Seurat WNN uses a graph‐based approach for joint clustering.
-
Always inspect batch mixing (e.g. UMAP colored by batch) and modality contributions to ensure successful integration.