Remove Doublets - MattHuff/SingleCellDocumentation_112023 GitHub Wiki
Doublets are errors in an scRNAseq experiment that marke data from two or more cells as coming from a single cell. In this step of the analysis, we will mark and filter out cells found to be doublets.
R Set-up: Loading Required Packages
library(Seurat)
library(dplyr)
library(ggplot2)
library(cowplot)
library(clustree)
library(scDblFinder)
library(SingleCellExperiment)
library(harmony)
NOTE: If you installed the version of R that is compatible with the M1 Mac chip (aka Arm64), reinstall it with a fresh installation of the x86_64 version. One of the dependencies of ScDblFinder, scran, is not able to be installed on the Arm64 version. If you run into issues installing ScDblFinder, try running this instead.
1. Load Seurat Object
If you are still working in the same R session in which you ran the previous Seurat steps, you may skip this step and use the same Seurat object you produced at the end. However, since this pipeline can take a while to run, it's a good idea to save RDS files throughout the pipeline so you can start it from where you left off.
h5_seurat_integrated <- readRDS(file = "katy_samples/03_processed_integrated_norris.rds")
h5_seurat_integrated <- JoinLayers(h5_seurat_integrated)
Convert Seurat Object to Single Cell Experiment (SCE) Object
In the past, you could run as.SingleCellExperiment(h5_seurat_integrated)
and directly run ScDblFinder. However, the way Seurat objects are read in R has changed, and this will give you an error. To correct this, create a copy of your Seurag object that only contains the "RNA" assay, and convert this object to an SCE object.
DefaultAssay(h5_seurat_integrated) <- "RNA"
h5_seurat_rna <- h5_seurat_integrated
h5_seurat_rna['SCT'](/MattHuff/SingleCellDocumentation_112023/wiki/'SCT') = NULL
sce <- as.SingleCellExperiment(h5_seurat_rna)
Run ScDblFinder
sce <- scDblFinder(sce, samples = "sample")
Obtain Class Counts
This command identifies the number of singlets (cells we want to keep) and doublets (cells we want to filter out). The goal is to make sure we won't lose too many cells after filtering out doublets.
class_counts <- table(sce@colData@listData["scDblFinder.class"](/MattHuff/SingleCellDocumentation_112023/wiki/"scDblFinder.class"))
write.csv(class_counts, file = 'katy_samples/singlet-doublet_counts.csv')
print(class_counts)
Assign ScDblFinder Results to Original Seurat Object
This will allow us to keep the SCT assay information that we had to void in the Seurat object copy we made.
h5_seurat_integrated['scDblFinder.class'](/MattHuff/SingleCellDocumentation_112023/wiki/'scDblFinder.class') <- sce@colData@listData["scDblFinder.class"](/MattHuff/SingleCellDocumentation_112023/wiki/"scDblFinder.class")
## Optional: remove extra object
rm(h5_seurat_rna)
Plot Singlets and Doublets
DimPlot(h5_seurat_integrated, group.by = "scDblFinder.class")
ggsave(file = 'katy_samples/viz_cluster/10_DIMPLOT_doublets.pdf')
Filter Out Doublets
h5_seurat_no_dbl <- subset(h5_seurat_integrated, subset = scDblFinder.class == 'singlet')
Rerun Standard Workflow
# Run PCA
DefaultAssay(h5_seurat_no_dbl) <- 'integrated'
h5_seurat_no_dbl <- RunPCA(object = h5_seurat_no_dbl,
features = NULL,
weight.by.var = TRUE,
ndims.print = 1:5,
nfeatures.print = 30,
npcs = 30,
reduction.name = "pca")
# Find Neighbors
h5_seurat_no_dbl <- FindNeighbors(object = h5_seurat_no_dbl,
reduction = "pca",
dims = 1:30,
nn.eps = 0.5)
# Find Clusters
umap_resolution <- c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2)
h5_seurat_no_dbl <- FindClusters(object = h5_seurat_no_dbl,
resolution = umap_resolution,
algorithm = 1,
n.iter = 1000)
# Run UMAP
h5_seurat_no_dbl <- RunUMAP(object = h5_seurat_no_dbl,
reduction = "pca",
dims = 1:30)
Filtered Dim Plots
p1 <- DimPlot(object = h5_seurat_no_dbl, reduction = "umap", label = TRUE, pt.size = 0.5) + theme(legend.position="none")
ggsave(file = "katy_samples/viz_cluster/11_DIMPLOT_labels_filt.pdf")
p2 <- DimPlot(object = h5_seurat_no_dbl, reduction = "umap", label = FALSE, pt.size = 0.5, split.by="sample")
ggsave(file = "katy_samples/viz_cluster/12_DIMPLOT_sample_filt.pdf", width = 12)
Save RDS
saveRDS(h5_seurat_no_dbl, "katy_samples/03a_filtered_integrated_norris.rds")