ScDblFinder - MattHuff/scRNASeq_011224 GitHub Wiki
This step identifies doublets, or errors in an scRNASeq experiment in which data from two or more cells are marked as from one cell.
Previous Doublet Documentation
R Set-up: Load Required Packages, Set Working Directory, and Load Seurat Object
If you are continuing directly from the previous step of documentation, then you do not need to load the RDS file. However, I always include in my documentation in case I need to start the analysis from a specific point.
# Load Packages
library(Seurat)
library(dplyr)
library(ggplot2)
library(cowplot)
library(clustree)
library(scDblFinder)
library(SingleCellExperiment)
library(harmony)
# Set Working Directory
setwd("~/Downloads/scRNASeq_011224/5_Seurat/")
# Load Seurat Object (Optional)
h5_seurat_integrated <- readRDS(file = "outputs/4_processed_integrated_norris.rds")
h5_seurat_integrated <- JoinLayers(h5_seurat_integrated)
Convert Seurat Object to Single Cell Experiment (SCE) Object
DefaultAssay(h5_seurat_integrated) <- "SCT"
h5_seurat_sct <- h5_seurat_integrated
h5_seurat_sct['RNA'](/MattHuff/scRNASeq_011224/wiki/'RNA') = NULL
sce <- as.SingleCellExperiment(h5_seurat_sct)
Run ScDblFinder
sce <- scDblFinder(sce, samples = "sample")
Obtain Class Counts
class_counts <- table(sce@colData@listData["scDblFinder.class"](/MattHuff/scRNASeq_011224/wiki/"scDblFinder.class"))
write.csv(class_counts, file = 'outputs//singlet-doublet_counts.csv')
print(class_counts)
Assign ScDblFinder Results to Original Seurat Object
h5_seurat_integrated['scDblFinder.class'](/MattHuff/scRNASeq_011224/wiki/'scDblFinder.class') <- sce@colData@listData["scDblFinder.class"](/MattHuff/scRNASeq_011224/wiki/"scDblFinder.class")
rm(h5_seurat_sct, sce)
Plot Singlets and Doublets
DimPlot(h5_seurat_integrated, group.by = "scDblFinder.class")
ggsave(file = 'outputs/viz_cluster/10_DIMPLOT_doublets.pdf')
Filter Out Doublets
h5_seurat_no_dbl <- subset(h5_seurat_integrated, subset = scDblFinder.class == 'singlet')
Rerun Standard Workflow
## Run PCA
DefaultAssay(h5_seurat_no_dbl) <- 'integrated'
h5_seurat_no_dbl <- RunPCA(object = h5_seurat_no_dbl,
features = NULL,
weight.by.var = TRUE,
ndims.print = 1:5,
nfeatures.print = 30,
npcs = 30,
reduction.name = "pca")
## Find Neighbors
h5_seurat_no_dbl <- FindNeighbors(object = h5_seurat_no_dbl,
reduction = "pca",
dims = 1:30,
nn.eps = 0.5)
## Find Clusters
umap_resolution <- 0.6
h5_seurat_no_dbl <- FindClusters(object = h5_seurat_no_dbl,
resolution = umap_resolution,
algorithm = 1,
n.iter = 1000)
## Run UMAP
h5_seurat_no_dbl <- RunUMAP(object = h5_seurat_no_dbl,
reduction = "pca",
dims = 1:30)
Filtered Dim Plots
### Main Plot
p1 <- DimPlot(object = h5_seurat_no_dbl, reduction = "umap", label = TRUE, pt.size = 0.5) + theme(legend.position="none")
ggsave(file = "outputs/viz_cluster/11_DIMPLOT_labels_filt.pdf")
### Per Sample Plots
p2 <- DimPlot(object = h5_seurat_no_dbl, reduction = "umap", label = FALSE, pt.size = 0.5, split.by="sample")
ggsave(file = "outputs/viz_cluster/12_DIMPLOT_sample_filt.pdf", width = 12)
Save RDS
saveRDS(h5_seurat_no_dbl, "outputs/5_filtered_integrated_norris.rds")