ScDblFinder - MattHuff/scRNASeq_011224 GitHub Wiki

This step identifies doublets, or errors in an scRNASeq experiment in which data from two or more cells are marked as from one cell.

Previous Doublet Documentation

R Set-up: Load Required Packages, Set Working Directory, and Load Seurat Object

If you are continuing directly from the previous step of documentation, then you do not need to load the RDS file. However, I always include in my documentation in case I need to start the analysis from a specific point.

# Load Packages
library(Seurat)
library(dplyr)
library(ggplot2)
library(cowplot)
library(clustree)
library(scDblFinder)
library(SingleCellExperiment)
library(harmony)

# Set Working Directory
setwd("~/Downloads/scRNASeq_011224/5_Seurat/")

# Load Seurat Object (Optional)
h5_seurat_integrated <- readRDS(file = "outputs/4_processed_integrated_norris.rds")
h5_seurat_integrated <- JoinLayers(h5_seurat_integrated)

Convert Seurat Object to Single Cell Experiment (SCE) Object

DefaultAssay(h5_seurat_integrated) <- "SCT"
h5_seurat_sct <- h5_seurat_integrated
h5_seurat_sct['RNA'](/MattHuff/scRNASeq_011224/wiki/'RNA') = NULL
sce <- as.SingleCellExperiment(h5_seurat_sct)

Run ScDblFinder

sce <- scDblFinder(sce, samples = "sample")

Obtain Class Counts

class_counts <- table(sce@colData@listData["scDblFinder.class"](/MattHuff/scRNASeq_011224/wiki/"scDblFinder.class"))
write.csv(class_counts, file = 'outputs//singlet-doublet_counts.csv')
print(class_counts)

Assign ScDblFinder Results to Original Seurat Object

h5_seurat_integrated['scDblFinder.class'](/MattHuff/scRNASeq_011224/wiki/'scDblFinder.class') <- sce@colData@listData["scDblFinder.class"](/MattHuff/scRNASeq_011224/wiki/"scDblFinder.class")
rm(h5_seurat_sct, sce)

Plot Singlets and Doublets

DimPlot(h5_seurat_integrated, group.by = "scDblFinder.class")
ggsave(file = 'outputs/viz_cluster/10_DIMPLOT_doublets.pdf')

Filter Out Doublets

h5_seurat_no_dbl <- subset(h5_seurat_integrated, subset = scDblFinder.class == 'singlet')

Rerun Standard Workflow

## Run PCA
DefaultAssay(h5_seurat_no_dbl) <- 'integrated'
h5_seurat_no_dbl <- RunPCA(object = h5_seurat_no_dbl,
                           features = NULL,
                           weight.by.var = TRUE,
                           ndims.print = 1:5,
                           nfeatures.print = 30,
                           npcs = 30,
                           reduction.name = "pca")

## Find Neighbors
h5_seurat_no_dbl <- FindNeighbors(object = h5_seurat_no_dbl,
                                  reduction = "pca",
                                  dims = 1:30,
                                  nn.eps = 0.5)

## Find Clusters
umap_resolution <- 0.6
h5_seurat_no_dbl <- FindClusters(object = h5_seurat_no_dbl,
                                 resolution = umap_resolution, 
                                 algorithm = 1,
                                 n.iter = 1000)

## Run UMAP
h5_seurat_no_dbl <- RunUMAP(object = h5_seurat_no_dbl,
                            reduction = "pca",
                            dims = 1:30)

Filtered Dim Plots

### Main Plot
p1 <- DimPlot(object = h5_seurat_no_dbl, reduction = "umap", label = TRUE, pt.size = 0.5) + theme(legend.position="none")
ggsave(file = "outputs/viz_cluster/11_DIMPLOT_labels_filt.pdf")

### Per Sample Plots
p2 <- DimPlot(object = h5_seurat_no_dbl, reduction = "umap", label = FALSE, pt.size = 0.5, split.by="sample")
ggsave(file = "outputs/viz_cluster/12_DIMPLOT_sample_filt.pdf", width = 12)

Save RDS

saveRDS(h5_seurat_no_dbl, "outputs/5_filtered_integrated_norris.rds")