Seurat - MattHuff/scRNASeq_011224 GitHub Wiki
Previous Seurat Documentation.
# Load Packages
library(Seurat)
library(SeuratData)
library(dplyr)
library(plyr)
library(hdf5r)
library(patchwork)
library(sctransform)
library(ggplot2)
library(cowplot)
library(stringr)
library(clustree)
library(harmony)
# Set Working Directory
setwd("~/Downloads/scRNASeq_011224/5_Seurat/")
dir.create("outputs", recursive = T)
file_names <- list.files(path = ".", pattern = "\\.h5", full.names = TRUE)
h5_read <- lapply(file_names, Read10X_h5)
h5_seurat <- lapply(h5_read, CreateSeuratObject)
h5_seurat <- merge(h5_seurat[[1]], y = h5_seurat[2:length(h5_seurat)], add.cell.ids = c("1", "2", "3", "4"), project = "jordan_011923")
h5_seurat <- JoinLayers(h5_seurat)
# Add sample column to metadata
sample <- str_split_i(colnames(h5_seurat), pattern = "_", 1)
h5_seurat$sample <- sample
h5_seurat[['pMito']] <- PercentageFeatureSet(h5_seurat, pattern = "^[Mm][Tt]-")
head(h5_seurat)
saveRDS(h5_seurat, file="./outputs/1_combined_unfiltered_seurat.rds")
If your analysis is interrupted, you can use an RDS file to save your progress up to this point. Load the RDS file with the following command:
h5_seurat <- readRDS(file="./katy_samples/combined_unfiltered_seurat.rds")
## Create Directory to Store Visualizations
qcviz_path = "outputs/viz_qc"
if(!dir.exists(qcviz_path)){dir.create(qcviz_path, recursive = T)}
## QC Plots - Default
pdf(file = paste0(qcviz_path, '/01_qc_plot_rna.pdf'), width = 15)
VlnPlot(h5_seurat, features = c('nCount_RNA', 'nFeature_RNA', 'pMito'),
group.by = 'sample')
invisible(dev.off())
## QC Plot - Dots Removed
pdf(file = paste0(qcviz_path, '/01a_qc_plot_rna.pdf'), width = 15)
VlnPlot(h5_seurat, features = c('nCount_RNA', 'nFeature_RNA', 'pMito'), pt.size = 0,
group.by = 'sample') + theme(legend.position = 'none')
invisible(dev.off())
## Density Plot
pdf(file = paste0(qcviz_path, '/01b_density_plot_rna.pdf'))
[email protected] %>%
ggplot(aes(color=sample, x=nCount_RNA, fill= sample)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
ylab("Cell density")
invisible(dev.off())
## Feature Scatter Plots
### RNA against Genes
pdf(file = paste0(qcviz_path, '/02_qc_plot_umi-gene.pdf'))
FeatureScatter(h5_seurat, "nCount_RNA", "nFeature_RNA", group.by = "sample", pt.size = 0)
invisible(dev.off())
### RNA against % Mt
pdf(file = paste0(qcviz_path, '/03_qc_plot_numi-pmito.pdf'))
FeatureScatter(h5_seurat, "nCount_RNA", "pMito", group.by = "sample", pt.size = 0)
invisible(dev.off())
## Plot correlation of genes detected and RNA counts
scatter <- [email protected] %>%
ggplot(aes(x=nCount_RNA, y=nFeature_RNA, color=pMito)) +
geom_point() +
scale_colour_gradient(low = "gray90", high = "black") +
stat_smooth(method=lm) +
scale_x_log10() +
scale_y_log10() +
theme_classic() +
geom_vline(xintercept = 500) +
geom_hline(yintercept = 250) +
facet_wrap(~sample)
ggsave(file = paste0(qcviz_path, "/05_qc_plot_correlation.pdf"))
## Identify Data to be Filtered per Sample
for (samp in unique(h5_seurat$sample)){
tmp <- as.data.frame([email protected]) %>%
filter(sample == samp)
### percent Mito > 20
tmp <- tmp %>% mutate(pmito_out = pMito > 20)
filt <- as.data.frame(table(tmp$pmito_out))
colnames(filt) <- c('Var1', paste0('percent_mito > ', 20, '%'))
### nCount RNA > 10,000
tmp <- tmp %>% mutate(numi_out = tmp$nCount_RNA > 10000)
u_filt <- as.data.frame(table(tmp$numi_out))
colnames(u_filt) <- c('Var1', paste0('nCount_RNA > ', 10000))
filtering <- merge(filt, u_filt, by='Var1')
### Find all cells to be filtered out
tmp <- tmp %>% mutate(all_out = nCount_RNA > 10000 | pMito > 20)
all_filt <- as.data.frame(table(tmp$all_out))
colnames(all_filt) <- c('Var1', 'Total')
filtering <- merge(filtering, all_filt, by='Var1')
filtering$Var1 <- plyr::mapvalues(filtering$Var1,
from = c('FALSE', 'TRUE'),
to = c('Remaining', 'To be filtered out'))
rownames(filtering) <- filtering$Var1
filtering <- filtering[,-1]
write.csv(filtering, file = paste0(qcviz_path, "/pm_numi_all_counts_", samp, ".csv"))
}
## Create Combined Table
### percent mito > 20
[email protected] <- [email protected] %>% mutate(mito_filter_out = pMito > 20)
filt <- as.data.frame(table([email protected]$mito_filter_out))
colnames(filt) <- c('Var1', paste0('percent_mito > ', 20, '%'))
### nUMI > 10,000
[email protected] <- [email protected] %>% mutate(numi_out = nCount_RNA > 10000)
u_filt <- as.data.frame(table([email protected]$numi_out))
colnames(u_filt) <- c('Var1', paste0('nCount_RNA > ', 10000))
filtering <- merge(filt, u_filt, by='Var1')
### All cells to be filtered out
[email protected] <- [email protected] %>%
mutate(all_out = (nCount_RNA > 10000) | (pMito > 20))
all_filt <- as.data.frame(table([email protected]$all_out))
colnames(all_filt) <- c('Var1', 'Total')
filtering <- merge(filtering, all_filt, by="Var1")
filtering$Var1 <- mapvalues(filtering$Var1,
from = c('FALSE', 'TRUE'),
to = c('Remaining', 'To be filtered out'))
rownames(filtering) <- filtering$Var1
filtering <- filtering[,-1]
write.csv(filtering, file = paste0(qcviz_path, "/pm_numi_all_counts.csv"))
nUMI_thresh <- 10000
mito_thresh <- 20
mito_filtered <- LayerData(h5_seurat, assay = "RNA", layer = "counts")[str_subset(rownames(LayerData(h5_seurat, assay = "RNA", layer = "counts")),
pattern = "^[Mm][Tt]",
negate = TRUE),]
h5_seurat_final <- CreateSeuratObject(counts = mito_filtered, project = "katy_integrated")
h5_seurat_final <- AddMetaData(h5_seurat_final, metadata = as.data.frame([email protected]))
h5_seurat_filtered <- subset(h5_seurat_final,
subset = nCount_RNA < nUMI_thresh & pMito < mito_thresh)
rm(h5_seurat, h5_seurat_final, mito_filtered)
df <- [email protected] %>% as.data.frame()
tmp <- table(df$sample) %>% as.data.frame()
ggplot(tmp,aes(x=Var1, y=Freq, fill=Var1)) +
geom_bar(stat="identity") +
geom_text(aes(label=Freq), vjust=-0.3, size=3.5) +
ggtitle("Gene Expression by Sample") +
xlab("Sample") +
ylab("Gene Expression") +
theme(legend.position="none")
ggsave(file = paste0(qcviz_path, "/06_qc_gex_bar_plot.pdf"))
This would be a good place to create another RDS file:
saveRDS(h5_seurat_filtered, file = "outputs//2_filtered_seurat_pre_int.rds")
## Split Seurat objects
h5_seurat_split <- SplitObject(h5_seurat_filtered, split.by = "sample")
## Run SCTransform on each individual object
for (i in 1:length(h5_seurat_split)) {
h5_seurat_split[[i]] <- SCTransform(h5_seurat_split[[i]],
vars.to.regress = c("nCount_RNA", "pMito"),
verbose = T)
}
## Find integration features
integ_features <- SelectIntegrationFeatures(object.list = h5_seurat_split, nFeatures = 4000)
## Run prep SCT integration
h5_seurat_split <- PrepSCTIntegration(object.list = h5_seurat_split,
anchor.features = integ_features)
## Find integration anchors
integ_anchors <- FindIntegrationAnchors(object.list = h5_seurat_split,
normalization.method = "SCT",
anchor.features = integ_features)
## Integrate
h5_seurat_integrated <- IntegrateData(
anchorset = integ_anchors,
new.assay.name = "integrated",
normalization.method = "SCT",
dims = 1:30,
k.weight = 100,
sd.weight = 1,
eps = 0.5,
verbose = T
)
DefaultAssay(h5_seurat_integrated) <- 'integrated'
# Add Condition Information
[email protected] <- [email protected] |>
dplyr::mutate(condition = dplyr::case_when(sample %in% c("1", "3") ~ "Pap",
sample %in% c("2", "4") ~"Septum",
.default = "no_condition"))
# Save Integrated RDS
saveRDS(h5_seurat_integrated, file = "outputs/3_jordan_seurat_integrated.rds")
## Create Directory
clviz_path="outputs/viz_cluster"
if(!dir.exists(clviz_path)){dir.create(clviz_path,recursive = T)}
## Set Map Resolution
umap_resolution <- c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2)
## Set Default Assay
DefaultAssay(h5_seurat_integrated) <- 'integrated'
## Run PCA
h5_seurat_integrated <- RunPCA(h5_seurat_integrated,
features = NULL,
weight.by.var = TRUE,
ndims.print = 1:5,
nfeatures.print = 30,
npcs = 50,
reduction.name = "pca")
## Perform Batch Correction
h5_seurat_integrated <- RunHarmony(h5_seurat_integrated, "sample")
## Find Neighbors
h5_seurat_integrated <- FindNeighbors(object = h5_seurat_integrated,
reduction = "harmony",
dims = 1:50,
nn.eps = 0.5)
## Find Clusters using Resolution Object
h5_seurat_integrated <- FindClusters(h5_seurat_integrated,
resolution = umap_resolution,
algorithm = 1,
n.iter = 1000)
## Run Clustree and Plot Clusters
clustree(h5_seurat_integrated, prefix = "integrated_snn_res.", node_colour = "sc3_stability")
ggsave(file = paste0(clviz_path, "/07_clustree.pdf"), height = 12, width = 8)
## Set Resolution - Typically 0.6, but this may vary.
Idents(h5_seurat_integrated) <- "integrated_snn_res.0.6"
## Run UMap
h5_seurat_integrated <- RunUMAP(h5_seurat_integrated, reduction = "harmony", dims = 1:50)
### Plot with Labels
p1 <- DimPlot(object = h5_seurat_integrated, reduction = "umap", label = TRUE, pt.size = 0.5) + theme(legend.position="none")
ggsave(file = paste0(clviz_path, "/08_DIMPLOT_labels.pdf"))
### Plot with Samples
p2 <- DimPlot(object = h5_seurat_integrated, reduction = "umap", label = FALSE, pt.size = 0.5, group.by="sample")
ggsave(file = paste0(clviz_path, "/09_DIMPLOT_sample.pdf"))
saveRDS(h5_seurat_integrated, file="outputs/4_processed_integrated_norris.rds")