Seurat - MattHuff/scRNASeq_011224 GitHub Wiki

Load Packages and Set Working Directory

# Load Packages
library(Seurat)
library(SeuratData)
library(dplyr)
library(plyr)
library(hdf5r)
library(patchwork)
library(sctransform)
library(ggplot2)
library(cowplot)
library(stringr)
library(clustree)
library(harmony)

# Set Working Directory
setwd("~/Downloads/scRNASeq_011224/5_Seurat/")
dir.create("outputs", recursive = T)

Run Seurat

Obtain Seurat Objects for All Samples

file_names <- list.files(path = ".", pattern = "\\.h5", full.names = TRUE)
h5_read <- lapply(file_names, Read10X_h5)
h5_seurat <- lapply(h5_read, CreateSeuratObject)
h5_seurat <- merge(h5_seurat[[1]], y = h5_seurat[2:length(h5_seurat)], add.cell.ids = c("1", "2", "3", "4"), project = "jordan_011923")
h5_seurat <- JoinLayers(h5_seurat)

# Add sample column to metadata
sample <- str_split_i(colnames(h5_seurat), pattern = "_", 1)
h5_seurat$sample <- sample

Detect Mitochondrial Gene Percentages

h5_seurat[['pMito']] <- PercentageFeatureSet(h5_seurat, pattern = "^[Mm][Tt]-")
head(h5_seurat)

Save Unfiltered RDS Object

saveRDS(h5_seurat, file="./outputs/1_combined_unfiltered_seurat.rds")

If your analysis is interrupted, you can use an RDS file to save your progress up to this point. Load the RDS file with the following command:

h5_seurat <- readRDS(file="./katy_samples/combined_unfiltered_seurat.rds")

Data Visualization

## Create Directory to Store Visualizations
qcviz_path = "outputs/viz_qc"
if(!dir.exists(qcviz_path)){dir.create(qcviz_path, recursive = T)}

## QC Plots - Default
pdf(file = paste0(qcviz_path, '/01_qc_plot_rna.pdf'), width = 15)
VlnPlot(h5_seurat, features = c('nCount_RNA', 'nFeature_RNA', 'pMito'),
        group.by = 'sample')
invisible(dev.off())

## QC Plot - Dots Removed
pdf(file = paste0(qcviz_path, '/01a_qc_plot_rna.pdf'), width = 15)
VlnPlot(h5_seurat, features = c('nCount_RNA', 'nFeature_RNA', 'pMito'), pt.size = 0,
        group.by = 'sample') + theme(legend.position = 'none')
invisible(dev.off())

## Density Plot
pdf(file = paste0(qcviz_path, '/01b_density_plot_rna.pdf'))
[email protected] %>%
  ggplot(aes(color=sample, x=nCount_RNA, fill= sample)) +
  geom_density(alpha = 0.2) +
  scale_x_log10() +
  theme_classic() +
  ylab("Cell density")
invisible(dev.off())

## Feature Scatter Plots
### RNA against Genes
pdf(file = paste0(qcviz_path, '/02_qc_plot_umi-gene.pdf'))
FeatureScatter(h5_seurat, "nCount_RNA", "nFeature_RNA", group.by = "sample", pt.size = 0)
invisible(dev.off())

### RNA against % Mt
pdf(file = paste0(qcviz_path, '/03_qc_plot_numi-pmito.pdf'))
FeatureScatter(h5_seurat, "nCount_RNA", "pMito", group.by = "sample", pt.size = 0)
invisible(dev.off())

## Plot correlation of genes detected and RNA counts
scatter <- [email protected] %>% 
  ggplot(aes(x=nCount_RNA, y=nFeature_RNA, color=pMito)) + 
  geom_point() + 
  scale_colour_gradient(low = "gray90", high = "black") +
  stat_smooth(method=lm) +
  scale_x_log10() + 
  scale_y_log10() + 
  theme_classic() +
  geom_vline(xintercept = 500) +
  geom_hline(yintercept = 250) +
  facet_wrap(~sample)
ggsave(file = paste0(qcviz_path, "/05_qc_plot_correlation.pdf"))

Identify Samples to Filter Out

## Identify Data to be Filtered per Sample
for (samp in unique(h5_seurat$sample)){
  tmp <- as.data.frame([email protected]) %>%
    filter(sample == samp)
  
  ### percent Mito > 20
  tmp <- tmp %>% mutate(pmito_out = pMito > 20)
  filt <- as.data.frame(table(tmp$pmito_out))
  colnames(filt) <- c('Var1', paste0('percent_mito > ', 20, '%'))
  
  ### nCount RNA > 10,000
  tmp <- tmp %>% mutate(numi_out = tmp$nCount_RNA > 10000)
  u_filt <- as.data.frame(table(tmp$numi_out))
  colnames(u_filt) <- c('Var1', paste0('nCount_RNA > ', 10000))
  
  filtering <- merge(filt, u_filt, by='Var1')
  
  ### Find all cells to be filtered out
  tmp <- tmp %>% mutate(all_out = nCount_RNA > 10000 | pMito > 20)
  all_filt <- as.data.frame(table(tmp$all_out))
  colnames(all_filt) <- c('Var1', 'Total')
  
  filtering <- merge(filtering, all_filt, by='Var1')
  
  filtering$Var1 <- plyr::mapvalues(filtering$Var1,
                                    from = c('FALSE', 'TRUE'),
                                    to = c('Remaining', 'To be filtered out'))
  rownames(filtering) <- filtering$Var1
  filtering <- filtering[,-1]
  
  write.csv(filtering, file = paste0(qcviz_path, "/pm_numi_all_counts_", samp, ".csv"))
}

## Create Combined Table
### percent mito > 20
[email protected] <- [email protected] %>% mutate(mito_filter_out = pMito > 20)
filt <- as.data.frame(table([email protected]$mito_filter_out))
colnames(filt) <- c('Var1', paste0('percent_mito > ', 20, '%'))

### nUMI > 10,000
[email protected] <- [email protected] %>% mutate(numi_out = nCount_RNA > 10000)
u_filt <- as.data.frame(table([email protected]$numi_out))
colnames(u_filt) <- c('Var1', paste0('nCount_RNA > ', 10000))

filtering <- merge(filt, u_filt, by='Var1')

### All cells to be filtered out
[email protected] <- [email protected] %>%
  mutate(all_out = (nCount_RNA > 10000) | (pMito > 20))
all_filt <- as.data.frame(table([email protected]$all_out))
colnames(all_filt) <- c('Var1', 'Total')

filtering <- merge(filtering, all_filt, by="Var1")

filtering$Var1 <- mapvalues(filtering$Var1,
                            from = c('FALSE', 'TRUE'),
                            to = c('Remaining', 'To be filtered out'))
rownames(filtering) <- filtering$Var1
filtering <- filtering[,-1]

write.csv(filtering, file = paste0(qcviz_path, "/pm_numi_all_counts.csv"))

Filter Out Data

nUMI_thresh <- 10000
mito_thresh <- 20

mito_filtered <- LayerData(h5_seurat, assay = "RNA", layer = "counts")[str_subset(rownames(LayerData(h5_seurat, assay = "RNA", layer = "counts")),
                                                                                  pattern = "^[Mm][Tt]",
                                                                                  negate = TRUE),]

h5_seurat_final <- CreateSeuratObject(counts = mito_filtered, project = "katy_integrated")

h5_seurat_final <- AddMetaData(h5_seurat_final, metadata = as.data.frame([email protected]))

h5_seurat_filtered <- subset(h5_seurat_final,
                             subset = nCount_RNA < nUMI_thresh & pMito < mito_thresh)

rm(h5_seurat, h5_seurat_final, mito_filtered)

df <- [email protected] %>% as.data.frame()
tmp <- table(df$sample) %>% as.data.frame()

ggplot(tmp,aes(x=Var1, y=Freq, fill=Var1)) + 
  geom_bar(stat="identity") +
  geom_text(aes(label=Freq), vjust=-0.3, size=3.5) +
  ggtitle("Gene Expression by Sample") +
  xlab("Sample") +
  ylab("Gene Expression") +
  theme(legend.position="none")
ggsave(file = paste0(qcviz_path, "/06_qc_gex_bar_plot.pdf"))

This would be a good place to create another RDS file:

saveRDS(h5_seurat_filtered, file = "outputs//2_filtered_seurat_pre_int.rds")

Integrate Seurat Data

## Split Seurat objects
h5_seurat_split <- SplitObject(h5_seurat_filtered, split.by = "sample")

## Run SCTransform on each individual object
for (i in 1:length(h5_seurat_split)) {
  h5_seurat_split[[i]] <- SCTransform(h5_seurat_split[[i]],
                                      vars.to.regress = c("nCount_RNA", "pMito"),
                                      verbose = T)
}

## Find integration features
integ_features <- SelectIntegrationFeatures(object.list = h5_seurat_split, nFeatures = 4000)

## Run prep SCT integration
h5_seurat_split <- PrepSCTIntegration(object.list = h5_seurat_split,
                                      anchor.features = integ_features)

## Find integration anchors
integ_anchors <- FindIntegrationAnchors(object.list = h5_seurat_split,
                                        normalization.method = "SCT",
                                        anchor.features = integ_features)

## Integrate
h5_seurat_integrated <- IntegrateData(
  anchorset = integ_anchors,
  new.assay.name = "integrated",
  normalization.method = "SCT",
  dims = 1:30,
  k.weight = 100,
  sd.weight = 1,
  eps = 0.5,
  verbose = T
)

DefaultAssay(h5_seurat_integrated) <- 'integrated'

Add Condition Information

# Add Condition Information
[email protected] <- [email protected] |>
  dplyr::mutate(condition = dplyr::case_when(sample %in% c("1", "3") ~ "Pap",
                                             sample %in% c("2", "4") ~"Septum",
                                             .default = "no_condition"))

# Save Integrated RDS
saveRDS(h5_seurat_integrated, file = "outputs/3_jordan_seurat_integrated.rds")

Identify Sample Clusters

## Create Directory
clviz_path="outputs/viz_cluster"
if(!dir.exists(clviz_path)){dir.create(clviz_path,recursive = T)}

## Set Map Resolution
umap_resolution <- c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2)

## Set Default Assay
DefaultAssay(h5_seurat_integrated) <- 'integrated'

## Run PCA
h5_seurat_integrated <- RunPCA(h5_seurat_integrated,
                               features = NULL,
                               weight.by.var = TRUE,
                               ndims.print = 1:5,
                               nfeatures.print = 30,
                               npcs = 50,
                               reduction.name = "pca")

## Perform Batch Correction
h5_seurat_integrated <- RunHarmony(h5_seurat_integrated, "sample")

## Find Neighbors
h5_seurat_integrated <- FindNeighbors(object = h5_seurat_integrated, 
                                      reduction = "harmony", 
                                      dims = 1:50, 
                                      nn.eps = 0.5)

## Find Clusters using Resolution Object
h5_seurat_integrated <- FindClusters(h5_seurat_integrated,
                                     resolution = umap_resolution,
                                     algorithm = 1,
                                     n.iter = 1000)

## Run Clustree and Plot Clusters
clustree(h5_seurat_integrated, prefix = "integrated_snn_res.", node_colour = "sc3_stability")
ggsave(file = paste0(clviz_path, "/07_clustree.pdf"), height = 12, width = 8)

## Set Resolution - Typically 0.6, but this may vary.
Idents(h5_seurat_integrated) <- "integrated_snn_res.0.6"

## Run UMap
h5_seurat_integrated <- RunUMAP(h5_seurat_integrated, reduction = "harmony", dims = 1:50)

Visualize Clusters

### Plot with Labels
p1 <- DimPlot(object = h5_seurat_integrated, reduction = "umap", label = TRUE, pt.size = 0.5) + theme(legend.position="none")
ggsave(file = paste0(clviz_path, "/08_DIMPLOT_labels.pdf"))

### Plot with Samples
p2 <- DimPlot(object = h5_seurat_integrated, reduction = "umap", label = FALSE, pt.size = 0.5, group.by="sample")
ggsave(file = paste0(clviz_path, "/09_DIMPLOT_sample.pdf"))

Checkpoint: Save RDS

saveRDS(h5_seurat_integrated, file="outputs/4_processed_integrated_norris.rds")

Seurat - MattHuff/scRNASeq_011224 GitHub Wiki

Load Packages and Set Working Directory

Run Seurat

Obtain Seurat Objects for All Samples

Detect Mitochondrial Gene Percentages

Save Unfiltered RDS Object

Data Visualization

Identify Samples to Filter Out

Filter Out Data

Integrate Seurat Data

Add Condition Information

Identify Sample Clusters

Visualize Clusters

Checkpoint: Save RDS

⚠️ **GitHub.com Fallback** ⚠️

⚠️ GitHub.com Fallback ⚠️