170109 Finishing up - npslindstrom/DE-analysis GitHub Wiki
Today we had a project meeting in the morning where we decided how we wanted our poster to look like and what graphs and comparisons we should include. Since the quality of the STAR-alignment is questionable and neither of us can understand why we decided to use the tophat counts to compare the differential expression packages DESeq, DESeq2 and limma. The STAR-alignment will be compared to the tophat-alignment in some way, potentially by looking at the resulting diff-expression results.
Anyway, I ran the finalized limma script to generate the comparisons to a common project folder.
The script is:
library(limma)
library(edgeR)
data_orig = read.table("/glob/nilsl/lncRNA_proj/tophat_counts/count_table.txt", header = TRUE)
# Comparison 1: SW620 vs SW480
data = data_orig
data = data[,-5:-18]
group <- factor(c("SW620" ,"SW620" ,"SW480" ,"SW480"))
design = model.matrix(~ group)
dge <- DGEList(counts = data)
dge <- calcNormFactors(dge)
logCPM <- cpm(dge, log=TRUE, prior.count=3)
fit <- lmFit(logCPM, design)
fit <- eBayes(fit, trend=TRUE)
res_table = topTable(fit, number = nrow(fit), coef=ncol(design))
write.csv2(res_table, "limma_comp1_SW620_vs_SW480.csv")
# Comparison 2: SW480+ER vs SW480
data = data_orig
data = data[,-11:-18]
data = data[,-5:-8]
data = data[,-1:-2]
group <- factor(c("SW480" ,"SW480" ,"SW480+ER" ,"SW480+ER"))
design = model.matrix(~ group)
dge <- DGEList(counts = data)
dge <- calcNormFactors(dge)
logCPM <- cpm(dge, log=TRUE, prior.count=3)
fit <- lmFit(logCPM, design)
fit <- eBayes(fit, trend=TRUE)
res_table = topTable(fit, number = nrow(fit), coef=ncol(design))
write.csv2(res_table, "limma_comp2_SW480_vs_SW480+ER.csv")
# Comparison 3: SW620+ER+E2 vs. SW620+ER+DMSO
data = data_orig
data = data[,-17:-18]
data = data[,-1:-12]
group <- factor(c("SW620+ER+DMSO" ,"SW620+ER+DMSO" ,"SW620+ER+E2" ,"SW620+ER+E2"))
design = model.matrix(~ group)
dge <- DGEList(counts = data)
dge <- calcNormFactors(dge)
logCPM <- cpm(dge, log=TRUE, prior.count=3)
fit <- lmFit(logCPM, design)
fit <- eBayes(fit, trend=TRUE)
res_table = topTable(fit, number = nrow(fit), coef=ncol(design))
write.csv2(res_table, "limma_comp3_SW620+ER+DMSO_vs_SW620+ER+E2.csv")
# Comparison 4: SW620 vs SW620+ER
data = data_orig
data = data[,-7:-18]
data = data[,-3:-4]
group <- factor(c("SW620" ,"SW620" ,"SW620+ER" ,"SW620+ER"))
design = model.matrix(~ group)
dge <- DGEList(counts = data)
dge <- calcNormFactors(dge)
logCPM <- cpm(dge, log=TRUE, prior.count=3)
fit <- lmFit(logCPM, design)
fit <- eBayes(fit, trend=TRUE)
res_table = topTable(fit, number = nrow(fit), coef=ncol(design))
write.csv2(res_table, "limma_comp4_SW620_vs_SW620+ER.csv")