170109 Finishing up - npslindstrom/DE-analysis GitHub Wiki

Today we had a project meeting in the morning where we decided how we wanted our poster to look like and what graphs and comparisons we should include. Since the quality of the STAR-alignment is questionable and neither of us can understand why we decided to use the tophat counts to compare the differential expression packages DESeq, DESeq2 and limma. The STAR-alignment will be compared to the tophat-alignment in some way, potentially by looking at the resulting diff-expression results.

Anyway, I ran the finalized limma script to generate the comparisons to a common project folder.

The script is:

library(limma) library(edgeR)

data_orig = read.table("/glob/nilsl/lncRNA_proj/tophat_counts/count_table.txt", header = TRUE)

# Comparison 1: SW620 vs SW480

data = data_orig

data = data[,-5:-18]

group <- factor(c("SW620" ,"SW620" ,"SW480" ,"SW480"))

design = model.matrix(~ group)

dge <- DGEList(counts = data)

dge <- calcNormFactors(dge)

logCPM <- cpm(dge, log=TRUE, prior.count=3)

fit <- lmFit(logCPM, design)

fit <- eBayes(fit, trend=TRUE)

res_table = topTable(fit, number = nrow(fit), coef=ncol(design))

write.csv2(res_table, "limma_comp1_SW620_vs_SW480.csv")

# Comparison 2: SW480+ER vs SW480

data = data_orig

data = data[,-11:-18] data = data[,-5:-8] data = data[,-1:-2]

group <- factor(c("SW480" ,"SW480" ,"SW480+ER" ,"SW480+ER"))

design = model.matrix(~ group)

dge <- DGEList(counts = data)

dge <- calcNormFactors(dge)

logCPM <- cpm(dge, log=TRUE, prior.count=3)

fit <- lmFit(logCPM, design)

fit <- eBayes(fit, trend=TRUE)

res_table = topTable(fit, number = nrow(fit), coef=ncol(design))

write.csv2(res_table, "limma_comp2_SW480_vs_SW480+ER.csv")

# Comparison 3: SW620+ER+E2 vs. SW620+ER+DMSO

data = data_orig

data = data[,-17:-18] data = data[,-1:-12]

group <- factor(c("SW620+ER+DMSO" ,"SW620+ER+DMSO" ,"SW620+ER+E2" ,"SW620+ER+E2"))

design = model.matrix(~ group)

dge <- DGEList(counts = data)

dge <- calcNormFactors(dge)

logCPM <- cpm(dge, log=TRUE, prior.count=3)

fit <- lmFit(logCPM, design)

fit <- eBayes(fit, trend=TRUE)

res_table = topTable(fit, number = nrow(fit), coef=ncol(design))

write.csv2(res_table, "limma_comp3_SW620+ER+DMSO_vs_SW620+ER+E2.csv")

# Comparison 4: SW620 vs SW620+ER

data = data_orig

data = data[,-7:-18] data = data[,-3:-4]

group <- factor(c("SW620" ,"SW620" ,"SW620+ER" ,"SW620+ER"))

design = model.matrix(~ group)

dge <- DGEList(counts = data)

dge <- calcNormFactors(dge)

logCPM <- cpm(dge, log=TRUE, prior.count=3)

fit <- lmFit(logCPM, design)

fit <- eBayes(fit, trend=TRUE)

res_table = topTable(fit, number = nrow(fit), coef=ncol(design))

write.csv2(res_table, "limma_comp4_SW620_vs_SW620+ER.csv")