WGCNA - k821209/pipelines GitHub Wiki

  1. ๋ฐ์ดํ„ฐ ํด๋ฆฐ์—… ๋ฐ ์ค€๋น„
#์›Œํ‚น๋””๋ ‰ํ† ๋ฆฌ ์„ธํŒ…
getwd();
workingDir = ".";
setwd(workingDir); 

library(WGCNA);

# ์™ ์ง€ ๋ชจ๋ฅด์ง€๋งŒ ํ•ด์•ผ๋œ๋‹คํ•จ 
options(stringsAsFactors = FALSE);


femData = read.csv("LiverFemale3600.csv");
maleData = read.csv("LiverMale3600.csv");

# Take a quick look at what is in the data sets (caution, longish output):
dim(femData)
names(femData)
dim(maleData)
names(maleData)

# ์ƒ˜ํ”Œ ์„ธํŠธ๊ฐ€ ๋‘๊ฐœ์ผ ๊ฒฝ์šฐ
nSets = 2;

# ๊ฐ ์ƒ˜ํ”Œ์˜ ๋Œ€ํ‘œ ์ด๋ฆ„์„ ์ •ํ•ด์ค€๋‹ค. ์ดํ›„ ํ”Œ๋กฏํŒ…์— ์‚ฌ์šฉ๋  ์˜ˆ์ •
setLabels = c("Female liver", "Male liver")
shortLabels = c("Female", "Male")

# ๋ฐ์ดํ„ฐ์„ธํŠธ ๋งŒ๋“œ๋Š” ๊ณผ์ •, ์ž…๋ ฅ๋ฐ์ดํ„ฐ๋ฅผ ๋ณด๋ฉด  9๋ฒˆ์งธ ์—ด๋ถ€ํ„ฐ ์‹ค์ œ ๋ฐœํ˜„ ๋ฐ์ดํ„ฐ๊ฐ€ ์‹œ์ž‘๋จ 
multiExpr = vector(mode = "list", length = nSets)

multiExpr[1](/k821209/pipelines/wiki/1) = list(data = as.data.frame(t(femData[-c(1:8)]))); # 1:8๊นŒ์ง€๋ฅผ ๋นผ๋ผ๋Š” ์ด์•ผ๊ธฐ์ธ๋“ฏ 
names(multiExpr[1](/k821209/pipelines/wiki/1)$data) = femData$substanceBXH;                # ์—ด ์ด๋ฆ„ ์ •ํ•ด์คŒ 
rownames(multiExpr[1](/k821209/pipelines/wiki/1)$data) = names(femData)[-c(1:8)];          # ํ–‰ ์ด๋ฆ„ ์ •ํ•ด์คŒ 
multiExpr[2](/k821209/pipelines/wiki/2) = list(data = as.data.frame(t(maleData[-c(1:8)])));
names(multiExpr[2](/k821209/pipelines/wiki/2)$data) = maleData$substanceBXH;
rownames(multiExpr[2](/k821209/pipelines/wiki/2)$data) = names(maleData)[-c(1:8)];

# Check that the data has the correct format for many functions operating on multiple sets:
exprSize = checkSets(multiExpr)

# Check that all genes and samples have sufficiently low numbers of missing values.
# ๋ฐ์ดํ„ฐ์„ธํŠธ๊ฐ€ ๊ดœ์ฐฎ์€์ง€ ํ™•์ธํ•˜๊ณ  ๊ตฌ๋ฆฌ๋ฉด ํ€„๋ฆฌํ‹ฐ ์ปจํŠธ๋กค ํ•˜๋Š” ์Šคํฌ๋ฆฝํŠธ์ž„. 
gsg = goodSamplesGenesMS(multiExpr, verbose = 3);
gsg$allOK

if (!gsg$allOK)
{
  # Print information about the removed genes:
  if (sum(!gsg$goodGenes) > 0)
    printFlush(paste("Removing genes:", paste(names(multiExpr[1](/k821209/pipelines/wiki/1)$data)[!gsg$goodGenes], 
                                              collapse = ", ")))
  for (set in 1:exprSize$nSets)
  {
    if (sum(!gsg$goodSamples[set](/k821209/pipelines/wiki/set)))
      printFlush(paste("In set", setLabels[set], "removing samples",
                       paste(rownames(multiExpr[set](/k821209/pipelines/wiki/set)$data)[!gsg$goodSamples[set](/k821209/pipelines/wiki/set)], collapse = ", ")))
    # Remove the offending genes and samples
    multiExpr[set](/k821209/pipelines/wiki/set)$data = multiExpr[set](/k821209/pipelines/wiki/set)$data[gsg$goodSamples[set](/k821209/pipelines/wiki/set), gsg$goodGenes];
  }
  # Update exprSize
  exprSize = checkSets(multiExpr)
}

# ๊ฐ ์„ธํŠธ๋ณ„๋กœ ํด๋Ÿฌ์Šคํ„ฐ๋ง 
sampleTrees = list()
for (set in 1:nSets)
{
  sampleTrees[set](/k821209/pipelines/wiki/set) = hclust(dist(multiExpr[set](/k821209/pipelines/wiki/set)$data), method = "average")
}

# ํด๋Ÿฌ์Šคํ„ฐ๋ง ๊ฒฐ๊ณผ ์‹œ๊ฐํ™” 
# pdf(file = "Plots/SampleClustering.pdf", width = 12, height = 12);
# cannot open file './Plots/SampleClustering.pdf' ์—๋Ÿฌ๋‚œ๋‹ค๋ฉด ๋‹ค์Œ๊ณผ ๊ฐ™์ด. 
pdf(paste('test.pdf',sep = ''), width = 12, height = 12);
par(mfrow=c(2,1))
par(mar = c(0, 4, 2, 0))
for (set in 1:nSets)
  plot(sampleTrees[set](/k821209/pipelines/wiki/set), main = paste("Sample clustering on all genes in", setLabels[set]),
       xlab="", sub="", cex = 0.7);
dev.off();

# ๊ทธ๋ฆผ์„ ๋ณด๊ณ  ํŠธ๋ฆฌ์—์„œ ์ปท์˜คํ”„ ๋†’์ด๋ฅผ ์ •ํ•จ. 
# Choose the "base" cut height for the female data set
baseHeight = 16

# ์ด ๋‘๊ฐœ์˜ ์ƒ˜ํ”Œ์— ๋Œ€ํ•ด์„œ baseHeight๋ฅผ ์ •ํ•ด์ค€๋‹ค. 
# Adjust the cut height for the male data set for the number of samples
cutHeights = c(16, 16*exprSize$nSamples[2]/exprSize$nSamples[1]);

# ์ƒˆ๋กœ๊ทธ๋ฆผ. ์ปท ๋ผ์ธ์„ ๋„ฃ๋Š”๋‹ค. ๋‘๋ฒˆ์งธ ์ƒ˜ํ”Œ์€ ๋‚˜์˜ค์ง€ ์•Š๋Š”๋‹ค. ๋‘๋ฒˆ์งธ์ƒ˜ํ”Œ์— ์„ ์ด ๋‚˜์˜ค๊ฒŒ ํ•˜๋ ค๋ฉด cutHeights์˜ ๊ฐ’์„ ๋‚ด๋ ค์ค˜์•ผํ•จ. 
# Re-plot the dendrograms including the cut lines
pdf(file = "Plots/SampleClustering.pdf", width = 12, height = 12);
par(mfrow=c(2,1))
par(mar = c(0, 4, 2, 0))
for (set in 1:nSets)
{
  plot(sampleTrees[set](/k821209/pipelines/wiki/set), main = paste("Sample clustering on all genes in", setLabels[set]),
       xlab="", sub="", cex = 0.7);
  abline(h=cutHeights[set], col = "red");
}
dev.off();


# 
for (set in 1:nSets)
{
  # Find clusters cut by the line
  # ์œ„์—์„œ ์ •ํ•ด์ค€ cutHeight ๊ฐ’์œผ๋กœ ์ž๋ฅธ๋’ค ํฐ ํด๋Ÿฌ์Šคํ„ฐ๋งŒ ๋‚จ๊ธด๋‹ค. ํฐ ํด๋Ÿฌ์Šคํ„ฐ๊ฐ€ 1๋กœ label ๋˜๋‚˜๋ด„
  labels = cutreeStatic(sampleTrees[set](/k821209/pipelines/wiki/set), cutHeight = cutHeights[set])
  # Keep the largest one (labeled by the number 1)
  keep = (labels==1)
  multiExpr[set](/k821209/pipelines/wiki/set)$data = multiExpr[set](/k821209/pipelines/wiki/set)$data[keep, ]
}
collectGarbage();

# Check the size of the leftover data
exprSize = checkSets(multiExpr)
exprSize

# ๋ฐ์ดํ„ฐ ์ธํ’‹ ์ž‘์„ฑ์™„๋ฃŒ
save(multiExpr, Traits, nGenes, nSamples, setLabels, shortLabels, exprSize, 
     file = "Consensus-dataInput.RData");

  1. Network construction and consensus module detection
# ์›Œํ‚น๋””๋ ‰ํ† ๋ฆฌ ๋“ฑ๋ก
getwd();
workingDir = ".";
setwd(workingDir); 

library(WGCNA)

# The following setting is important, do not omit.
# ์‹œํ‚ค๋Š”๋Œ€๋กœ ํ•˜์ž. 
options(stringsAsFactors = FALSE);

# Allow multi-threading within WGCNA. 
# Caution: skip this line if you run RStudio or other third-party R environments.
# See note above.
# ์‹œํ‚ค๋Š”๋Œ€๋กœ ํ•˜์ž. 
enableWGCNAThreads()

# 1๋ฒˆ์—์„œ ์ž‘์„ฑํ–ˆ๋˜ ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ค์ž. 
lnames = load(file = "Consensus-dataInput.RData");

# ์›ƒ๊ธด๊ฑด ๋ฐ์ดํ„ฐ๊ฐ€ lnames์— ๋“ค์–ด๊ฐ€๋Š”๊ฒŒ ์•„๋‹ˆ๋ผ ๋ณ€์ˆ˜๋ช…๋“ค์ด ๋ถˆ๋Ÿฌ์™€์ง€๊ณ , ๋ณ€์ˆ˜๋ช… ๋ชฉ๋ก์ด lnames์— ๋“ค์–ด๊ฐ. 
lnames

# ์ค€๋น„ํ–‡๋˜ ์„ธํŠธ ์ˆ˜๋ฅผ ๋ถˆ๋Ÿฌ์˜ด. 
nSets = checkSets(multiExpr)$nSets


## ๋ณธ๊ฒฉ ๋„คํŠธ์›Œํฌ ์ž‘์„ฑ 
# power ๊ฐ’์„ ์ •ํ•˜๋Š” ๊ณผ์ •์ž„. ์ฐ”๋Ÿฌ๋ณผ power๊ฐ’ list๋ฅผ ์ •ํ•จ. 
powers = c(seq(4,10,by=1), seq(12,20, by=2));


# powerTable ์ค€๋น„ R์€ ๊ธฐ๋ณธ์ ์œผ๋กœ ๋นˆํ†ต์„ ๋งŒ๋“ค๊ณ  ์ฑ„์›Œ๋„ฃ๋Š” ์‹์ž„. ๋นˆํ†ต๋ถ€ํ„ฐ ๋งŒ๋“ค์–ด์•ผํ•จ 
powerTables = vector(mode = "list", length = nSets);

# Call the network topology analysis function for each set in turn
# pickSoftThreshold ๊ธฐ๋Šฅ์„ ์ด์šฉํ•ด์„œ ์ค€๋น„ํ•œ ์ฐ”๋Ÿฌ๋ณผ powers์™€ expression ๋ฐ์ดํ„ฐ๋ฅผ ์ง‘์–ด๋„ฃ๊ณ  powerTable์„ ์ฑ„์›Œ ๋„ฃ๋Š”๋‹ค. 
# ์š” ๊ธฐ๋Šฅ์„ ์ด์šฉํ•˜๋ฉด scale free topology ๋ถ„์„์ด ๋˜๋Š”๋“ฏํ•จ. ๋…ผ๋ฌธ์—์„œ๋„ ์„ ํ–‰์—ฐ๊ตฌ๋ฅผ ์–ธ๊ธ‰ํ•œ๊ฒƒ์œผ๋กœ ๋ณด์•„ topology๋ถ„์„์— ๋Œ€ํ•œ ๋‹ค๋ฅธ ์—ฐ๊ตฌ๊ฐ€ ์กด์žฌ 
for (set in 1:nSets)
  powerTables[set](/k821209/pipelines/wiki/set) = list(data = pickSoftThreshold(multiExpr[set](/k821209/pipelines/wiki/set)$data, powerVector=powers,
                                                     verbose = 2)[2](/k821209/pipelines/wiki/2));
collectGarbage();

# ์‹œ๊ฐํ™” 
colors = c("black", "red")

# Will plot these columns of the returned scale free analysis tables
plotCols = c(2,5,6,7)
colNames = c("Scale Free Topology Model Fit", "Mean connectivity", "Median connectivity",
"Max connectivity");
# Get the minima and maxima of the plotted points
ylim = matrix(NA, nrow = 2, ncol = 4);
for (set in 1:nSets)
{
  for (col in 1:length(plotCols))
  {
    ylim[1, col] = min(ylim[1, col], powerTables[set](/k821209/pipelines/wiki/set)$data[, plotCols[col]], na.rm = TRUE);
    ylim[2, col] = max(ylim[2, col], powerTables[set](/k821209/pipelines/wiki/set)$data[, plotCols[col]], na.rm = TRUE);
  }
}


# Plot the quantities in the chosen columns vs. the soft thresholding power
sizeGrWindow(8, 6)
pdf(paste("scaleFreeAnalysis.pdf",sep=''), wi = 8, he = 6)
par(mfcol = c(2,2));
par(mar = c(4.2, 4.2 , 2.2, 0.5))
cex1 = 0.7;
for (col in 1:length(plotCols)) for (set in 1:nSets)
{
  if (set==1)
  {
    plot(powerTables[set](/k821209/pipelines/wiki/set)$data[,1], -sign(powerTables[set](/k821209/pipelines/wiki/set)$data[,3])*powerTables[set](/k821209/pipelines/wiki/set)$data[,2],
         xlab="Soft Threshold (power)",ylab=colNames[col],type="n", ylim = ylim[, col],
         main = colNames[col]);
    addGrid();
  }
  if (col==1)
  {
    text(powerTables[set](/k821209/pipelines/wiki/set)$data[,1], -sign(powerTables[set](/k821209/pipelines/wiki/set)$data[,3])*powerTables[set](/k821209/pipelines/wiki/set)$data[,2],
         labels=powers,cex=cex1,col=colors[set]);
  } else
    text(powerTables[set](/k821209/pipelines/wiki/set)$data[,1], powerTables[set](/k821209/pipelines/wiki/set)$data[,plotCols[col]],
         labels=powers,cex=cex1,col=colors[set]);
  if (col==1)
  {
    legend("bottomright", legend = setLabels, col = colors, pch = 20) ;
  } else
    legend("topright", legend = setLabels, col = colors, pch = 20) ;
}
dev.off();

# ์œ—๊ทธ๋ฆผ์—์„œ power ๋ฅผ 6์œผ๋กœ ์ •ํ•˜๊ณ  ๊ทธ๋ฆผ์„ ๊ทธ๋ฆผ. 
net = blockwiseConsensusModules(
        multiExpr, power = 6, minModuleSize = 30, deepSplit = 2,
        pamRespectsDendro = FALSE, 
        mergeCutHeight = 0.25, numericLabels = TRUE,
        minKMEtoStay = 0,
        saveTOMs = TRUE, verbose = 5)

# ๋„คํŠธ์›Œํฌ ์‹œ๊ฐํ™”, ๊ฐ ๋ชจ๋“ˆ์„ ํŠธ๋ฆฌ์™€ ์ƒ‰์„ ํ†ตํ•ด ๋ณด์—ฌ์คŒ. 
consMEs = net$multiMEs;
moduleLabels = net$colors;
# Convert the numeric labels to color labels
moduleColors = labels2colors(moduleLabels)
consTree = net$dendrograms[1](/k821209/pipelines/wiki/1); 

sizeGrWindow(8,6);
pdf(paste("ConsensusDendrogram-auto.pdf",sep=''), wi = 8, he = 6)
plotDendroAndColors(consTree, moduleColors,
                    "Module colors",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05,
                    main = "Consensus gene dendrogram and module colors")

dev.off()

# ์ž‘์„ฑ๋œ ๋„คํŠธ์›Œํฌ ๋ฉ”ํŠธ๋ฆญ์Šค ์ €์žฅ 
save(consMEs, moduleLabels, moduleColors, consTree, file = "Consensus-NetworkConstruction-auto.RData")
  1. sample group specific module ๊ฐ€์ ธ์˜ค๊ธฐ
getwd();
workingDir = ".";
setwd(workingDir); 
library(WGCNA)

# The following setting is important, do not omit.
options(stringsAsFactors = FALSE);

# Load the data saved in the first part
lnames = load(file = "Consensus-dataInput.RData");

#The variable lnames contains the names of loaded variables.
lnames
# Load the results of network analysis, tutorial part 2.a
lnames = load(file = "Consensus-NetworkConstruction-auto.RData");
lnames