- ๋ฐ์ดํฐ ํด๋ฆฐ์
๋ฐ ์ค๋น
#์ํน๋๋ ํ ๋ฆฌ ์ธํ
getwd();
workingDir = ".";
setwd(workingDir);
library(WGCNA);
# ์ ์ง ๋ชจ๋ฅด์ง๋ง ํด์ผ๋๋คํจ
options(stringsAsFactors = FALSE);
femData = read.csv("LiverFemale3600.csv");
maleData = read.csv("LiverMale3600.csv");
# Take a quick look at what is in the data sets (caution, longish output):
dim(femData)
names(femData)
dim(maleData)
names(maleData)
# ์ํ ์ธํธ๊ฐ ๋๊ฐ์ผ ๊ฒฝ์ฐ
nSets = 2;
# ๊ฐ ์ํ์ ๋ํ ์ด๋ฆ์ ์ ํด์ค๋ค. ์ดํ ํ๋กฏํ
์ ์ฌ์ฉ๋ ์์
setLabels = c("Female liver", "Male liver")
shortLabels = c("Female", "Male")
# ๋ฐ์ดํฐ์ธํธ ๋ง๋๋ ๊ณผ์ , ์
๋ ฅ๋ฐ์ดํฐ๋ฅผ ๋ณด๋ฉด 9๋ฒ์งธ ์ด๋ถํฐ ์ค์ ๋ฐํ ๋ฐ์ดํฐ๊ฐ ์์๋จ
multiExpr = vector(mode = "list", length = nSets)
multiExpr[1](/k821209/pipelines/wiki/1) = list(data = as.data.frame(t(femData[-c(1:8)]))); # 1:8๊น์ง๋ฅผ ๋นผ๋ผ๋ ์ด์ผ๊ธฐ์ธ๋ฏ
names(multiExpr[1](/k821209/pipelines/wiki/1)$data) = femData$substanceBXH; # ์ด ์ด๋ฆ ์ ํด์ค
rownames(multiExpr[1](/k821209/pipelines/wiki/1)$data) = names(femData)[-c(1:8)]; # ํ ์ด๋ฆ ์ ํด์ค
multiExpr[2](/k821209/pipelines/wiki/2) = list(data = as.data.frame(t(maleData[-c(1:8)])));
names(multiExpr[2](/k821209/pipelines/wiki/2)$data) = maleData$substanceBXH;
rownames(multiExpr[2](/k821209/pipelines/wiki/2)$data) = names(maleData)[-c(1:8)];
# Check that the data has the correct format for many functions operating on multiple sets:
exprSize = checkSets(multiExpr)
# Check that all genes and samples have sufficiently low numbers of missing values.
# ๋ฐ์ดํฐ์ธํธ๊ฐ ๊ด์ฐฎ์์ง ํ์ธํ๊ณ ๊ตฌ๋ฆฌ๋ฉด ํ๋ฆฌํฐ ์ปจํธ๋กค ํ๋ ์คํฌ๋ฆฝํธ์.
gsg = goodSamplesGenesMS(multiExpr, verbose = 3);
gsg$allOK
if (!gsg$allOK)
{
# Print information about the removed genes:
if (sum(!gsg$goodGenes) > 0)
printFlush(paste("Removing genes:", paste(names(multiExpr[1](/k821209/pipelines/wiki/1)$data)[!gsg$goodGenes],
collapse = ", ")))
for (set in 1:exprSize$nSets)
{
if (sum(!gsg$goodSamples[set](/k821209/pipelines/wiki/set)))
printFlush(paste("In set", setLabels[set], "removing samples",
paste(rownames(multiExpr[set](/k821209/pipelines/wiki/set)$data)[!gsg$goodSamples[set](/k821209/pipelines/wiki/set)], collapse = ", ")))
# Remove the offending genes and samples
multiExpr[set](/k821209/pipelines/wiki/set)$data = multiExpr[set](/k821209/pipelines/wiki/set)$data[gsg$goodSamples[set](/k821209/pipelines/wiki/set), gsg$goodGenes];
}
# Update exprSize
exprSize = checkSets(multiExpr)
}
# ๊ฐ ์ธํธ๋ณ๋ก ํด๋ฌ์คํฐ๋ง
sampleTrees = list()
for (set in 1:nSets)
{
sampleTrees[set](/k821209/pipelines/wiki/set) = hclust(dist(multiExpr[set](/k821209/pipelines/wiki/set)$data), method = "average")
}
# ํด๋ฌ์คํฐ๋ง ๊ฒฐ๊ณผ ์๊ฐํ
# pdf(file = "Plots/SampleClustering.pdf", width = 12, height = 12);
# cannot open file './Plots/SampleClustering.pdf' ์๋ฌ๋๋ค๋ฉด ๋ค์๊ณผ ๊ฐ์ด.
pdf(paste('test.pdf',sep = ''), width = 12, height = 12);
par(mfrow=c(2,1))
par(mar = c(0, 4, 2, 0))
for (set in 1:nSets)
plot(sampleTrees[set](/k821209/pipelines/wiki/set), main = paste("Sample clustering on all genes in", setLabels[set]),
xlab="", sub="", cex = 0.7);
dev.off();
# ๊ทธ๋ฆผ์ ๋ณด๊ณ ํธ๋ฆฌ์์ ์ปท์คํ ๋์ด๋ฅผ ์ ํจ.
# Choose the "base" cut height for the female data set
baseHeight = 16
# ์ด ๋๊ฐ์ ์ํ์ ๋ํด์ baseHeight๋ฅผ ์ ํด์ค๋ค.
# Adjust the cut height for the male data set for the number of samples
cutHeights = c(16, 16*exprSize$nSamples[2]/exprSize$nSamples[1]);
# ์๋ก๊ทธ๋ฆผ. ์ปท ๋ผ์ธ์ ๋ฃ๋๋ค. ๋๋ฒ์งธ ์ํ์ ๋์ค์ง ์๋๋ค. ๋๋ฒ์งธ์ํ์ ์ ์ด ๋์ค๊ฒ ํ๋ ค๋ฉด cutHeights์ ๊ฐ์ ๋ด๋ ค์ค์ผํจ.
# Re-plot the dendrograms including the cut lines
pdf(file = "Plots/SampleClustering.pdf", width = 12, height = 12);
par(mfrow=c(2,1))
par(mar = c(0, 4, 2, 0))
for (set in 1:nSets)
{
plot(sampleTrees[set](/k821209/pipelines/wiki/set), main = paste("Sample clustering on all genes in", setLabels[set]),
xlab="", sub="", cex = 0.7);
abline(h=cutHeights[set], col = "red");
}
dev.off();
#
for (set in 1:nSets)
{
# Find clusters cut by the line
# ์์์ ์ ํด์ค cutHeight ๊ฐ์ผ๋ก ์๋ฅธ๋ค ํฐ ํด๋ฌ์คํฐ๋ง ๋จ๊ธด๋ค. ํฐ ํด๋ฌ์คํฐ๊ฐ 1๋ก label ๋๋๋ด
labels = cutreeStatic(sampleTrees[set](/k821209/pipelines/wiki/set), cutHeight = cutHeights[set])
# Keep the largest one (labeled by the number 1)
keep = (labels==1)
multiExpr[set](/k821209/pipelines/wiki/set)$data = multiExpr[set](/k821209/pipelines/wiki/set)$data[keep, ]
}
collectGarbage();
# Check the size of the leftover data
exprSize = checkSets(multiExpr)
exprSize
# ๋ฐ์ดํฐ ์ธํ ์์ฑ์๋ฃ
save(multiExpr, Traits, nGenes, nSamples, setLabels, shortLabels, exprSize,
file = "Consensus-dataInput.RData");
- Network construction and consensus module detection
# ์ํน๋๋ ํ ๋ฆฌ ๋ฑ๋ก
getwd();
workingDir = ".";
setwd(workingDir);
library(WGCNA)
# The following setting is important, do not omit.
# ์ํค๋๋๋ก ํ์.
options(stringsAsFactors = FALSE);
# Allow multi-threading within WGCNA.
# Caution: skip this line if you run RStudio or other third-party R environments.
# See note above.
# ์ํค๋๋๋ก ํ์.
enableWGCNAThreads()
# 1๋ฒ์์ ์์ฑํ๋ ๋ฐ์ดํฐ๋ฅผ ๋ถ๋ฌ์ค์.
lnames = load(file = "Consensus-dataInput.RData");
# ์๊ธด๊ฑด ๋ฐ์ดํฐ๊ฐ lnames์ ๋ค์ด๊ฐ๋๊ฒ ์๋๋ผ ๋ณ์๋ช
๋ค์ด ๋ถ๋ฌ์์ง๊ณ , ๋ณ์๋ช
๋ชฉ๋ก์ด lnames์ ๋ค์ด๊ฐ.
lnames
# ์ค๋นํ๋ ์ธํธ ์๋ฅผ ๋ถ๋ฌ์ด.
nSets = checkSets(multiExpr)$nSets
## ๋ณธ๊ฒฉ ๋คํธ์ํฌ ์์ฑ
# power ๊ฐ์ ์ ํ๋ ๊ณผ์ ์. ์ฐ๋ฌ๋ณผ power๊ฐ list๋ฅผ ์ ํจ.
powers = c(seq(4,10,by=1), seq(12,20, by=2));
# powerTable ์ค๋น R์ ๊ธฐ๋ณธ์ ์ผ๋ก ๋นํต์ ๋ง๋ค๊ณ ์ฑ์๋ฃ๋ ์์. ๋นํต๋ถํฐ ๋ง๋ค์ด์ผํจ
powerTables = vector(mode = "list", length = nSets);
# Call the network topology analysis function for each set in turn
# pickSoftThreshold ๊ธฐ๋ฅ์ ์ด์ฉํด์ ์ค๋นํ ์ฐ๋ฌ๋ณผ powers์ expression ๋ฐ์ดํฐ๋ฅผ ์ง์ด๋ฃ๊ณ powerTable์ ์ฑ์ ๋ฃ๋๋ค.
# ์ ๊ธฐ๋ฅ์ ์ด์ฉํ๋ฉด scale free topology ๋ถ์์ด ๋๋๋ฏํจ. ๋
ผ๋ฌธ์์๋ ์ ํ์ฐ๊ตฌ๋ฅผ ์ธ๊ธํ๊ฒ์ผ๋ก ๋ณด์ topology๋ถ์์ ๋ํ ๋ค๋ฅธ ์ฐ๊ตฌ๊ฐ ์กด์ฌ
for (set in 1:nSets)
powerTables[set](/k821209/pipelines/wiki/set) = list(data = pickSoftThreshold(multiExpr[set](/k821209/pipelines/wiki/set)$data, powerVector=powers,
verbose = 2)[2](/k821209/pipelines/wiki/2));
collectGarbage();
# ์๊ฐํ
colors = c("black", "red")
# Will plot these columns of the returned scale free analysis tables
plotCols = c(2,5,6,7)
colNames = c("Scale Free Topology Model Fit", "Mean connectivity", "Median connectivity",
"Max connectivity");
# Get the minima and maxima of the plotted points
ylim = matrix(NA, nrow = 2, ncol = 4);
for (set in 1:nSets)
{
for (col in 1:length(plotCols))
{
ylim[1, col] = min(ylim[1, col], powerTables[set](/k821209/pipelines/wiki/set)$data[, plotCols[col]], na.rm = TRUE);
ylim[2, col] = max(ylim[2, col], powerTables[set](/k821209/pipelines/wiki/set)$data[, plotCols[col]], na.rm = TRUE);
}
}
# Plot the quantities in the chosen columns vs. the soft thresholding power
sizeGrWindow(8, 6)
pdf(paste("scaleFreeAnalysis.pdf",sep=''), wi = 8, he = 6)
par(mfcol = c(2,2));
par(mar = c(4.2, 4.2 , 2.2, 0.5))
cex1 = 0.7;
for (col in 1:length(plotCols)) for (set in 1:nSets)
{
if (set==1)
{
plot(powerTables[set](/k821209/pipelines/wiki/set)$data[,1], -sign(powerTables[set](/k821209/pipelines/wiki/set)$data[,3])*powerTables[set](/k821209/pipelines/wiki/set)$data[,2],
xlab="Soft Threshold (power)",ylab=colNames[col],type="n", ylim = ylim[, col],
main = colNames[col]);
addGrid();
}
if (col==1)
{
text(powerTables[set](/k821209/pipelines/wiki/set)$data[,1], -sign(powerTables[set](/k821209/pipelines/wiki/set)$data[,3])*powerTables[set](/k821209/pipelines/wiki/set)$data[,2],
labels=powers,cex=cex1,col=colors[set]);
} else
text(powerTables[set](/k821209/pipelines/wiki/set)$data[,1], powerTables[set](/k821209/pipelines/wiki/set)$data[,plotCols[col]],
labels=powers,cex=cex1,col=colors[set]);
if (col==1)
{
legend("bottomright", legend = setLabels, col = colors, pch = 20) ;
} else
legend("topright", legend = setLabels, col = colors, pch = 20) ;
}
dev.off();
# ์๊ทธ๋ฆผ์์ power ๋ฅผ 6์ผ๋ก ์ ํ๊ณ ๊ทธ๋ฆผ์ ๊ทธ๋ฆผ.
net = blockwiseConsensusModules(
multiExpr, power = 6, minModuleSize = 30, deepSplit = 2,
pamRespectsDendro = FALSE,
mergeCutHeight = 0.25, numericLabels = TRUE,
minKMEtoStay = 0,
saveTOMs = TRUE, verbose = 5)
# ๋คํธ์ํฌ ์๊ฐํ, ๊ฐ ๋ชจ๋์ ํธ๋ฆฌ์ ์์ ํตํด ๋ณด์ฌ์ค.
consMEs = net$multiMEs;
moduleLabels = net$colors;
# Convert the numeric labels to color labels
moduleColors = labels2colors(moduleLabels)
consTree = net$dendrograms[1](/k821209/pipelines/wiki/1);
sizeGrWindow(8,6);
pdf(paste("ConsensusDendrogram-auto.pdf",sep=''), wi = 8, he = 6)
plotDendroAndColors(consTree, moduleColors,
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Consensus gene dendrogram and module colors")
dev.off()
# ์์ฑ๋ ๋คํธ์ํฌ ๋ฉํธ๋ฆญ์ค ์ ์ฅ
save(consMEs, moduleLabels, moduleColors, consTree, file = "Consensus-NetworkConstruction-auto.RData")
- sample group specific module ๊ฐ์ ธ์ค๊ธฐ
getwd();
workingDir = ".";
setwd(workingDir);
library(WGCNA)
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE);
# Load the data saved in the first part
lnames = load(file = "Consensus-dataInput.RData");
#The variable lnames contains the names of loaded variables.
lnames
# Load the results of network analysis, tutorial part 2.a
lnames = load(file = "Consensus-NetworkConstruction-auto.RData");
lnames