GWAS - QuantGen/HPCC GitHub Wiki
GWAS using predefined traits
Prerequisites:
- Check if your trait of interest corresponds to one of the subdirectories in
/mnt/research/quantgen/projects/UKB/PIPELINE500/output/adjusted_phenotypes
and set thetrait
variable accordinglytrait <- "height"
Steps:
- Load libraries
library(BGData)
- Define paths
inputPath <- "/mnt/research/quantgen/projects/UKB/PIPELINE500/output" outputPath <- "..."
- Load cohort of unrelated whites
cohortPath <- paste0(inputPath, "/cohorts/whites_unrelated_0.05/calls/cohort.txt") samples <- scan(cohortPath, what = character(), quiet = TRUE)
- Load genotypes (as BGData object)
load.BGData(paste0(inputPath, "/BGData/whites/calls/BGData.RData")) # loads 'DATA'
- Load phenotypes
phenotypesPath <- paste0(inputPath, "/adjusted_phenotypes/", trait, "/whites_unrelated_0.05/calls/trait.tsv") adjustedPhenotypes <- read.table(phenotypesPath, header = TRUE, sep = "\t", stringsAsFactors = FALSE) adjustedPhenotypes["FID"](/QuantGen/HPCC/wiki/"FID") <- as.character(adjustedPhenotypes["FID"](/QuantGen/HPCC/wiki/"FID")) adjustedPhenotypes["IID"](/QuantGen/HPCC/wiki/"IID") <- as.character(adjustedPhenotypes["IID"](/QuantGen/HPCC/wiki/"IID"))
- Merge adjusted phenotypes into BGData object
DATA@pheno <- orderedMerge(DATA@pheno, adjustedPhenotypes)
- Perform GWAS
coeffs <- GWAS( formula = as.formula(paste0("adjusted_", trait, " ~ 1")), data = DATA, method = "rayOLS", i = samples, chunkSize = 2500, nCores = 1, verbose = TRUE ) # will take ~1.5 hours save(coeffs, file = paste0(outputPath, "/GWAS.RData"))
GWAS from scratch
- Load libraries
library(BGData)
- Define paths
inputPath <- "/mnt/research/quantgen/projects/UKB/PIPELINE500/output" outputPath <- "..."
- Load cohort of unrelated whites
cohortPath <- paste0(inputPath, "/cohorts/whites_unrelated_0.05/calls/cohort.txt") samples <- scan(cohortPath, what = character(), quiet = TRUE)
- Load phenotypes
phenotypesPath <- paste0(inputPath, "/phenotypes/whites/calls/phenotypes.RData") load(phenotypesPath) # loads 'phenotypes' -- this will take a while ...
- Adjust trait by sex, age, center, and first five PCs after filtering for outliers
batchesPath <- paste0(inputPath, "/batches/whites/calls/batches.RData") load(batchesPath) # loads 'batches' pcPath <- paste0(inputPath, "/PCs/whites/calls/SVD_5000.RData") load(pcPath) # loads 'SVD' adjustedPhenotypes <- data.frame( FID = phenotypes[samples, "eid"], IID = phenotypes[samples, "eid"], sex = phenotypes[samples, "Sex (0.0)"], age = phenotypes[samples, "Age when attended assessment centre (0.0)"], batch = batches[samples, "batch"], center = phenotypes[samples, "UK Biobank assessment centre (0.0)"], pc1 = SVD$u[samples, 1], pc2 = SVD$u[samples, 2], pc3 = SVD$u[samples, 3], pc4 = SVD$u[samples, 4], pc5 = SVD$u[samples, 5], height = phenotypes[samples, "Standing height (0.0)"], stringsAsFactors = FALSE ) outlierFilter <- adjustedPhenotypes$height < 147 & adjustedPhenotypes$height > 210 adjustedPhenotypes$height[outlierFilter] <- NA phenotypeAdjustment <- lm( formula = height ~ factor(sex) + age + factor(batch) + factor(center) + pc1 + pc2 + pc3 + pc4 + pc5, data = adjustedPhenotypes, na.action = na.exclude ) adjustedPhenotypes$adjusted_height = residuals(phenotypeAdjustment)
- Load genotypes
bedPath <- paste0(inputPath, "/BED/whites/calls/chromosomes") bedFiles <- list.files(bedPath, pattern = "*.bed", full.names = TRUE) X <- do.call(ColumnLinkedMatrix, lapply(bedFiles, function(bedFile) { BEDMatrix(bedFile, simple_names = TRUE) }))
- Perform GWAS
DATA <- as.BGData(X) DATA@pheno <- orderedMerge(DATA@pheno, adjustedPhenotypes) coeffs <- GWAS( formula = adjusted_height ~ 1, data = DATA, method = "rayOLS", i = samples, chunkSize = 2500, nCores = 1, verbose = TRUE ) # will take ~1.5 hours save(coeffs, file = paste0(outputPath, "/GWAS.RData"))