GWAS - QuantGen/HPCC GitHub Wiki

GWAS using predefined traits

Prerequisites:

Check if your trait of interest corresponds to one of the subdirectories in /mnt/research/quantgen/projects/UKB/PIPELINE500/output/adjusted_phenotypes and set the trait variable accordingly
```
trait <- "height"
```

Steps:

Load libraries
```
library(BGData)
```

Define paths

inputPath <- "/mnt/research/quantgen/projects/UKB/PIPELINE500/output"
outputPath <- "..."

Load cohort of unrelated whites

cohortPath <- paste0(inputPath, "/cohorts/whites_unrelated_0.05/calls/cohort.txt")
samples <- scan(cohortPath, what = character(), quiet = TRUE)

Load genotypes (as BGData object)

load.BGData(paste0(inputPath, "/BGData/whites/calls/BGData.RData")) # loads 'DATA'

Load phenotypes

phenotypesPath <- paste0(inputPath, "/adjusted_phenotypes/", trait, "/whites_unrelated_0.05/calls/trait.tsv")
adjustedPhenotypes <- read.table(phenotypesPath, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
adjustedPhenotypes["FID"](/QuantGen/HPCC/wiki/"FID") <- as.character(adjustedPhenotypes["FID"](/QuantGen/HPCC/wiki/"FID"))
adjustedPhenotypes["IID"](/QuantGen/HPCC/wiki/"IID") <- as.character(adjustedPhenotypes["IID"](/QuantGen/HPCC/wiki/"IID"))

Merge adjusted phenotypes into BGData object

DATA@pheno <- orderedMerge(DATA@pheno, adjustedPhenotypes)

Perform GWAS

coeffs <- GWAS(
    formula = as.formula(paste0("adjusted_", trait, " ~ 1")),
    data = DATA,
    method = "rayOLS",
    i = samples,
    chunkSize = 2500,
    nCores = 1,
    verbose = TRUE
) # will take ~1.5 hours
save(coeffs, file = paste0(outputPath, "/GWAS.RData"))

GWAS from scratch

Load libraries
```
library(BGData)
```

Define paths

inputPath <- "/mnt/research/quantgen/projects/UKB/PIPELINE500/output"
outputPath <- "..."

Load cohort of unrelated whites

cohortPath <- paste0(inputPath, "/cohorts/whites_unrelated_0.05/calls/cohort.txt")
samples <- scan(cohortPath, what = character(), quiet = TRUE)

Load phenotypes

phenotypesPath <- paste0(inputPath, "/phenotypes/whites/calls/phenotypes.RData")
load(phenotypesPath) # loads 'phenotypes' -- this will take a while ...

Adjust trait by sex, age, center, and first five PCs after filtering for outliers

batchesPath <- paste0(inputPath, "/batches/whites/calls/batches.RData")
load(batchesPath) # loads 'batches'

pcPath <- paste0(inputPath, "/PCs/whites/calls/SVD_5000.RData")
load(pcPath) # loads 'SVD'

adjustedPhenotypes <- data.frame(
    FID = phenotypes[samples, "eid"],
    IID = phenotypes[samples, "eid"],
    sex = phenotypes[samples, "Sex (0.0)"],
    age = phenotypes[samples, "Age when attended assessment centre (0.0)"],
    batch = batches[samples, "batch"],
    center = phenotypes[samples, "UK Biobank assessment centre (0.0)"],
    pc1 = SVD$u[samples, 1],
    pc2 = SVD$u[samples, 2],
    pc3 = SVD$u[samples, 3],
    pc4 = SVD$u[samples, 4],
    pc5 = SVD$u[samples, 5],
    height = phenotypes[samples, "Standing height (0.0)"],
    stringsAsFactors = FALSE
)

outlierFilter <- adjustedPhenotypes$height < 147 & adjustedPhenotypes$height > 210
adjustedPhenotypes$height[outlierFilter] <- NA

phenotypeAdjustment <- lm(
    formula = height ~ factor(sex) + age + factor(batch) + factor(center) +
                       pc1 + pc2 + pc3 + pc4 + pc5,
    data = adjustedPhenotypes,
    na.action = na.exclude
)
adjustedPhenotypes$adjusted_height = residuals(phenotypeAdjustment)

Load genotypes

bedPath <- paste0(inputPath, "/BED/whites/calls/chromosomes")
bedFiles <- list.files(bedPath, pattern = "*.bed", full.names = TRUE)
X <- do.call(ColumnLinkedMatrix, lapply(bedFiles, function(bedFile) {
    BEDMatrix(bedFile, simple_names = TRUE)
}))

Perform GWAS

DATA <- as.BGData(X)
DATA@pheno <- orderedMerge(DATA@pheno, adjustedPhenotypes)
coeffs <- GWAS(
    formula = adjusted_height ~ 1,
    data = DATA,
    method = "rayOLS",
    i = samples,
    chunkSize = 2500,
    nCores = 1,
    verbose = TRUE
) # will take ~1.5 hours
save(coeffs, file = paste0(outputPath, "/GWAS.RData"))