GWAS - QuantGen/HPCC GitHub Wiki

GWAS using predefined traits

Prerequisites:

  • Check if your trait of interest corresponds to one of the subdirectories in /mnt/research/quantgen/projects/UKB/PIPELINE500/output/adjusted_phenotypes and set the trait variable accordingly
    trait <- "height"
    

Steps:

  1. Load libraries
    library(BGData)
    
  2. Define paths
    inputPath <- "/mnt/research/quantgen/projects/UKB/PIPELINE500/output"
    outputPath <- "..."
    
  3. Load cohort of unrelated whites
    cohortPath <- paste0(inputPath, "/cohorts/whites_unrelated_0.05/calls/cohort.txt")
    samples <- scan(cohortPath, what = character(), quiet = TRUE)
    
  4. Load genotypes (as BGData object)
    load.BGData(paste0(inputPath, "/BGData/whites/calls/BGData.RData")) # loads 'DATA'
    
  5. Load phenotypes
    phenotypesPath <- paste0(inputPath, "/adjusted_phenotypes/", trait, "/whites_unrelated_0.05/calls/trait.tsv")
    adjustedPhenotypes <- read.table(phenotypesPath, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
    adjustedPhenotypes["FID"](/QuantGen/HPCC/wiki/"FID") <- as.character(adjustedPhenotypes["FID"](/QuantGen/HPCC/wiki/"FID"))
    adjustedPhenotypes["IID"](/QuantGen/HPCC/wiki/"IID") <- as.character(adjustedPhenotypes["IID"](/QuantGen/HPCC/wiki/"IID"))
    
  6. Merge adjusted phenotypes into BGData object
    DATA@pheno <- orderedMerge(DATA@pheno, adjustedPhenotypes)
    
  7. Perform GWAS
    coeffs <- GWAS(
        formula = as.formula(paste0("adjusted_", trait, " ~ 1")),
        data = DATA,
        method = "rayOLS",
        i = samples,
        chunkSize = 2500,
        nCores = 1,
        verbose = TRUE
    ) # will take ~1.5 hours
    save(coeffs, file = paste0(outputPath, "/GWAS.RData"))
    

GWAS from scratch

  1. Load libraries
    library(BGData)
    
  2. Define paths
    inputPath <- "/mnt/research/quantgen/projects/UKB/PIPELINE500/output"
    outputPath <- "..."
    
  3. Load cohort of unrelated whites
    cohortPath <- paste0(inputPath, "/cohorts/whites_unrelated_0.05/calls/cohort.txt")
    samples <- scan(cohortPath, what = character(), quiet = TRUE)
    
  4. Load phenotypes
    phenotypesPath <- paste0(inputPath, "/phenotypes/whites/calls/phenotypes.RData")
    load(phenotypesPath) # loads 'phenotypes' -- this will take a while ...
    
  5. Adjust trait by sex, age, center, and first five PCs after filtering for outliers
    batchesPath <- paste0(inputPath, "/batches/whites/calls/batches.RData")
    load(batchesPath) # loads 'batches'
    
    pcPath <- paste0(inputPath, "/PCs/whites/calls/SVD_5000.RData")
    load(pcPath) # loads 'SVD'
    
    adjustedPhenotypes <- data.frame(
        FID = phenotypes[samples, "eid"],
        IID = phenotypes[samples, "eid"],
        sex = phenotypes[samples, "Sex (0.0)"],
        age = phenotypes[samples, "Age when attended assessment centre (0.0)"],
        batch = batches[samples, "batch"],
        center = phenotypes[samples, "UK Biobank assessment centre (0.0)"],
        pc1 = SVD$u[samples, 1],
        pc2 = SVD$u[samples, 2],
        pc3 = SVD$u[samples, 3],
        pc4 = SVD$u[samples, 4],
        pc5 = SVD$u[samples, 5],
        height = phenotypes[samples, "Standing height (0.0)"],
        stringsAsFactors = FALSE
    )
    
    outlierFilter <- adjustedPhenotypes$height < 147 & adjustedPhenotypes$height > 210
    adjustedPhenotypes$height[outlierFilter] <- NA
    
    phenotypeAdjustment <- lm(
        formula = height ~ factor(sex) + age + factor(batch) + factor(center) +
                           pc1 + pc2 + pc3 + pc4 + pc5,
        data = adjustedPhenotypes,
        na.action = na.exclude
    )
    adjustedPhenotypes$adjusted_height = residuals(phenotypeAdjustment)
    
  6. Load genotypes
    bedPath <- paste0(inputPath, "/BED/whites/calls/chromosomes")
    bedFiles <- list.files(bedPath, pattern = "*.bed", full.names = TRUE)
    X <- do.call(ColumnLinkedMatrix, lapply(bedFiles, function(bedFile) {
        BEDMatrix(bedFile, simple_names = TRUE)
    }))
    
  7. Perform GWAS
    DATA <- as.BGData(X)
    DATA@pheno <- orderedMerge(DATA@pheno, adjustedPhenotypes)
    coeffs <- GWAS(
        formula = adjusted_height ~ 1,
        data = DATA,
        method = "rayOLS",
        i = samples,
        chunkSize = 2500,
        nCores = 1,
        verbose = TRUE
    ) # will take ~1.5 hours
    save(coeffs, file = paste0(outputPath, "/GWAS.RData"))