Example: BipEx (advanced) - ajaynadig/bhr GitHub Wiki
BHR Example: Burden Heritability in the BipEx dataset
Description
This file contains the code to run a BHR analysis of bipolar disorder, from publicly available summary statistics to final estimates.
Because this trait is binary, and we have only summary statistics, we compute per-allele effect sizes from case and control allele counts (Equation 33 in BHR paper). In the case of a continuous trait, one should compute per-allele effect sizes using a linear model.
Loading and filtering variants.
The source file, "Bipex_variants_results.tsv", can be downloaded at this URL: https://bipex.broadinstitute.org/downloads
The baseline model file, "baseline_model.txt", can be downloaded at this URL: https://github.com/ajaynadig/bhr/tree/master/reference_files
baseline_model <- read.table("~/Documents/oconnor_rotation/rarevariantproject/final_manuscript_repo/bhr/reference_files/ms_baseline_oe5.txt")
bp_variantlevel_bipex <- bigreadr::fread2("~/Downloads/BipEx_variant_results.tsv.gz")
variant_filter = bp_variantlevel_bipex$group == "Bipolar Disorder" & #Filter to Bipolar Disorder counts
bp_variantlevel_bipex$in_analysis == TRUE & #Use variant filter from Palmer et al, 2022
!is.na(bp_variantlevel_bipex$gene_id) & #Remove variants with NA gene ID
str_detect(bp_variantlevel_bipex$locus, "^chr\\d") #Subset to autosomal variants
#Subset variants
bp_variantlevel_bipex <- bp_variantlevel_bipex[variant_filter,
c("gene_id",
"consequence",
"ac_case",
"ac_ctrl",
"locus",
"mpc")]
Wrangling summary statistics into format for BHR
wrangle_sumstats <- function(table,n_cases,n_controls, var_filter) {
#Filter to variants of interest
table = table[var_filter,]
#Compute sample prevalence, will be used to compute per-sd beta
prevalence = n_cases/(n_cases+n_controls)
#Compute variant MAF in cases, will be used to compute per-sd beta
table$AF_case = table$ac_case/(2*n_cases)
#Compute variant MAFoverall, will be used to compute per-sd beta
table$AF = (table$ac_case + table$ac_ctrl)/(2*(n_cases + n_controls))
#calculate per-sd betas
table$beta = (2 * (table$AF_case - table$AF) * prevalence)/sqrt(2 * table$AF * (1 - table$AF) * prevalence * (1 - prevalence))
#calculate variant variances
table$twopq = 2*table$AF * (1 - table$AF)
#convert betas from per-sd (i.e. sqrt(variance explained)) to per-allele (i.e. in units of phenotype) betas.
#per-allele are the usual betas reported by an exome wide association study.
table$beta_perallele = table$beta/sqrt(table$twopq)
#aggregate into gene-level table.
#position doesn't need to be super precise, as it is only used to order genes for jackknife
#N = sum of case and control counts
sumstats = data.frame(gene = table$gene_id,
AF = table$AF,
beta = table$beta_perallele,
gene_position = parse_number(sapply(strsplit(table$locus, split = ":"), function(x) x[2](/ajaynadig/bhr/wiki/2))),
chromosome = parse_number(sapply(strsplit(table$locus, split = ":"), function(x) x[1](/ajaynadig/bhr/wiki/1))),
N = n_cases + n_controls,
phenotype_key = "BP")
#we have found that in these smaller sample analyses, there are some genes with
#large burden scores that are clearly outliers
#we remove one such gene, TTN
sumstats <- sumstats[sumstats$gene != "ENSG00000155657",]
return(sumstats)
}
bp_sumstats_ptv = wrangle_sumstats(bp_variantlevel_bipex,
14210, #N from Palmer et al, 2022
14422, #N from Palmer et al, 2022
bp_variantlevel_bipex$consequence == "ptv")
bp_sumstats_missenseMPC2 = wrangle_sumstats(bp_variantlevel_bipex[!is.na(bp_variantlevel_bipex$mpc),],
14210,
14422,
bp_variantlevel_bipex$consequence[!is.na(bp_variantlevel_bipex$mpc)] %in% c("damaging_missense", "other_missense") &
bp_variantlevel_bipex$mpc[!is.na(bp_variantlevel_bipex$mpc)] > 2)
bp_sumstats_synonymous = wrangle_sumstats(bp_variantlevel_bipex,
14210,
14422,
bp_variantlevel_bipex$consequence == "synonymous")
Running BHR
#install BHR
#devtools::install_github("ajaynadig/bhr", force = TRUE)
bp_ptv_bhr <- BHR(trait1_sumstats = bp_sumstats_ptv,
annotations = list(baseline_model), #baseline model including constraint annotations
num_blocks = 100, #number of blocks for jackknife
mode = "univariate") #run in univariate mode to compute burden h2
bp_missenseMPC2_bhr <- BHR(bp_sumstats_missenseMPC2,
annotations = list(baseline_model),
num_blocks = 100,
mode = "univariate")
bp_synonymous_bhr <- BHR(bp_sumstats_synonymous,
annotations = list(baseline_model),
num_blocks = 100,
mode = "univariate")
#convert observed scale to liability scale h2
#calculate sample prevalence
bp_sample_prevalence = 14210/(14210 + 14422)
#this population prevalence estimate is from Ferrari et al, 2016 Bipolar Disorders
population_prevalence_bp = 0.007
obs2lia_factor <- function(K, P){
X <- qnorm(K,lower.tail=FALSE)
z <- (1/sqrt(2*pi))*(exp(-(X**2)/2))
factor <- (K*(1-K)*K*(1-K))/(P*(1-P)*(z**2))
return(factor)
}
bp_scalingfactor = obs2lia_factor(population_prevalence_bp,bp_sample_prevalence)
print(paste0("BP PTV Burden Heritability: ",
round(bp_ptv_bhr$mixed_model$heritabilities[1,5] * bp_scalingfactor, 4),
" ",
"(",
round(bp_ptv_bhr$mixed_model$heritabilities[2,5] * bp_scalingfactor, 4),
")"))
print(paste0("BP Missense MPC > 2 Burden Heritability: ",
round(bp_missenseMPC2_bhr$mixed_model$heritabilities[1,5] * bp_scalingfactor, 4),
" ",
"(",
round(bp_missenseMPC2_bhr$mixed_model$heritabilities[2,5] * bp_scalingfactor, 4),
")"))
print(paste0("BP Synonymous Burden Heritability: ",
round(bp_synonymous_bhr$mixed_model$heritabilities[1,5] * bp_scalingfactor, 4),
" ",
"(",
round(bp_synonymous_bhr$mixed_model$heritabilities[2,5] * bp_scalingfactor, 4),
")"))
"BP PTV Burden Heritability: 0.0179 (0.003)"
"BP Missense MPC > 2 Burden Heritability: 0.0021 (0.0011)"
"BP Synonymous Burden Heritability: 0.0015 (0.0021)"