data encryption - reworkhow/JWAS.jl GitHub Wiki
JWAS (branch: encryption)
Raw data
data simulation
using JWAS,DataFrames,CSV,Statistics,Random
# Step 2: Read data
n=50
p=100
genotypes_raw = rand(MersenneTwister(1),[0.0,1.0,2.0],n,p)
genotypes_raw = genotypes_raw.-mean(genotypes_raw,dims=1) #center
phenotypes_raw = DataFrame(ID=1:n,cg=[repeat(["cg1"],25);repeat(["cg2"],25)],batch=[repeat(["batch1"],20);repeat(["batch2"],30)],y_raw=rand(MersenneTwister(2),n))
phenotypes_raw[:,:y_raw]=phenotypes_raw[:,:y_raw].-mean(phenotypes_raw[:,:y_raw]) #center
run JWAS
G=0.001
genotypes_raw = get_genotypes(genotypes_raw,G;G_is_marker_variance = true,
method="RR-BLUP",center=false,quality_control=false);
# Step 3: Build Model Equations
model_equation_raw ="y_raw = cg + batch + genotypes_raw"
R=1.0
model_raw = build_model(model_equation_raw,R);
set_random(model_raw,"cg");
set_random(model_raw,"batch");
# Step 6: Run Analysis
out_raw=runMCMC(model_raw,phenotypes_raw,double_precision=true,chain_length=50000);
Encrypted data
data simulation
- The names of the encrypted incidence matrix must be "name_i". For example, for "batch" with 2 levels, the names must be "batch_1","batch_2".
n=50
p=100
genotypes_raw = rand(MersenneTwister(1),[0.0,1.0,2.0],n,p)
genotypes_raw = genotypes_raw.-mean(genotypes_raw,dims=1) #center
phenotypes_raw = DataFrame(ID=1:n,cg=[repeat(["cg1"],25);repeat(["cg2"],25)],y_raw=rand(MersenneTwister(2),n))
phenotypes_raw[:,:y_raw]=phenotypes_raw[:,:y_raw].-mean(phenotypes_raw[:,:y_raw]) #center
phenotypes_raw[:,:y_raw]'phenotypes_raw[:,:y_raw]
#enctyption
P=lq(rand(MersenneTwister(3),n,n)).Q
P=Matrix(P) #get orthogonal matrix from QR decomposition
genotypes_encrypted=P*genotypes_raw
incidence_matrix1=[ones(25) zeros(25)
zeros(25) ones(25)]
incidence_matrix2=[ones(20) zeros(20)
zeros(30) ones(30)]
incidence_matrix=[incidence_matrix1 incidence_matrix2]
phenotypes_encrypted=DataFrame(ID=1:n,cg_1=P*incidence_matrix[:,1],cg_2=P*incidence_matrix[:,2],
batch_1=P*incidence_matrix[:,3],batch_2=P*incidence_matrix[:,4],
y_encrypted=P*phenotypes_raw[:,:y_raw])
Run JWAS
- set
encryption=true
in runMCMC()
G=0.001
genotypes_encrypted = get_genotypes(genotypes_encrypted,G;G_is_marker_variance = true,
method="RR-BLUP",center=false,quality_control=false);
# Step 3: Build Model Equations
model_equation_encrypted ="y_encrypted = cg + batch + genotypes_encrypted"
R=1.0
model_encrypted = build_model(model_equation_encrypted,R);
set_random(model_encrypted,"cg");
set_random(model_encrypted,"batch");
# Step 6: Run Analysis
out_encrypted=runMCMC(model_encrypted,phenotypes_encrypted,encryption=true,double_precision=true,chain_length=50000);
Compare results from raw v.s. encrypted
ebv_raw=out_raw["EBV_y_raw"][:,:EBV]
ebv_encrypted=out_encrypted["EBV_y_encrypted"][:,:EBV]
ebv_encrypted=P'*ebv_encrypted
cor(marker_effect_raw,marker_effect_encrypted) #0.9775076
cor(ebv_raw,ebv_encrypted) #0.9924604
#also similar residual variance, genetic varance
using Plots
scatter(marker_effect_raw,marker_effect_encrypted)