Skip to content

Commit 99cf6aa

Browse files
Treatment are now allele strings
1 parent 74a4239 commit 99cf6aa

File tree

3 files changed

+47
-13
lines changed

3 files changed

+47
-13
lines changed

Manifest.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
julia_version = "1.10.4"
44
manifest_format = "2.0"
5-
project_hash = "2b58dcc0ffd21f9ddd37c869a6c32c55af516134"
5+
project_hash = "b1a916e4f68d2fb953c6eba21c0fc7546e7dc5c6"
66

77
[[deps.ARFFFiles]]
88
deps = ["CategoricalArrays", "Dates", "Parsers", "Tables"]

src/inputs_from_config.jl

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ end
167167
"""
168168
function treatments_from_variant(variant::String, dataset::DataFrame)
169169
variant_levels = sort(levels(dataset[!, variant], skipmissing=true))
170-
return Dict{Symbol, Vector{UInt8}}(Symbol(variant)=>variant_levels)
170+
return Dict{Symbol, Vector{String}}(Symbol(variant)=>variant_levels)
171171
end
172172

173173
function estimands_from_gwas(dataset, variants, outcomes, confounders;
@@ -198,14 +198,35 @@ function read_bed_chromosome(bedprefix)
198198
return SnpData(bed_file, famnm=fam_file, bimnm=bim_file)
199199
end
200200

201+
function map_allele(value, allele1, allele2)
202+
if value == 0x00
203+
return "$allele1$allele1"
204+
elseif value == 0x01
205+
return missing
206+
elseif value == 0x02
207+
return "$allele1$allele2"
208+
elseif value == 0x03
209+
return "$allele2$allele2"
210+
end
211+
end
212+
213+
function convert_string(snpdata)
214+
genotypes_data = []
215+
for col in 1:snpdata.snps
216+
allele_col = snpdata.snparray[:,col]
217+
allele1 = snpdata.snp_info[col, "allele1"]
218+
allele2 = snpdata.snp_info[col, "allele2"]
219+
mapped_col = map(value -> map_allele(value, allele1, allele2), allele_col)
220+
push!(genotypes_data, mapped_col)
221+
end
222+
return DataFrame(genotypes_data, snpdata.snp_info."snpid")
223+
end
224+
201225
function get_genotypes_from_beds(bedprefix)
202226
snpdata = read_bed_chromosome(bedprefix)
203-
genotypes = DataFrame(convert(Matrix{UInt8}, snpdata.snparray), snpdata.snp_info."snpid")
204-
genotype_map = Union{UInt8, Missing}[0, missing, 1, 2]
205-
for col in names(genotypes)
206-
genotypes[!, col] = [genotype_map[x+1] for x in genotypes[!, col]]
207-
end
227+
genotypes = convert_string(snpdata)
208228
insertcols!(genotypes, 1, :SAMPLE_ID => snpdata.person_info."iid")
229+
209230
return genotypes
210231
end
211232

test/inputs_from_gwas_config.jl

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,27 @@ function get_summary_stats(estimands)
1919
return sort(combine(groupby(results, :OUTCOME), nrow), :OUTCOME)
2020
end
2121

22-
function check_estimands_levels_order(estimands)
22+
function check_estimands_levels_order(estimands, snp_info)
2323
for Ψ in estimands
2424
# If the two components are present, the first is the 0 -> 1 and the second is the 1 -> 2
2525
variant = only(keys.args[1].treatment_values))
26+
variant_info = filter(:snpid=>x->x==String(variant),snp_info)
27+
allele1, allele2 = variant_info.allele1[1], variant_info.allele2[1]
28+
29+
# Here, we check if the order is sufficient to be able to compute non-linear effects any of these combinations will do
2630
if length.args) == 2
27-
@test Ψ.args[1].treatment_values[variant] == (control = 0x00, case = 0x01)
28-
@test Ψ.args[2].treatment_values[variant] == (control = 0x01, case = 0x02)
31+
@test.args[1].treatment_values[variant] == (control = allele1*allele1, case = allele1*allele2) &&
32+
Ψ.args[2].treatment_values[variant] == (control = allele1*allele2, case = allele2*allele2)) ||
33+
.args[1].treatment_values[variant] == (control = allele2*allele2, case = allele1*allele2) &&
34+
Ψ.args[2].treatment_values[variant] == (control = allele1*allele2, case = allele1*allele1))
2935
else
3036
# Otherwise we check they are one or the other
3137
arg = only.args)
32-
@test arg.treatment_values[variant]==(control = 0x00, case = 0x01) ||
33-
arg.treatment_values[variant]==( control = 0x01, case = 0x02)
38+
@test arg.treatment_values[variant] == (control = allele1*allele1, case = allele1*allele2) ||
39+
arg.treatment_values[variant] == (control = allele2*allele2, case = allele1*allele2) ||
40+
arg.treatment_values[variant] == (control = allele1*allele2, case = allele2*allele2) ||
41+
arg.treatment_values[variant] == (control = allele1*allele2, case = allele1*allele1)
42+
3443
end
3544
end
3645
end
@@ -48,6 +57,10 @@ end
4857
"--positivity-constraint=0"
4958
])
5059
TargeneCore.julia_main()
60+
61+
# Define SNP information to check string allele defintions
62+
snpdata = read_bed_chromosome(joinpath(TESTDIR, "data", "ukbb", "genotypes" , "ukbb_1."))
63+
snp_info = select(DataFrame(snpdata.snp_info), [:snpid, :allele1, :allele2])
5164
# Check dataset
5265
dataset = DataFrame(Arrow.Table(joinpath(tmpdir, "final.data.arrow")))
5366
@test size(dataset) == (1940, 886)
@@ -68,7 +81,7 @@ end
6881
nrow = repeat([875], 5)
6982
)
7083

71-
check_estimands_levels_order(estimands)
84+
check_estimands_levels_order(estimands, snp_info)
7285
end
7386

7487
@testset "Test inputs_from_config gwas: positivity constraint" begin

0 commit comments

Comments
 (0)