@@ -12,7 +12,7 @@ read_data(filepath) = CSV.read(filepath, DataFrame, types=Dict(:SAMPLE_ID => Str
12
12
13
13
function write_tmle_inputs (outprefix, final_dataset, parameters; batch_size= nothing )
14
14
# Write final_dataset
15
- CSV . write (string (outprefix, " .data.csv " ), final_dataset)
15
+ Arrow . write (string (outprefix, " .data.arrow " ), final_dataset)
16
16
# Write param_files
17
17
if batch_size != = nothing
18
18
for (batch_id, batch) in enumerate (Iterators. partition (parameters, batch_size))
@@ -56,22 +56,13 @@ all_snps_called(found_variants::Set{<:AbstractString}, variants::Set{<:AbstractS
56
56
variants == found_variants
57
57
58
58
"""
59
- genotypes_encoding(variant; asint=true )
59
+ genotypes_encoding(variant)
60
60
61
- If asint is true then the number of minor alleles is reported, otherwise string genotypes are reported.
61
+ String genotypes are reported.
62
62
"""
63
- function genotypes_encoding (variant; asint= true )
64
- minor = minor_allele (variant)
63
+ function genotypes_encoding (variant)
65
64
all₁, all₂ = alleles (variant)
66
- if asint
67
- if all₁ == minor
68
- return [2 , 1 , 0 ]
69
- else
70
- return [0 , 1 , 2 ]
71
- end
72
- else
73
- return [all₁* all₁, all₁* all₂, all₂* all₂]
74
- end
65
+ return [all₁* all₁, all₁* all₂, all₂* all₂]
75
66
end
76
67
77
68
NotAllVariantsFoundError (found_snps, snp_list) =
@@ -83,7 +74,7 @@ NotBiAllelicOrUnphasedVariantError(rsid) = ArgumentError(string("Variant: ", rsi
83
74
84
75
This function assumes the UK-Biobank structure
85
76
"""
86
- function call_genotypes (bgen_prefix:: String , variants:: Set{<:AbstractString} , threshold:: Real ; asint = true )
77
+ function call_genotypes (bgen_prefix:: String , variants:: Set{<:AbstractString} , threshold:: Real )
87
78
chr_dir_, prefix_ = splitdir (bgen_prefix)
88
79
chr_dir = chr_dir_ == " " ? " ." : chr_dir_
89
80
genotypes = nothing
@@ -102,7 +93,7 @@ function call_genotypes(bgen_prefix::String, variants::Set{<:AbstractString}, th
102
93
continue
103
94
end
104
95
minor_allele_dosage! (bgenfile, variant)
105
- variant_genotypes = genotypes_encoding (variant; asint = asint )
96
+ variant_genotypes = genotypes_encoding (variant)
106
97
probabilities = probabilities! (bgenfile, variant)
107
98
size (probabilities, 1 ) != 3 && throw (NotBiAllelicOrUnphasedVariantError (rsid_))
108
99
chr_genotypes[! , rsid_] = call_genotypes (probabilities, variant_genotypes, threshold)
0 commit comments