Skip to content

Commit 33c3ff8

Browse files
authored
Merge pull request #185 from TARGENE/brh_data_source
Add functionality to run from_actors mode with more than one TF
2 parents 4dcf5c4 + 45af342 commit 33c3ff8

File tree

8 files changed

+124
-18
lines changed

8 files changed

+124
-18
lines changed

src/confounders.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,4 +106,4 @@ function adapt_flashpca(parsed_args)
106106
pcs = CSV.File(parsed_args["input"], drop=["FID"]) |> DataFrame
107107
rename!(pcs, :IID => :SAMPLE_ID)
108108
CSV.write(parsed_args["output"], pcs)
109-
end
109+
end

src/tmle_inputs/from_actors.jl

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,15 @@ function treatments_from_actors(bqtl_file, env_file, trans_actors_prefix)
1111
bqtls, transactors, extraT
1212
end
1313

14+
filter_snps_by_tf(df::DataFrame, tf_name::AbstractString) = filter(row -> row.TF == tf_name, df)
15+
filter_snps_by_tf(df::DataFrame, tf_name::Nothing) = df
16+
filter_snps_by_tf(df_vector, tf_name::AbstractString) = [filter_snps_by_tf(df, tf_name) for df in df_vector]
17+
filter_snps_by_tf(df_vector, tf_name::Nothing) = df_vector
1418

1519
combine_trans_actors(trans_actors::Vector{DataFrame}, extraT::DataFrame, order) = combinations([trans_actors..., extraT], order)
1620
combine_trans_actors(trans_actors::Vector{DataFrame}, extraT::Nothing, order) = combinations(trans_actors, order)
1721
combine_trans_actors(trans_actors::Nothing, extraT::DataFrame, order) = [[extraT]]
1822

19-
2023
function combine_by_bqtl(bqtls::DataFrame, trans_actors::Union{Vector{DataFrame}, Nothing}, extraT::Union{DataFrame, Nothing}, order::Int)
2124
treatment_combinations = Vector{Symbol}[]
2225
if order == 1
@@ -44,8 +47,11 @@ all_variants(bqtls::DataFrame, transactors::Vector{DataFrame}) = Set(vcat(bqtls.
4447

4548

4649
read_snps_from_csv(path::Nothing) = nothing
47-
read_snps_from_csv(path::String) = unique(CSV.read(path, DataFrame; select=[:ID, :CHR]), :ID)
48-
50+
function read_snps_from_csv(path::String)
51+
df = CSV.read(path, DataFrame)
52+
df = "TF" in names(df) ? unique(df[:, [:ID, :CHR, :TF]], [:ID, :TF]) : unique(df[:, [:ID, :CHR]], :ID)
53+
return(df)
54+
end
4955

5056
trans_actors_from_prefix(trans_actors_prefix::Nothing) = nothing
5157
function trans_actors_from_prefix(trans_actors_prefix::String)
@@ -156,7 +162,14 @@ function parameters_from_actors(bqtls, transactors, data, variables, orders, out
156162
for order in orders
157163
# First generate the `T` section
158164
treatment_combinations = TargeneCore.combine_by_bqtl(bqtls, transactors, extraT_df, order)
165+
# If there are duplicates here, remove them
166+
treatment_combinations = unique(treatment_combinations)
159167
for treatments in treatment_combinations
168+
# If RSID is duplicated in treatments, skip
169+
if length(treatments) != length(unique(treatments))
170+
continue
171+
end
172+
160173
addParameters!(parameters, treatments, variables, data; positivity_constraint=positivity_constraint)
161174

162175
if batch_size !== nothing && size(parameters, 1) >= batch_size
@@ -208,11 +221,19 @@ function tmle_inputs_from_actors(parsed_args)
208221

209222
# Parameter files
210223
variables = TargeneCore.get_variables(pcs, traits, extraW, extraC, extraT)
211-
TargeneCore.parameters_from_actors(
212-
bqtls, transactors, data, variables, orders, outprefix;
224+
225+
# Loop through each TF present in bqtls file
226+
tfs = "TF" in names(bqtls) ? unique(bqtls.TF) : [nothing]
227+
for tf in tfs
228+
outprefix_tf = tf !== nothing ? string(outprefix,".",tf) : outprefix
229+
bqtls_tf = TargeneCore.filter_snps_by_tf(bqtls, tf)
230+
transactors_tf = TargeneCore.filter_snps_by_tf(transactors, tf)
231+
TargeneCore.parameters_from_actors(
232+
bqtls_tf, transactors_tf, data, variables, orders, outprefix_tf;
213233
positivity_constraint=positivity_constraint, batch_size=batch_size
214-
)
234+
)
235+
end
215236

216237
# write data
217238
Arrow.write(string(outprefix, ".data.arrow"), data)
218-
end
239+
end

src/tmle_inputs/tmle_inputs.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
const CHR_REG = r"chr[1-9]+"
22

3-
param_batch_name(outprefix, batch_id) = string(outprefix, ".param_", batch_id, ".yaml")
3+
param_batch_name(outprefix, batch_id) = string(outprefix, ".param_", batch_id, ".yaml")
44

55

66
"""

test/confounders.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ end
5656

5757
clean(parsed_args)
5858

59-
# No qc file provided
59+
# No QC file provided
6060
parsed_args = Dict(
6161
"input" => SnpArrays.datadir("mouse"),
6262
"output" => joinpath("data", "filtered-mouse"),
@@ -138,4 +138,4 @@ end
138138

139139
end;
140140

141-
true
141+
true
File renamed without changes.

test/data/bqtls_2.csv

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
ID,CHR,TF
2+
RSID_17,12,TF1
3+
RSID_99,12,TF1
4+
RSID_17,12,TF2
5+
RSID_198,12,TF2

test/data/trans_actors_3.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
ID,CHR,TF
2+
RSID_102,2,TF1
3+
RSID_2,chr,TF2

test/tmle_inputs/from_actors.jl

Lines changed: 84 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ end
222222
@test_throws ArgumentError TargeneCore.treatments_from_actors(1, nothing, nothing)
223223
@test_throws ArgumentError TargeneCore.treatments_from_actors(nothing, 1, nothing)
224224

225-
bqtl_file = joinpath("data", "bqtls.csv")
225+
bqtl_file = joinpath("data", "bqtls_1.csv")
226226
trans_actors_prefix = joinpath("data", "trans_actors_1.csv")
227227
env_file = joinpath("data", "extra_treatments.txt")
228228
# bqtls and trans_actors
@@ -259,7 +259,7 @@ end
259259
# - Order 1,2
260260
parsed_args = Dict(
261261
"from-actors" => Dict{String, Any}(
262-
"bqtls" => joinpath("data", "bqtls.csv"),
262+
"bqtls" => joinpath("data", "bqtls_1.csv"),
263263
"trans-actors-prefix" => joinpath("data", "trans_actors_1.csv"),
264264
"extra-covariates" => joinpath("data", "extra_covariates.txt"),
265265
"extra-treatments" => joinpath("data", "extra_treatments.txt"),
@@ -333,7 +333,7 @@ end
333333
# - batched
334334
parsed_args = Dict(
335335
"from-actors" => Dict{String, Any}(
336-
"bqtls" => joinpath("data", "bqtls.csv"),
336+
"bqtls" => joinpath("data", "bqtls_1.csv"),
337337
"trans-actors-prefix" => joinpath("data", "trans_actors_2"),
338338
"extra-covariates" => nothing,
339339
"extra-treatments" => nothing,
@@ -361,10 +361,10 @@ end
361361
@test size(traits) == (490, 13)
362362

363363
# Parameter files:
364-
ouparameters_1 = parameters_from_yaml("final.param_1.yaml")
365-
@test size(ouparameters_1, 1) == 100
366-
ouparameters_2 = parameters_from_yaml("final.param_2.yaml")
367-
outparameters = vcat(ouparameters_1, ouparameters_2)
364+
outparameters_1 = parameters_from_yaml("final.param_1.yaml")
365+
@test size(outparameters_1, 1) == 100
366+
outparameters_2 = parameters_from_yaml("final.param_2.yaml")
367+
outparameters = vcat(outparameters_1, outparameters_2)
368368

369369
found_targets = Dict(
370370
:BINARY_1 => 0,
@@ -405,6 +405,83 @@ end
405405
cleanup()
406406
end
407407

408+
@testset "Test tmle_inputs from-actors: scenario 3" begin
409+
# Scenario:
410+
# - Trans-actors
411+
# - Extra Treatment
412+
# - Extra Covariates
413+
# - Order 1,2
414+
# - More than 1 TF present
415+
parsed_args = Dict(
416+
"from-actors" => Dict{String, Any}(
417+
"bqtls" => joinpath("data", "bqtls_2.csv"),
418+
"trans-actors-prefix" => joinpath("data", "trans_actors_3.csv"),
419+
"extra-covariates" => joinpath("data", "extra_covariates.txt"),
420+
"extra-treatments" => joinpath("data", "extra_treatments.txt"),
421+
"extra-confounders" => nothing,
422+
"orders" => "1,2",
423+
),
424+
"traits" => joinpath("data", "traits_1.csv"),
425+
"pcs" => joinpath("data", "pcs.csv"),
426+
"call-threshold" => 0.8,
427+
"%COMMAND%" => "from-actors",
428+
"bgen-prefix" => joinpath("data", "ukbb", "imputed" ,"ukbb"),
429+
"out-prefix" => "final",
430+
"batch-size" => nothing,
431+
"positivity-constraint" => 0.
432+
)
433+
bqtls = Symbol.(unique(CSV.read(parsed_args["from-actors"]["bqtls"], DataFrame).ID))
434+
tmle_inputs(parsed_args)
435+
436+
## Dataset file
437+
trait_data = DataFrame(Arrow.Table("final.data.arrow"))
438+
@test names(trait_data) == [
439+
"SAMPLE_ID", "BINARY_1", "BINARY_2", "CONTINUOUS_1", "CONTINUOUS_2",
440+
"COV_1", "21003", "22001", "TREAT_1", "PC1", "PC2", "RSID_2", "RSID_102",
441+
"RSID_17", "RSID_198", "RSID_99"]
442+
@test size(trait_data) == (490, 16)
443+
444+
## Parameter file:
445+
outparameters = [parameters_from_yaml("final.TF1.param_1.yaml"), parameters_from_yaml("final.TF2.param_1.yaml")]
446+
found_targets = Dict(
447+
:BINARY_1 => 0,
448+
:CONTINUOUS_2 => 0,
449+
:CONTINUOUS_1 => 0,
450+
:BINARY_2 => 0
451+
)
452+
for tf in [1,2]
453+
outparameters_tf = outparameters[tf]
454+
for Ψ in outparameters_tf
455+
if Ψ isa ATE
456+
ntreatments = length.treatment)
457+
if ntreatments > 1
458+
@test all.treatment[index].case == Ψ.treatment[index].control for index 2:ntreatments)
459+
end
460+
else
461+
@test Ψ isa IATE
462+
@test all(cc.case != cc.control for cc Ψ.treatment)
463+
end
464+
@test Ψ.covariates == [:COV_1, Symbol("21003"), Symbol("22001")]
465+
@test Ψ.confounders == [:PC1, :PC2]
466+
# The first treatment will be a bqtl
467+
@test keys.treatment)[1] bqtls
468+
@test Ψ.treatment[1].case isa AbstractString
469+
@test length.treatment) [1, 2]
470+
found_targets[Ψ.target] += 1
471+
end
472+
end
473+
# The number of parameters with various targets should be the same
474+
@test all(x == found_targets[:BINARY_1] for x in values(found_targets))
475+
# This is difficult to really check the ordering
476+
# Those correspond to the simple bQTL ATE
477+
for tf in [1,2]
478+
first_treatments = keys(outparameters[tf][1].treatment)
479+
@test all(keys.treatment) == first_treatments for Ψ in outparameters[tf][1:12])
480+
end
481+
482+
cleanup()
483+
end
484+
408485
end
409486

410487
true

0 commit comments

Comments
 (0)