Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
6a03213
GWIS configuration
joshua-slaughter Nov 17, 2024
c9ebfa5
missing true in gwis test file
joshua-slaughter Nov 18, 2024
4019b38
fixing GWIS test string output
joshua-slaughter Nov 18, 2024
f21d4d1
Changes from GWIS to GWIES; Updated tests and generalized estimand cr…
joshua-slaughter Mar 18, 2025
1b1a740
Adding unwatched files for testing GWEIS
joshua-slaughter Mar 18, 2025
8d63364
Update CI for actions
joshua-slaughter Mar 18, 2025
5e575f7
correct behavior to do first order interactions iteratively across li…
joshua-slaughter Mar 30, 2025
b1ef821
variant mapping file output; gwis is now under gwas functions specifi…
joshua-slaughter Apr 12, 2025
04d2ac5
adding genotype/variant mapping for interpretability of gwas results
joshua-slaughter Apr 13, 2025
02322ab
fix typo
joshua-slaughter Apr 13, 2025
79cb895
Undo premature changes
joshua-slaughter Apr 13, 2025
1c3453f
up manifest
joshua-slaughter Apr 23, 2025
0a4c105
Merge branch 'gwis' of github.com:TARGENE/TargeneCore.jl into gwis
joshua-slaughter Apr 23, 2025
99976c1
Errors in Project.toml
joshua-slaughter Apr 23, 2025
e016964
allow duplicate variant names for estimation
joshua-slaughter May 12, 2025
4516d04
Merge branch 'gwis' of github.com:TARGENE/TargeneCore.jl into gwis
joshua-slaughter May 12, 2025
5ede1f0
Revert "Merge branch 'gwis' of github.com:TARGENE/TargeneCore.jl into…
joshua-slaughter May 12, 2025
abef210
harmonization of estimands; 0x00 will be encoded as major-major relat…
joshua-slaughter May 26, 2025
086ee83
allele orientation fix
joshua-slaughter May 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

julia_version = "1.10.4"
manifest_format = "2.0"
project_hash = "2b58dcc0ffd21f9ddd37c869a6c32c55af516134"
project_hash = "b1a916e4f68d2fb953c6eba21c0fc7546e7dc5c6"

[[deps.ARFFFiles]]
deps = ["CategoricalArrays", "Dates", "Parsers", "Tables"]
Expand Down
42 changes: 40 additions & 2 deletions src/inputs_from_config.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,22 @@ function estimands_from_variants(
estimand_constructor,
outcomes,
confounders;
extra_treatments=[],
outcome_extra_covariates=[],
positivity_constraint=0.,
verbosity=1
)
estimands = []
for variant in variants
treatments = treatments_from_variant(variant, dataset)

if isempty(extra_treatments)
treatments = treatments_from_variant(variant, dataset)
elseif length(extra_treatments) == 1
treatments = Dict(treatments_from_variant(variant, dataset)..., treatments_from_variant(string(extra_treatments[1]), dataset)...)
else
error("GWIS mode only supports pairwise interaction with one extra treatment.")
end

local Ψ
try
Ψ = factorialEstimands(
Expand Down Expand Up @@ -171,13 +180,15 @@ function treatments_from_variant(variant::String, dataset::DataFrame)
end

function estimands_from_gwas(dataset, variants, outcomes, confounders;
extra_treatments=extra_treatments,
outcome_extra_covariates = [],
positivity_constraint=0.,
verbosity=0
)
variants_groups = Iterators.partition(variants, length(variants) ÷ Threads.nthreads())
estimands_tasks = map(variants_groups) do variants
Threads.@spawn estimands_from_variants(variants, dataset, ATE, outcomes, confounders;
extra_treatments=extra_treatments,
outcome_extra_covariates=outcome_extra_covariates,
positivity_constraint=positivity_constraint,
verbosity=verbosity
Expand All @@ -187,6 +198,24 @@ function estimands_from_gwas(dataset, variants, outcomes, confounders;
return vcat(estimands_partitions...)
end

function estimands_from_gwis(dataset, variants, outcomes, confounders;
extra_treatments=extra_treatments,
outcome_extra_covariates = [],
positivity_constraint=0.,
verbosity=0
)
variants_groups = Iterators.partition(variants, length(variants) ÷ Threads.nthreads())
estimands_tasks = map(variants_groups) do variants
Threads.@spawn estimands_from_variants(variants, dataset, AIE, outcomes, confounders;
extra_treatments=extra_treatments,
outcome_extra_covariates=outcome_extra_covariates,
positivity_constraint=positivity_constraint,
verbosity=verbosity
)
end
estimands_partitions = fetch.(estimands_tasks)
return vcat(estimands_partitions...)
end

get_only_file_with_suffix(files, suffix) = files[only(findall(x -> endswith(x, suffix), files))]

Expand All @@ -210,7 +239,7 @@ function get_genotypes_from_beds(bedprefix)
end

function make_genotypes(genotype_prefix, config, call_threshold)
genotypes = if config["type"] == "gwas"
genotypes = if config["type"] == "gwas"|| config["type"] == "gwis"
get_genotypes_from_beds(genotype_prefix)
else
variants_set = Set(retrieve_variants_list(config["variants"]))
Expand Down Expand Up @@ -263,10 +292,19 @@ function inputs_from_config(config_file, genotypes_prefix, traits_file, pcs_file
elseif config_type == "gwas"
variants = filter(!=("SAMPLE_ID"), names(genotypes))
estimands_from_gwas(dataset, variants, outcomes, confounders;
extra_treatments=extra_treatments,
outcome_extra_covariates=outcome_extra_covariates,
positivity_constraint=positivity_constraint,
verbosity=verbosity
)
elseif config_type == "gwis"
variants = filter(!=("SAMPLE_ID"), names(genotypes))
estimands_from_gwis(dataset, variants, outcomes, confounders;
extra_treatments=extra_treatments,
outcome_extra_covariates=outcome_extra_covariates,
positivity_constraint=positivity_constraint,
verbosity=verbosity
)
else
throw(ArgumentError(string("Unknown extraction type: ", config_type, ", use any of: (flat, groups, gwas)")))
end
Expand Down
10 changes: 10 additions & 0 deletions test/data/config_gwis.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
type: gwis

extra_treatments:
- 22001

outcome_extra_covariates:
- COV_1

extra_confounders:
- 21003
113 changes: 113 additions & 0 deletions test/inputs_from_gwis_config.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
module TestGwisEstimands

using Test
using SnpArrays
using TargeneCore
using Arrow
using DataFrames
using Serialization
using TMLE
using CSV

TESTDIR = joinpath(pkgdir(TargeneCore), "test")

include(joinpath(TESTDIR, "testutils.jl"))

function get_summary_stats(estimands)
outcomes = [TargeneCore.get_outcome(Ψ) for Ψ in estimands]
results = DataFrame(ESTIMAND = estimands, OUTCOME = outcomes)
return sort(combine(groupby(results, :OUTCOME), nrow), :OUTCOME)
end

function check_estimands_levels_order(estimands)
for Ψ in estimands
# If the two components are present, the first is the 0 -> 1 and the second is the 1 -> 2
variant = collect(keys(Ψ.args[1].treatment_values))[2]
if length(Ψ.args) == 2
@test Ψ.args[1].treatment_values[variant] == (control = 0x00, case = 0x01)
@test Ψ.args[2].treatment_values[variant] == (control = 0x01, case = 0x02)
else
# Otherwise we check they are one or the other
arg = only(Ψ.args)
@test arg.treatment_values[variant]==(control = 0x00, case = 0x01) ||
arg.treatment_values[variant]==( control = 0x01, case = 0x02)
end
end
end

@testset "Test inputs_from_config gwis: no positivity constraint" begin
tmpdir = mktempdir()
copy!(ARGS, [
"estimation-inputs",
joinpath(TESTDIR, "data", "config_gwis.yaml"),
string("--traits-file=", joinpath(TESTDIR, "data", "ukbb_traits.csv")),
string("--pcs-file=", joinpath(TESTDIR, "data", "ukbb_pcs.csv")),
string("--genotypes-prefix=", joinpath(TESTDIR, "data", "ukbb", "genotypes" , "ukbb_1.")),
string("--outprefix=", joinpath(tmpdir, "final")),
"--batchsize=5",
"--verbosity=0",
"--positivity-constraint=0"
])
TargeneCore.julia_main()
# Check dataset
dataset = DataFrame(Arrow.Table(joinpath(tmpdir, "final.data.arrow")))
@test size(dataset) == (1940, 886)

# Check estimands
estimands = []
for file in readdir(tmpdir, join=true)
if endswith(file, "jls")
append!(estimands, deserialize(file).estimands)
end
end
@test all(e isa JointEstimand for e in estimands)

# There are 875 variants in the dataset
summary_stats = get_summary_stats(estimands)
@test summary_stats == DataFrame(
OUTCOME = [:BINARY_1, :BINARY_2, :CONTINUOUS_1, :CONTINUOUS_2, :TREAT_1],
nrow = repeat([875], 5)
)

check_estimands_levels_order(estimands)
end


@testset "Test inputs_from_config gwis: positivity constraint" begin
tmpdir = mktempdir()
copy!(ARGS, [
"estimation-inputs",
joinpath(TESTDIR, "data", "config_gwis.yaml"),
string("--traits-file=", joinpath(TESTDIR, "data", "ukbb_traits.csv")),
string("--pcs-file=", joinpath(TESTDIR, "data", "ukbb_pcs.csv")),
string("--genotypes-prefix=", joinpath(TESTDIR, "data", "ukbb", "genotypes" , "ukbb_1.")),
string("--outprefix=", joinpath(tmpdir, "final")),
"--batchsize=5",
"--verbosity=0",
"--positivity-constraint=0.2"
])
TargeneCore.julia_main()
# Check dataset
dataset = DataFrame(Arrow.Table(joinpath(tmpdir, "final.data.arrow")))
@test size(dataset) == (1940, 886)
# Check estimands
estimands = []
for file in readdir(tmpdir, join=true)
if endswith(file, "jls")
append!(estimands, deserialize(file).estimands)
end
end
# The positivity constraint reduces the number of variants
@test all(e isa JointEstimand for e in estimands)
summary_stats = get_summary_stats(estimands)
@test summary_stats == DataFrame(
OUTCOME = [:BINARY_1, :BINARY_2, :CONTINUOUS_1, :CONTINUOUS_2, :TREAT_1],
nrow = repeat([142], 5)
)

check_estimands_levels_order(estimands)
end


end
true
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ TESTDIR = joinpath(pkgdir(TargeneCore), "test")
@test include(joinpath(TESTDIR, "inputs_from_estimands.jl"))
@test include(joinpath(TESTDIR, "inputs_from_config.jl"))
@test include(joinpath(TESTDIR, "inputs_from_gwas_config.jl"))
@test include(joinpath(TESTDIR, "inputs_from_gwis_config.jl"))
@test include(joinpath(TESTDIR, "sieve_variance.jl"))
end
Loading