1
+ module TestGweisEstimands
2
+
3
+ using Test
4
+ using SnpArrays
5
+ using TargeneCore
6
+ using Arrow
7
+ using DataFrames
8
+ using Serialization
9
+ using TMLE
10
+ using CSV
11
+
12
+ TESTDIR = joinpath (pkgdir (TargeneCore), " test" )
13
+
14
+ include (joinpath (TESTDIR, " testutils.jl" ))
15
+
16
+ function get_summary_stats (estimands)
17
+ outcomes = [TargeneCore. get_outcome (Ψ) for Ψ in estimands]
18
+ results = DataFrame (ESTIMAND = estimands, OUTCOME = outcomes)
19
+ return sort (combine (groupby (results, :OUTCOME ), nrow), :OUTCOME )
20
+ end
21
+
22
+ function check_estimands_levels_interactions (estimands)
23
+ for Ψ in estimands
24
+ # If the two components are present, the first is the 0 -> 1 and the second is the 1 -> 2
25
+ # The variant should always be the last key
26
+ variant = last (collect (keys (Ψ. args[1 ]. treatment_values)))
27
+ if length (Ψ. args) == 2
28
+ @test Ψ. args[1 ]. treatment_values[variant] == (control = 0x00 , case = 0x01 )
29
+ @test Ψ. args[2 ]. treatment_values[variant] == (control = 0x01 , case = 0x02 )
30
+ else
31
+ # Otherwise we check they are one or the other
32
+ arg = only (Ψ. args)
33
+ @test arg. treatment_values[variant]== (control = 0x00 , case = 0x01 ) ||
34
+ arg. treatment_values[variant]== ( control = 0x01 , case = 0x02 )
35
+ end
36
+ end
37
+ end
38
+
39
+ @testset " Test inputs_from_config gweis: no positivity constraint" begin
40
+ tmpdir = mktempdir ()
41
+ copy! (ARGS , [
42
+ " estimation-inputs" ,
43
+ joinpath (TESTDIR, " data" , " config_gweis_first_order.yaml" ),
44
+ string (" --traits-file=" , joinpath (TESTDIR, " data" , " ukbb_traits.csv" )),
45
+ string (" --pcs-file=" , joinpath (TESTDIR, " data" , " ukbb_pcs.csv" )),
46
+ string (" --genotypes-prefix=" , joinpath (TESTDIR, " data" , " ukbb" , " genotypes" , " ukbb_1." )),
47
+ string (" --outprefix=" , joinpath (tmpdir, " final" )),
48
+ " --batchsize=5" ,
49
+ " --verbosity=0" ,
50
+ " --positivity-constraint=0"
51
+ ])
52
+ TargeneCore. julia_main ()
53
+ # Check dataset
54
+ dataset = DataFrame (Arrow. Table (joinpath (tmpdir, " final.data.arrow" )))
55
+ @test size (dataset) == (1940 , 886 )
56
+
57
+ # Check estimands
58
+ estimands = []
59
+ for file in readdir (tmpdir, join= true )
60
+ if endswith (file, " jls" )
61
+ append! (estimands, deserialize (file). estimands)
62
+ end
63
+ end
64
+ @test all (e isa JointEstimand for e in estimands)
65
+
66
+ # There are 875 variants in the dataset
67
+ summary_stats = get_summary_stats (estimands)
68
+ @test summary_stats == DataFrame (
69
+ OUTCOME = [:BINARY_1 , :BINARY_2 , :CONTINUOUS_1 , :CONTINUOUS_2 , :TREAT_1 ],
70
+ nrow = repeat ([875 ], 5 )
71
+ )
72
+
73
+ check_estimands_levels_interactions (estimands)
74
+ end
75
+
76
+
77
+ @testset " Test inputs_from_config gweis: positivity constraint" begin
78
+ tmpdir = mktempdir ()
79
+ copy! (ARGS , [
80
+ " estimation-inputs" ,
81
+ joinpath (TESTDIR, " data" , " config_gweis_first_order.yaml" ),
82
+ string (" --traits-file=" , joinpath (TESTDIR, " data" , " ukbb_traits.csv" )),
83
+ string (" --pcs-file=" , joinpath (TESTDIR, " data" , " ukbb_pcs.csv" )),
84
+ string (" --genotypes-prefix=" , joinpath (TESTDIR, " data" , " ukbb" , " genotypes" , " ukbb_1." )),
85
+ string (" --outprefix=" , joinpath (tmpdir, " final" )),
86
+ " --batchsize=5" ,
87
+ " --verbosity=0" ,
88
+ " --positivity-constraint=0.2"
89
+ ])
90
+ TargeneCore. julia_main ()
91
+ # Check dataset
92
+ dataset = DataFrame (Arrow. Table (joinpath (tmpdir, " final.data.arrow" )))
93
+ @test size (dataset) == (1940 , 886 )
94
+ # Check estimands
95
+ estimands = []
96
+ for file in readdir (tmpdir, join= true )
97
+ if endswith (file, " jls" )
98
+ append! (estimands, deserialize (file). estimands)
99
+ end
100
+ end
101
+ # The positivity constraint reduces the number of variants
102
+ @test all (e isa JointEstimand for e in estimands)
103
+ summary_stats = get_summary_stats (estimands)
104
+ @test summary_stats == DataFrame (
105
+ OUTCOME = [:BINARY_1 , :BINARY_2 , :CONTINUOUS_1 , :CONTINUOUS_2 , :TREAT_1 ],
106
+ nrow = repeat ([142 ], 5 )
107
+ )
108
+
109
+ check_estimands_levels_interactions (estimands)
110
+ end
111
+
112
+ @testset " Test inputs_from_config gweis: no positivity constraint and four-point interaction" begin
113
+ tmpdir = mktempdir ()
114
+ copy! (ARGS , [
115
+ " estimation-inputs" ,
116
+ joinpath (TESTDIR, " data" , " config_gweis_higher_order.yaml" ),
117
+ string (" --traits-file=" , joinpath (TESTDIR, " data" , " ukbb_traits.csv" )),
118
+ string (" --pcs-file=" , joinpath (TESTDIR, " data" , " ukbb_pcs.csv" )),
119
+ string (" --genotypes-prefix=" , joinpath (TESTDIR, " data" , " ukbb" , " genotypes" , " ukbb_1." )),
120
+ string (" --outprefix=" , joinpath (tmpdir, " final" )),
121
+ " --batchsize=5" ,
122
+ " --verbosity=0" ,
123
+ " --positivity-constraint=0"
124
+ ])
125
+ TargeneCore. julia_main ()
126
+ # Check dataset
127
+ dataset = DataFrame (Arrow. Table (joinpath (tmpdir, " final.data.arrow" )))
128
+ @test size (dataset) == (1940 , 886 )
129
+
130
+ # Check estimands
131
+ estimands = []
132
+ for file in readdir (tmpdir, join= true )
133
+ if endswith (file, " jls" )
134
+ append! (estimands, deserialize (file). estimands)
135
+ end
136
+ end
137
+ @test all (e isa JointEstimand for e in estimands)
138
+
139
+ # There are 875 variants in the dataset
140
+ summary_stats = get_summary_stats (estimands)
141
+ @test summary_stats == DataFrame (
142
+ OUTCOME = [:CONTINUOUS_1 , :CONTINUOUS_2 , :TREAT_1 ],
143
+ nrow = repeat ([875 ], 3 )
144
+ )
145
+
146
+ check_estimands_levels_interactions (estimands)
147
+ end
148
+
149
+ @testset " Test inputs_from_config gweis: positivity constraint and four-point interaction" begin
150
+ tmpdir = mktempdir ()
151
+ copy! (ARGS , [
152
+ " estimation-inputs" ,
153
+ joinpath (TESTDIR, " data" , " config_gweis_higher_order.yaml" ),
154
+ string (" --traits-file=" , joinpath (TESTDIR, " data" , " ukbb_traits.csv" )),
155
+ string (" --pcs-file=" , joinpath (TESTDIR, " data" , " ukbb_pcs.csv" )),
156
+ string (" --genotypes-prefix=" , joinpath (TESTDIR, " data" , " ukbb" , " genotypes" , " ukbb_1." )),
157
+ string (" --outprefix=" , joinpath (tmpdir, " final" )),
158
+ " --batchsize=5" ,
159
+ " --verbosity=0" ,
160
+ " --positivity-constraint=0.02"
161
+ ])
162
+ TargeneCore. julia_main ()
163
+ # Check dataset
164
+ dataset = DataFrame (Arrow. Table (joinpath (tmpdir, " final.data.arrow" )))
165
+ @test size (dataset) == (1940 , 886 )
166
+
167
+ # Check estimands
168
+ estimands = []
169
+ for file in readdir (tmpdir, join= true )
170
+ if endswith (file, " jls" )
171
+ append! (estimands, deserialize (file). estimands)
172
+ end
173
+ end
174
+ @test all (e isa JointEstimand for e in estimands)
175
+
176
+ # There are 784 treatments in the dataset after positivity_constraint
177
+ summary_stats = get_summary_stats (estimands)
178
+ @test summary_stats == DataFrame (
179
+ OUTCOME = [:CONTINUOUS_1 , :CONTINUOUS_2 , :TREAT_1 ],
180
+ nrow = repeat ([784 ], 3 )
181
+ )
182
+
183
+ check_estimands_levels_interactions (estimands)
184
+ end
185
+
186
+ end
187
+ true
0 commit comments