Skip to content

Commit 436b1ea

Browse files
authored
Merge pull request #778 from jeromekelleher/mutations-over-samples-not-missing
Fix bugs in missing data handling.
2 parents c5c639f + 5b4a541 commit 436b1ea

File tree

5 files changed

+394
-25
lines changed

5 files changed

+394
-25
lines changed

c/CHANGELOG.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,14 @@
1212
from the MRCAs of samples in the simplified tree sequence back to the roots
1313
in the input tree sequence (:user:`jeromekelleher`, :issue:`775`, :pr:`782`).
1414

15+
**Bugfixes**
16+
17+
- :issue:`777` - Mutations over isolated samples were incorrectly decoded as
18+
missing data. (:user:`jeromekelleher`, :pr:`778`)
19+
20+
- :issue:`776` - Fix a segfault when a partial list of samples
21+
was provided to the ``variants`` iterator. (:user:`jeromekelleher`, :pr:`778`)
22+
1523
---------------------
1624
[0.99.4] - 2020-08-12
1725
---------------------

c/tests/test_genotypes.c

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ test_simplest_missing_data_user_alleles(void)
9393
tsk_variant_t *var;
9494
const char *alleles[] = { "A", NULL };
9595
int ret;
96+
tsk_id_t samples[] = { 0 };
9697

9798
tsk_treeseq_from_text(&ts, 1, nodes, "", NULL, sites, NULL, NULL, NULL, 0);
9899
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
@@ -110,6 +111,17 @@ test_simplest_missing_data_user_alleles(void)
110111
CU_ASSERT_EQUAL_FATAL(ret, 0);
111112
tsk_vargen_free(&vargen);
112113

114+
ret = tsk_vargen_init(&vargen, &ts, samples, 1, alleles, 0);
115+
CU_ASSERT_EQUAL_FATAL(ret, 0);
116+
ret = tsk_vargen_next(&vargen, &var);
117+
CU_ASSERT_EQUAL_FATAL(ret, 1);
118+
CU_ASSERT_EQUAL(var->site->position, 0.0);
119+
CU_ASSERT_TRUE(var->has_missing_data);
120+
CU_ASSERT_EQUAL(var->genotypes.i8[0], TSK_MISSING_DATA);
121+
ret = tsk_vargen_next(&vargen, &var);
122+
CU_ASSERT_EQUAL_FATAL(ret, 0);
123+
tsk_vargen_free(&vargen);
124+
113125
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, TSK_16_BIT_GENOTYPES);
114126
CU_ASSERT_EQUAL_FATAL(ret, 0);
115127
ret = tsk_vargen_next(&vargen, &var);
@@ -122,6 +134,17 @@ test_simplest_missing_data_user_alleles(void)
122134
CU_ASSERT_EQUAL_FATAL(ret, 0);
123135
tsk_vargen_free(&vargen);
124136

137+
ret = tsk_vargen_init(&vargen, &ts, samples, 1, alleles, TSK_16_BIT_GENOTYPES);
138+
CU_ASSERT_EQUAL_FATAL(ret, 0);
139+
ret = tsk_vargen_next(&vargen, &var);
140+
CU_ASSERT_EQUAL_FATAL(ret, 1);
141+
CU_ASSERT_EQUAL(var->site->position, 0.0);
142+
CU_ASSERT_TRUE(var->has_missing_data);
143+
CU_ASSERT_EQUAL(var->genotypes.i8[0], TSK_MISSING_DATA);
144+
ret = tsk_vargen_next(&vargen, &var);
145+
CU_ASSERT_EQUAL_FATAL(ret, 0);
146+
tsk_vargen_free(&vargen);
147+
125148
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, TSK_ISOLATED_NOT_MISSING);
126149
CU_ASSERT_EQUAL_FATAL(ret, 0);
127150
ret = tsk_vargen_next(&vargen, &var);
@@ -137,6 +160,161 @@ test_simplest_missing_data_user_alleles(void)
137160
tsk_treeseq_free(&ts);
138161
}
139162

163+
static void
164+
test_simplest_missing_data_mutations(void)
165+
{
166+
const char *nodes = "1 0 0\n"
167+
"1 0 0\n";
168+
const char *sites = "0.0 A\n";
169+
const char *mutations = "0 0 T -1\n";
170+
tsk_treeseq_t ts;
171+
tsk_vargen_t vargen;
172+
tsk_variant_t *var;
173+
const char *alleles[] = { "A", "T", NULL };
174+
int ret;
175+
tsk_id_t samples[] = { 0 };
176+
177+
tsk_treeseq_from_text(&ts, 1, nodes, "", NULL, sites, mutations, NULL, NULL, 0);
178+
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
179+
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 1);
180+
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 1);
181+
182+
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, alleles, 0);
183+
CU_ASSERT_EQUAL_FATAL(ret, 0);
184+
ret = tsk_vargen_next(&vargen, &var);
185+
CU_ASSERT_EQUAL_FATAL(ret, 1);
186+
CU_ASSERT_EQUAL(var->site->position, 0.0);
187+
CU_ASSERT_TRUE(var->has_missing_data);
188+
CU_ASSERT_EQUAL(var->genotypes.i8[0], 1);
189+
CU_ASSERT_EQUAL(var->genotypes.i8[1], TSK_MISSING_DATA);
190+
ret = tsk_vargen_next(&vargen, &var);
191+
CU_ASSERT_EQUAL_FATAL(ret, 0);
192+
tsk_vargen_free(&vargen);
193+
194+
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, alleles, TSK_16_BIT_GENOTYPES);
195+
CU_ASSERT_EQUAL_FATAL(ret, 0);
196+
ret = tsk_vargen_next(&vargen, &var);
197+
CU_ASSERT_EQUAL_FATAL(ret, 1);
198+
CU_ASSERT_EQUAL(var->site->position, 0.0);
199+
CU_ASSERT_TRUE(var->has_missing_data);
200+
CU_ASSERT_EQUAL(var->genotypes.i16[0], 1);
201+
CU_ASSERT_EQUAL(var->genotypes.i16[1], TSK_MISSING_DATA);
202+
ret = tsk_vargen_next(&vargen, &var);
203+
CU_ASSERT_EQUAL_FATAL(ret, 0);
204+
tsk_vargen_free(&vargen);
205+
206+
ret = tsk_vargen_init(&vargen, &ts, samples, 1, alleles, 0);
207+
CU_ASSERT_EQUAL_FATAL(ret, 0);
208+
tsk_vargen_print_state(&vargen, _devnull);
209+
ret = tsk_vargen_next(&vargen, &var);
210+
CU_ASSERT_EQUAL_FATAL(ret, 1);
211+
CU_ASSERT_EQUAL(var->site->position, 0.0);
212+
CU_ASSERT_FALSE(var->has_missing_data);
213+
CU_ASSERT_EQUAL(var->genotypes.i8[0], 1);
214+
ret = tsk_vargen_next(&vargen, &var);
215+
CU_ASSERT_EQUAL_FATAL(ret, 0);
216+
tsk_vargen_free(&vargen);
217+
218+
ret = tsk_vargen_init(&vargen, &ts, samples, 1, alleles, TSK_16_BIT_GENOTYPES);
219+
CU_ASSERT_EQUAL_FATAL(ret, 0);
220+
tsk_vargen_print_state(&vargen, _devnull);
221+
ret = tsk_vargen_next(&vargen, &var);
222+
CU_ASSERT_EQUAL_FATAL(ret, 1);
223+
CU_ASSERT_EQUAL(var->site->position, 0.0);
224+
CU_ASSERT_FALSE(var->has_missing_data);
225+
CU_ASSERT_EQUAL(var->genotypes.i16[0], 1);
226+
ret = tsk_vargen_next(&vargen, &var);
227+
CU_ASSERT_EQUAL_FATAL(ret, 0);
228+
tsk_vargen_free(&vargen);
229+
230+
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, TSK_ISOLATED_NOT_MISSING);
231+
CU_ASSERT_EQUAL_FATAL(ret, 0);
232+
ret = tsk_vargen_next(&vargen, &var);
233+
CU_ASSERT_EQUAL_FATAL(ret, 1);
234+
CU_ASSERT_EQUAL(var->site->position, 0.0);
235+
CU_ASSERT_FALSE(var->has_missing_data);
236+
CU_ASSERT_EQUAL(var->genotypes.i8[0], 1);
237+
CU_ASSERT_EQUAL(var->genotypes.i8[1], 0);
238+
ret = tsk_vargen_next(&vargen, &var);
239+
CU_ASSERT_EQUAL_FATAL(ret, 0);
240+
tsk_vargen_free(&vargen);
241+
242+
tsk_treeseq_free(&ts);
243+
}
244+
245+
static void
246+
test_simplest_missing_data_mutations_all_samples(void)
247+
{
248+
const char *nodes = "1 0 0\n"
249+
"1 0 0\n";
250+
const char *sites = "0.0 A\n";
251+
const char *mutations = "0 0 T -1\n"
252+
"0 1 T -1\n";
253+
tsk_treeseq_t ts;
254+
tsk_vargen_t vargen;
255+
tsk_variant_t *var;
256+
const char *alleles[] = { "A", "T", NULL };
257+
int ret;
258+
tsk_id_t samples[] = { 0, 1 };
259+
260+
tsk_treeseq_from_text(&ts, 1, nodes, "", NULL, sites, mutations, NULL, NULL, 0);
261+
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
262+
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 1);
263+
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 2);
264+
265+
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, alleles, 0);
266+
CU_ASSERT_EQUAL_FATAL(ret, 0);
267+
ret = tsk_vargen_next(&vargen, &var);
268+
CU_ASSERT_EQUAL_FATAL(ret, 1);
269+
CU_ASSERT_EQUAL(var->site->position, 0.0);
270+
CU_ASSERT_FALSE(var->has_missing_data);
271+
CU_ASSERT_EQUAL(var->genotypes.i8[0], 1);
272+
CU_ASSERT_EQUAL(var->genotypes.i8[1], 1);
273+
ret = tsk_vargen_next(&vargen, &var);
274+
CU_ASSERT_EQUAL_FATAL(ret, 0);
275+
tsk_vargen_free(&vargen);
276+
277+
ret = tsk_vargen_init(&vargen, &ts, samples, 2, alleles, 0);
278+
CU_ASSERT_EQUAL_FATAL(ret, 0);
279+
tsk_vargen_print_state(&vargen, _devnull);
280+
ret = tsk_vargen_next(&vargen, &var);
281+
CU_ASSERT_EQUAL_FATAL(ret, 1);
282+
CU_ASSERT_EQUAL(var->site->position, 0.0);
283+
CU_ASSERT_FALSE(var->has_missing_data);
284+
CU_ASSERT_EQUAL(var->genotypes.i8[0], 1);
285+
CU_ASSERT_EQUAL(var->genotypes.i8[1], 1);
286+
ret = tsk_vargen_next(&vargen, &var);
287+
CU_ASSERT_EQUAL_FATAL(ret, 0);
288+
tsk_vargen_free(&vargen);
289+
290+
ret = tsk_vargen_init(&vargen, &ts, samples, 2, alleles, TSK_16_BIT_GENOTYPES);
291+
CU_ASSERT_EQUAL_FATAL(ret, 0);
292+
tsk_vargen_print_state(&vargen, _devnull);
293+
ret = tsk_vargen_next(&vargen, &var);
294+
CU_ASSERT_EQUAL_FATAL(ret, 1);
295+
CU_ASSERT_EQUAL(var->site->position, 0.0);
296+
CU_ASSERT_FALSE(var->has_missing_data);
297+
CU_ASSERT_EQUAL(var->genotypes.i16[0], 1);
298+
CU_ASSERT_EQUAL(var->genotypes.i16[1], 1);
299+
ret = tsk_vargen_next(&vargen, &var);
300+
CU_ASSERT_EQUAL_FATAL(ret, 0);
301+
tsk_vargen_free(&vargen);
302+
303+
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, TSK_ISOLATED_NOT_MISSING);
304+
CU_ASSERT_EQUAL_FATAL(ret, 0);
305+
ret = tsk_vargen_next(&vargen, &var);
306+
CU_ASSERT_EQUAL_FATAL(ret, 1);
307+
CU_ASSERT_EQUAL(var->site->position, 0.0);
308+
CU_ASSERT_FALSE(var->has_missing_data);
309+
CU_ASSERT_EQUAL(var->genotypes.i8[0], 1);
310+
CU_ASSERT_EQUAL(var->genotypes.i8[1], 1);
311+
ret = tsk_vargen_next(&vargen, &var);
312+
CU_ASSERT_EQUAL_FATAL(ret, 0);
313+
tsk_vargen_free(&vargen);
314+
315+
tsk_treeseq_free(&ts);
316+
}
317+
140318
static void
141319
test_single_tree_user_alleles(void)
142320
{
@@ -733,6 +911,9 @@ main(int argc, char **argv)
733911
{ "test_simplest_missing_data", test_simplest_missing_data },
734912
{ "test_simplest_missing_data_user_alleles",
735913
test_simplest_missing_data_user_alleles },
914+
{ "test_simplest_missing_data_mutations", test_simplest_missing_data_mutations },
915+
{ "test_simplest_missing_data_mutations_all_samples",
916+
test_simplest_missing_data_mutations_all_samples },
736917
{ "test_single_tree_user_alleles", test_single_tree_user_alleles },
737918
{ "test_single_tree_char_alphabet", test_single_tree_char_alphabet },
738919
{ "test_single_tree_binary_alphabet", test_single_tree_binary_alphabet },

0 commit comments

Comments
 (0)