Skip to content

Commit 99a9b34

Browse files
committed
Check mutation parents column is correct for a tree sequence to be valid
1 parent d30fb27 commit 99a9b34

18 files changed

+585
-172
lines changed

c/CHANGELOG.rst

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,32 @@
77
- Remove ``tsk_diff_iter_t`` and associated functions.
88
(:user:`benjeffery`, :pr:`3221`, :issue:`2797`).
99

10+
- ``tsk_treeseq_init`` now requires that mutation parents in the table collection
11+
are correct and consistent with the topology of the tree at each mutation site.
12+
Returns ``TSK_ERR_BAD_MUTATION_PARENT`` if this is not the case, or
13+
``TSK_ERR_MUTATION_PARENT_AFTER_CHILD`` if the mutations are not in an order
14+
compatible with the correct mutation parent.
15+
(:user:`benjeffery`, :issue:`2729`, :issue:`2732`, :pr:`3212`).
16+
17+
**Features**
18+
19+
- Add ``TSK_TS_INIT_COMPUTE_MUTATION_PARENTS`` to ``tsk_treeseq_init``
20+
to compute mutation parents from the tree sequence topology.
21+
Note that the mutations must be in the correct order.
22+
(:user:`benjeffery`, :issue:`2757`, :pr:`3212`).
23+
24+
- Add ``TSK_CHECK_MUTATION_PARENTS`` option to ``tsk_table_collection_check_integrity``
25+
to check that mutation parents are consistent with the tree sequence topology.
26+
This option implies ``TSK_CHECK_TREES``.
27+
(:user:`benjeffery`, :issue:`2729`, :issue:`2732`, :pr:`3212`).
28+
29+
- Add the ``TSK_NO_CHECK_INTEGRITY`` option to ``tsk_table_collection_compute_mutation_parents``
30+
to skip the integrity checks that are normally run when computing mutation parents.
31+
This is useful for speeding up the computation of mutation parents when the
32+
tree sequence is certainly known to be valid.
33+
(:user:`benjeffery`, :pr:`3212`).
34+
35+
1036
--------------------
1137
[1.1.4] - 2025-03-31
1238
--------------------

c/tests/test_tables.c

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9038,7 +9038,7 @@ test_sort_tables_mutation_times(void)
90389038
CU_ASSERT_EQUAL_FATAL(ret, 0);
90399039

90409040
/* Check to make sure we have legal mutations */
9041-
ret = tsk_treeseq_init(&ts, &tables, 0);
9041+
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_COMPUTE_MUTATION_PARENTS);
90429042
CU_ASSERT_EQUAL_FATAL(ret, 0);
90439043

90449044
ret = tsk_treeseq_copy_tables(&ts, &t1, 0);
@@ -10717,6 +10717,64 @@ test_table_collection_check_integrity_bad_indexes(void)
1071710717
tsk_table_collection_free(&tables);
1071810718
}
1071910719

10720+
static void
10721+
test_check_integrity_bad_mutation_parent_topology(void)
10722+
{
10723+
int ret;
10724+
tsk_id_t ret_trees;
10725+
tsk_table_collection_t tables;
10726+
const char *sites = "0 0\n";
10727+
/* Make a mutation on a parallel branch the parent*/
10728+
const char *bad_mutations = "0 0 1 -1\n"
10729+
"0 1 1 0\n";
10730+
10731+
/* A mutation above is set as child*/
10732+
const char *reverse_mutations = "0 0 1 -1\n"
10733+
"0 4 1 0\n";
10734+
10735+
const char *reverse_sites = "0.5 0\n"
10736+
"0 0\n";
10737+
10738+
ret = tsk_table_collection_init(&tables, 0);
10739+
CU_ASSERT_EQUAL_FATAL(ret, 0);
10740+
10741+
tables.sequence_length = 1;
10742+
parse_nodes(single_tree_ex_nodes, &tables.nodes);
10743+
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);
10744+
parse_edges(single_tree_ex_edges, &tables.edges);
10745+
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);
10746+
parse_sites(sites, &tables.sites);
10747+
CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 1);
10748+
parse_mutations(bad_mutations, &tables.mutations);
10749+
CU_ASSERT_EQUAL_FATAL(tables.mutations.num_rows, 2);
10750+
tables.sequence_length = 1.0;
10751+
10752+
ret = tsk_table_collection_build_index(&tables, 0);
10753+
CU_ASSERT_EQUAL_FATAL(ret, 0);
10754+
10755+
ret_trees = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);
10756+
CU_ASSERT_EQUAL_FATAL(ret_trees, 1);
10757+
ret_trees
10758+
= tsk_table_collection_check_integrity(&tables, TSK_CHECK_MUTATION_PARENTS);
10759+
CU_ASSERT_EQUAL_FATAL(ret_trees, TSK_ERR_BAD_MUTATION_PARENT);
10760+
10761+
parse_mutations(reverse_mutations, &tables.mutations);
10762+
ret_trees = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);
10763+
CU_ASSERT_EQUAL_FATAL(ret_trees, 1);
10764+
ret_trees
10765+
= tsk_table_collection_check_integrity(&tables, TSK_CHECK_MUTATION_PARENTS);
10766+
CU_ASSERT_EQUAL_FATAL(ret_trees, TSK_ERR_MUTATION_PARENT_AFTER_CHILD);
10767+
10768+
/* Now check that TSK_CHECK_MUTATION_PARENTS implies TSK_CHECK_TREES
10769+
by triggering an error with reversed sites */
10770+
parse_sites(reverse_sites, &tables.sites);
10771+
ret_trees
10772+
= tsk_table_collection_check_integrity(&tables, TSK_CHECK_MUTATION_PARENTS);
10773+
CU_ASSERT_EQUAL_FATAL(ret_trees, TSK_ERR_UNSORTED_SITES);
10774+
10775+
tsk_table_collection_free(&tables);
10776+
}
10777+
1072010778
static void
1072110779
test_table_collection_subset_with_options(tsk_flags_t options)
1072210780
{
@@ -11771,6 +11829,8 @@ main(int argc, char **argv)
1177111829
test_table_collection_check_integrity_bad_indexes_example },
1177211830
{ "test_table_collection_check_integrity_bad_indexes",
1177311831
test_table_collection_check_integrity_bad_indexes },
11832+
{ "test_check_integrity_bad_mutation_parent_topology",
11833+
test_check_integrity_bad_mutation_parent_topology },
1177411834
{ "test_table_collection_subset", test_table_collection_subset },
1177511835
{ "test_table_collection_subset_unsorted",
1177611836
test_table_collection_subset_unsorted },

c/tests/test_trees.c

Lines changed: 115 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3815,16 +3815,16 @@ test_simplest_mutation_edges(void)
38153815
"1 2 2 0\n";
38163816
const char *sites = "0.5 0\n"
38173817
"1.5 0\n";
3818-
const char *mutations = "0 1 1\n"
3818+
const char *mutations = "0 2 1\n"
3819+
"0 1 1\n"
38193820
"0 0 1\n"
3820-
"0 2 1\n"
3821-
"1 0 1\n"
3821+
"1 2 1\n"
38223822
"1 1 1\n"
3823-
"1 2 1\n";
3823+
"1 0 1\n";
38243824
tsk_treeseq_t ts;
38253825
tsk_tree_t tree;
38263826
/* We have mutations over roots, samples and just isolated nodes */
3827-
tsk_id_t mutation_edges[] = { -1, 0, -1, 1, -1, -1 };
3827+
tsk_id_t mutation_edges[] = { -1, -1, 0, -1, -1, 1 };
38283828
tsk_size_t i, j, k, t;
38293829
tsk_mutation_t mut;
38303830
tsk_site_t site;
@@ -4167,7 +4167,7 @@ test_single_tree_bad_mutations(void)
41674167
ret = tsk_treeseq_init(&ts, &tables, load_flags);
41684168
CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_PARENT_AFTER_CHILD);
41694169
tsk_treeseq_free(&ts);
4170-
tables.mutations.parent[3] = TSK_NULL;
4170+
tables.mutations.parent[3] = 2;
41714171

41724172
/* time < node time */
41734173
tables.mutations.time[2] = 0;
@@ -8559,6 +8559,112 @@ test_init_take_ownership_no_edge_metadata(void)
85598559
tsk_treeseq_free(&ts);
85608560
}
85618561

8562+
static void
8563+
test_init_compute_mutation_parents(void)
8564+
{
8565+
int ret;
8566+
tsk_table_collection_t *tables, *tables2;
8567+
tsk_treeseq_t ts;
8568+
const char *sites = "0 0\n";
8569+
/* Make a mutation on a parallel branch the parent*/
8570+
const char *bad_mutations = "0 0 1 -1\n"
8571+
"0 1 1 0\n";
8572+
8573+
tables = tsk_malloc(sizeof(tsk_table_collection_t));
8574+
CU_ASSERT_NOT_EQUAL_FATAL(tables, NULL);
8575+
tables2 = tsk_malloc(sizeof(tsk_table_collection_t));
8576+
CU_ASSERT_NOT_EQUAL_FATAL(tables2, NULL);
8577+
8578+
CU_ASSERT_FATAL(tables != NULL);
8579+
ret = tsk_table_collection_init(tables, 0);
8580+
CU_ASSERT_EQUAL_FATAL(ret, 0);
8581+
8582+
tables->sequence_length = 1;
8583+
parse_nodes(single_tree_ex_nodes, &tables->nodes);
8584+
CU_ASSERT_EQUAL_FATAL(tables->nodes.num_rows, 7);
8585+
parse_edges(single_tree_ex_edges, &tables->edges);
8586+
CU_ASSERT_EQUAL_FATAL(tables->edges.num_rows, 6);
8587+
parse_sites(sites, &tables->sites);
8588+
CU_ASSERT_EQUAL_FATAL(tables->sites.num_rows, 1);
8589+
parse_mutations(bad_mutations, &tables->mutations);
8590+
CU_ASSERT_EQUAL_FATAL(tables->mutations.num_rows, 2);
8591+
tables->sequence_length = 1.0;
8592+
ret = tsk_table_collection_copy(tables, tables2, 0);
8593+
CU_ASSERT_EQUAL_FATAL(ret, 0);
8594+
8595+
ret = tsk_treeseq_init(&ts, tables, TSK_TS_INIT_BUILD_INDEXES);
8596+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_MUTATION_PARENT);
8597+
tsk_treeseq_free(&ts);
8598+
8599+
ret = tsk_treeseq_init(
8600+
&ts, tables, TSK_TS_INIT_BUILD_INDEXES | TSK_TS_INIT_COMPUTE_MUTATION_PARENTS);
8601+
CU_ASSERT_EQUAL_FATAL(ret, 0);
8602+
tsk_treeseq_free(&ts);
8603+
8604+
/* When we use take ownership, the check of parents shouldn't overwrite them*/
8605+
ret = tsk_treeseq_init(&ts, tables, TSK_TAKE_OWNERSHIP | TSK_TS_INIT_BUILD_INDEXES);
8606+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_MUTATION_PARENT);
8607+
CU_ASSERT_EQUAL(tables->mutations.parent[0], -1);
8608+
CU_ASSERT_EQUAL(tables->mutations.parent[1], 0);
8609+
tsk_treeseq_free(&ts);
8610+
8611+
/* When we use take ownership and compute, the tables are overwritten*/
8612+
ret = tsk_treeseq_init(&ts, tables2,
8613+
TSK_TAKE_OWNERSHIP | TSK_TS_INIT_BUILD_INDEXES
8614+
| TSK_TS_INIT_COMPUTE_MUTATION_PARENTS);
8615+
CU_ASSERT_EQUAL_FATAL(ret, 0);
8616+
CU_ASSERT_EQUAL(tables2->mutations.parent[0], -1);
8617+
CU_ASSERT_EQUAL(tables2->mutations.parent[1], -1);
8618+
8619+
/* Don't need to free tables as we took ownership */
8620+
tsk_treeseq_free(&ts);
8621+
}
8622+
8623+
static void
8624+
test_init_compute_mutation_parents_errors(void)
8625+
{
8626+
int ret;
8627+
tsk_id_t row_ret;
8628+
tsk_table_collection_t tables;
8629+
tsk_treeseq_t ts;
8630+
const char *sites = "0.5 0\n"
8631+
"0 0\n";
8632+
8633+
ret = tsk_table_collection_init(&tables, 0);
8634+
CU_ASSERT_EQUAL_FATAL(ret, 0);
8635+
8636+
tables.sequence_length = 1;
8637+
parse_nodes(single_tree_ex_nodes, &tables.nodes);
8638+
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);
8639+
parse_edges(single_tree_ex_edges, &tables.edges);
8640+
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);
8641+
parse_sites(sites, &tables.sites);
8642+
CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 2);
8643+
tables.sequence_length = 1.0;
8644+
8645+
ret = tsk_treeseq_init(
8646+
&ts, &tables, TSK_TS_INIT_BUILD_INDEXES | TSK_TS_INIT_COMPUTE_MUTATION_PARENTS);
8647+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_SITES);
8648+
tsk_treeseq_free(&ts);
8649+
8650+
tsk_site_table_clear(&tables.sites);
8651+
row_ret = tsk_site_table_add_row(&tables.sites, 0.5, "A", 1, NULL, 0);
8652+
CU_ASSERT_EQUAL_FATAL(row_ret, 0);
8653+
row_ret = tsk_mutation_table_add_row(
8654+
&tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, "A", 1, NULL, 0);
8655+
CU_ASSERT_EQUAL_FATAL(row_ret, 0);
8656+
row_ret = tsk_mutation_table_add_row(
8657+
&tables.mutations, 0, 4, TSK_NULL, TSK_UNKNOWN_TIME, "A", 1, NULL, 0);
8658+
CU_ASSERT_EQUAL_FATAL(row_ret, 1);
8659+
8660+
ret = tsk_treeseq_init(
8661+
&ts, &tables, TSK_TS_INIT_BUILD_INDEXES | TSK_TS_INIT_COMPUTE_MUTATION_PARENTS);
8662+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_PARENT_AFTER_CHILD);
8663+
tsk_treeseq_free(&ts);
8664+
8665+
tsk_table_collection_free(&tables);
8666+
}
8667+
85628668
int
85638669
main(int argc, char **argv)
85648670
{
@@ -8759,6 +8865,9 @@ main(int argc, char **argv)
87598865
test_extend_haplotypes_conflicting_times },
87608866
{ "test_init_take_ownership_no_edge_metadata",
87618867
test_init_take_ownership_no_edge_metadata },
8868+
{ "test_init_compute_mutation_parents", test_init_compute_mutation_parents },
8869+
{ "test_init_compute_mutation_parents_errors",
8870+
test_init_compute_mutation_parents_errors },
87628871
{ NULL, NULL },
87638872
};
87648873

c/tests/testlib.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -767,6 +767,8 @@ tsk_treeseq_from_text(tsk_treeseq_t *ts, double sequence_length, const char *nod
767767
tsk_table_collection_t tables;
768768
tsk_id_t max_population_id;
769769
tsk_size_t j;
770+
tsk_flags_t ts_flags;
771+
bool all_parents_null;
770772

771773
CU_ASSERT_FATAL(ts != NULL);
772774
CU_ASSERT_FATAL(nodes != NULL);
@@ -807,7 +809,21 @@ tsk_treeseq_from_text(tsk_treeseq_t *ts, double sequence_length, const char *nod
807809
}
808810
}
809811

810-
ret = tsk_treeseq_init(ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
812+
/* If all mutation.parent are TSK_NULL, use TSK_TS_COMPUTE_MUTATION_PARENTS flag too
813+
*/
814+
ts_flags = TSK_TS_INIT_BUILD_INDEXES;
815+
all_parents_null = true;
816+
for (j = 0; j < tables.mutations.num_rows; j++) {
817+
if (tables.mutations.parent[j] != TSK_NULL) {
818+
all_parents_null = false;
819+
break;
820+
}
821+
}
822+
if (all_parents_null) {
823+
ts_flags |= TSK_TS_INIT_COMPUTE_MUTATION_PARENTS;
824+
}
825+
826+
ret = tsk_treeseq_init(ts, &tables, ts_flags);
811827
/* tsk_treeseq_print_state(ts, stdout); */
812828
if (ret != 0) {
813829
printf("\nret = %s\n", tsk_strerror(ret));

c/tskit/core.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,12 @@ tsk_strerror_internal(int err)
347347
"(TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME)";
348348
break;
349349

350+
case TSK_ERR_BAD_MUTATION_PARENT:
351+
ret = "A mutation's parent is not consistent with the topology of the tree. "
352+
"Use compute_mutation_parents to set the parents correctly."
353+
"(TSK_ERR_BAD_MUTATION_PARENT)";
354+
break;
355+
350356
/* Migration errors */
351357
case TSK_ERR_UNSORTED_MIGRATIONS:
352358
ret = "Migrations must be sorted by time. (TSK_ERR_UNSORTED_MIGRATIONS)";

c/tskit/core.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,12 @@ Some mutations have TSK_UNKNOWN_TIME in an algorithm where that's
510510
disallowed (use compute_mutation_times?).
511511
*/
512512
#define TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME -510
513+
514+
/**
515+
A mutation's parent was not consistent with the topology of the tree.
516+
*/
517+
#define TSK_ERR_BAD_MUTATION_PARENT -511
518+
513519
/** @} */
514520

515521
/**

0 commit comments

Comments
 (0)