Skip to content

Commit 0f65728

Browse files
Clarify the semantics of skipping sort on sites/mutations.
1 parent b232615 commit 0f65728

File tree

5 files changed

+92
-9
lines changed

5 files changed

+92
-9
lines changed

c/tests/test_tables.c

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2978,6 +2978,81 @@ test_copy_table_collection(void)
29782978
tsk_treeseq_free(&ts);
29792979
}
29802980

2981+
static void
2982+
test_sort_tables_offsets(void)
2983+
{
2984+
int ret;
2985+
tsk_treeseq_t *ts;
2986+
tsk_table_collection_t tables, copy;
2987+
tsk_bookmark_t bookmark;
2988+
2989+
ts = caterpillar_tree(10, 5, 5);
2990+
ret = tsk_treeseq_copy_tables(ts, &tables, 0);
2991+
CU_ASSERT_EQUAL_FATAL(ret, 0);
2992+
2993+
ret = tsk_table_collection_sort(&tables, NULL, 0);
2994+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_MIGRATIONS_NOT_SUPPORTED);
2995+
2996+
tsk_migration_table_clear(&tables.migrations);
2997+
ret = tsk_table_collection_sort(&tables, NULL, 0);
2998+
CU_ASSERT_EQUAL_FATAL(ret, 0);
2999+
3000+
/* Check that setting edge offset = len(edges) does nothing */
3001+
reverse_edges(&tables);
3002+
ret = tsk_table_collection_copy(&tables, &copy, 0);
3003+
CU_ASSERT_EQUAL_FATAL(ret, 0);
3004+
memset(&bookmark, 0, sizeof(bookmark));
3005+
bookmark.edges = tables.edges.num_rows;
3006+
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
3007+
CU_ASSERT_EQUAL_FATAL(ret, 0);
3008+
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy));
3009+
3010+
ret = tsk_table_collection_sort(&tables, NULL, 0);
3011+
CU_ASSERT_EQUAL_FATAL(ret, 0);
3012+
CU_ASSERT_FATAL(tables.sites.num_rows > 2);
3013+
CU_ASSERT_FATAL(tables.mutations.num_rows > 2);
3014+
3015+
/* Check that setting mutation and site offset = to the len
3016+
* of the tables leaves them untouched. */
3017+
reverse_mutations(&tables);
3018+
/* Swap the positions of the first two sites, as a quick way
3019+
* to disorder the site table */
3020+
tables.sites.position[0] = tables.sites.position[1];
3021+
tables.sites.position[1] = 0;
3022+
ret = tsk_table_collection_copy(&tables, &copy, TSK_NO_INIT);
3023+
CU_ASSERT_EQUAL_FATAL(ret, 0);
3024+
memset(&bookmark, 0, sizeof(bookmark));
3025+
bookmark.sites = tables.sites.num_rows;
3026+
bookmark.mutations = tables.mutations.num_rows;
3027+
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
3028+
CU_ASSERT_EQUAL_FATAL(ret, 0);
3029+
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy));
3030+
3031+
/* Anything other than len(table) leads to an error for sites
3032+
* and mutations, and we can't specify one without the other. */
3033+
memset(&bookmark, 0, sizeof(bookmark));
3034+
bookmark.sites = tables.sites.num_rows;
3035+
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
3036+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);
3037+
3038+
memset(&bookmark, 0, sizeof(bookmark));
3039+
bookmark.mutations = tables.mutations.num_rows;
3040+
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
3041+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);
3042+
3043+
memset(&bookmark, 0, sizeof(bookmark));
3044+
bookmark.sites = tables.sites.num_rows - 1;
3045+
bookmark.mutations = tables.mutations.num_rows - 1;
3046+
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
3047+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);
3048+
3049+
tsk_table_collection_free(&tables);
3050+
tsk_table_collection_free(&copy);
3051+
tsk_treeseq_free(ts);
3052+
free(ts);
3053+
}
3054+
3055+
29813056
static void
29823057
test_sort_tables_drops_indexes_with_options(tsk_flags_t tc_options)
29833058
{
@@ -3128,7 +3203,7 @@ test_sort_tables_errors(void)
31283203
memset(&pos, 0, sizeof(pos));
31293204
pos.migrations = 1;
31303205
ret = tsk_table_collection_sort(&tables, &pos, 0);
3131-
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);
3206+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATIONS_NOT_SUPPORTED);
31323207

31333208
memset(&pos, 0, sizeof(pos));
31343209
pos.sites = 1;
@@ -4552,6 +4627,7 @@ main(int argc, char **argv)
45524627
test_link_ancestors_samples_and_ancestors_overlap },
45534628
{ "test_link_ancestors_multiple_to_single_tree",
45544629
test_link_ancestors_multiple_to_single_tree },
4630+
{ "test_sort_tables_offsets", test_sort_tables_offsets },
45554631
{ "test_sort_tables_drops_indexes", test_sort_tables_drops_indexes },
45564632
{ "test_sort_tables_edge_metadata", test_sort_tables_edge_metadata },
45574633
{ "test_sort_tables_no_edge_metadata", test_sort_tables_no_edge_metadata },

c/tskit/core.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,8 +351,9 @@ tsk_strerror_internal(int err)
351351
ret = "Migrations not currently supported by this operation";
352352
break;
353353
case TSK_ERR_SORT_OFFSET_NOT_SUPPORTED:
354-
ret = "Specifying position for mutation, sites or migrations is not "
355-
"supported";
354+
ret = "Sort offsets for sites and mutations must be either 0 "
355+
"or the length of the respective tables. Intermediate values "
356+
"are not supported";
356357
break;
357358
case TSK_ERR_NONBINARY_MUTATIONS_UNSUPPORTED:
358359
ret = "Only binary mutations are supported for this operation";

c/tskit/tables.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4557,7 +4557,7 @@ tsk_table_sorter_run(tsk_table_sorter_t *self, tsk_bookmark_t *start)
45574557
edge_start = start->edges;
45584558

45594559
if (start->migrations != 0) {
4560-
ret = TSK_ERR_SORT_OFFSET_NOT_SUPPORTED;
4560+
ret = TSK_ERR_MIGRATIONS_NOT_SUPPORTED;
45614561
goto out;
45624562
}
45634563
/* We only allow sites and mutations to be specified as a way to

c/tskit/tables.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2383,8 +2383,12 @@ Positions in tables that are not sorted (``individual``, ``node``, ``population`
23832383
and ``provenance``) are ignored and can be set to arbitrary values.
23842384
23852385
.. warning:: The current implementation only supports specifying a start
2386-
position for the ``edge`` table. Specifying a non-zero ``migration``,
2387-
``site`` or ``mutation`` start position results in an error.
2386+
position for the ``edge`` table and in a limited form for the
2387+
``site`` and ``mutation`` tables. Specifying a non-zero ``migration``,
2388+
start position results in an error. The start positions for the
2389+
``site`` and ``mutation`` tables can either be 0 or the length of the
2390+
respective tables, allowing these tables to either be fully sorted, or
2391+
not sorted at all.
23882392
23892393
The table collection will always be unindexed after sort successfully completes.
23902394

python/tests/test_topology.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5698,9 +5698,9 @@ def verify(self, ts):
56985698
# Called by the examples in ExampleTopologyMixin
56995699
samples = ts.samples()
57005700
self.verify_keep_input_roots(ts, samples[:2])
5701-
# self.verify_keep_input_roots(ts, samples[:3])
5702-
# self.verify_keep_input_roots(ts, samples[:-1])
5703-
# self.verify_keep_input_roots(ts, samples)
5701+
self.verify_keep_input_roots(ts, samples[:3])
5702+
self.verify_keep_input_roots(ts, samples[:-1])
5703+
self.verify_keep_input_roots(ts, samples)
57045704

57055705
def verify_keep_input_roots(self, ts, samples):
57065706
ts_with_roots, node_map = self.do_simplify(
@@ -5762,6 +5762,8 @@ def verify_keep_input_roots(self, ts, samples):
57625762
new_mutations = {
57635763
mut.metadata: mut for mut in new_site.mutations
57645764
}
5765+
# Just make sure the metadata is actually unique.
5766+
self.assertEqual(len(new_mutations), len(new_site.mutations))
57655767
input_site = input_sites[position]
57665768
for input_mutation in input_site.mutations:
57675769
if input_mutation.node in root_path:

0 commit comments

Comments
 (0)