Skip to content

Commit 6f885de

Browse files
Removed the squash loop from simplifier_extract_ancestry.
1 parent 0f65728 commit 6f885de

File tree

6 files changed

+82
-35
lines changed

6 files changed

+82
-35
lines changed

c/CHANGELOG.rst

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,19 @@
66

77
- The macro ``TSK_IMPUTE_MISSING_DATA`` is renamed to ``TSK_ISOLATED_NOT_MISSING``
88

9+
**New features**
10+
11+
- Add a ``TSK_KEEP_INPUT_ROOTS`` option to simplify which, if enabled, adds edges
12+
from the MRCAs of samples in the simplified tree sequence back to the roots
13+
in the input tree sequence (:user:`jeromekelleher`, :issue:`775`, :pr:`782`).
14+
915
---------------------
1016
[0.99.4] - 2020-08-12
1117
---------------------
1218

1319
**Note**
1420

15-
- The ``TSK_VERSION_PATCH`` macro was incorrectly set to ``4`` for 0.99.3, so both
21+
- The ``TSK_VERSION_PATCH`` macro was incorrectly set to ``4`` for 0.99.3, so both
1622
0.99.4 and 0.99.3 have the same value.
1723

1824
**Changes**
@@ -70,7 +76,7 @@
7076

7177
- New methods to perform set operations on table collections.
7278
``tsk_table_collection_subset`` subsets and reorders table collections by nodes
73-
(:user:`mufernando`, :user:`petrelharp`, :pr:`663`, :pr:`690`).
79+
(:user:`mufernando`, :user:`petrelharp`, :pr:`663`, :pr:`690`).
7480
``tsk_table_collection_union`` forms the node-wise union of two table collections
7581
(:user:`mufernando`, :user:`petrelharp`, :issue:`381`, :pr:`623`).
7682

c/tests/test_tables.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3052,7 +3052,6 @@ test_sort_tables_offsets(void)
30523052
free(ts);
30533053
}
30543054

3055-
30563055
static void
30573056
test_sort_tables_drops_indexes_with_options(tsk_flags_t tc_options)
30583057
{

c/tests/test_trees.c

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2566,6 +2566,69 @@ test_simplest_reduce_site_topology(void)
25662566
tsk_table_collection_free(&tables);
25672567
}
25682568

2569+
static void
2570+
test_simplest_simplify_defragment(void)
2571+
{
2572+
const char *nodes = "0 2 -1\n"
2573+
"0 2 -1\n"
2574+
"0 2 -1\n"
2575+
"0 2 -1\n"
2576+
"0 2 -1\n"
2577+
"0 2 -1\n"
2578+
"0 1 -1\n"
2579+
"0 1 -1\n"
2580+
"0 1 -1\n"
2581+
"0 1 -1\n"
2582+
"0 1 -1\n"
2583+
"0 1 -1\n"
2584+
"1 0 -1\n"
2585+
"1 0 -1\n"
2586+
"1 0 -1\n"
2587+
"1 0 -1\n"
2588+
"1 0 -1\n"
2589+
"1 0 -1\n";
2590+
const char *edges = "0.00000000 0.20784841 8 12\n"
2591+
"0.00000000 0.42202433 8 15\n"
2592+
"0.00000000 0.63541014 8 16\n"
2593+
"0.42202433 1.00000000 9 15\n"
2594+
"0.00000000 1.00000000 9 17\n"
2595+
"0.00000000 1.00000000 10 14\n"
2596+
"0.20784841 1.00000000 11 12\n"
2597+
"0.00000000 1.00000000 11 13\n"
2598+
"0.63541014 1.00000000 11 16\n"
2599+
"0.00000000 1.00000000 0 10\n"
2600+
"0.62102072 1.00000000 1 9\n"
2601+
"0.00000000 1.00000000 1 11\n"
2602+
"0.00000000 0.26002984 2 6\n"
2603+
"0.26002984 1.00000000 2 6\n"
2604+
"0.00000000 0.62102072 2 9\n"
2605+
"0.55150554 1.00000000 3 8\n"
2606+
"0.00000000 1.00000000 4 7\n"
2607+
"0.00000000 0.55150554 5 8\n";
2608+
2609+
tsk_id_t samples[] = { 12, 13, 14, 15, 16, 17 };
2610+
tsk_table_collection_t tables;
2611+
int ret;
2612+
2613+
/* This was the simplest example I could find that exercised the
2614+
* inner loops of the simplifier_extract_ancestry function */
2615+
ret = tsk_table_collection_init(&tables, 0);
2616+
CU_ASSERT_EQUAL_FATAL(ret, 0);
2617+
tables.sequence_length = 1;
2618+
parse_nodes(nodes, &tables.nodes);
2619+
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 18);
2620+
parse_edges(edges, &tables.edges);
2621+
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 18);
2622+
2623+
ret = tsk_table_collection_simplify(&tables, samples, 6, 0, NULL);
2624+
CU_ASSERT_EQUAL_FATAL(ret, 0);
2625+
2626+
CU_ASSERT_EQUAL(tables.nodes.num_rows, 10);
2627+
CU_ASSERT_EQUAL(tables.edges.num_rows, 10);
2628+
2629+
tsk_table_collection_free(&tables);
2630+
}
2631+
25692632
static void
25702633
test_simplest_population_filter(void)
25712634
{
@@ -5915,6 +5978,7 @@ main(int argc, char **argv)
59155978
{ "test_simplest_overlapping_unary_edges_internal_samples_simplify",
59165979
test_simplest_overlapping_unary_edges_internal_samples_simplify },
59175980
{ "test_simplest_reduce_site_topology", test_simplest_reduce_site_topology },
5981+
{ "test_simplest_simplify_defragment", test_simplest_simplify_defragment },
59185982
{ "test_simplest_population_filter", test_simplest_population_filter },
59195983
{ "test_simplest_individual_filter", test_simplest_individual_filter },
59205984
{ "test_simplest_map_mutations", test_simplest_map_mutations },

c/tskit/tables.c

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6102,7 +6102,7 @@ simplifier_extract_ancestry(
61026102
int ret = 0;
61036103
tsk_segment_t *x = self->ancestor_map_head[input_id];
61046104
tsk_segment_t y; /* y is the segment that has been removed */
6105-
tsk_segment_t *x_head, *x_tail, *x_prev, *seg_left, *seg_right;
6105+
tsk_segment_t *x_head, *x_prev, *seg_left, *seg_right;
61066106

61076107
x_head = NULL;
61086108
x_prev = NULL;
@@ -6152,22 +6152,8 @@ simplifier_extract_ancestry(
61526152
}
61536153
}
61546154

6155-
x = x_head;
6156-
x_tail = x_head;
6157-
while (x != NULL) {
6158-
x_tail = x;
6159-
if (x->next != NULL) {
6160-
assert(x->right <= x->next->left);
6161-
if (x->next->left == x->right && x->node == x->next->node) {
6162-
// Squash out (and free) the x.next segment.
6163-
x->right = x->next->right;
6164-
x->next = x->next->next;
6165-
}
6166-
}
6167-
x = x->next;
6168-
}
61696155
self->ancestor_map_head[input_id] = x_head;
6170-
self->ancestor_map_tail[input_id] = x_tail;
6156+
self->ancestor_map_tail[input_id] = x_prev;
61716157
out:
61726158
return ret;
61736159
}

python/CHANGELOG.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,10 @@ SVG drawing improvements and many others.
126126
- Add ``tree_sequence.to_macs()`` function to convert tree sequence to MACS
127127
format (:user:`winni2k`, :pr:`727`)
128128

129+
- Add a ``keep_input_roots`` option to simplify which, if enabled, adds edges
130+
from the MRCAs of samples in the simplified tree sequence back to the roots
131+
in the input tree sequence (:user:`jeromekelleher`, :issue:`775`, :pr:`782`).
132+
129133
**Bugfixes**
130134

131135
- :issue:`453` - Fix LibraryError when ``tree.newick()`` is called with large node time

python/tests/simplify.py

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -359,23 +359,11 @@ def extract_ancestry(self, edge):
359359
x_head = x
360360
x_prev = x
361361
x = x.next
362-
363-
# We could probably do the squashing and tail tracking
364-
# as part of the pass above, but keeping it simpler for now.
362+
# Note - we had some code to defragment segments in the output
363+
# chain here, but couldn't find an example where it needed to
364+
# be called. So, looks like squashing isn't necessary here.
365365
self.A_head[edge.child] = x_head
366-
x = x_head
367-
x_tail = x_head
368-
while x is not None:
369-
x_tail = x
370-
if x.next is not None:
371-
assert x.right <= x.next.left
372-
if x.next.left == x.right and x.node == x.next.node:
373-
# Squash out the x.next segment
374-
x.right = x.next.right
375-
x.next = x.next.next
376-
x = x.next
377-
self.A_tail[edge.child] = x_tail
378-
366+
self.A_tail[edge.child] = x_prev
379367
return S
380368

381369
def process_parent_edges(self, edges):

0 commit comments

Comments
 (0)