Skip to content

Commit ab5ef8f

Browse files
Merge pull request #2786 from jeromekelleher/incremental-trees
Modular incremental trees
2 parents 67f2ef3 + 2c264be commit ab5ef8f

File tree

7 files changed

+1097
-86
lines changed

7 files changed

+1097
-86
lines changed

c/tests/test_trees.c

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,97 @@ verify_individual_nodes(tsk_treeseq_t *ts)
175175
}
176176
}
177177

178+
static void
179+
verify_tree_pos(const tsk_treeseq_t *ts, tsk_size_t num_trees, tsk_id_t *tree_parents)
180+
{
181+
int ret;
182+
const tsk_size_t N = tsk_treeseq_get_num_nodes(ts);
183+
const tsk_id_t *edges_parent = ts->tables->edges.parent;
184+
const tsk_id_t *edges_child = ts->tables->edges.child;
185+
tsk_tree_position_t tree_pos;
186+
tsk_id_t *known_parent;
187+
tsk_id_t *parent = tsk_malloc(N * sizeof(*parent));
188+
tsk_id_t u, index, j, e;
189+
bool valid;
190+
191+
CU_ASSERT_FATAL(parent != NULL);
192+
193+
ret = tsk_tree_position_init(&tree_pos, ts, 0);
194+
CU_ASSERT_EQUAL_FATAL(ret, 0);
195+
196+
for (u = 0; u < (tsk_id_t) N; u++) {
197+
parent[u] = TSK_NULL;
198+
}
199+
200+
for (index = 0; index < (tsk_id_t) num_trees; index++) {
201+
known_parent = tree_parents + N * (tsk_size_t) index;
202+
203+
valid = tsk_tree_position_next(&tree_pos);
204+
CU_ASSERT_TRUE(valid);
205+
CU_ASSERT_EQUAL(index, tree_pos.index);
206+
207+
for (j = tree_pos.out.start; j < tree_pos.out.stop; j++) {
208+
e = tree_pos.out.order[j];
209+
parent[edges_child[e]] = TSK_NULL;
210+
}
211+
212+
for (j = tree_pos.in.start; j < tree_pos.in.stop; j++) {
213+
e = tree_pos.in.order[j];
214+
parent[edges_child[e]] = edges_parent[e];
215+
}
216+
217+
for (u = 0; u < (tsk_id_t) N; u++) {
218+
CU_ASSERT_EQUAL(parent[u], known_parent[u]);
219+
}
220+
}
221+
222+
valid = tsk_tree_position_next(&tree_pos);
223+
CU_ASSERT_FALSE(valid);
224+
for (j = tree_pos.out.start; j < tree_pos.out.stop; j++) {
225+
e = tree_pos.out.order[j];
226+
parent[edges_child[e]] = TSK_NULL;
227+
}
228+
for (u = 0; u < (tsk_id_t) N; u++) {
229+
CU_ASSERT_EQUAL(parent[u], TSK_NULL);
230+
}
231+
232+
for (index = (tsk_id_t) num_trees - 1; index >= 0; index--) {
233+
known_parent = tree_parents + N * (tsk_size_t) index;
234+
235+
valid = tsk_tree_position_prev(&tree_pos);
236+
CU_ASSERT_TRUE(valid);
237+
CU_ASSERT_EQUAL(index, tree_pos.index);
238+
239+
for (j = tree_pos.out.start; j > tree_pos.out.stop; j--) {
240+
e = tree_pos.out.order[j];
241+
parent[edges_child[e]] = TSK_NULL;
242+
}
243+
244+
for (j = tree_pos.in.start; j > tree_pos.in.stop; j--) {
245+
CU_ASSERT_FATAL(j >= 0);
246+
e = tree_pos.in.order[j];
247+
parent[edges_child[e]] = edges_parent[e];
248+
}
249+
250+
for (u = 0; u < (tsk_id_t) N; u++) {
251+
CU_ASSERT_EQUAL(parent[u], known_parent[u]);
252+
}
253+
}
254+
255+
valid = tsk_tree_position_prev(&tree_pos);
256+
CU_ASSERT_FALSE(valid);
257+
for (j = tree_pos.out.start; j > tree_pos.out.stop; j--) {
258+
e = tree_pos.out.order[j];
259+
parent[edges_child[e]] = TSK_NULL;
260+
}
261+
for (u = 0; u < (tsk_id_t) N; u++) {
262+
CU_ASSERT_EQUAL(parent[u], TSK_NULL);
263+
}
264+
265+
tsk_tree_position_free(&tree_pos);
266+
tsk_safe_free(parent);
267+
}
268+
178269
static void
179270
verify_trees(tsk_treeseq_t *ts, tsk_size_t num_trees, tsk_id_t *parents)
180271
{
@@ -233,6 +324,8 @@ verify_trees(tsk_treeseq_t *ts, tsk_size_t num_trees, tsk_id_t *parents)
233324
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(ts), breakpoints[j]);
234325

235326
tsk_tree_free(&tree);
327+
328+
verify_tree_pos(ts, num_trees, parents);
236329
}
237330

238331
static tsk_tree_t *
@@ -5233,6 +5326,65 @@ test_single_tree_tracked_samples(void)
52335326
tsk_tree_free(&tree);
52345327
}
52355328

5329+
static void
5330+
test_single_tree_tree_pos(void)
5331+
{
5332+
tsk_treeseq_t ts;
5333+
tsk_tree_position_t tree_pos;
5334+
bool valid;
5335+
int ret;
5336+
5337+
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
5338+
NULL, NULL, NULL, 0);
5339+
5340+
ret = tsk_tree_position_init(&tree_pos, &ts, 0);
5341+
CU_ASSERT_EQUAL_FATAL(ret, 0);
5342+
valid = tsk_tree_position_next(&tree_pos);
5343+
CU_ASSERT_FATAL(valid);
5344+
5345+
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 0);
5346+
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 1);
5347+
CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 0);
5348+
CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, 6);
5349+
CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_insertion_order);
5350+
CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 0);
5351+
CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 0);
5352+
CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_removal_order);
5353+
5354+
valid = tsk_tree_position_next(&tree_pos);
5355+
CU_ASSERT_FATAL(!valid);
5356+
5357+
tsk_tree_position_print_state(&tree_pos, _devnull);
5358+
5359+
CU_ASSERT_EQUAL_FATAL(tree_pos.index, -1);
5360+
CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 0);
5361+
CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 6);
5362+
CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_removal_order);
5363+
5364+
valid = tsk_tree_position_prev(&tree_pos);
5365+
CU_ASSERT_FATAL(valid);
5366+
5367+
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 0);
5368+
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 1);
5369+
CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 5);
5370+
CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, -1);
5371+
CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_removal_order);
5372+
CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 5);
5373+
CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 5);
5374+
CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_insertion_order);
5375+
5376+
valid = tsk_tree_position_prev(&tree_pos);
5377+
CU_ASSERT_FATAL(!valid);
5378+
5379+
CU_ASSERT_EQUAL_FATAL(tree_pos.index, -1);
5380+
CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 5);
5381+
CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, -1);
5382+
CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_insertion_order);
5383+
5384+
tsk_tree_position_free(&tree_pos);
5385+
tsk_treeseq_free(&ts);
5386+
}
5387+
52365388
/*=======================================================
52375389
* Multi tree tests.
52385390
*======================================================*/
@@ -8185,6 +8337,7 @@ main(int argc, char **argv)
81858337
{ "test_single_tree_map_mutations_internal_samples",
81868338
test_single_tree_map_mutations_internal_samples },
81878339
{ "test_single_tree_tracked_samples", test_single_tree_tracked_samples },
8340+
{ "test_single_tree_tree_pos", test_single_tree_tree_pos },
81888341

81898342
/* Multi tree tests */
81908343
{ "test_simple_multi_tree", test_simple_multi_tree },

c/tskit/haplotype_matching.c

Lines changed: 31 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ int
209209
tsk_ls_hmm_free(tsk_ls_hmm_t *self)
210210
{
211211
tsk_tree_free(&self->tree);
212-
tsk_diff_iter_free(&self->diffs);
212+
tsk_tree_position_free(&self->tree_pos);
213213
tsk_safe_free(self->recombination_rate);
214214
tsk_safe_free(self->mutation_rate);
215215
tsk_safe_free(self->recombination_rate);
@@ -248,9 +248,8 @@ tsk_ls_hmm_reset(tsk_ls_hmm_t *self)
248248
tsk_memset(self->transition_parent, 0xff,
249249
self->max_transitions * sizeof(*self->transition_parent));
250250

251-
/* This is safe because we've already zero'd out the memory. */
252-
tsk_diff_iter_free(&self->diffs);
253-
ret = tsk_diff_iter_init_from_ts(&self->diffs, self->tree_sequence, false);
251+
tsk_tree_position_free(&self->tree_pos);
252+
ret = tsk_tree_position_init(&self->tree_pos, self->tree_sequence, 0);
254253
if (ret != 0) {
255254
goto out;
256255
}
@@ -306,47 +305,48 @@ tsk_ls_hmm_update_tree(tsk_ls_hmm_t *self)
306305
int ret = 0;
307306
tsk_id_t *restrict parent = self->parent;
308307
tsk_id_t *restrict T_index = self->transition_index;
308+
const tsk_id_t *restrict edges_child = self->tree_sequence->tables->edges.child;
309+
const tsk_id_t *restrict edges_parent = self->tree_sequence->tables->edges.parent;
309310
tsk_value_transition_t *restrict T = self->transitions;
310-
tsk_edge_list_node_t *record;
311-
tsk_edge_list_t records_out, records_in;
312-
tsk_edge_t edge;
313-
double left, right;
314-
tsk_id_t u;
311+
tsk_id_t u, c, p, j, e;
315312
tsk_value_transition_t *vt;
316313

317-
ret = tsk_diff_iter_next(&self->diffs, &left, &right, &records_out, &records_in);
318-
if (ret < 0) {
319-
goto out;
320-
}
314+
tsk_tree_position_next(&self->tree_pos);
315+
tsk_bug_assert(self->tree_pos.index != -1);
316+
tsk_bug_assert(self->tree_pos.index == self->tree.index);
321317

322-
for (record = records_out.head; record != NULL; record = record->next) {
323-
u = record->edge.child;
318+
for (j = self->tree_pos.out.start; j < self->tree_pos.out.stop; j++) {
319+
e = self->tree_pos.out.order[j];
320+
c = edges_child[e];
321+
u = c;
324322
if (T_index[u] == TSK_NULL) {
325323
/* Ensure the subtree we're detaching has a transition at the root */
326324
while (T_index[u] == TSK_NULL) {
327325
u = parent[u];
328326
tsk_bug_assert(u != TSK_NULL);
329327
}
330328
tsk_bug_assert(self->num_transitions < self->max_transitions);
331-
T_index[record->edge.child] = (tsk_id_t) self->num_transitions;
332-
T[self->num_transitions].tree_node = record->edge.child;
329+
T_index[c] = (tsk_id_t) self->num_transitions;
330+
T[self->num_transitions].tree_node = c;
333331
T[self->num_transitions].value = T[T_index[u]].value;
334332
self->num_transitions++;
335333
}
336-
parent[record->edge.child] = TSK_NULL;
334+
parent[c] = TSK_NULL;
337335
}
338336

339-
for (record = records_in.head; record != NULL; record = record->next) {
340-
edge = record->edge;
341-
parent[edge.child] = edge.parent;
342-
u = edge.parent;
343-
if (parent[edge.parent] == TSK_NULL) {
337+
for (j = self->tree_pos.in.start; j < self->tree_pos.in.stop; j++) {
338+
e = self->tree_pos.in.order[j];
339+
c = edges_child[e];
340+
p = edges_parent[e];
341+
parent[c] = p;
342+
u = p;
343+
if (parent[p] == TSK_NULL) {
344344
/* Grafting onto a new root. */
345-
if (T_index[record->edge.parent] == TSK_NULL) {
346-
T_index[edge.parent] = (tsk_id_t) self->num_transitions;
345+
if (T_index[p] == TSK_NULL) {
346+
T_index[p] = (tsk_id_t) self->num_transitions;
347347
tsk_bug_assert(self->num_transitions < self->max_transitions);
348-
T[self->num_transitions].tree_node = edge.parent;
349-
T[self->num_transitions].value = T[T_index[edge.child]].value;
348+
T[self->num_transitions].tree_node = p;
349+
T[self->num_transitions].value = T[T_index[c]].value;
350350
self->num_transitions++;
351351
}
352352
} else {
@@ -356,18 +356,17 @@ tsk_ls_hmm_update_tree(tsk_ls_hmm_t *self)
356356
}
357357
tsk_bug_assert(u != TSK_NULL);
358358
}
359-
tsk_bug_assert(T_index[u] != -1 && T_index[edge.child] != -1);
360-
if (T[T_index[u]].value == T[T_index[edge.child]].value) {
361-
vt = &T[T_index[edge.child]];
359+
tsk_bug_assert(T_index[u] != -1 && T_index[c] != -1);
360+
if (T[T_index[u]].value == T[T_index[c]].value) {
361+
vt = &T[T_index[c]];
362362
/* Mark the value transition as unusued */
363363
vt->value = -1;
364364
vt->tree_node = TSK_NULL;
365-
T_index[edge.child] = TSK_NULL;
365+
T_index[c] = TSK_NULL;
366366
}
367367
}
368368

369369
ret = tsk_ls_hmm_remove_dead_roots(self);
370-
out:
371370
return ret;
372371
}
373372

c/tskit/haplotype_matching.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* MIT License
33
*
4-
* Copyright (c) 2019-2022 Tskit Developers
4+
* Copyright (c) 2019-2023 Tskit Developers
55
*
66
* Permission is hereby granted, free of charge, to any person obtaining a copy
77
* of this software and associated documentation files (the "Software"), to deal
@@ -98,7 +98,10 @@ typedef struct _tsk_ls_hmm_t {
9898
tsk_size_t num_nodes;
9999
/* state */
100100
tsk_tree_t tree;
101-
tsk_diff_iter_t diffs;
101+
/* NOTE: this tree_position will be redundant once we integrate the top-level
102+
* tree class with this.
103+
*/
104+
tsk_tree_position_t tree_pos;
102105
tsk_id_t *parent;
103106
/* The probability value transitions on the tree */
104107
tsk_value_transition_t *transitions;

0 commit comments

Comments
 (0)