Skip to content

Commit 88fe00d

Browse files
molpopgenmergify[bot]
authored andcommitted
Refactor edge difference iterator to work from table collections as well
as from tree sequences.
1 parent 9025f57 commit 88fe00d

File tree

7 files changed

+258
-199
lines changed

7 files changed

+258
-199
lines changed

c/tests/test_tables.c

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* MIT License
33
*
4-
* Copyright (c) 2019-2022 Tskit Developers
4+
* Copyright (c) 2019-2023 Tskit Developers
55
*
66
* Permission is hereby granted, free of charge, to any person obtaining a copy
77
* of this software and associated documentation files (the "Software"), to deal
@@ -23,6 +23,7 @@
2323
*/
2424

2525
#include "testlib.h"
26+
#include "tskit/core.h"
2627
#include <tskit/tables.h>
2728

2829
#include <float.h>
@@ -10420,6 +10421,35 @@ test_table_collection_delete_older(void)
1042010421
tsk_treeseq_free(&ts);
1042110422
}
1042210423

10424+
static void
10425+
test_table_collection_edge_diffs_errors(void)
10426+
{
10427+
int ret;
10428+
tsk_id_t ret_id;
10429+
tsk_table_collection_t tables;
10430+
tsk_diff_iter_t iter;
10431+
10432+
ret = tsk_table_collection_init(&tables, 0);
10433+
CU_ASSERT_EQUAL(ret, 0);
10434+
tables.sequence_length = 1;
10435+
ret_id
10436+
= tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0, -1, -1, NULL, 0);
10437+
CU_ASSERT_EQUAL(ret_id, 0);
10438+
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1, -1, -1, NULL, 0);
10439+
CU_ASSERT_EQUAL(ret_id, 1);
10440+
ret = (int) tsk_edge_table_add_row(&tables.edges, 0., 1., 1, 0, NULL, 0);
10441+
CU_ASSERT_EQUAL(ret, 0);
10442+
10443+
ret = tsk_diff_iter_init(&iter, &tables, -1, 0);
10444+
CU_ASSERT_EQUAL(ret, TSK_ERR_TABLES_NOT_INDEXED);
10445+
10446+
tables.nodes.time[0] = 1;
10447+
ret = tsk_diff_iter_init(&iter, &tables, -1, 0);
10448+
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_NODE_TIME_ORDERING);
10449+
10450+
tsk_table_collection_free(&tables);
10451+
}
10452+
1042310453
int
1042410454
main(int argc, char **argv)
1042510455
{
@@ -10542,6 +10572,8 @@ main(int argc, char **argv)
1054210572
{ "test_table_collection_takeset_indexes",
1054310573
test_table_collection_takeset_indexes },
1054410574
{ "test_table_collection_delete_older", test_table_collection_delete_older },
10575+
{ "test_table_collection_edge_diffs_errors",
10576+
test_table_collection_edge_diffs_errors },
1054510577
{ NULL, NULL },
1054610578
};
1054710579

c/tests/test_trees.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* MIT License
33
*
4-
* Copyright (c) 2019-2022 Tskit Developers
4+
* Copyright (c) 2019-2023 Tskit Developers
55
*
66
* Permission is hereby granted, free of charge, to any person obtaining a copy
77
* of this software and associated documentation files (the "Software"), to deal
@@ -407,7 +407,7 @@ verify_tree_diffs(tsk_treeseq_t *ts, tsk_flags_t options)
407407
child[j] = TSK_NULL;
408408
sib[j] = TSK_NULL;
409409
}
410-
ret = tsk_diff_iter_init(&iter, ts, options);
410+
ret = tsk_diff_iter_init_from_ts(&iter, ts, options);
411411
CU_ASSERT_EQUAL_FATAL(ret, 0);
412412
ret = tsk_tree_init(&tree, ts, 0);
413413
CU_ASSERT_EQUAL_FATAL(ret, 0);

c/tskit/haplotype_matching.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* MIT License
33
*
4-
* Copyright (c) 2019-2022 Tskit Developers
4+
* Copyright (c) 2019-2023 Tskit Developers
55
*
66
* Permission is hereby granted, free of charge, to any person obtaining a copy
77
* of this software and associated documentation files (the "Software"), to deal
@@ -250,7 +250,7 @@ tsk_ls_hmm_reset(tsk_ls_hmm_t *self)
250250

251251
/* This is safe because we've already zero'd out the memory. */
252252
tsk_diff_iter_free(&self->diffs);
253-
ret = tsk_diff_iter_init(&self->diffs, self->tree_sequence, false);
253+
ret = tsk_diff_iter_init_from_ts(&self->diffs, self->tree_sequence, false);
254254
if (ret != 0) {
255255
goto out;
256256
}

c/tskit/tables.c

Lines changed: 163 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* MIT License
33
*
4-
* Copyright (c) 2019-2022 Tskit Developers
4+
* Copyright (c) 2019-2023 Tskit Developers
55
* Copyright (c) 2017-2018 University of Oxford
66
*
77
* Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -13010,3 +13010,165 @@ tsk_squash_edges(tsk_edge_t *edges, tsk_size_t num_edges, tsk_size_t *num_output
1301013010
out:
1301113011
return ret;
1301213012
}
13013+
13014+
/* ======================================================== *
13015+
* Tree diff iterator.
13016+
* ======================================================== */
13017+
13018+
int TSK_WARN_UNUSED
13019+
tsk_diff_iter_init(tsk_diff_iter_t *self, const tsk_table_collection_t *tables,
13020+
tsk_id_t num_trees, tsk_flags_t options)
13021+
{
13022+
int ret = 0;
13023+
13024+
tsk_bug_assert(tables != NULL);
13025+
tsk_memset(self, 0, sizeof(tsk_diff_iter_t));
13026+
self->num_nodes = tables->nodes.num_rows;
13027+
self->num_edges = tables->edges.num_rows;
13028+
self->tables = tables;
13029+
self->insertion_index = 0;
13030+
self->removal_index = 0;
13031+
self->tree_left = 0;
13032+
self->tree_index = -1;
13033+
if (num_trees < 0) {
13034+
num_trees = tsk_table_collection_check_integrity(self->tables, TSK_CHECK_TREES);
13035+
if (num_trees < 0) {
13036+
ret = (int) num_trees;
13037+
goto out;
13038+
}
13039+
}
13040+
self->last_index = num_trees;
13041+
13042+
if (options & TSK_INCLUDE_TERMINAL) {
13043+
self->last_index = self->last_index + 1;
13044+
}
13045+
self->edge_list_nodes = tsk_malloc(self->num_edges * sizeof(*self->edge_list_nodes));
13046+
if (self->edge_list_nodes == NULL) {
13047+
ret = TSK_ERR_NO_MEMORY;
13048+
goto out;
13049+
}
13050+
out:
13051+
return ret;
13052+
}
13053+
13054+
int
13055+
tsk_diff_iter_free(tsk_diff_iter_t *self)
13056+
{
13057+
tsk_safe_free(self->edge_list_nodes);
13058+
return 0;
13059+
}
13060+
13061+
void
13062+
tsk_diff_iter_print_state(const tsk_diff_iter_t *self, FILE *out)
13063+
{
13064+
fprintf(out, "tree_diff_iterator state\n");
13065+
fprintf(out, "num_edges = %lld\n", (long long) self->num_edges);
13066+
fprintf(out, "insertion_index = %lld\n", (long long) self->insertion_index);
13067+
fprintf(out, "removal_index = %lld\n", (long long) self->removal_index);
13068+
fprintf(out, "tree_left = %f\n", self->tree_left);
13069+
fprintf(out, "tree_index = %lld\n", (long long) self->tree_index);
13070+
}
13071+
13072+
int TSK_WARN_UNUSED
13073+
tsk_diff_iter_next(tsk_diff_iter_t *self, double *ret_left, double *ret_right,
13074+
tsk_edge_list_t *edges_out_ret, tsk_edge_list_t *edges_in_ret)
13075+
{
13076+
int ret = 0;
13077+
tsk_id_t k;
13078+
const double sequence_length = self->tables->sequence_length;
13079+
double left = self->tree_left;
13080+
double right = sequence_length;
13081+
tsk_size_t next_edge_list_node = 0;
13082+
tsk_edge_list_node_t *out_head = NULL;
13083+
tsk_edge_list_node_t *out_tail = NULL;
13084+
tsk_edge_list_node_t *in_head = NULL;
13085+
tsk_edge_list_node_t *in_tail = NULL;
13086+
tsk_edge_list_node_t *w = NULL;
13087+
tsk_edge_list_t edges_out;
13088+
tsk_edge_list_t edges_in;
13089+
const tsk_edge_table_t *edges = &self->tables->edges;
13090+
const tsk_id_t *insertion_order = self->tables->indexes.edge_insertion_order;
13091+
const tsk_id_t *removal_order = self->tables->indexes.edge_removal_order;
13092+
13093+
tsk_memset(&edges_out, 0, sizeof(edges_out));
13094+
tsk_memset(&edges_in, 0, sizeof(edges_in));
13095+
13096+
if (self->tree_index + 1 < self->last_index) {
13097+
/* First we remove the stale records */
13098+
while (self->removal_index < (tsk_id_t) self->num_edges
13099+
&& left == edges->right[removal_order[self->removal_index]]) {
13100+
k = removal_order[self->removal_index];
13101+
tsk_bug_assert(next_edge_list_node < self->num_edges);
13102+
w = &self->edge_list_nodes[next_edge_list_node];
13103+
next_edge_list_node++;
13104+
w->edge.id = k;
13105+
w->edge.left = edges->left[k];
13106+
w->edge.right = edges->right[k];
13107+
w->edge.parent = edges->parent[k];
13108+
w->edge.child = edges->child[k];
13109+
w->edge.metadata = edges->metadata + edges->metadata_offset[k];
13110+
w->edge.metadata_length
13111+
= edges->metadata_offset[k + 1] - edges->metadata_offset[k];
13112+
w->next = NULL;
13113+
w->prev = NULL;
13114+
if (out_head == NULL) {
13115+
out_head = w;
13116+
out_tail = w;
13117+
} else {
13118+
out_tail->next = w;
13119+
w->prev = out_tail;
13120+
out_tail = w;
13121+
}
13122+
self->removal_index++;
13123+
}
13124+
edges_out.head = out_head;
13125+
edges_out.tail = out_tail;
13126+
13127+
/* Now insert the new records */
13128+
while (self->insertion_index < (tsk_id_t) self->num_edges
13129+
&& left == edges->left[insertion_order[self->insertion_index]]) {
13130+
k = insertion_order[self->insertion_index];
13131+
tsk_bug_assert(next_edge_list_node < self->num_edges);
13132+
w = &self->edge_list_nodes[next_edge_list_node];
13133+
next_edge_list_node++;
13134+
w->edge.id = k;
13135+
w->edge.left = edges->left[k];
13136+
w->edge.right = edges->right[k];
13137+
w->edge.parent = edges->parent[k];
13138+
w->edge.child = edges->child[k];
13139+
w->edge.metadata = edges->metadata + edges->metadata_offset[k];
13140+
w->edge.metadata_length
13141+
= edges->metadata_offset[k + 1] - edges->metadata_offset[k];
13142+
w->next = NULL;
13143+
w->prev = NULL;
13144+
if (in_head == NULL) {
13145+
in_head = w;
13146+
in_tail = w;
13147+
} else {
13148+
in_tail->next = w;
13149+
w->prev = in_tail;
13150+
in_tail = w;
13151+
}
13152+
self->insertion_index++;
13153+
}
13154+
edges_in.head = in_head;
13155+
edges_in.tail = in_tail;
13156+
13157+
right = sequence_length;
13158+
if (self->insertion_index < (tsk_id_t) self->num_edges) {
13159+
right = TSK_MIN(right, edges->left[insertion_order[self->insertion_index]]);
13160+
}
13161+
if (self->removal_index < (tsk_id_t) self->num_edges) {
13162+
right = TSK_MIN(right, edges->right[removal_order[self->removal_index]]);
13163+
}
13164+
self->tree_index++;
13165+
ret = TSK_TREE_OK;
13166+
}
13167+
*edges_out_ret = edges_out;
13168+
*edges_in_ret = edges_in;
13169+
*ret_left = left;
13170+
*ret_right = right;
13171+
/* Set the left coordinate for the next tree */
13172+
self->tree_left = right;
13173+
return ret;
13174+
}

c/tskit/tables.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,30 @@ typedef struct {
670670
bool store_pairs;
671671
} tsk_identity_segments_t;
672672

673+
/* Diff iterator. */
674+
typedef struct _tsk_edge_list_node_t {
675+
tsk_edge_t edge;
676+
struct _tsk_edge_list_node_t *next;
677+
struct _tsk_edge_list_node_t *prev;
678+
} tsk_edge_list_node_t;
679+
680+
typedef struct {
681+
tsk_edge_list_node_t *head;
682+
tsk_edge_list_node_t *tail;
683+
} tsk_edge_list_t;
684+
685+
typedef struct {
686+
tsk_size_t num_nodes;
687+
tsk_size_t num_edges;
688+
double tree_left;
689+
const tsk_table_collection_t *tables;
690+
tsk_id_t insertion_index;
691+
tsk_id_t removal_index;
692+
tsk_id_t tree_index;
693+
tsk_id_t last_index;
694+
tsk_edge_list_node_t *edge_list_nodes;
695+
} tsk_diff_iter_t;
696+
673697
/****************************************************************************/
674698
/* Common function options */
675699
/****************************************************************************/
@@ -892,6 +916,16 @@ top-level information of the table collections being compared.
892916
#define TSK_CLEAR_PROVENANCE (1 << 2)
893917
/** @} */
894918

919+
/* For the edge diff iterator */
920+
#define TSK_INCLUDE_TERMINAL (1 << 0)
921+
922+
/** @brief Value returned by seeking methods when they have successfully
923+
seeked to a non-null tree.
924+
925+
@ingroup TREE_API_SEEKING_GROUP
926+
*/
927+
#define TSK_TREE_OK 1
928+
895929
/****************************************************************************/
896930
/* Function signatures */
897931
/****************************************************************************/
@@ -4417,6 +4451,19 @@ int tsk_identity_segments_get(const tsk_identity_segments_t *self, tsk_id_t a,
44174451
void tsk_identity_segments_print_state(tsk_identity_segments_t *self, FILE *out);
44184452
int tsk_identity_segments_free(tsk_identity_segments_t *self);
44194453

4454+
/* Edge differences */
4455+
4456+
/* Internal API - currently used in a few places, but a better API is envisaged
4457+
* at some point.
4458+
* IMPORTANT: tskit-rust uses this API, so don't break without discussing!
4459+
*/
4460+
int tsk_diff_iter_init(tsk_diff_iter_t *self, const tsk_table_collection_t *tables,
4461+
tsk_id_t num_trees, tsk_flags_t options);
4462+
int tsk_diff_iter_free(tsk_diff_iter_t *self);
4463+
int tsk_diff_iter_next(tsk_diff_iter_t *self, double *left, double *right,
4464+
tsk_edge_list_t *edges_out, tsk_edge_list_t *edges_in);
4465+
void tsk_diff_iter_print_state(const tsk_diff_iter_t *self, FILE *out);
4466+
44204467
#ifdef __cplusplus
44214468
}
44224469
#endif

0 commit comments

Comments
 (0)