Skip to content

Commit b06a718

Browse files
petrelharpmergify[bot]
authored andcommitted
Implement extend edges
1 parent 6e99c11 commit b06a718

File tree

15 files changed

+1413
-1
lines changed

15 files changed

+1413
-1
lines changed

c/CHANGELOG.rst

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
--------
2+
UPCOMING
3+
--------
4+
5+
**Features**
6+
7+
- Add the `tsk_treeseq_extend_edges` method that can compress a tree sequence
8+
by extending edges into adjacent trees and thus creating unary nodes in those
9+
trees (:user:`petrelharp`, :user:`hfr1tze`, :user:`avabamf`, :pr:`2651`).
10+
111
--------------------
212
[1.1.2] - 2023-05-17
313
--------------------
@@ -24,7 +34,7 @@
2434
(:user:`jeromekelleher`, :issue:`2662`, :pr:`2663`).
2535

2636
- Guarantee that unfiltered tables are not written to unnecessarily
27-
during simplify (:user:`jeromekelleher` :pr:`2619`).
37+
during simplify (:user:`jeromekelleher`, :pr:`2619`).
2838

2939
- Add `x_table_keep_rows` methods to provide efficient in-place table subsetting
3040
(:user:`jeromekelleher`, :pr:`2700`).

c/tests/test_trees.c

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8212,6 +8212,165 @@ test_split_edges_errors(void)
82128212
tsk_treeseq_free(&ts);
82138213
}
82148214

8215+
static void
8216+
test_extend_edges_simple(void)
8217+
{
8218+
int ret;
8219+
tsk_treeseq_t ts, ets;
8220+
const char *nodes = "1 0 -1 -1\n"
8221+
"1 0 -1 -1\n"
8222+
"0 2.0 -1 -1\n";
8223+
const char *edges = "0 10 2 0\n"
8224+
"0 10 2 1\n";
8225+
const char *sites = "0.0 0\n"
8226+
"1.0 0\n";
8227+
const char *mutations = "0 0 1 -1 0.5\n"
8228+
"1 1 1 -1 0.5\n";
8229+
8230+
tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
8231+
ret = tsk_treeseq_extend_edges(&ts, 10, 0, &ets);
8232+
CU_ASSERT_EQUAL_FATAL(ret, 0);
8233+
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, ets.tables, 0));
8234+
tsk_treeseq_free(&ts);
8235+
8236+
tsk_treeseq_free(&ets);
8237+
}
8238+
8239+
static void
8240+
test_extend_edges_errors(void)
8241+
{
8242+
int ret;
8243+
tsk_treeseq_t ts, ets;
8244+
const char *nodes = "1 0 -1 -1\n"
8245+
"1 0 -1 -1\n"
8246+
"0 2.0 -1 -1\n";
8247+
const char *edges = "0 10 2 0\n"
8248+
"0 10 2 1\n";
8249+
const char *sites = "0.0 0\n"
8250+
"1.0 0\n";
8251+
const char *mutations = "0 0 1 -1 0.5\n"
8252+
"1 1 1 -1 0.5\n";
8253+
const char *mutations_no_time = "0 0 1 -1\n"
8254+
"1 1 1 -1\n";
8255+
// left, right, node source, dest, time
8256+
const char *migrations = "0 10 0 0 1 0.5\n"
8257+
"0 10 0 1 0 1.5\n";
8258+
8259+
tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
8260+
ret = tsk_treeseq_extend_edges(&ts, -2, 0, &ets);
8261+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EXTEND_EDGES_BAD_MAXITER);
8262+
tsk_treeseq_free(&ts);
8263+
8264+
tsk_treeseq_from_text(
8265+
&ts, 10, nodes, edges, migrations, sites, mutations, NULL, NULL, 0);
8266+
ret = tsk_treeseq_extend_edges(&ts, 10, 0, &ets);
8267+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATIONS_NOT_SUPPORTED);
8268+
tsk_treeseq_free(&ts);
8269+
8270+
tsk_treeseq_from_text(
8271+
&ts, 10, nodes, edges, NULL, sites, mutations_no_time, NULL, NULL, 0);
8272+
ret = tsk_treeseq_extend_edges(&ts, 10, 0, &ets);
8273+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME);
8274+
tsk_treeseq_free(&ts);
8275+
8276+
tsk_treeseq_free(&ets);
8277+
}
8278+
8279+
static void
8280+
assert_equal_except_edges_and_mutation_nodes(
8281+
const tsk_treeseq_t *ts1, const tsk_treeseq_t *ts2)
8282+
{
8283+
tsk_table_collection_t t1, t2;
8284+
int ret;
8285+
8286+
ret = tsk_table_collection_copy(ts1->tables, &t1, 0);
8287+
CU_ASSERT_EQUAL_FATAL(ret, 0);
8288+
8289+
ret = tsk_table_collection_copy(ts2->tables, &t2, 0);
8290+
CU_ASSERT_EQUAL_FATAL(ret, 0);
8291+
8292+
tsk_memset(t1.mutations.node, 0, t1.mutations.num_rows * sizeof(*t1.mutations.node));
8293+
tsk_memset(t2.mutations.node, 0, t2.mutations.num_rows * sizeof(*t2.mutations.node));
8294+
8295+
tsk_edge_table_clear(&t1.edges);
8296+
tsk_edge_table_clear(&t2.edges);
8297+
8298+
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
8299+
8300+
tsk_table_collection_free(&t1);
8301+
tsk_table_collection_free(&t2);
8302+
}
8303+
8304+
static void
8305+
test_extend_edges(void)
8306+
{
8307+
int ret, max_iter;
8308+
tsk_treeseq_t ts, ets;
8309+
/* 7 and 8 should be extended to the whole sequence
8310+
8311+
6 6 6 6
8312+
+-+-+ +-+-+ +-+-+ +-+-+
8313+
| | 7 | | 8 | |
8314+
| | ++-+ | | +-++ | |
8315+
4 5 4 | | 4 | 5 4 5
8316+
+++ +++ +++ | | | | +++ +++ +++
8317+
0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3
8318+
*/
8319+
8320+
const char *nodes = "1 0 -1 -1\n"
8321+
"1 0 -1 -1\n"
8322+
"1 0 -1 -1\n"
8323+
"1 0 -1 -1\n"
8324+
"0 1.0 -1 -1\n"
8325+
"0 1.0 -1 -1\n"
8326+
"0 3.0 -1 -1\n"
8327+
"0 2.0 -1 -1\n"
8328+
"0 2.0 -1 -1\n";
8329+
// l, r, p, c
8330+
const char *edges = "0 10 4 0\n"
8331+
"0 5 4 1\n"
8332+
"7 10 4 1\n"
8333+
"0 2 5 2\n"
8334+
"5 10 5 2\n"
8335+
"0 2 5 3\n"
8336+
"5 10 5 3\n"
8337+
"2 5 7 2\n"
8338+
"2 5 7 4\n"
8339+
"5 7 8 1\n"
8340+
"5 7 8 5\n"
8341+
"2 5 6 3\n"
8342+
"0 2 6 4\n"
8343+
"5 10 6 4\n"
8344+
"0 2 6 5\n"
8345+
"7 10 6 5\n"
8346+
"2 5 6 7\n"
8347+
"5 7 6 8\n";
8348+
const char *sites = "0.0 0\n"
8349+
"9.0 0\n";
8350+
const char *mutations = "0 4 1 -1 2.5\n"
8351+
"0 4 2 0 1.5\n"
8352+
"1 5 1 -1 2.5\n"
8353+
"1 5 2 2 1.5\n";
8354+
8355+
tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
8356+
8357+
for (max_iter = 1; max_iter < 10; max_iter++) {
8358+
ret = tsk_treeseq_extend_edges(&ts, max_iter, 0, &ets);
8359+
CU_ASSERT_EQUAL_FATAL(ret, 0);
8360+
assert_equal_except_edges_and_mutation_nodes(&ts, &ets);
8361+
CU_ASSERT_TRUE(ets.tables->edges.num_rows >= 12);
8362+
tsk_treeseq_free(&ets);
8363+
}
8364+
8365+
ret = tsk_treeseq_extend_edges(&ts, 10, 0, &ets);
8366+
CU_ASSERT_EQUAL_FATAL(ret, 0);
8367+
CU_ASSERT_EQUAL_FATAL(ets.tables->nodes.num_rows, 9);
8368+
CU_ASSERT_EQUAL_FATAL(ets.tables->edges.num_rows, 12);
8369+
tsk_treeseq_free(&ets);
8370+
8371+
tsk_treeseq_free(&ts);
8372+
}
8373+
82158374
static void
82168375
test_init_take_ownership_no_edge_metadata(void)
82178376
{
@@ -8431,6 +8590,9 @@ main(int argc, char **argv)
84318590
{ "test_split_edges_no_populations", test_split_edges_no_populations },
84328591
{ "test_split_edges_populations", test_split_edges_populations },
84338592
{ "test_split_edges_errors", test_split_edges_errors },
8593+
{ "test_extend_edges_simple", test_extend_edges_simple },
8594+
{ "test_extend_edges_errors", test_extend_edges_errors },
8595+
{ "test_extend_edges", test_extend_edges },
84348596
{ "test_init_take_ownership_no_edge_metadata",
84358597
test_init_take_ownership_no_edge_metadata },
84368598
{ NULL, NULL },

c/tskit/core.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,11 @@ tsk_strerror_internal(int err)
330330
"values for any single site. "
331331
"(TSK_ERR_MUTATION_TIME_HAS_BOTH_KNOWN_AND_UNKNOWN)";
332332
break;
333+
case TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME:
334+
ret = "Some mutation times are marked 'unknown' for a method that requires "
335+
"no unknown times. (Use compute_mutation_times to add times?) "
336+
"(TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME)";
337+
break;
333338

334339
/* Migration errors */
335340
case TSK_ERR_UNSORTED_MIGRATIONS:
@@ -615,6 +620,11 @@ tsk_strerror_internal(int err)
615620
"if an individual has nodes from more than one time. "
616621
"(TSK_ERR_INDIVIDUAL_TIME_MISMATCH)";
617622
break;
623+
624+
case TSK_ERR_EXTEND_EDGES_BAD_MAXITER:
625+
ret = "Maximum number of iterations must be positive. "
626+
"(TSK_ERR_EXTEND_EDGES_BAD_MAXITER)";
627+
break;
618628
}
619629
return ret;
620630
}

c/tskit/core.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,11 @@ the edge on which it occurs, and wasn't TSK_UNKNOWN_TIME.
500500
A single site had a mixture of known mutation times and TSK_UNKNOWN_TIME
501501
*/
502502
#define TSK_ERR_MUTATION_TIME_HAS_BOTH_KNOWN_AND_UNKNOWN -509
503+
/**
504+
Some mutations have TSK_UNKNOWN_TIME in an algorithm where that's
505+
disallowed (use compute_mutation_times?).
506+
*/
507+
#define TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME -510
503508
/** @} */
504509

505510
/**
@@ -865,6 +870,16 @@ An individual had nodes from more than one time
865870
*/
866871
#define TSK_ERR_INDIVIDUAL_TIME_MISMATCH -1704
867872
/** @} */
873+
874+
/**
875+
@defgroup EXTEND_EDGES_ERROR_GROUP Extend edges errors.
876+
@{
877+
*/
878+
/**
879+
Maximum iteration number (max_iter) must be positive.
880+
*/
881+
#define TSK_ERR_EXTEND_EDGES_BAD_MAXITER -1800
882+
/** @} */
868883
// clang-format on
869884

870885
/* This bit is 0 for any errors originating from kastore */

0 commit comments

Comments
 (0)