Skip to content

Commit 2f58af6

Browse files
Merge pull request #172 from awohns/kc-distance-branch-lengths
kc-distance with branch lengths
2 parents 8fe05cd + b5a02a9 commit 2f58af6

File tree

11 files changed

+1227
-2
lines changed

11 files changed

+1227
-2
lines changed

c/tests/test_core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ test_strerror(void)
3232
{
3333
int j;
3434
const char *msg;
35-
int max_error_code = 1024; /* totally arbitrary */
35+
int max_error_code = 8192; /* totally arbitrary */
3636

3737
for (j = 0; j < max_error_code; j++) {
3838
msg = tsk_strerror(-j);

c/tests/test_trees.c

Lines changed: 296 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4420,6 +4420,292 @@ test_internal_sample_sample_sets(void)
44204420
}
44214421

44224422

4423+
4424+
/*=======================================================
4425+
* KC Distance tests.
4426+
*=======================================================*/
4427+
4428+
static void
4429+
test_single_tree_kc(void)
4430+
{
4431+
int ret;
4432+
tsk_treeseq_t ts;
4433+
tsk_tree_t t, other_t;
4434+
double result = 0;
4435+
4436+
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges,
4437+
NULL, NULL, NULL, NULL, NULL);
4438+
ret = tsk_tree_init(&t, &ts, 0);
4439+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4440+
ret = tsk_tree_first(&t);
4441+
CU_ASSERT_EQUAL_FATAL(ret, 1);
4442+
ret = tsk_tree_init(&other_t, &ts, 0);
4443+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4444+
ret = tsk_tree_first(&other_t);
4445+
CU_ASSERT_EQUAL_FATAL(ret, 1);
4446+
ret = tsk_tree_copy(&t, &other_t, TSK_NO_INIT);
4447+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4448+
check_trees_identical(&t, &other_t);
4449+
ret = tsk_tree_kc_distance(&t, &other_t, 0, &result);
4450+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4451+
CU_ASSERT_EQUAL_FATAL(result, 0);
4452+
ret = tsk_tree_kc_distance(&t, &other_t, 1, &result);
4453+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4454+
CU_ASSERT_EQUAL_FATAL(result, 0);
4455+
tsk_treeseq_free(&ts);
4456+
tsk_tree_free(&t);
4457+
tsk_tree_free(&other_t);
4458+
}
4459+
4460+
static void
4461+
test_two_trees_kc(void)
4462+
{
4463+
const char *nodes =
4464+
"1 0 0\n"
4465+
"1 0 0\n"
4466+
"1 0 0\n"
4467+
"0 2 0\n"
4468+
"0 3 0\n";
4469+
const char *nodes_other =
4470+
"1 0 0\n"
4471+
"1 0 0\n"
4472+
"1 0 0\n"
4473+
"0 4 0\n"
4474+
"0 6 0\n";
4475+
const char *edges =
4476+
"0 1 3 0,1\n"
4477+
"0 1 4 2,3\n";
4478+
int ret;
4479+
tsk_treeseq_t ts, other_ts;
4480+
tsk_tree_t t, other_t;
4481+
double result = 0;
4482+
4483+
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL);
4484+
ret = tsk_tree_init(&t, &ts, 0);
4485+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4486+
ret = tsk_tree_first(&t);
4487+
CU_ASSERT_EQUAL_FATAL(ret, 1);
4488+
tsk_treeseq_from_text(&other_ts, 1, nodes_other, edges, NULL, NULL, NULL, NULL, NULL);
4489+
ret = tsk_tree_init(&other_t, &other_ts, 0);
4490+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4491+
ret = tsk_tree_first(&other_t);
4492+
CU_ASSERT_EQUAL_FATAL(ret, 1);
4493+
ret = tsk_tree_kc_distance(&t, &other_t, 0, &result);
4494+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4495+
CU_ASSERT_EQUAL_FATAL(result, 0);
4496+
ret = tsk_tree_kc_distance(&t, &other_t, 1, &result);
4497+
CU_ASSERT_DOUBLE_EQUAL_FATAL(result, 4.243, 1e-2);
4498+
tsk_treeseq_free(&ts);
4499+
tsk_treeseq_free(&other_ts);
4500+
tsk_tree_free(&t);
4501+
tsk_tree_free(&other_t);
4502+
}
4503+
4504+
static void
4505+
test_empty_tree_kc(void)
4506+
{
4507+
tsk_treeseq_t ts;
4508+
tsk_table_collection_t tables;
4509+
tsk_tree_t t;
4510+
tsk_id_t v;
4511+
int ret;
4512+
double result = 0;
4513+
4514+
ret = tsk_table_collection_init(&tables, 0);
4515+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4516+
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
4517+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SEQUENCE_LENGTH);
4518+
tsk_treeseq_free(&ts);
4519+
tables.sequence_length = 1.0;
4520+
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
4521+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4522+
4523+
verify_empty_tree_sequence(&ts, 1.0);
4524+
4525+
ret = tsk_tree_init(&t, &ts, 0);
4526+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4527+
ret = tsk_tree_first(&t);
4528+
CU_ASSERT_EQUAL_FATAL(ret, 1);
4529+
CU_ASSERT_EQUAL_FATAL(t.left_root, TSK_NULL);
4530+
CU_ASSERT_EQUAL_FATAL(t.left, 0);
4531+
CU_ASSERT_EQUAL_FATAL(t.right, 1);
4532+
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_parent(&t, 0, &v), TSK_ERR_NODE_OUT_OF_BOUNDS);
4533+
4534+
ret = tsk_tree_kc_distance(&t, &t, 0, &result);
4535+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_ROOTS);
4536+
4537+
tsk_tree_free(&t);
4538+
tsk_treeseq_free(&ts);
4539+
tsk_table_collection_free(&tables);
4540+
}
4541+
4542+
static void
4543+
test_nonbinary_tree_kc(void)
4544+
{
4545+
const char *nodes =
4546+
"1 0 0\n"
4547+
"1 0 0\n"
4548+
"1 0 0\n"
4549+
"1 0 0\n"
4550+
"0 1 0";
4551+
const char *edges =
4552+
"0 1 4 0,1,2,3\n";
4553+
tsk_treeseq_t ts;
4554+
tsk_tree_t t;
4555+
int ret;
4556+
double result = 0;
4557+
4558+
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL);
4559+
ret = tsk_tree_init(&t, &ts, 0);
4560+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4561+
ret = tsk_tree_first(&t);
4562+
CU_ASSERT_EQUAL_FATAL(ret, 1);
4563+
tsk_tree_kc_distance(&t, &t, 0, &result);
4564+
CU_ASSERT_EQUAL_FATAL(result, 0);
4565+
tsk_treeseq_free(&ts);
4566+
tsk_tree_free(&t);
4567+
}
4568+
4569+
static void
4570+
test_nonzero_samples_kc(void)
4571+
{
4572+
const char *nodes =
4573+
"0 0 0\n" /* unused node at the start */
4574+
"1 0 0\n"
4575+
"1 0 0\n"
4576+
"0 1 0";
4577+
const char *edges =
4578+
"0 1 3 1,2\n";
4579+
tsk_treeseq_t ts;
4580+
tsk_tree_t t;
4581+
int ret;
4582+
double result = 0;
4583+
4584+
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL);
4585+
ret = tsk_tree_init(&t, &ts, 0);
4586+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4587+
ret = tsk_tree_first(&t);
4588+
CU_ASSERT_EQUAL_FATAL(ret, 1);
4589+
ret = tsk_tree_kc_distance(&t, &t, 0, &result);
4590+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4591+
CU_ASSERT_EQUAL_FATAL(result, 0);
4592+
tsk_treeseq_free(&ts);
4593+
tsk_tree_free(&t);
4594+
}
4595+
4596+
static void
4597+
test_internal_samples_kc(void)
4598+
{
4599+
const char *nodes =
4600+
"1 0 0\n"
4601+
"1 0 0\n"
4602+
"1 1 0";
4603+
const char *edges =
4604+
"0 1 2 0,1\n";
4605+
tsk_treeseq_t ts;
4606+
tsk_tree_t t;
4607+
int ret;
4608+
double result = 0;
4609+
4610+
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL);
4611+
ret = tsk_tree_init(&t, &ts, 0);
4612+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4613+
ret = tsk_tree_first(&t);
4614+
CU_ASSERT_EQUAL_FATAL(ret, 1);
4615+
ret = tsk_tree_kc_distance(&t, &t, 0, &result);
4616+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INTERNAL_SAMPLES);
4617+
tsk_treeseq_free(&ts);
4618+
tsk_tree_free(&t);
4619+
}
4620+
4621+
static void
4622+
test_unequal_sample_size_kc(void)
4623+
{
4624+
const char *nodes =
4625+
"1 0 0\n"
4626+
"1 0 0\n"
4627+
"1 0 0\n"
4628+
"0 2 0\n"
4629+
"0 3 0\n";
4630+
const char *nodes_other =
4631+
"1 0 0\n"
4632+
"1 0 0\n"
4633+
"0 1 0\n";
4634+
const char *edges =
4635+
"0 1 3 0,1\n"
4636+
"0 1 4 2,3\n";
4637+
const char *edges_other =
4638+
"0 1 2 0,1\n";
4639+
int ret;
4640+
tsk_treeseq_t ts, other_ts;
4641+
tsk_tree_t t, other_t;
4642+
double result = 0;
4643+
4644+
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL);
4645+
ret = tsk_tree_init(&t, &ts, 0);
4646+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4647+
ret = tsk_tree_first(&t);
4648+
CU_ASSERT_EQUAL_FATAL(ret, 1);
4649+
tsk_treeseq_from_text(&other_ts, 1, nodes_other, edges_other,
4650+
NULL, NULL, NULL, NULL, NULL);
4651+
ret = tsk_tree_init(&other_t, &other_ts, 0);
4652+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4653+
ret = tsk_tree_first(&other_t);
4654+
CU_ASSERT_EQUAL_FATAL(ret, 1);
4655+
ret = tsk_tree_kc_distance(&t, &other_t, 0, &result);
4656+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SAMPLE_SIZE_MISMATCH);
4657+
tsk_treeseq_free(&ts);
4658+
tsk_treeseq_free(&other_ts);
4659+
tsk_tree_free(&t);
4660+
tsk_tree_free(&other_t);
4661+
}
4662+
4663+
static void
4664+
test_unequal_samples_kc(void)
4665+
{
4666+
const char *nodes =
4667+
"1 0 0\n"
4668+
"1 0 0\n"
4669+
"1 0 0\n"
4670+
"0 2 0\n"
4671+
"0 3 0\n";
4672+
const char *nodes_other =
4673+
"0 0 0\n" /* Unused node at the start */
4674+
"1 0 0\n"
4675+
"1 0 0\n"
4676+
"1 0 0\n"
4677+
"0 2 0\n"
4678+
"0 3 0\n";
4679+
const char *edges =
4680+
"0 1 3 0,1\n"
4681+
"0 1 4 2,3\n";
4682+
const char *edges_other =
4683+
"0 1 4 1,2\n"
4684+
"0 1 5 3,4\n";
4685+
int ret;
4686+
tsk_treeseq_t ts, other_ts;
4687+
tsk_tree_t t, other_t;
4688+
double result = 0;
4689+
4690+
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL);
4691+
ret = tsk_tree_init(&t, &ts, 0);
4692+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4693+
ret = tsk_tree_first(&t);
4694+
CU_ASSERT_EQUAL_FATAL(ret, 1);
4695+
tsk_treeseq_from_text(&other_ts, 1, nodes_other, edges_other,
4696+
NULL, NULL, NULL, NULL, NULL);
4697+
ret = tsk_tree_init(&other_t, &other_ts, 0);
4698+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4699+
ret = tsk_tree_first(&other_t);
4700+
CU_ASSERT_EQUAL_FATAL(ret, 1);
4701+
ret = tsk_tree_kc_distance(&t, &other_t, 0, &result);
4702+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SAMPLES_NOT_EQUAL);
4703+
tsk_treeseq_free(&ts);
4704+
tsk_treeseq_free(&other_ts);
4705+
tsk_tree_free(&t);
4706+
tsk_tree_free(&other_t);
4707+
}
4708+
44234709
/*=======================================================
44244710
* Miscellaneous tests.
44254711
*======================================================*/
@@ -5108,6 +5394,16 @@ main(int argc, char **argv)
51085394
{"test_nonbinary_sample_sets", test_nonbinary_sample_sets},
51095395
{"test_internal_sample_sample_sets", test_internal_sample_sample_sets},
51105396

5397+
/*KC_Distance tests */
5398+
{"test_single_tree_kc", test_single_tree_kc},
5399+
{"test_two_trees_kc", test_two_trees_kc},
5400+
{"test_empty_tree_kc", test_empty_tree_kc},
5401+
{"test_nonbinary_tree_kc", test_nonbinary_tree_kc},
5402+
{"test_nonzero_samples_kc", test_nonzero_samples_kc},
5403+
{"test_internal_samples_kc", test_internal_samples_kc},
5404+
{"test_unequal_sample_size_kc", test_unequal_sample_size_kc},
5405+
{"test_unequal_samples_kc", test_unequal_samples_kc},
5406+
51115407
/* Misc */
51125408
{"test_tree_errors", test_tree_errors},
51135409
{"test_tree_copy_flags", test_tree_copy_flags},

c/tskit/core.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,20 @@ tsk_strerror_internal(int err)
373373
"represented. Either generate variants (which support missing "
374374
"data) or use the impute missing data option.";
375375
break;
376+
377+
/* Distance metric errors */
378+
case TSK_ERR_SAMPLE_SIZE_MISMATCH:
379+
ret = "Cannot compare trees with different numbers of samples.";
380+
break;
381+
case TSK_ERR_SAMPLES_NOT_EQUAL:
382+
ret = "Samples must be identical in trees to compare.";
383+
break;
384+
case TSK_ERR_INTERNAL_SAMPLES:
385+
ret = "Internal samples are not supported.";
386+
break;
387+
case TSK_ERR_MULTIPLE_ROOTS:
388+
ret = "Trees with multiple roots not supported.";
389+
break;
376390
}
377391
return ret;
378392
}

c/tskit/core.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,12 @@ of tskit.
223223
#define TSK_ERR_MUST_IMPUTE_NON_SAMPLES -1100
224224
#define TSK_ERR_MUST_IMPUTE_HAPLOTYPES -1101
225225

226+
/* Distance metric errors */
227+
#define TSK_ERR_SAMPLE_SIZE_MISMATCH -1200
228+
#define TSK_ERR_SAMPLES_NOT_EQUAL -1201
229+
#define TSK_ERR_INTERNAL_SAMPLES -1202
230+
#define TSK_ERR_MULTIPLE_ROOTS -1203
231+
226232

227233
/* This bit is 0 for any errors originating from kastore */
228234
#define TSK_KAS_ERR_BIT 14

c/tskit/stats.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ int tsk_ld_calc_get_r2_array(tsk_ld_calc_t *self, tsk_id_t a, int direction,
4848
tsk_size_t max_sites, double max_distance,
4949
double *r2, tsk_size_t *num_r2_values);
5050

51-
5251
#ifdef __cplusplus
5352
}
5453
#endif

0 commit comments

Comments
 (0)