Skip to content

Commit d651881

Browse files
jeromekellehermergify[bot]
authored andcommitted
Support sample sets in divmat
1 parent 77faade commit d651881

File tree

10 files changed

+732
-266
lines changed

10 files changed

+732
-266
lines changed

c/tests/test_stats.c

Lines changed: 97 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* MIT License
33
*
4-
* Copyright (c) 2019-2023 Tskit Developers
4+
* Copyright (c) 2019-2024 Tskit Developers
55
*
66
* Permission is hereby granted, free of charge, to any person obtaining a copy
77
* of this software and associated documentation files (the "Software"), to deal
@@ -287,7 +287,8 @@ verify_divergence_matrix(tsk_treeseq_t *ts, tsk_flags_t options)
287287
ts, n, sample_set_sizes, samples, n * n, index_tuples, 0, NULL, options, D1);
288288
CU_ASSERT_EQUAL_FATAL(ret, 0);
289289

290-
ret = tsk_treeseq_divergence_matrix(ts, 0, NULL, 0, NULL, options, D2);
290+
ret = tsk_treeseq_divergence_matrix(
291+
ts, n, sample_set_sizes, samples, 0, NULL, options, D2);
291292
CU_ASSERT_EQUAL_FATAL(ret, 0);
292293

293294
for (j = 0; j < n; j++) {
@@ -1057,19 +1058,42 @@ test_single_tree_divergence_matrix(void)
10571058
int ret;
10581059
double result[16];
10591060
double D_branch[16] = { 0, 2, 6, 6, 2, 0, 6, 6, 6, 6, 0, 4, 6, 6, 4, 0 };
1060-
double D_site[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1061+
double D_site[16] = { 0, 1, 1, 0, 1, 0, 2, 1, 1, 2, 0, 1, 0, 1, 1, 0 };
10611062

1062-
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
1063-
NULL, NULL, NULL, 0);
1063+
tsk_size_t sample_set_sizes[] = { 2, 2 };
10641064

1065-
ret = tsk_treeseq_divergence_matrix(&ts, 0, NULL, 0, NULL, TSK_STAT_BRANCH, result);
1065+
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
1066+
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
1067+
1068+
ret = tsk_treeseq_divergence_matrix(
1069+
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_BRANCH, result);
10661070
CU_ASSERT_EQUAL_FATAL(ret, 0);
10671071
assert_arrays_almost_equal(16, result, D_branch);
10681072

1069-
ret = tsk_treeseq_divergence_matrix(&ts, 0, NULL, 0, NULL, TSK_STAT_SITE, result);
1073+
ret = tsk_treeseq_divergence_matrix(
1074+
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE, result);
10701075
CU_ASSERT_EQUAL_FATAL(ret, 0);
10711076
assert_arrays_almost_equal(16, result, D_site);
10721077

1078+
ret = tsk_treeseq_divergence_matrix(
1079+
&ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_SITE, result);
1080+
CU_ASSERT_EQUAL_FATAL(ret, 0);
1081+
1082+
ret = tsk_treeseq_divergence_matrix(
1083+
&ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_BRANCH, result);
1084+
CU_ASSERT_EQUAL_FATAL(ret, 0);
1085+
1086+
sample_set_sizes[0] = 3;
1087+
sample_set_sizes[1] = 1;
1088+
ret = tsk_treeseq_divergence_matrix(
1089+
&ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_BRANCH, result);
1090+
CU_ASSERT_EQUAL_FATAL(ret, 0);
1091+
ret = tsk_treeseq_divergence_matrix(
1092+
&ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_SITE, result);
1093+
CU_ASSERT_EQUAL_FATAL(ret, 0);
1094+
1095+
/* assert_arrays_almost_equal(4, result, D_site); */
1096+
10731097
verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
10741098
verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
10751099
verify_divergence_matrix(&ts, TSK_STAT_SITE);
@@ -1083,7 +1107,7 @@ test_single_tree_divergence_matrix_internal_samples(void)
10831107
{
10841108
tsk_treeseq_t ts;
10851109
int ret;
1086-
double result[16];
1110+
double *result = malloc(16 * sizeof(double));
10871111
double D[16] = { 0, 2, 4, 3, 2, 0, 4, 3, 4, 4, 0, 1, 3, 3, 1, 0 };
10881112

10891113
const char *nodes = "1 0 -1 -1\n" /* 2.00┊ 6 ┊ */
@@ -1109,14 +1133,35 @@ test_single_tree_divergence_matrix_internal_samples(void)
11091133
"3 3 T -1\n"
11101134
"4 4 T -1\n"
11111135
"5 5 T -1\n";
1136+
tsk_id_t samples[] = { 0, 1, 2, 5 };
1137+
tsk_size_t sizes[] = { 1, 1, 1, 1 };
11121138

11131139
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
11141140

1115-
ret = tsk_treeseq_divergence_matrix(&ts, 0, NULL, 0, NULL, TSK_STAT_BRANCH, result);
1141+
ret = tsk_treeseq_divergence_matrix(
1142+
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_BRANCH, result);
1143+
CU_ASSERT_EQUAL_FATAL(ret, 0);
1144+
assert_arrays_almost_equal(16, result, D);
1145+
ret = tsk_treeseq_divergence_matrix(
1146+
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE, result);
11161147
CU_ASSERT_EQUAL_FATAL(ret, 0);
11171148
assert_arrays_almost_equal(16, result, D);
11181149

1119-
ret = tsk_treeseq_divergence_matrix(&ts, 0, NULL, 0, NULL, TSK_STAT_SITE, result);
1150+
ret = tsk_treeseq_divergence_matrix(
1151+
&ts, 4, sizes, samples, 0, NULL, TSK_STAT_BRANCH, result);
1152+
CU_ASSERT_EQUAL_FATAL(ret, 0);
1153+
assert_arrays_almost_equal(16, result, D);
1154+
ret = tsk_treeseq_divergence_matrix(
1155+
&ts, 4, sizes, samples, 0, NULL, TSK_STAT_SITE, result);
1156+
CU_ASSERT_EQUAL_FATAL(ret, 0);
1157+
assert_arrays_almost_equal(16, result, D);
1158+
1159+
ret = tsk_treeseq_divergence_matrix(
1160+
&ts, 4, NULL, samples, 0, NULL, TSK_STAT_BRANCH, result);
1161+
CU_ASSERT_EQUAL_FATAL(ret, 0);
1162+
assert_arrays_almost_equal(16, result, D);
1163+
ret = tsk_treeseq_divergence_matrix(
1164+
&ts, 4, NULL, samples, 0, NULL, TSK_STAT_SITE, result);
11201165
CU_ASSERT_EQUAL_FATAL(ret, 0);
11211166
assert_arrays_almost_equal(16, result, D);
11221167

@@ -1126,6 +1171,7 @@ test_single_tree_divergence_matrix_internal_samples(void)
11261171
verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
11271172

11281173
tsk_treeseq_free(&ts);
1174+
free(result);
11291175
}
11301176

11311177
static void
@@ -1164,7 +1210,8 @@ test_single_tree_divergence_matrix_multi_root(void)
11641210

11651211
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
11661212

1167-
ret = tsk_treeseq_divergence_matrix(&ts, 0, NULL, 0, NULL, TSK_STAT_BRANCH, result);
1213+
ret = tsk_treeseq_divergence_matrix(
1214+
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_BRANCH, result);
11681215
CU_ASSERT_EQUAL_FATAL(ret, 0);
11691216
assert_arrays_almost_equal(16, result, D_branch);
11701217

@@ -2424,73 +2471,84 @@ test_simplest_divergence_matrix(void)
24242471
"1 0 0\n"
24252472
"0 1 0\n";
24262473
const char *edges = "0 1 2 0,1\n";
2474+
const char *sites = "0.1 A\n"
2475+
"0.6 A\n";
2476+
const char *mutations = "0 0 B -1\n"
2477+
"1 0 B -1\n";
24272478
tsk_treeseq_t ts;
24282479
tsk_id_t sample_ids[] = { 0, 1 };
24292480
double D_branch[4] = { 0, 2, 2, 0 };
2430-
double D_site[4] = { 0, 0, 0, 0 };
2481+
double D_site[4] = { 0, 2, 2, 0 };
24312482
double result[4];
24322483
int ret;
24332484

2434-
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
2485+
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
24352486

24362487
ret = tsk_treeseq_divergence_matrix(
2437-
&ts, 2, sample_ids, 0, NULL, TSK_STAT_BRANCH, result);
2488+
&ts, 2, NULL, sample_ids, 0, NULL, TSK_STAT_BRANCH, result);
24382489
CU_ASSERT_EQUAL_FATAL(ret, 0);
24392490
assert_arrays_almost_equal(4, D_branch, result);
24402491

2441-
ret = tsk_treeseq_divergence_matrix(
2442-
&ts, 2, sample_ids, 0, NULL, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE, result);
2492+
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, NULL,
2493+
TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE, result);
24432494
CU_ASSERT_EQUAL_FATAL(ret, 0);
24442495
assert_arrays_almost_equal(4, D_branch, result);
24452496

2446-
ret = tsk_treeseq_divergence_matrix(&ts, 2, sample_ids, 0, NULL, 0, result);
2497+
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, NULL, 0, result);
24472498
CU_ASSERT_EQUAL_FATAL(ret, 0);
24482499
assert_arrays_almost_equal(4, D_site, result);
24492500

24502501
ret = tsk_treeseq_divergence_matrix(
2451-
&ts, 2, sample_ids, 0, NULL, TSK_STAT_SPAN_NORMALISE, result);
2502+
&ts, 2, NULL, sample_ids, 0, NULL, TSK_STAT_SPAN_NORMALISE, result);
24522503
CU_ASSERT_EQUAL_FATAL(ret, 0);
24532504
assert_arrays_almost_equal(4, D_site, result);
24542505

24552506
ret = tsk_treeseq_divergence_matrix(
2456-
&ts, 2, sample_ids, 0, NULL, TSK_STAT_SITE, result);
2507+
&ts, 2, NULL, sample_ids, 0, NULL, TSK_STAT_SITE, result);
24572508
CU_ASSERT_EQUAL_FATAL(ret, 0);
24582509
assert_arrays_almost_equal(4, D_site, result);
24592510

2460-
ret = tsk_treeseq_divergence_matrix(&ts, 0, NULL, 0, NULL, TSK_STAT_BRANCH, result);
2511+
ret = tsk_treeseq_divergence_matrix(
2512+
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_BRANCH, result);
24612513
CU_ASSERT_EQUAL_FATAL(ret, 0);
24622514
assert_arrays_almost_equal(4, D_branch, result);
24632515

2464-
ret = tsk_treeseq_divergence_matrix(&ts, 0, NULL, 0, NULL, TSK_STAT_SITE, result);
2516+
ret = tsk_treeseq_divergence_matrix(
2517+
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE, result);
24652518
CU_ASSERT_EQUAL_FATAL(ret, 0);
24662519
assert_arrays_almost_equal(4, D_site, result);
24672520

2468-
ret = tsk_treeseq_divergence_matrix(&ts, 0, NULL, 0, NULL, TSK_STAT_NODE, result);
2521+
ret = tsk_treeseq_divergence_matrix(
2522+
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_NODE, result);
24692523
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);
24702524

24712525
ret = tsk_treeseq_divergence_matrix(
2472-
&ts, 0, NULL, 0, NULL, TSK_STAT_POLARISED, result);
2526+
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_POLARISED, result);
24732527
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_STAT_POLARISED_UNSUPPORTED);
24742528

24752529
ret = tsk_treeseq_divergence_matrix(
2476-
&ts, 0, NULL, 0, NULL, TSK_STAT_SITE | TSK_STAT_BRANCH, result);
2530+
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE | TSK_STAT_BRANCH, result);
24772531
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);
24782532

24792533
sample_ids[0] = -1;
2480-
ret = tsk_treeseq_divergence_matrix(&ts, 2, sample_ids, 0, NULL, 0, result);
2534+
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, NULL, 0, result);
24812535
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
24822536

24832537
sample_ids[0] = 3;
2484-
ret = tsk_treeseq_divergence_matrix(&ts, 2, sample_ids, 0, NULL, 0, result);
2538+
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, NULL, 0, result);
24852539
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
24862540

24872541
sample_ids[0] = 1;
2488-
ret = tsk_treeseq_divergence_matrix(&ts, 2, sample_ids, 0, NULL, 0, result);
2542+
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, NULL, 0, result);
24892543
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
24902544
ret = tsk_treeseq_divergence_matrix(
2491-
&ts, 2, sample_ids, 0, NULL, TSK_STAT_BRANCH, result);
2545+
&ts, 2, NULL, sample_ids, 0, NULL, TSK_STAT_BRANCH, result);
24922546
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
24932547

2548+
sample_ids[0] = 2;
2549+
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, NULL, 0, result);
2550+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLES);
2551+
24942552
tsk_treeseq_free(&ts);
24952553
}
24962554

@@ -2515,39 +2573,39 @@ test_simplest_divergence_matrix_windows(void)
25152573

25162574
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
25172575

2518-
ret = tsk_treeseq_divergence_matrix(&ts, 2, sample_ids, 2, windows, 0, result);
2576+
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 2, windows, 0, result);
25192577
CU_ASSERT_EQUAL_FATAL(ret, 0);
25202578
assert_arrays_almost_equal(8, D_site, result);
25212579
ret = tsk_treeseq_divergence_matrix(
2522-
&ts, 2, sample_ids, 2, windows, TSK_STAT_BRANCH, result);
2580+
&ts, 2, NULL, sample_ids, 2, windows, TSK_STAT_BRANCH, result);
25232581
CU_ASSERT_EQUAL_FATAL(ret, 0);
25242582
assert_arrays_almost_equal(8, D_branch, result);
25252583

25262584
/* Windows for the second half */
25272585
ret = tsk_treeseq_divergence_matrix(
2528-
&ts, 2, sample_ids, 1, windows + 1, TSK_STAT_SITE, result);
2586+
&ts, 2, NULL, sample_ids, 1, windows + 1, TSK_STAT_SITE, result);
25292587
CU_ASSERT_EQUAL_FATAL(ret, 0);
25302588
assert_arrays_almost_equal(4, D_site, result);
25312589
ret = tsk_treeseq_divergence_matrix(
2532-
&ts, 2, sample_ids, 1, windows + 1, TSK_STAT_BRANCH, result);
2590+
&ts, 2, NULL, sample_ids, 1, windows + 1, TSK_STAT_BRANCH, result);
25332591
CU_ASSERT_EQUAL_FATAL(ret, 0);
25342592
assert_arrays_almost_equal(4, D_branch, result);
25352593

2536-
ret = tsk_treeseq_divergence_matrix(&ts, 2, sample_ids, 0, windows, 0, result);
2594+
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, windows, 0, result);
25372595
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NUM_WINDOWS);
25382596

25392597
windows[0] = -1;
2540-
ret = tsk_treeseq_divergence_matrix(&ts, 2, sample_ids, 2, windows, 0, result);
2598+
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 2, windows, 0, result);
25412599
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
25422600

25432601
windows[0] = 0.45;
25442602
windows[2] = 1.5;
2545-
ret = tsk_treeseq_divergence_matrix(&ts, 2, sample_ids, 2, windows, 0, result);
2603+
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 2, windows, 0, result);
25462604
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
25472605

25482606
windows[0] = 0.55;
25492607
windows[2] = 1.0;
2550-
ret = tsk_treeseq_divergence_matrix(&ts, 2, sample_ids, 2, windows, 0, result);
2608+
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 2, windows, 0, result);
25512609
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
25522610

25532611
tsk_treeseq_free(&ts);
@@ -2558,7 +2616,7 @@ test_simplest_divergence_matrix_internal_sample(void)
25582616
{
25592617
const char *nodes = "1 0 0\n"
25602618
"1 0 0\n"
2561-
"0 1 0\n";
2619+
"1 1 0\n";
25622620
const char *edges = "0 1 2 0,1\n";
25632621
tsk_treeseq_t ts;
25642622
tsk_id_t sample_ids[] = { 0, 1, 2 };
@@ -2570,12 +2628,12 @@ test_simplest_divergence_matrix_internal_sample(void)
25702628
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
25712629

25722630
ret = tsk_treeseq_divergence_matrix(
2573-
&ts, 3, sample_ids, 0, NULL, TSK_STAT_BRANCH, result);
2631+
&ts, 3, NULL, sample_ids, 0, NULL, TSK_STAT_BRANCH, result);
25742632
CU_ASSERT_EQUAL_FATAL(ret, 0);
25752633
assert_arrays_almost_equal(9, D_branch, result);
25762634

25772635
ret = tsk_treeseq_divergence_matrix(
2578-
&ts, 3, sample_ids, 0, NULL, TSK_STAT_SITE, result);
2636+
&ts, 3, NULL, sample_ids, 0, NULL, TSK_STAT_SITE, result);
25792637
CU_ASSERT_EQUAL_FATAL(ret, 0);
25802638
assert_arrays_almost_equal(9, D_site, result);
25812639

c/tests/test_trees.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8008,9 +8008,10 @@ test_time_uncalibrated(void)
80088008
TSK_STAT_BRANCH | TSK_STAT_ALLOW_TIME_UNCALIBRATED, sigma);
80098009
CU_ASSERT_EQUAL_FATAL(ret, 0);
80108010

8011-
ret = tsk_treeseq_divergence_matrix(&ts2, 0, NULL, 0, NULL, TSK_STAT_BRANCH, result);
8011+
ret = tsk_treeseq_divergence_matrix(
8012+
&ts2, 0, NULL, NULL, 0, NULL, TSK_STAT_BRANCH, result);
80128013
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TIME_UNCALIBRATED);
8013-
ret = tsk_treeseq_divergence_matrix(&ts2, 0, NULL, 0, NULL,
8014+
ret = tsk_treeseq_divergence_matrix(&ts2, 0, NULL, NULL, 0, NULL,
80148015
TSK_STAT_BRANCH | TSK_STAT_ALLOW_TIME_UNCALIBRATED, result);
80158016
CU_ASSERT_EQUAL_FATAL(ret, 0);
80168017

c/tests/testlib.c

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -966,16 +966,6 @@ tskit_suite_init(void)
966966
return CUE_SUCCESS;
967967
}
968968

969-
void
970-
assert_arrays_almost_equal(tsk_size_t len, double *a, double *b)
971-
{
972-
tsk_size_t j;
973-
974-
for (j = 0; j < len; j++) {
975-
CU_ASSERT_DOUBLE_EQUAL(a[j], b[j], 1e-9);
976-
}
977-
}
978-
979969
static int
980970
tskit_suite_cleanup(void)
981971
{

c/tests/testlib.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,16 @@ void parse_individuals(const char *text, tsk_individual_table_t *individual_tabl
5454

5555
void unsort_edges(tsk_edge_table_t *edges, size_t start);
5656

57-
void assert_arrays_almost_equal(tsk_size_t len, double *a, double *b);
57+
/* Use a macro so we can get line numbers at roughly the right place */
58+
#define assert_arrays_almost_equal(len, a, b) \
59+
{ \
60+
do { \
61+
tsk_size_t _j; \
62+
for (_j = 0; _j < len; _j++) { \
63+
CU_ASSERT_DOUBLE_EQUAL(a[_j], b[_j], 1e-9); \
64+
} \
65+
} while (0); \
66+
}
5867

5968
extern const char *single_tree_ex_nodes;
6069
extern const char *single_tree_ex_edges;

0 commit comments

Comments
 (0)