Skip to content

Commit 6c156b7

Browse files
Merge pull request #312 from jeromekelleher/remove-pairwise-diversity
Removed old pairwise_diversity code.
2 parents 23fa1a8 + 9c0c13d commit 6c156b7

File tree

8 files changed

+44
-403
lines changed

8 files changed

+44
-403
lines changed

c/dev-tools/dev-cli.c

Lines changed: 2 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -157,29 +157,6 @@ print_ld_matrix(tsk_treeseq_t *ts)
157157
}
158158
}
159159

160-
static void
161-
print_stats(tsk_treeseq_t *ts)
162-
{
163-
int ret = 0;
164-
uint32_t j;
165-
size_t num_samples = tsk_treeseq_get_num_samples(ts) / 2;
166-
tsk_id_t *sample = malloc(num_samples * sizeof(tsk_id_t));
167-
double pi;
168-
169-
if (sample == NULL) {
170-
fatal_error("no memory");
171-
}
172-
for (j = 0; j < num_samples; j++) {
173-
sample[j] = (tsk_id_t) j;
174-
}
175-
ret = tsk_treeseq_get_pairwise_diversity(ts, sample, num_samples, &pi);
176-
if (ret != 0) {
177-
fatal_library_error(ret, "get_pairwise_diversity");
178-
}
179-
printf("pi = %f\n", pi);
180-
free(sample);
181-
}
182-
183160
static void
184161
print_newick_trees(tsk_treeseq_t *ts)
185162
{
@@ -292,16 +269,6 @@ run_newick(const char *filename, int TSK_UNUSED(verbose))
292269
tsk_treeseq_free(&ts);
293270
}
294271

295-
static void
296-
run_stats(const char *filename, int TSK_UNUSED(verbose))
297-
{
298-
tsk_treeseq_t ts;
299-
300-
load_tree_sequence(&ts, filename);
301-
print_stats(&ts);
302-
tsk_treeseq_free(&ts);
303-
}
304-
305272
static void
306273
run_simplify(const char *input_filename, const char *output_filename, size_t num_samples,
307274
bool filter_sites, int verbose)
@@ -399,14 +366,6 @@ main(int argc, char** argv)
399366
void* argtable6[] = {cmd6, verbose6, infiles6, end6};
400367
int nerrors6;
401368

402-
/* SYNTAX 7: stats [-v] <input-file> */
403-
struct arg_rex *cmd7 = arg_rex1(NULL, NULL, "stats", NULL, REG_ICASE, NULL);
404-
struct arg_lit *verbose7 = arg_lit0("v", "verbose", NULL);
405-
struct arg_file *infiles7 = arg_file1(NULL, NULL, NULL, NULL);
406-
struct arg_end *end7 = arg_end(20);
407-
void* argtable7[] = {cmd7, verbose7, infiles7, end7};
408-
int nerrors7;
409-
410369
int exitcode = EXIT_SUCCESS;
411370
const char *progname = "main";
412371

@@ -419,7 +378,6 @@ main(int argc, char** argv)
419378
nerrors4 = arg_parse(argc, argv, argtable4);
420379
nerrors5 = arg_parse(argc, argv, argtable5);
421380
nerrors6 = arg_parse(argc, argv, argtable6);
422-
nerrors7 = arg_parse(argc, argv, argtable7);
423381

424382
if (nerrors1 == 0) {
425383
run_simplify(infiles1->filename[0], outfiles1->filename[0],
@@ -432,11 +390,9 @@ main(int argc, char** argv)
432390
} else if (nerrors4 == 0) {
433391
run_variants(infiles4->filename[0], verbose4->count);
434392
} else if (nerrors5 == 0) {
435-
run_stats(infiles5->filename[0], verbose5->count);
393+
run_print(infiles5->filename[0], verbose5->count);
436394
} else if (nerrors6 == 0) {
437-
run_print(infiles6->filename[0], verbose6->count);
438-
} else if (nerrors7 == 0) {
439-
run_newick(infiles7->filename[0], verbose7->count);
395+
run_newick(infiles6->filename[0], verbose6->count);
440396
} else {
441397
/* We get here if the command line matched none of the possible syntaxes */
442398
if (cmd1->count > 0) {
@@ -463,10 +419,6 @@ main(int argc, char** argv)
463419
arg_print_errors(stdout, end6, progname);
464420
printf("usage: %s ", progname);
465421
arg_print_syntax(stdout, argtable6, "\n");
466-
} else if (cmd7->count > 0) {
467-
arg_print_errors(stdout, end7, progname);
468-
printf("usage: %s ", progname);
469-
arg_print_syntax(stdout, argtable7, "\n");
470422
} else {
471423
/* no correct cmd literals were given, so we cant presume which syntax was intended */
472424
printf("%s: missing command.\n",progname);
@@ -476,7 +428,6 @@ main(int argc, char** argv)
476428
printf("usage 4: %s ", progname); arg_print_syntax(stdout, argtable4, "\n");
477429
printf("usage 5: %s ", progname); arg_print_syntax(stdout, argtable5, "\n");
478430
printf("usage 6: %s ", progname); arg_print_syntax(stdout, argtable6, "\n");
479-
printf("usage 7: %s ", progname); arg_print_syntax(stdout, argtable7, "\n");
480431
}
481432
}
482433

@@ -486,7 +437,6 @@ main(int argc, char** argv)
486437
arg_freetable(argtable4, sizeof(argtable4) / sizeof(argtable4[0]));
487438
arg_freetable(argtable5, sizeof(argtable5) / sizeof(argtable5[0]));
488439
arg_freetable(argtable6, sizeof(argtable6) / sizeof(argtable6[0]));
489-
arg_freetable(argtable7, sizeof(argtable7) / sizeof(argtable7[0]));
490440

491441
return exitcode;
492442
}

c/tests/test_stats.c

Lines changed: 5 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -29,22 +29,6 @@
2929
#include <stdlib.h>
3030
#include <float.h>
3131

32-
static tsk_size_t
33-
get_max_site_mutations(tsk_treeseq_t *ts)
34-
{
35-
int ret;
36-
tsk_id_t j;
37-
tsk_size_t max_mutations = 0;
38-
tsk_site_t site;
39-
40-
for (j = 0; j < (tsk_id_t) tsk_treeseq_get_num_sites(ts); j++) {
41-
ret = tsk_treeseq_get_site(ts, j, &site);
42-
CU_ASSERT_EQUAL_FATAL(ret, 0);
43-
max_mutations = TSK_MAX(max_mutations, site.mutations_length);
44-
}
45-
return max_mutations;
46-
}
47-
4832
static bool
4933
multi_mutations_exist(tsk_treeseq_t *ts, tsk_id_t start, tsk_id_t end)
5034
{
@@ -227,36 +211,6 @@ verify_ld(tsk_treeseq_t *ts)
227211
free(num_site_mutations);
228212
}
229213

230-
static void
231-
verify_pairwise_diversity(tsk_treeseq_t *ts)
232-
{
233-
int ret;
234-
size_t num_samples = tsk_treeseq_get_num_samples(ts);
235-
tsk_id_t *samples;
236-
uint32_t j;
237-
double pi;
238-
tsk_size_t max_site_mutations = get_max_site_mutations(ts);
239-
240-
ret = tsk_treeseq_get_pairwise_diversity(ts, NULL, 0, &pi);
241-
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
242-
ret = tsk_treeseq_get_pairwise_diversity(ts, NULL, 1, &pi);
243-
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
244-
ret = tsk_treeseq_get_pairwise_diversity(ts, NULL, num_samples + 1, &pi);
245-
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
246-
247-
samples = tsk_treeseq_get_samples(ts);
248-
249-
for (j = 2; j < num_samples; j++) {
250-
ret = tsk_treeseq_get_pairwise_diversity(ts, samples, j, &pi);
251-
if (max_site_mutations <= 1) {
252-
CU_ASSERT_EQUAL_FATAL(ret, 0);
253-
CU_ASSERT_TRUE_FATAL(pi >= 0);
254-
} else {
255-
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ONLY_INFINITE_SITES);
256-
}
257-
}
258-
}
259-
260214
/* FIXME: this test is weak and should check the return value somehow.
261215
* We should also have simplest and single tree tests along with separate
262216
* tests for the error conditions. This should be done as part of the general
@@ -1023,17 +977,6 @@ test_single_tree_ld(void)
1023977
tsk_treeseq_free(&ts);
1024978
}
1025979

1026-
static void
1027-
test_single_tree_pairwise_diversity(void)
1028-
{
1029-
tsk_treeseq_t ts;
1030-
1031-
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges,
1032-
NULL, single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL);
1033-
verify_pairwise_diversity(&ts);
1034-
tsk_treeseq_free(&ts);
1035-
}
1036-
1037980
static void
1038981
test_single_tree_mean_descendants(void)
1039982
{
@@ -1095,17 +1038,6 @@ test_paper_ex_ld(void)
10951038
tsk_treeseq_free(&ts);
10961039
}
10971040

1098-
static void
1099-
test_paper_ex_pairwise_diversity(void)
1100-
{
1101-
tsk_treeseq_t ts;
1102-
1103-
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges,
1104-
NULL, paper_ex_sites, paper_ex_mutations, paper_ex_individuals, NULL);
1105-
verify_pairwise_diversity(&ts);
1106-
tsk_treeseq_free(&ts);
1107-
}
1108-
11091041
static void
11101042
test_paper_ex_mean_descendants(void)
11111043
{
@@ -1173,26 +1105,23 @@ test_paper_ex_diversity(void)
11731105
tsk_treeseq_t ts;
11741106
tsk_id_t samples[] = {0, 1, 2, 3};
11751107
tsk_size_t sample_set_sizes = 4;
1176-
double pi1, pi2;
1108+
double pi;
11771109
int ret;
11781110

11791111
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges,
11801112
NULL, paper_ex_sites, paper_ex_mutations, paper_ex_individuals, NULL);
11811113

11821114
ret = tsk_treeseq_diversity(&ts, 1, &sample_set_sizes, samples, 0, NULL,
1183-
&pi1, TSK_STAT_SITE);
1115+
&pi, TSK_STAT_SITE);
11841116
CU_ASSERT_EQUAL_FATAL(ret, 0);
1185-
/* This function will probably be removed, but it's a handy test for now */
1186-
ret = tsk_treeseq_get_pairwise_diversity(&ts, samples, 4, &pi2);
1187-
CU_ASSERT_EQUAL_FATAL(ret, 0);
1188-
CU_ASSERT_DOUBLE_EQUAL_FATAL(pi1, pi2, 1e-6);
1117+
CU_ASSERT_DOUBLE_EQUAL_FATAL(pi, 1.5, 1e-6);
11891118

11901119
/* A sample set size of 1 leads to NaN */
11911120
sample_set_sizes = 1;
11921121
ret = tsk_treeseq_diversity(&ts, 1, &sample_set_sizes, samples, 0, NULL,
1193-
&pi1, TSK_STAT_SITE);
1122+
&pi, TSK_STAT_SITE);
11941123
CU_ASSERT_EQUAL_FATAL(ret, 0);
1195-
CU_ASSERT(isnan((float) pi1));
1124+
CU_ASSERT(isnan((float) pi));
11961125

11971126
tsk_treeseq_free(&ts);
11981127
}
@@ -1681,17 +1610,6 @@ test_nonbinary_ex_ld(void)
16811610
tsk_treeseq_free(&ts);
16821611
}
16831612

1684-
static void
1685-
test_nonbinary_ex_pairwise_diversity(void)
1686-
{
1687-
tsk_treeseq_t ts;
1688-
1689-
tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges,
1690-
NULL, nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL);
1691-
verify_pairwise_diversity(&ts);
1692-
tsk_treeseq_free(&ts);
1693-
}
1694-
16951613
static void
16961614
test_nonbinary_ex_mean_descendants(void)
16971615
{
@@ -1756,15 +1674,13 @@ main(int argc, char **argv)
17561674
{"test_empty_ts_afs", test_empty_ts_afs},
17571675

17581676
{"test_single_tree_ld", test_single_tree_ld},
1759-
{"test_single_tree_pairwise_diversity", test_single_tree_pairwise_diversity},
17601677
{"test_single_tree_mean_descendants", test_single_tree_mean_descendants},
17611678
{"test_single_tree_genealogical_nearest_neighbours",
17621679
test_single_tree_genealogical_nearest_neighbours},
17631680
{"test_single_tree_general_stat", test_single_tree_general_stat},
17641681
{"test_single_tree_general_stat_errors", test_single_tree_general_stat_errors},
17651682

17661683
{"test_paper_ex_ld", test_paper_ex_ld},
1767-
{"test_paper_ex_pairwise_diversity", test_paper_ex_pairwise_diversity},
17681684
{"test_paper_ex_mean_descendants", test_paper_ex_mean_descendants},
17691685
{"test_paper_ex_genealogical_nearest_neighbours",
17701686
test_paper_ex_genealogical_nearest_neighbours},
@@ -1798,7 +1714,6 @@ main(int argc, char **argv)
17981714
{"test_paper_ex_afs", test_paper_ex_afs},
17991715

18001716
{"test_nonbinary_ex_ld", test_nonbinary_ex_ld},
1801-
{"test_nonbinary_ex_pairwise_diversity", test_nonbinary_ex_pairwise_diversity},
18021717
{"test_nonbinary_ex_mean_descendants", test_nonbinary_ex_mean_descendants},
18031718
{"test_nonbinary_ex_genealogical_nearest_neighbours",
18041719
test_nonbinary_ex_genealogical_nearest_neighbours},

c/tskit/trees.c

Lines changed: 1 addition & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -566,66 +566,8 @@ tsk_treeseq_is_sample(tsk_treeseq_t *self, tsk_id_t u)
566566
return ret;
567567
}
568568

569-
/* Accessors for records */
569+
/* Stats functions */
570570

571-
int TSK_WARN_UNUSED
572-
tsk_treeseq_get_pairwise_diversity(tsk_treeseq_t *self,
573-
tsk_id_t *samples, size_t num_samples, double *pi)
574-
{
575-
int ret = 0;
576-
tsk_tree_t *tree = NULL;
577-
double result, denom, n, count;
578-
tsk_site_t *sites;
579-
tsk_size_t j, k, num_sites;
580-
581-
if (num_samples < 2 || num_samples > self->num_samples) {
582-
ret = TSK_ERR_BAD_PARAM_VALUE;
583-
goto out;
584-
}
585-
n = (double) num_samples;
586-
tree = malloc(sizeof(tsk_tree_t));
587-
if (tree == NULL) {
588-
ret = TSK_ERR_NO_MEMORY;
589-
goto out;
590-
}
591-
ret = tsk_tree_init(tree, self, TSK_SAMPLE_COUNTS);
592-
if (ret != 0) {
593-
goto out;
594-
}
595-
ret = tsk_tree_set_tracked_samples(tree, num_samples, samples);
596-
if (ret != 0) {
597-
goto out;
598-
}
599-
/* Allocation done; move onto main algorithm. */
600-
result = 0.0;
601-
for (ret = tsk_tree_first(tree); ret == 1; ret = tsk_tree_next(tree)) {
602-
ret = tsk_tree_get_sites(tree, &sites, &num_sites);
603-
if (ret != 0) {
604-
goto out;
605-
}
606-
for (j = 0; j < num_sites; j++) {
607-
if (sites[j].mutations_length != 1) {
608-
ret = TSK_ERR_ONLY_INFINITE_SITES;
609-
goto out;
610-
}
611-
for (k = 0; k < sites[j].mutations_length; k++) {
612-
count = (double) tree->num_tracked_samples[sites[j].mutations[k].node];
613-
result += count * (n - count);
614-
}
615-
}
616-
}
617-
if (ret != 0) {
618-
goto out;
619-
}
620-
denom = (n * (n - 1)) / 2.0;
621-
*pi = result / denom;
622-
out:
623-
if (tree != NULL) {
624-
tsk_tree_free(tree);
625-
free(tree);
626-
}
627-
return ret;
628-
}
629571

630572
#define GET_2D_ROW(array, row_len, row) (array + (((size_t) (row_len)) * (size_t) row))
631573

c/tskit/trees.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,6 @@ int tsk_treeseq_simplify(tsk_treeseq_t *self, tsk_id_t *samples,
196196
/* TODO do these belong in trees or stats? They should probably be in stats.
197197
* Keep them here for now until we figure out the correct interface.
198198
*/
199-
int tsk_treeseq_get_pairwise_diversity(tsk_treeseq_t *self,
200-
tsk_id_t *samples, size_t num_samples, double *pi);
201199
int tsk_treeseq_genealogical_nearest_neighbours(tsk_treeseq_t *self,
202200
tsk_id_t *focal, size_t num_focal,
203201
tsk_id_t **reference_sets, size_t *reference_set_size, size_t num_reference_sets,

0 commit comments

Comments
 (0)