Skip to content

Commit ee696bd

Browse files
Add multi-chromosome WF example
endpoint fix multichromosome profiling added multichom singlethreaded Fix compile
1 parent 44c7d7c commit ee696bd

File tree

4 files changed

+399
-0
lines changed

4 files changed

+399
-0
lines changed

c/examples/haploid_wright_fisher.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,12 @@ main(int argc, char **argv)
9292
check_tsk_error(ret);
9393
srand((unsigned)atoi(argv[5]));
9494
simulate(&tables, atoi(argv[1]), atoi(argv[2]), atoi(argv[3]));
95+
96+
/* Sort and index so that the result can be opened as a tree sequence */
97+
ret = tsk_table_collection_sort(&tables, NULL, 0);
98+
check_tsk_error(ret);
99+
ret = tsk_table_collection_build_index(&tables, 0);
100+
check_tsk_error(ret);
95101
ret = tsk_table_collection_dump(&tables, argv[4], 0);
96102
check_tsk_error(ret);
97103

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
#include <stdio.h>
2+
#include <stdlib.h>
3+
#include <assert.h>
4+
#include <err.h>
5+
#include <string.h>
6+
7+
#include <pthread.h>
8+
#include <tskit/tables.h>
9+
10+
#define check_tsk_error(val) \
11+
if (val < 0) { \
12+
errx(EXIT_FAILURE, "line %d: %s\n", __LINE__, tsk_strerror(val)); \
13+
}
14+
15+
static void
16+
init_tables(tsk_table_collection_t *tcs, int num_chroms)
17+
{
18+
int j, ret;
19+
20+
for (j = 0; j < num_chroms; j++) {
21+
ret = tsk_table_collection_init(&tcs[j], 0);
22+
check_tsk_error(ret);
23+
if (j > 0) {
24+
tsk_node_table_free(&tcs[j].nodes);
25+
}
26+
}
27+
}
28+
29+
static void
30+
free_tables(tsk_table_collection_t *tcs, int num_chroms)
31+
{
32+
int j;
33+
34+
for (j = 0; j < num_chroms; j++) {
35+
if (j > 0) {
36+
/* Must not double free node table columns. */
37+
memset(&tcs[j].nodes, 0, sizeof(tcs[j].nodes));
38+
}
39+
tsk_table_collection_free(&tcs[j]);
40+
}
41+
}
42+
43+
static void
44+
join_tables(tsk_table_collection_t *tcs, int num_chroms)
45+
{
46+
int j, ret;
47+
48+
for (j = 1; j < num_chroms; j++) {
49+
ret = tsk_edge_table_extend(
50+
&tcs[0].edges, &tcs[j].edges, tcs[j].edges.num_rows, NULL, 0);
51+
check_tsk_error(ret);
52+
}
53+
/* Get all the squashable edges next to each other */
54+
ret = tsk_table_collection_sort(&tcs[0], NULL, 0);
55+
check_tsk_error(ret);
56+
ret = tsk_edge_table_squash(&tcs[0].edges);
57+
check_tsk_error(ret);
58+
/* We need to sort again after squash */
59+
ret = tsk_table_collection_sort(&tcs[0], NULL, 0);
60+
check_tsk_error(ret);
61+
ret = tsk_table_collection_build_index(&tcs[0], 0);
62+
check_tsk_error(ret);
63+
}
64+
65+
struct chunk_work {
66+
int chunk;
67+
tsk_table_collection_t *tc;
68+
int *samples;
69+
int N;
70+
};
71+
72+
void *
73+
simplify_chunk(void *arg)
74+
{
75+
int ret;
76+
struct chunk_work *work = (struct chunk_work *) arg;
77+
tsk_size_t edges_before = work->tc->edges.num_rows;
78+
79+
ret = tsk_table_collection_sort(work->tc, NULL, 0);
80+
check_tsk_error(ret);
81+
ret = tsk_table_collection_simplify(work->tc, work->samples, work->N,
82+
TSK_SIMPLIFY_NO_FILTER_NODES | TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS, NULL);
83+
check_tsk_error(ret);
84+
/* NOTE: this printf makes helgrind complain */
85+
printf("\tchunk %d: %lld -> %lld\n", work->chunk, (long long) edges_before,
86+
(long long) work->tc->edges.num_rows);
87+
88+
return NULL;
89+
}
90+
91+
void
92+
sort_and_simplify_all(tsk_table_collection_t *tcs, int num_chroms, int *samples, int N)
93+
{
94+
int j, ret;
95+
struct chunk_work work[num_chroms];
96+
pthread_t threads[num_chroms];
97+
98+
for (j = 1; j < num_chroms; j++) {
99+
tcs[j].nodes = tcs[0].nodes;
100+
}
101+
102+
for (j = 0; j < num_chroms; j++) {
103+
work[j].chunk = j;
104+
work[j].tc = &tcs[j];
105+
work[j].samples = samples;
106+
work[j].N = N;
107+
108+
ret = pthread_create(&threads[j], NULL, simplify_chunk, (void *) &work[j]);
109+
if (ret != 0) {
110+
errx(EXIT_FAILURE, "Pthread create failed");
111+
}
112+
/* simplify_chunk((void *) &work[j]); */
113+
}
114+
for (j = 0; j < num_chroms; j++) {
115+
ret = pthread_join(threads[j], NULL);
116+
if (ret != 0) {
117+
errx(EXIT_FAILURE, "Pthread join failed");
118+
}
119+
}
120+
}
121+
122+
void
123+
simplify_tables(tsk_table_collection_t *tcs, int num_chroms, int *samples, int N)
124+
{
125+
int j, k, ret;
126+
const tsk_size_t num_nodes = tcs[0].nodes.num_rows;
127+
tsk_bool_t *keep_nodes = malloc(num_nodes * sizeof(*keep_nodes));
128+
tsk_id_t *node_id_map = malloc(num_nodes * sizeof(*node_id_map));
129+
tsk_id_t *edge_child, *edge_parent;
130+
131+
if (keep_nodes == NULL || node_id_map == NULL) {
132+
errx(EXIT_FAILURE, "Out of memory");
133+
}
134+
135+
printf("Simplify %lld nodes\n", (long long) tcs[0].nodes.num_rows);
136+
sort_and_simplify_all(tcs, num_chroms, samples, N);
137+
138+
for (j = 0; j < num_nodes; j++) {
139+
keep_nodes[j] = false;
140+
}
141+
for (j = 0; j < N; j++) {
142+
keep_nodes[samples[j]] = true;
143+
}
144+
145+
for (j = 0; j < num_chroms; j++) {
146+
edge_child = tcs[j].edges.child;
147+
edge_parent = tcs[j].edges.parent;
148+
for (k = 0; k < tcs[j].edges.num_rows; k++) {
149+
keep_nodes[edge_child[k]] = true;
150+
keep_nodes[edge_parent[k]] = true;
151+
}
152+
}
153+
tsk_node_table_keep_rows(&tcs[0].nodes, keep_nodes, 0, node_id_map);
154+
printf("\tdone: %lld nodes\n", (long long) tcs[0].nodes.num_rows);
155+
156+
/* Remap node references */
157+
for (j = 0; j < num_chroms; j++) {
158+
edge_child = tcs[j].edges.child;
159+
edge_parent = tcs[j].edges.parent;
160+
for (k = 0; k < tcs[j].edges.num_rows; k++) {
161+
edge_child[k] = node_id_map[edge_child[k]];
162+
edge_parent[k] = node_id_map[edge_parent[k]];
163+
}
164+
ret = tsk_table_collection_check_integrity(&tcs[j], 0);
165+
check_tsk_error(ret);
166+
}
167+
for (j = 0; j < N; j++) {
168+
samples[j] = node_id_map[samples[j]];
169+
}
170+
free(keep_nodes);
171+
free(node_id_map);
172+
}
173+
174+
void
175+
simulate(
176+
tsk_table_collection_t *tcs, int num_chroms, int N, int T, int simplify_interval)
177+
{
178+
tsk_id_t *buffer, *parents, *children, child, left_parent, right_parent;
179+
bool left_is_first;
180+
double chunk_left, chunk_right;
181+
int ret, j, t, b, k;
182+
183+
assert(simplify_interval != 0); // leads to division by zero
184+
buffer = malloc(2 * N * sizeof(tsk_id_t));
185+
if (buffer == NULL) {
186+
errx(EXIT_FAILURE, "Out of memory");
187+
}
188+
for (k = 0; k < num_chroms; k++) {
189+
tcs[k].sequence_length = num_chroms;
190+
}
191+
parents = buffer;
192+
for (j = 0; j < N; j++) {
193+
parents[j]
194+
= tsk_node_table_add_row(&tcs[0].nodes, 0, T, TSK_NULL, TSK_NULL, NULL, 0);
195+
check_tsk_error(parents[j]);
196+
}
197+
b = 0;
198+
for (t = T - 1; t >= 0; t--) {
199+
/* Alternate between using the first and last N values in the buffer */
200+
parents = buffer + (b * N);
201+
b = (b + 1) % 2;
202+
children = buffer + (b * N);
203+
for (j = 0; j < N; j++) {
204+
child = tsk_node_table_add_row(
205+
&tcs[0].nodes, 0, t, TSK_NULL, TSK_NULL, NULL, 0);
206+
check_tsk_error(child);
207+
/* NOTE: the use of rand() is discouraged for
208+
* research code and proper random number generator
209+
* libraries should be preferred.
210+
*/
211+
left_parent = parents[(size_t)((rand() / (1. + RAND_MAX)) * N)];
212+
right_parent = parents[(size_t)((rand() / (1. + RAND_MAX)) * N)];
213+
left_is_first = rand() < 0.5;
214+
chunk_left = 0.0;
215+
for (k = 0; k < num_chroms; k++) {
216+
chunk_right = chunk_left + rand() / (1. + RAND_MAX);
217+
/* a very tiny chance that right and left are equal */
218+
if (chunk_right > chunk_left) {
219+
ret = tsk_edge_table_add_row(&tcs[k].edges, chunk_left, chunk_right,
220+
left_is_first ? left_parent : right_parent, child, NULL, 0);
221+
check_tsk_error(ret);
222+
}
223+
chunk_left += 1.0;
224+
if (chunk_right < chunk_left) {
225+
ret = tsk_edge_table_add_row(&tcs[k].edges, chunk_right, chunk_left,
226+
left_is_first ? right_parent : left_parent, child, NULL, 0);
227+
check_tsk_error(ret);
228+
}
229+
}
230+
children[j] = child;
231+
}
232+
if (t % simplify_interval == 0) {
233+
simplify_tables(tcs, num_chroms, children, N);
234+
}
235+
}
236+
/* Set the sample flags for final generation */
237+
for (j = 0; j < N; j++) {
238+
tcs[0].nodes.flags[children[j]] = TSK_NODE_IS_SAMPLE;
239+
}
240+
free(buffer);
241+
}
242+
243+
int
244+
main(int argc, char **argv)
245+
{
246+
// int ret;
247+
int num_chroms;
248+
249+
if (argc != 7) {
250+
errx(EXIT_FAILURE, "usage: N T simplify-interval output seed num-chroms");
251+
}
252+
253+
num_chroms = atoi(argv[6]);
254+
tsk_table_collection_t tcs[num_chroms];
255+
256+
srand((unsigned) atoi(argv[5]));
257+
init_tables(tcs, num_chroms);
258+
simulate(tcs, num_chroms, atoi(argv[1]), atoi(argv[2]), atoi(argv[3]));
259+
join_tables(tcs, num_chroms);
260+
// ret = tsk_table_collection_dump(&tcs[0], argv[4], 0);
261+
// check_tsk_error(ret);
262+
free_tables(tcs, num_chroms);
263+
264+
return 0;
265+
}
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
#include <stdio.h>
2+
#include <stdlib.h>
3+
#include <assert.h>
4+
#include <err.h>
5+
#include <string.h>
6+
7+
#include <tskit/tables.h>
8+
9+
#define check_tsk_error(val) \
10+
if (val < 0) { \
11+
errx(EXIT_FAILURE, "line %d: %s\n", __LINE__, tsk_strerror(val)); \
12+
}
13+
14+
void
15+
simulate(
16+
tsk_table_collection_t *tables, int num_chroms, int N, int T, int simplify_interval)
17+
{
18+
tsk_id_t *buffer, *parents, *children, child, left_parent, right_parent;
19+
bool left_is_first;
20+
double chunk_left, chunk_right;
21+
int ret, j, t, b, k;
22+
23+
assert(simplify_interval != 0); // leads to division by zero
24+
buffer = malloc(2 * N * sizeof(tsk_id_t));
25+
if (buffer == NULL) {
26+
errx(EXIT_FAILURE, "Out of memory");
27+
}
28+
tables->sequence_length = num_chroms;
29+
parents = buffer;
30+
for (j = 0; j < N; j++) {
31+
parents[j]
32+
= tsk_node_table_add_row(&tables->nodes, 0, T, TSK_NULL, TSK_NULL, NULL, 0);
33+
check_tsk_error(parents[j]);
34+
}
35+
b = 0;
36+
for (t = T - 1; t >= 0; t--) {
37+
/* Alternate between using the first and last N values in the buffer */
38+
parents = buffer + (b * N);
39+
b = (b + 1) % 2;
40+
children = buffer + (b * N);
41+
for (j = 0; j < N; j++) {
42+
child = tsk_node_table_add_row(
43+
&tables->nodes, 0, t, TSK_NULL, TSK_NULL, NULL, 0);
44+
check_tsk_error(child);
45+
/* NOTE: the use of rand() is discouraged for
46+
* research code and proper random number generator
47+
* libraries should be preferred.
48+
*/
49+
left_parent = parents[(size_t)((rand() / (1. + RAND_MAX)) * N)];
50+
right_parent = parents[(size_t)((rand() / (1. + RAND_MAX)) * N)];
51+
left_is_first = rand() < 0.5;
52+
chunk_left = 0.0;
53+
for (k = 0; k < num_chroms; k++) {
54+
chunk_right = chunk_left + rand() / (1. + RAND_MAX);
55+
/* a very tiny chance that right and left are equal */
56+
if (chunk_right > chunk_left) {
57+
ret = tsk_edge_table_add_row(&tables->edges, chunk_left, chunk_right,
58+
left_is_first ? left_parent : right_parent, child, NULL, 0);
59+
check_tsk_error(ret);
60+
}
61+
chunk_left += 1.0;
62+
if (chunk_right < chunk_left) {
63+
ret = tsk_edge_table_add_row(&tables->edges, chunk_right, chunk_left,
64+
left_is_first ? right_parent : left_parent, child, NULL, 0);
65+
check_tsk_error(ret);
66+
}
67+
}
68+
children[j] = child;
69+
}
70+
if (t % simplify_interval == 0) {
71+
printf("Simplify at generation %lld: (%lld nodes %lld edges)",
72+
(long long) t,
73+
(long long) tables->nodes.num_rows,
74+
(long long) tables->edges.num_rows);
75+
/* Note: Edges must be sorted for simplify to work, and we use a brute force
76+
* approach of sorting each time here for simplicity. This is inefficient. */
77+
ret = tsk_table_collection_sort(tables, NULL, 0);
78+
check_tsk_error(ret);
79+
ret = tsk_table_collection_simplify(tables, children, N, 0, NULL);
80+
check_tsk_error(ret);
81+
printf(" -> (%lld nodes %lld edges)\n",
82+
(long long) tables->nodes.num_rows,
83+
(long long) tables->edges.num_rows);
84+
for (j = 0; j < N; j++) {
85+
children[j] = j;
86+
}
87+
}
88+
}
89+
/* Set the sample flags for final generation */
90+
for (j = 0; j < N; j++) {
91+
tables->nodes.flags[children[j]] = TSK_NODE_IS_SAMPLE;
92+
}
93+
free(buffer);
94+
}
95+
96+
int
97+
main(int argc, char **argv)
98+
{
99+
int ret;
100+
tsk_table_collection_t tables;
101+
102+
if (argc != 7) {
103+
errx(EXIT_FAILURE, "usage: N T simplify-interval output seed num-chroms");
104+
}
105+
ret = tsk_table_collection_init(&tables, 0);
106+
check_tsk_error(ret);
107+
srand((unsigned)atoi(argv[5]));
108+
simulate(&tables, atoi(argv[6]), atoi(argv[1]), atoi(argv[2]), atoi(argv[3]));
109+
110+
// /* Sort and index so that the result can be opened as a tree sequence */
111+
// ret = tsk_table_collection_sort(&tables, NULL, 0);
112+
// check_tsk_error(ret);
113+
// ret = tsk_table_collection_build_index(&tables, 0);
114+
// check_tsk_error(ret);
115+
// ret = tsk_table_collection_dump(&tables, argv[4], 0);
116+
// check_tsk_error(ret);
117+
118+
tsk_table_collection_free(&tables);
119+
return 0;
120+
}

0 commit comments

Comments
 (0)