Skip to content

Commit e6bff82

Browse files
Merge pull request #2665 from jeromekelleher/multichrom-example
Multichrom example
2 parents 44c7d7c + f82a135 commit e6bff82

File tree

5 files changed

+431
-0
lines changed

5 files changed

+431
-0
lines changed

c/examples/haploid_wright_fisher.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,12 @@ main(int argc, char **argv)
9292
check_tsk_error(ret);
9393
srand((unsigned)atoi(argv[5]));
9494
simulate(&tables, atoi(argv[1]), atoi(argv[2]), atoi(argv[3]));
95+
96+
/* Sort and index so that the result can be opened as a tree sequence */
97+
ret = tsk_table_collection_sort(&tables, NULL, 0);
98+
check_tsk_error(ret);
99+
ret = tsk_table_collection_build_index(&tables, 0);
100+
check_tsk_error(ret);
95101
ret = tsk_table_collection_dump(&tables, argv[4], 0);
96102
check_tsk_error(ret);
97103

Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
#include <stdio.h>
2+
#include <stdlib.h>
3+
#include <assert.h>
4+
#include <err.h>
5+
#include <string.h>
6+
7+
#include <pthread.h>
8+
#include <tskit/tables.h>
9+
10+
#define check_tsk_error(val) \
11+
if (val < 0) { \
12+
errx(EXIT_FAILURE, "line %d: %s\n", __LINE__, tsk_strerror(val)); \
13+
}
14+
15+
static void
16+
init_tables(tsk_table_collection_t *tcs, int num_chroms)
17+
{
18+
int j, ret;
19+
20+
for (j = 0; j < num_chroms; j++) {
21+
ret = tsk_table_collection_init(&tcs[j], 0);
22+
check_tsk_error(ret);
23+
if (j > 0) {
24+
tsk_node_table_free(&tcs[j].nodes);
25+
}
26+
}
27+
}
28+
29+
static void
30+
free_tables(tsk_table_collection_t *tcs, int num_chroms)
31+
{
32+
int j;
33+
34+
for (j = 0; j < num_chroms; j++) {
35+
if (j > 0) {
36+
/* Must not double free node table columns. */
37+
memset(&tcs[j].nodes, 0, sizeof(tcs[j].nodes));
38+
}
39+
tsk_table_collection_free(&tcs[j]);
40+
}
41+
}
42+
43+
static void
44+
join_tables(tsk_table_collection_t *tcs, int num_chroms)
45+
{
46+
int j, ret;
47+
48+
for (j = 1; j < num_chroms; j++) {
49+
ret = tsk_edge_table_extend(
50+
&tcs[0].edges, &tcs[j].edges, tcs[j].edges.num_rows, NULL, 0);
51+
check_tsk_error(ret);
52+
}
53+
/* Get all the squashable edges next to each other */
54+
ret = tsk_table_collection_sort(&tcs[0], NULL, 0);
55+
check_tsk_error(ret);
56+
ret = tsk_edge_table_squash(&tcs[0].edges);
57+
check_tsk_error(ret);
58+
/* We need to sort again after squash */
59+
ret = tsk_table_collection_sort(&tcs[0], NULL, 0);
60+
check_tsk_error(ret);
61+
ret = tsk_table_collection_build_index(&tcs[0], 0);
62+
check_tsk_error(ret);
63+
}
64+
65+
struct chunk_work {
66+
int chunk;
67+
tsk_table_collection_t *tc;
68+
int *samples;
69+
int N;
70+
};
71+
72+
void *
73+
simplify_chunk(void *arg)
74+
{
75+
int ret;
76+
struct chunk_work *work = (struct chunk_work *) arg;
77+
tsk_size_t edges_before = work->tc->edges.num_rows;
78+
79+
ret = tsk_table_collection_sort(work->tc, NULL, 0);
80+
check_tsk_error(ret);
81+
ret = tsk_table_collection_simplify(work->tc, work->samples, work->N,
82+
TSK_SIMPLIFY_NO_FILTER_NODES | TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS, NULL);
83+
check_tsk_error(ret);
84+
/* NOTE: this printf makes helgrind complain */
85+
printf("\tchunk %d: %lld -> %lld\n", work->chunk, (long long) edges_before,
86+
(long long) work->tc->edges.num_rows);
87+
88+
return NULL;
89+
}
90+
91+
void
92+
sort_and_simplify_all(tsk_table_collection_t *tcs, int num_chroms, int *samples, int N)
93+
{
94+
int j, ret;
95+
struct chunk_work work[num_chroms];
96+
pthread_t threads[num_chroms];
97+
98+
for (j = 1; j < num_chroms; j++) {
99+
tcs[j].nodes = tcs[0].nodes;
100+
}
101+
102+
for (j = 0; j < num_chroms; j++) {
103+
work[j].chunk = j;
104+
work[j].tc = &tcs[j];
105+
work[j].samples = samples;
106+
work[j].N = N;
107+
108+
ret = pthread_create(&threads[j], NULL, simplify_chunk, (void *) &work[j]);
109+
if (ret != 0) {
110+
errx(EXIT_FAILURE, "Pthread create failed");
111+
}
112+
/* simplify_chunk((void *) &work[j]); */
113+
}
114+
for (j = 0; j < num_chroms; j++) {
115+
ret = pthread_join(threads[j], NULL);
116+
if (ret != 0) {
117+
errx(EXIT_FAILURE, "Pthread join failed");
118+
}
119+
}
120+
}
121+
122+
void
123+
simplify_tables(tsk_table_collection_t *tcs, int num_chroms, int *samples, int N)
124+
{
125+
int j, k, num_edges, ret;
126+
const tsk_size_t num_nodes = tcs[0].nodes.num_rows;
127+
tsk_bool_t *keep_nodes = malloc(num_nodes * sizeof(*keep_nodes));
128+
tsk_id_t *node_id_map = malloc(num_nodes * sizeof(*node_id_map));
129+
tsk_id_t *edge_child, *edge_parent;
130+
131+
if (keep_nodes == NULL || node_id_map == NULL) {
132+
errx(EXIT_FAILURE, "Out of memory");
133+
}
134+
135+
printf("Simplify %lld nodes\n", (long long) tcs[0].nodes.num_rows);
136+
sort_and_simplify_all(tcs, num_chroms, samples, N);
137+
138+
for (j = 0; j < num_nodes; j++) {
139+
keep_nodes[j] = false;
140+
tcs[0].nodes.flags[j] &= (~TSK_NODE_IS_SAMPLE);
141+
}
142+
for (j = 0; j < N; j++) {
143+
keep_nodes[samples[j]] = true;
144+
tcs[0].nodes.flags[samples[j]] |= TSK_NODE_IS_SAMPLE;
145+
}
146+
147+
for (j = 0; j < num_chroms; j++) {
148+
edge_child = tcs[j].edges.child;
149+
edge_parent = tcs[j].edges.parent;
150+
num_edges = tcs[j].edges.num_rows;
151+
for (k = 0; k < num_edges; k++) {
152+
keep_nodes[edge_child[k]] = true;
153+
keep_nodes[edge_parent[k]] = true;
154+
}
155+
}
156+
tsk_node_table_keep_rows(&tcs[0].nodes, keep_nodes, 0, node_id_map);
157+
printf("\tdone: %lld nodes\n", (long long) tcs[0].nodes.num_rows);
158+
159+
/* Remap node references */
160+
for (j = 0; j < num_chroms; j++) {
161+
edge_child = tcs[j].edges.child;
162+
edge_parent = tcs[j].edges.parent;
163+
num_edges = tcs[j].edges.num_rows;
164+
for (k = 0; k < num_edges; k++) {
165+
edge_child[k] = node_id_map[edge_child[k]];
166+
edge_parent[k] = node_id_map[edge_parent[k]];
167+
}
168+
ret = tsk_table_collection_check_integrity(&tcs[j], 0);
169+
check_tsk_error(ret);
170+
}
171+
for (j = 0; j < N; j++) {
172+
samples[j] = node_id_map[samples[j]];
173+
}
174+
free(keep_nodes);
175+
free(node_id_map);
176+
}
177+
178+
void
179+
simulate(
180+
tsk_table_collection_t *tcs, int num_chroms, int N, int T, int simplify_interval)
181+
{
182+
tsk_id_t *buffer, *parents, *children, child, left_parent, right_parent;
183+
bool left_is_first;
184+
double chunk_left, chunk_right;
185+
int ret, j, t, b, k;
186+
187+
assert(simplify_interval != 0); // leads to division by zero
188+
buffer = malloc(2 * N * sizeof(tsk_id_t));
189+
if (buffer == NULL) {
190+
errx(EXIT_FAILURE, "Out of memory");
191+
}
192+
for (k = 0; k < num_chroms; k++) {
193+
tcs[k].sequence_length = num_chroms;
194+
}
195+
parents = buffer;
196+
for (j = 0; j < N; j++) {
197+
parents[j]
198+
= tsk_node_table_add_row(&tcs[0].nodes, 0, T, TSK_NULL, TSK_NULL, NULL, 0);
199+
check_tsk_error(parents[j]);
200+
}
201+
b = 0;
202+
for (t = T - 1; t >= 0; t--) {
203+
/* Alternate between using the first and last N values in the buffer */
204+
parents = buffer + (b * N);
205+
b = (b + 1) % 2;
206+
children = buffer + (b * N);
207+
for (j = 0; j < N; j++) {
208+
child = tsk_node_table_add_row(
209+
&tcs[0].nodes, 0, t, TSK_NULL, TSK_NULL, NULL, 0);
210+
check_tsk_error(child);
211+
/* NOTE: the use of rand() is discouraged for
212+
* research code and proper random number generator
213+
* libraries should be preferred.
214+
*/
215+
left_parent = parents[(size_t)((rand() / (1. + RAND_MAX)) * N)];
216+
right_parent = parents[(size_t)((rand() / (1. + RAND_MAX)) * N)];
217+
left_is_first = rand() < 0.5;
218+
chunk_left = 0.0;
219+
for (k = 0; k < num_chroms; k++) {
220+
chunk_right = chunk_left + rand() / (1. + RAND_MAX);
221+
/* a very tiny chance that right and left are equal */
222+
if (chunk_right > chunk_left) {
223+
ret = tsk_edge_table_add_row(&tcs[k].edges, chunk_left, chunk_right,
224+
left_is_first ? left_parent : right_parent, child, NULL, 0);
225+
check_tsk_error(ret);
226+
}
227+
chunk_left += 1.0;
228+
if (chunk_right < chunk_left) {
229+
ret = tsk_edge_table_add_row(&tcs[k].edges, chunk_right, chunk_left,
230+
left_is_first ? right_parent : left_parent, child, NULL, 0);
231+
check_tsk_error(ret);
232+
}
233+
}
234+
children[j] = child;
235+
}
236+
if (t % simplify_interval == 0) {
237+
simplify_tables(tcs, num_chroms, children, N);
238+
}
239+
}
240+
/* Set the sample flags for final generation */
241+
for (j = 0; j < N; j++) {
242+
tcs[0].nodes.flags[children[j]] = TSK_NODE_IS_SAMPLE;
243+
}
244+
free(buffer);
245+
}
246+
247+
int
248+
main(int argc, char **argv)
249+
{
250+
int ret;
251+
int num_chroms;
252+
253+
if (argc != 7) {
254+
errx(EXIT_FAILURE, "usage: N T simplify-interval output seed num-chroms");
255+
}
256+
257+
num_chroms = atoi(argv[6]);
258+
tsk_table_collection_t tcs[num_chroms];
259+
260+
srand((unsigned) atoi(argv[5]));
261+
init_tables(tcs, num_chroms);
262+
simulate(tcs, num_chroms, atoi(argv[1]), atoi(argv[2]), atoi(argv[3]));
263+
join_tables(tcs, num_chroms);
264+
ret = tsk_table_collection_dump(&tcs[0], argv[4], 0);
265+
check_tsk_error(ret);
266+
free_tables(tcs, num_chroms);
267+
268+
return 0;
269+
}

0 commit comments

Comments
 (0)