|
| 1 | +#include <stdio.h> |
| 2 | +#include <stdlib.h> |
| 3 | +#include <assert.h> |
| 4 | +#include <err.h> |
| 5 | +#include <string.h> |
| 6 | + |
| 7 | +#include <pthread.h> |
| 8 | +#include <tskit/tables.h> |
| 9 | + |
| 10 | +#define check_tsk_error(val) \ |
| 11 | + if (val < 0) { \ |
| 12 | + errx(EXIT_FAILURE, "line %d: %s\n", __LINE__, tsk_strerror(val)); \ |
| 13 | + } |
| 14 | + |
| 15 | +static void |
| 16 | +init_tables(tsk_table_collection_t *tcs, int num_chroms) |
| 17 | +{ |
| 18 | + int j, ret; |
| 19 | + |
| 20 | + for (j = 0; j < num_chroms; j++) { |
| 21 | + ret = tsk_table_collection_init(&tcs[j], 0); |
| 22 | + check_tsk_error(ret); |
| 23 | + if (j > 0) { |
| 24 | + tsk_node_table_free(&tcs[j].nodes); |
| 25 | + } |
| 26 | + } |
| 27 | +} |
| 28 | + |
| 29 | +static void |
| 30 | +free_tables(tsk_table_collection_t *tcs, int num_chroms) |
| 31 | +{ |
| 32 | + int j; |
| 33 | + |
| 34 | + for (j = 0; j < num_chroms; j++) { |
| 35 | + if (j > 0) { |
| 36 | + /* Must not double free node table columns. */ |
| 37 | + memset(&tcs[j].nodes, 0, sizeof(tcs[j].nodes)); |
| 38 | + } |
| 39 | + tsk_table_collection_free(&tcs[j]); |
| 40 | + } |
| 41 | +} |
| 42 | + |
| 43 | +static void |
| 44 | +join_tables(tsk_table_collection_t *tcs, int num_chroms) |
| 45 | +{ |
| 46 | + int j, ret; |
| 47 | + |
| 48 | + for (j = 1; j < num_chroms; j++) { |
| 49 | + ret = tsk_edge_table_extend( |
| 50 | + &tcs[0].edges, &tcs[j].edges, tcs[j].edges.num_rows, NULL, 0); |
| 51 | + check_tsk_error(ret); |
| 52 | + } |
| 53 | + /* Get all the squashable edges next to each other */ |
| 54 | + ret = tsk_table_collection_sort(&tcs[0], NULL, 0); |
| 55 | + check_tsk_error(ret); |
| 56 | + ret = tsk_edge_table_squash(&tcs[0].edges); |
| 57 | + check_tsk_error(ret); |
| 58 | + /* We need to sort again after squash */ |
| 59 | + ret = tsk_table_collection_sort(&tcs[0], NULL, 0); |
| 60 | + check_tsk_error(ret); |
| 61 | + ret = tsk_table_collection_build_index(&tcs[0], 0); |
| 62 | + check_tsk_error(ret); |
| 63 | +} |
| 64 | + |
| 65 | +struct chunk_work { |
| 66 | + int chunk; |
| 67 | + tsk_table_collection_t *tc; |
| 68 | + int *samples; |
| 69 | + int N; |
| 70 | +}; |
| 71 | + |
| 72 | +void * |
| 73 | +simplify_chunk(void *arg) |
| 74 | +{ |
| 75 | + int ret; |
| 76 | + struct chunk_work *work = (struct chunk_work *) arg; |
| 77 | + tsk_size_t edges_before = work->tc->edges.num_rows; |
| 78 | + |
| 79 | + ret = tsk_table_collection_sort(work->tc, NULL, 0); |
| 80 | + check_tsk_error(ret); |
| 81 | + ret = tsk_table_collection_simplify(work->tc, work->samples, work->N, |
| 82 | + TSK_SIMPLIFY_NO_FILTER_NODES | TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS, NULL); |
| 83 | + check_tsk_error(ret); |
| 84 | + /* NOTE: this printf makes helgrind complain */ |
| 85 | + printf("\tchunk %d: %lld -> %lld\n", work->chunk, (long long) edges_before, |
| 86 | + (long long) work->tc->edges.num_rows); |
| 87 | + |
| 88 | + return NULL; |
| 89 | +} |
| 90 | + |
| 91 | +void |
| 92 | +sort_and_simplify_all(tsk_table_collection_t *tcs, int num_chroms, int *samples, int N) |
| 93 | +{ |
| 94 | + int j, ret; |
| 95 | + struct chunk_work work[num_chroms]; |
| 96 | + pthread_t threads[num_chroms]; |
| 97 | + |
| 98 | + for (j = 1; j < num_chroms; j++) { |
| 99 | + tcs[j].nodes = tcs[0].nodes; |
| 100 | + } |
| 101 | + |
| 102 | + for (j = 0; j < num_chroms; j++) { |
| 103 | + work[j].chunk = j; |
| 104 | + work[j].tc = &tcs[j]; |
| 105 | + work[j].samples = samples; |
| 106 | + work[j].N = N; |
| 107 | + |
| 108 | + ret = pthread_create(&threads[j], NULL, simplify_chunk, (void *) &work[j]); |
| 109 | + if (ret != 0) { |
| 110 | + errx(EXIT_FAILURE, "Pthread create failed"); |
| 111 | + } |
| 112 | + /* simplify_chunk((void *) &work[j]); */ |
| 113 | + } |
| 114 | + for (j = 0; j < num_chroms; j++) { |
| 115 | + ret = pthread_join(threads[j], NULL); |
| 116 | + if (ret != 0) { |
| 117 | + errx(EXIT_FAILURE, "Pthread join failed"); |
| 118 | + } |
| 119 | + } |
| 120 | +} |
| 121 | + |
| 122 | +void |
| 123 | +simplify_tables(tsk_table_collection_t *tcs, int num_chroms, int *samples, int N) |
| 124 | +{ |
| 125 | + int j, k, num_edges, ret; |
| 126 | + const tsk_size_t num_nodes = tcs[0].nodes.num_rows; |
| 127 | + tsk_bool_t *keep_nodes = malloc(num_nodes * sizeof(*keep_nodes)); |
| 128 | + tsk_id_t *node_id_map = malloc(num_nodes * sizeof(*node_id_map)); |
| 129 | + tsk_id_t *edge_child, *edge_parent; |
| 130 | + |
| 131 | + if (keep_nodes == NULL || node_id_map == NULL) { |
| 132 | + errx(EXIT_FAILURE, "Out of memory"); |
| 133 | + } |
| 134 | + |
| 135 | + printf("Simplify %lld nodes\n", (long long) tcs[0].nodes.num_rows); |
| 136 | + sort_and_simplify_all(tcs, num_chroms, samples, N); |
| 137 | + |
| 138 | + for (j = 0; j < num_nodes; j++) { |
| 139 | + keep_nodes[j] = false; |
| 140 | + tcs[0].nodes.flags[j] &= (~TSK_NODE_IS_SAMPLE); |
| 141 | + } |
| 142 | + for (j = 0; j < N; j++) { |
| 143 | + keep_nodes[samples[j]] = true; |
| 144 | + tcs[0].nodes.flags[samples[j]] |= TSK_NODE_IS_SAMPLE; |
| 145 | + } |
| 146 | + |
| 147 | + for (j = 0; j < num_chroms; j++) { |
| 148 | + edge_child = tcs[j].edges.child; |
| 149 | + edge_parent = tcs[j].edges.parent; |
| 150 | + num_edges = tcs[j].edges.num_rows; |
| 151 | + for (k = 0; k < num_edges; k++) { |
| 152 | + keep_nodes[edge_child[k]] = true; |
| 153 | + keep_nodes[edge_parent[k]] = true; |
| 154 | + } |
| 155 | + } |
| 156 | + tsk_node_table_keep_rows(&tcs[0].nodes, keep_nodes, 0, node_id_map); |
| 157 | + printf("\tdone: %lld nodes\n", (long long) tcs[0].nodes.num_rows); |
| 158 | + |
| 159 | + /* Remap node references */ |
| 160 | + for (j = 0; j < num_chroms; j++) { |
| 161 | + edge_child = tcs[j].edges.child; |
| 162 | + edge_parent = tcs[j].edges.parent; |
| 163 | + num_edges = tcs[j].edges.num_rows; |
| 164 | + for (k = 0; k < num_edges; k++) { |
| 165 | + edge_child[k] = node_id_map[edge_child[k]]; |
| 166 | + edge_parent[k] = node_id_map[edge_parent[k]]; |
| 167 | + } |
| 168 | + ret = tsk_table_collection_check_integrity(&tcs[j], 0); |
| 169 | + check_tsk_error(ret); |
| 170 | + } |
| 171 | + for (j = 0; j < N; j++) { |
| 172 | + samples[j] = node_id_map[samples[j]]; |
| 173 | + } |
| 174 | + free(keep_nodes); |
| 175 | + free(node_id_map); |
| 176 | +} |
| 177 | + |
| 178 | +void |
| 179 | +simulate( |
| 180 | + tsk_table_collection_t *tcs, int num_chroms, int N, int T, int simplify_interval) |
| 181 | +{ |
| 182 | + tsk_id_t *buffer, *parents, *children, child, left_parent, right_parent; |
| 183 | + bool left_is_first; |
| 184 | + double chunk_left, chunk_right; |
| 185 | + int ret, j, t, b, k; |
| 186 | + |
| 187 | + assert(simplify_interval != 0); // leads to division by zero |
| 188 | + buffer = malloc(2 * N * sizeof(tsk_id_t)); |
| 189 | + if (buffer == NULL) { |
| 190 | + errx(EXIT_FAILURE, "Out of memory"); |
| 191 | + } |
| 192 | + for (k = 0; k < num_chroms; k++) { |
| 193 | + tcs[k].sequence_length = num_chroms; |
| 194 | + } |
| 195 | + parents = buffer; |
| 196 | + for (j = 0; j < N; j++) { |
| 197 | + parents[j] |
| 198 | + = tsk_node_table_add_row(&tcs[0].nodes, 0, T, TSK_NULL, TSK_NULL, NULL, 0); |
| 199 | + check_tsk_error(parents[j]); |
| 200 | + } |
| 201 | + b = 0; |
| 202 | + for (t = T - 1; t >= 0; t--) { |
| 203 | + /* Alternate between using the first and last N values in the buffer */ |
| 204 | + parents = buffer + (b * N); |
| 205 | + b = (b + 1) % 2; |
| 206 | + children = buffer + (b * N); |
| 207 | + for (j = 0; j < N; j++) { |
| 208 | + child = tsk_node_table_add_row( |
| 209 | + &tcs[0].nodes, 0, t, TSK_NULL, TSK_NULL, NULL, 0); |
| 210 | + check_tsk_error(child); |
| 211 | + /* NOTE: the use of rand() is discouraged for |
| 212 | + * research code and proper random number generator |
| 213 | + * libraries should be preferred. |
| 214 | + */ |
| 215 | + left_parent = parents[(size_t)((rand() / (1. + RAND_MAX)) * N)]; |
| 216 | + right_parent = parents[(size_t)((rand() / (1. + RAND_MAX)) * N)]; |
| 217 | + left_is_first = rand() < 0.5; |
| 218 | + chunk_left = 0.0; |
| 219 | + for (k = 0; k < num_chroms; k++) { |
| 220 | + chunk_right = chunk_left + rand() / (1. + RAND_MAX); |
| 221 | + /* a very tiny chance that right and left are equal */ |
| 222 | + if (chunk_right > chunk_left) { |
| 223 | + ret = tsk_edge_table_add_row(&tcs[k].edges, chunk_left, chunk_right, |
| 224 | + left_is_first ? left_parent : right_parent, child, NULL, 0); |
| 225 | + check_tsk_error(ret); |
| 226 | + } |
| 227 | + chunk_left += 1.0; |
| 228 | + if (chunk_right < chunk_left) { |
| 229 | + ret = tsk_edge_table_add_row(&tcs[k].edges, chunk_right, chunk_left, |
| 230 | + left_is_first ? right_parent : left_parent, child, NULL, 0); |
| 231 | + check_tsk_error(ret); |
| 232 | + } |
| 233 | + } |
| 234 | + children[j] = child; |
| 235 | + } |
| 236 | + if (t % simplify_interval == 0) { |
| 237 | + simplify_tables(tcs, num_chroms, children, N); |
| 238 | + } |
| 239 | + } |
| 240 | + /* Set the sample flags for final generation */ |
| 241 | + for (j = 0; j < N; j++) { |
| 242 | + tcs[0].nodes.flags[children[j]] = TSK_NODE_IS_SAMPLE; |
| 243 | + } |
| 244 | + free(buffer); |
| 245 | +} |
| 246 | + |
| 247 | +int |
| 248 | +main(int argc, char **argv) |
| 249 | +{ |
| 250 | + int ret; |
| 251 | + int num_chroms; |
| 252 | + |
| 253 | + if (argc != 7) { |
| 254 | + errx(EXIT_FAILURE, "usage: N T simplify-interval output seed num-chroms"); |
| 255 | + } |
| 256 | + |
| 257 | + num_chroms = atoi(argv[6]); |
| 258 | + tsk_table_collection_t tcs[num_chroms]; |
| 259 | + |
| 260 | + srand((unsigned) atoi(argv[5])); |
| 261 | + init_tables(tcs, num_chroms); |
| 262 | + simulate(tcs, num_chroms, atoi(argv[1]), atoi(argv[2]), atoi(argv[3])); |
| 263 | + join_tables(tcs, num_chroms); |
| 264 | + ret = tsk_table_collection_dump(&tcs[0], argv[4], 0); |
| 265 | + check_tsk_error(ret); |
| 266 | + free_tables(tcs, num_chroms); |
| 267 | + |
| 268 | + return 0; |
| 269 | +} |
0 commit comments