Skip to content

Commit d3029a6

Browse files
committed
Perf
1 parent 24cf27a commit d3029a6

File tree

2 files changed

+147
-53
lines changed

2 files changed

+147
-53
lines changed

c/tskit/genotypes.c

Lines changed: 145 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -49,43 +49,140 @@ tsk_haplotype_ctz64(uint64_t x)
4949
#endif
5050
}
5151

52+
static inline uint32_t
53+
tsk_haplotype_popcount64(uint64_t value)
54+
{
55+
#if defined(_MSC_VER)
56+
return (uint32_t) __popcnt64(value);
57+
#else
58+
return (uint32_t) __builtin_popcountll(value);
59+
#endif
60+
}
61+
62+
static inline void
63+
tsk_haplotype_bitset_clear(tsk_haplotype_t *self, tsk_size_t idx)
64+
{
65+
tsk_size_t word = idx >> 6;
66+
uint64_t mask = UINT64_C(1) << (idx & 63);
67+
if ((self->unresolved_bits[word] & mask) == 0) {
68+
return;
69+
}
70+
self->unresolved_bits[word] &= ~mask;
71+
if (self->unresolved_counts[word] > 0) {
72+
self->unresolved_counts[word]--;
73+
}
74+
}
75+
5276
static inline void
53-
tsk_haplotype_bitset_clear(uint64_t *bits, tsk_size_t idx)
77+
tsk_haplotype_clear_word_bit(tsk_haplotype_t *self, tsk_size_t word, uint64_t mask)
78+
{
79+
if ((self->unresolved_bits[word] & mask) != 0) {
80+
self->unresolved_bits[word] &= ~mask;
81+
if (self->unresolved_counts[word] > 0) {
82+
self->unresolved_counts[word]--;
83+
}
84+
}
85+
}
86+
87+
static inline bool
88+
tsk_haplotype_bitset_test(const uint64_t *bits, tsk_size_t idx)
5489
{
5590
tsk_size_t word = idx >> 6;
5691
uint64_t mask = UINT64_C(1) << (idx & 63);
57-
bits[word] &= ~mask;
92+
return (bits[word] & mask) != 0;
5893
}
5994

6095
static inline tsk_size_t
6196
tsk_haplotype_bitset_next(
62-
const uint64_t *bits, tsk_size_t num_words, tsk_size_t start, tsk_size_t limit)
97+
const tsk_haplotype_t *self, tsk_size_t start, tsk_size_t limit)
6398
{
6499
tsk_size_t word = start >> 6;
100+
tsk_size_t word_limit = (limit + 63) >> 6;
65101
uint64_t mask, value;
66102

67-
if (start >= limit || word >= num_words) {
103+
if (start >= limit || word >= self->num_bit_words) {
68104
return limit;
69105
}
70106
mask = UINT64_MAX << (start & 63);
71-
value = bits[word] & mask;
107+
value = self->unresolved_bits[word] & mask;
72108
while (value == 0) {
73109
word++;
74-
if (word >= num_words) {
110+
if (word >= word_limit || word >= self->num_bit_words) {
75111
return limit;
76112
}
77-
value = bits[word];
113+
while (word < word_limit && word < self->num_bit_words
114+
&& self->unresolved_counts[word] == 0) {
115+
word++;
116+
}
117+
if (word >= word_limit || word >= self->num_bit_words) {
118+
return limit;
119+
}
120+
value = self->unresolved_bits[word];
78121
}
79122
start = (word << 6) + tsk_haplotype_ctz64(value);
80123
return start < limit ? start : limit;
81124
}
82125

126+
static bool
127+
tsk_haplotype_find_next_uncovered(tsk_haplotype_t *self, tsk_size_t start,
128+
tsk_size_t end, const int32_t *interval_start, const int32_t *interval_end,
129+
tsk_size_t interval_count, tsk_size_t *out_index)
130+
{
131+
if (start >= end) {
132+
return false;
133+
}
134+
tsk_size_t word = start >> 6;
135+
tsk_size_t last_word = (end - 1) >> 6;
136+
if (word >= self->num_bit_words) {
137+
return false;
138+
}
139+
uint64_t start_mask = UINT64_MAX << (start & 63);
140+
for (; word <= last_word && word < self->num_bit_words; word++) {
141+
if (self->unresolved_counts[word] == 0) {
142+
continue;
143+
}
144+
uint64_t word_bits = self->unresolved_bits[word];
145+
if (word == (start >> 6)) {
146+
word_bits &= start_mask;
147+
}
148+
if (word == last_word) {
149+
uint64_t end_mask = UINT64_MAX >> (63 - ((end - 1) & 63));
150+
word_bits &= end_mask;
151+
}
152+
while (word_bits != 0) {
153+
uint64_t lowest_bit = word_bits & (~word_bits + 1);
154+
tsk_size_t bit = tsk_haplotype_ctz64(word_bits);
155+
word_bits ^= lowest_bit;
156+
tsk_size_t bit_index = (word << 6) + bit;
157+
if (bit_index >= end) {
158+
break;
159+
}
160+
bool covered = false;
161+
for (tsk_size_t p = 0; p < interval_count; p++) {
162+
if (interval_start[p] <= (int32_t) bit_index
163+
&& (int32_t) bit_index < interval_end[p]) {
164+
covered = true;
165+
break;
166+
}
167+
}
168+
if (!covered) {
169+
*out_index = bit_index;
170+
return true;
171+
}
172+
}
173+
start_mask = UINT64_MAX;
174+
}
175+
return false;
176+
}
177+
83178
static void
84-
tsk_haplotype_reset_bitset(const tsk_haplotype_t *self)
179+
tsk_haplotype_reset_bitset(tsk_haplotype_t *self)
85180
{
86181
if (self->num_bit_words > 0) {
87182
tsk_memcpy(self->unresolved_bits, self->initial_bits,
88183
self->num_bit_words * sizeof(*self->unresolved_bits));
184+
tsk_memcpy(self->unresolved_counts, self->initial_counts,
185+
self->num_bit_words * sizeof(*self->unresolved_counts));
89186
}
90187
}
91188

@@ -379,21 +476,31 @@ tsk_haplotype_alloc_bitset(tsk_haplotype_t *self)
379476
if (self->num_bit_words == 0) {
380477
self->unresolved_bits = NULL;
381478
self->initial_bits = NULL;
479+
self->unresolved_counts = NULL;
480+
self->initial_counts = NULL;
382481
return 0;
383482
}
384483
self->unresolved_bits
385484
= tsk_malloc(self->num_bit_words * sizeof(*self->unresolved_bits));
386485
self->initial_bits = tsk_malloc(self->num_bit_words * sizeof(*self->initial_bits));
387-
if (self->unresolved_bits == NULL || self->initial_bits == NULL) {
486+
self->unresolved_counts
487+
= tsk_malloc(self->num_bit_words * sizeof(*self->unresolved_counts));
488+
self->initial_counts
489+
= tsk_malloc(self->num_bit_words * sizeof(*self->initial_counts));
490+
if (self->unresolved_bits == NULL || self->initial_bits == NULL
491+
|| self->unresolved_counts == NULL || self->initial_counts == NULL) {
388492
return tsk_trace_error(TSK_ERR_NO_MEMORY);
389493
}
390494
for (j = 0; j < self->num_bit_words; j++) {
391-
self->initial_bits[j] = UINT64_MAX;
392-
}
393-
if ((tsk_size_t) self->num_sites % 64 != 0) {
394-
uint32_t bits = (uint32_t)((tsk_size_t) self->num_sites & 63);
395-
self->initial_bits[self->num_bit_words - 1] = (UINT64_C(1) << bits) - 1;
495+
uint64_t word = UINT64_MAX;
496+
if (j == self->num_bit_words - 1 && (tsk_size_t) self->num_sites % 64 != 0) {
497+
uint32_t bits = (uint32_t)((tsk_size_t) self->num_sites & 63);
498+
word = (UINT64_C(1) << bits) - 1;
499+
}
500+
self->initial_bits[j] = word;
501+
self->initial_counts[j] = tsk_haplotype_popcount64(word);
396502
}
503+
tsk_haplotype_reset_bitset(self);
397504
return 0;
398505
}
399506

@@ -517,11 +624,9 @@ tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype)
517624
for (int32_t m = mut_start; m < mut_end; m++) {
518625
int32_t site = self->node_mutation_sites[m];
519626
if (site >= 0 && site < self->num_sites
520-
&& tsk_haplotype_bitset_next(
521-
bits, self->num_bit_words, (tsk_size_t) site, (tsk_size_t) site + 1)
522-
== (tsk_size_t) site) {
627+
&& tsk_haplotype_bitset_test(bits, (tsk_size_t) site)) {
523628
haplotype[site] = (int8_t) self->node_mutation_states[m];
524-
tsk_haplotype_bitset_clear(bits, (tsk_size_t) site);
629+
tsk_haplotype_bitset_clear(self, (tsk_size_t) site);
525630
}
526631
}
527632

@@ -539,9 +644,10 @@ tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype)
539644
if (start >= end) {
540645
continue;
541646
}
542-
if (tsk_haplotype_bitset_next(
543-
bits, self->num_bit_words, (tsk_size_t) start, (tsk_size_t) end)
544-
< (tsk_size_t) end) {
647+
tsk_size_t uncovered_idx;
648+
if (tsk_haplotype_find_next_uncovered(self, (tsk_size_t) start, (tsk_size_t) end,
649+
self->parent_interval_start, self->parent_interval_end, 0,
650+
&uncovered_idx)) {
545651
self->edge_stack[stack_top] = edge;
546652
self->stack_interval_start[stack_top] = start;
547653
self->stack_interval_end[stack_top] = end;
@@ -562,11 +668,9 @@ tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype)
562668
for (int32_t m = mut_start; m < mut_end; m++) {
563669
int32_t site = self->node_mutation_sites[m];
564670
if (site >= interval_start && site < interval_end
565-
&& tsk_haplotype_bitset_next(bits, self->num_bit_words,
566-
(tsk_size_t) site, (tsk_size_t) site + 1)
567-
== (tsk_size_t) site) {
671+
&& tsk_haplotype_bitset_test(bits, (tsk_size_t) site)) {
568672
haplotype[site] = (int8_t) self->node_mutation_states[m];
569-
tsk_haplotype_bitset_clear(bits, (tsk_size_t) site);
673+
tsk_haplotype_bitset_clear(self, (tsk_size_t) site);
570674
}
571675
}
572676
}
@@ -589,9 +693,10 @@ tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype)
589693
if (parent_start >= parent_end) {
590694
continue;
591695
}
592-
if (tsk_haplotype_bitset_next(bits, self->num_bit_words,
593-
(tsk_size_t) parent_start, (tsk_size_t) parent_end)
594-
< (tsk_size_t) parent_end) {
696+
tsk_size_t uncovered_idx;
697+
if (tsk_haplotype_find_next_uncovered(self, (tsk_size_t) parent_start,
698+
(tsk_size_t) parent_end, self->parent_interval_start,
699+
self->parent_interval_end, parent_count, &uncovered_idx)) {
595700
self->edge_stack[stack_top] = parent_edge;
596701
self->stack_interval_start[stack_top] = parent_start;
597702
self->stack_interval_end[stack_top] = parent_end;
@@ -606,34 +711,19 @@ tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype)
606711
child_stop = 0;
607712
}
608713

609-
idx = tsk_haplotype_bitset_next(bits, self->num_bit_words,
610-
(tsk_size_t) interval_start, (tsk_size_t) interval_end);
611-
while ((int32_t) idx < interval_end) {
612-
bool covered = false;
613-
for (tsk_size_t p = 0; p < parent_count; p++) {
614-
if (self->parent_interval_start[p] <= (int32_t) idx
615-
&& (int32_t) idx < self->parent_interval_end[p]) {
616-
covered = true;
617-
break;
618-
}
619-
}
620-
if (covered) {
621-
idx = tsk_haplotype_bitset_next(
622-
bits, self->num_bit_words, idx + 1, (tsk_size_t) interval_end);
623-
} else {
624-
tsk_haplotype_bitset_clear(bits, idx);
625-
idx = tsk_haplotype_bitset_next(
626-
bits, self->num_bit_words, idx, (tsk_size_t) interval_end);
627-
}
714+
tsk_size_t uncovered_idx;
715+
while (tsk_haplotype_find_next_uncovered(self, (tsk_size_t) interval_start,
716+
(tsk_size_t) interval_end, self->parent_interval_start,
717+
self->parent_interval_end, parent_count, &uncovered_idx)) {
718+
tsk_size_t word_index = uncovered_idx >> 6;
719+
uint64_t mask = UINT64_C(1) << (uncovered_idx & 63);
720+
tsk_haplotype_clear_word_bit(self, word_index, mask);
628721
}
629722
}
630723

631-
idx = tsk_haplotype_bitset_next(
632-
bits, self->num_bit_words, 0, (tsk_size_t) self->num_sites);
633-
while (idx < (tsk_size_t) self->num_sites) {
634-
tsk_haplotype_bitset_clear(bits, idx);
635-
idx = tsk_haplotype_bitset_next(
636-
bits, self->num_bit_words, idx, (tsk_size_t) self->num_sites);
724+
for (tsk_size_t w = 0; w < self->num_bit_words; w++) {
725+
self->unresolved_bits[w] = 0;
726+
self->unresolved_counts[w] = 0;
637727
}
638728

639729
return 0;
@@ -660,6 +750,8 @@ tsk_haplotype_free(tsk_haplotype_t *self)
660750
tsk_safe_free(self->parent_interval_end);
661751
tsk_safe_free(self->unresolved_bits);
662752
tsk_safe_free(self->initial_bits);
753+
tsk_safe_free(self->unresolved_counts);
754+
tsk_safe_free(self->initial_counts);
663755
self->tree_sequence = NULL;
664756
self->site_positions = NULL;
665757
self->initialised = false;

c/tskit/genotypes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ typedef struct {
110110
uint64_t *unresolved_bits;
111111
uint64_t *initial_bits;
112112
tsk_size_t num_bit_words;
113+
uint32_t *unresolved_counts;
114+
uint32_t *initial_counts;
113115
bool initialised;
114116
} tsk_haplotype_t;
115117

0 commit comments

Comments
 (0)