Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 38 additions & 42 deletions c/tests/test_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -531,111 +531,107 @@ test_bit_arrays(void)
{
// NB: This test is only valid for the 32 bit implementation of bit arrays. If we
// were to change the chunk size of a bit array, we'd need to update these tests
tsk_bit_array_t arr;
tsk_bitset_t arr;
tsk_id_t items_truth[64] = { 0 }, items[64] = { 0 };
tsk_size_t n_items = 0, n_items_truth = 0;

// test item retrieval
tsk_bit_array_init(&arr, 90, 1);
tsk_bit_array_get_items(&arr, items, &n_items);
tsk_bitset_init(&arr, 90, 1);
CU_ASSERT_EQUAL_FATAL(arr.len, 1);
CU_ASSERT_EQUAL_FATAL(arr.row_len, 3);
tsk_bitset_get_items(&arr, 0, items, &n_items);
assert_arrays_equal(n_items_truth, items, items_truth);

for (tsk_bit_array_value_t i = 0; i < 20; i++) {
tsk_bit_array_add_bit(&arr, i);
for (tsk_bitset_val_t i = 0; i < 20; i++) {
tsk_bitset_set_bit(&arr, 0, i);
items_truth[n_items_truth] = (tsk_id_t) i;
n_items_truth++;
}
tsk_bit_array_add_bit(&arr, 63);
tsk_bit_array_add_bit(&arr, 65);
tsk_bitset_set_bit(&arr, 0, 63);
tsk_bitset_set_bit(&arr, 0, 65);

// these assertions are only valid for 32-bit values
CU_ASSERT_EQUAL_FATAL(arr.data[0], 1048575);
CU_ASSERT_EQUAL_FATAL(arr.data[1], 2147483648);
CU_ASSERT_EQUAL_FATAL(arr.data[2], 2);

// verify our assumptions about bit array counting
CU_ASSERT_EQUAL_FATAL(tsk_bit_array_count(&arr), 22);
CU_ASSERT_EQUAL_FATAL(tsk_bitset_count(&arr, 0), 22);

tsk_bit_array_get_items(&arr, items, &n_items);
tsk_bitset_get_items(&arr, 0, items, &n_items);
assert_arrays_equal(n_items_truth, items, items_truth);

tsk_memset(items, 0, 64);
tsk_memset(items_truth, 0, 64);
n_items = n_items_truth = 0;
tsk_bit_array_free(&arr);
tsk_bitset_free(&arr);

// create a length-2 array with 64 bit capacity
tsk_bit_array_init(&arr, 64, 2);
tsk_bit_array_t arr_row1, arr_row2;

// select the first and second row
tsk_bit_array_get_row(&arr, 0, &arr_row1);
tsk_bit_array_get_row(&arr, 1, &arr_row2);
// create a length-2 array with 64 bit capacity (two chunks per row)
tsk_bitset_init(&arr, 64, 2);
CU_ASSERT_EQUAL_FATAL(arr.len, 2);
CU_ASSERT_EQUAL_FATAL(arr.row_len, 2);

// fill the first 50 bits of the first row
for (tsk_bit_array_value_t i = 0; i < 50; i++) {
tsk_bit_array_add_bit(&arr_row1, i);
for (tsk_bitset_val_t i = 0; i < 50; i++) {
tsk_bitset_set_bit(&arr, 0, i);
items_truth[n_items_truth] = (tsk_id_t) i;
n_items_truth++;
}

tsk_bit_array_get_items(&arr_row1, items, &n_items);
tsk_bitset_get_items(&arr, 0, items, &n_items);
assert_arrays_equal(n_items_truth, items, items_truth);

tsk_memset(items, 0, 64);
tsk_memset(items_truth, 0, 64);
n_items = n_items_truth = 0;

// fill bits 20-40 of the second row
for (tsk_bit_array_value_t i = 20; i < 40; i++) {
tsk_bit_array_add_bit(&arr_row2, i);
for (tsk_bitset_val_t i = 20; i < 40; i++) {
tsk_bitset_set_bit(&arr, 1, i);
items_truth[n_items_truth] = (tsk_id_t) i;
n_items_truth++;
}

tsk_bit_array_get_items(&arr_row2, items, &n_items);
tsk_bitset_get_items(&arr, 1, items, &n_items);
assert_arrays_equal(n_items_truth, items, items_truth);

tsk_memset(items, 0, 64);
tsk_memset(items_truth, 0, 64);
n_items = n_items_truth = 0;

// verify our assumptions about row selection
CU_ASSERT_EQUAL_FATAL(arr.data[0], 4294967295);
CU_ASSERT_EQUAL_FATAL(arr.data[1], 262143);
CU_ASSERT_EQUAL_FATAL(arr_row1.data[0], 4294967295);
CU_ASSERT_EQUAL_FATAL(arr_row1.data[1], 262143);

CU_ASSERT_EQUAL_FATAL(arr.data[2], 4293918720);
CU_ASSERT_EQUAL_FATAL(arr.data[3], 255);
CU_ASSERT_EQUAL_FATAL(arr_row2.data[0], 4293918720);
CU_ASSERT_EQUAL_FATAL(arr_row2.data[1], 255);
CU_ASSERT_EQUAL_FATAL(arr.data[0], 4294967295); // row1 elem1
CU_ASSERT_EQUAL_FATAL(arr.data[1], 262143); // row1 elem2
CU_ASSERT_EQUAL_FATAL(arr.data[2], 4293918720); // row2 elem1
CU_ASSERT_EQUAL_FATAL(arr.data[3], 255); // row2 elem2

// subtract the second from the first row, store in first
tsk_bit_array_subtract(&arr_row1, &arr_row2);
tsk_bitset_subtract(&arr, 0, &arr, 1);

// verify our assumptions about subtraction
CU_ASSERT_EQUAL_FATAL(arr_row1.data[0], 1048575);
CU_ASSERT_EQUAL_FATAL(arr_row1.data[1], 261888);
CU_ASSERT_EQUAL_FATAL(arr.data[0], 1048575);
CU_ASSERT_EQUAL_FATAL(arr.data[1], 261888);

tsk_bit_array_t int_result;
tsk_bit_array_init(&int_result, 64, 1);
tsk_bitset_t int_result;
tsk_bitset_init(&int_result, 64, 1);
CU_ASSERT_EQUAL_FATAL(int_result.len, 1);
CU_ASSERT_EQUAL_FATAL(int_result.row_len, 2);

// their intersection should be zero
tsk_bit_array_intersect(&arr_row1, &arr_row2, &int_result);
tsk_bitset_intersect(&arr, 0, &arr, 1, &int_result);
CU_ASSERT_EQUAL_FATAL(int_result.data[0], 0);
CU_ASSERT_EQUAL_FATAL(int_result.data[1], 0);

// now, add them back together, storing back in a
tsk_bit_array_add(&arr_row1, &arr_row2);
tsk_bitset_union(&arr, 0, &arr, 1);

// now, their intersection should be the subtracted chunk (20-40)
tsk_bit_array_intersect(&arr_row1, &arr_row2, &int_result);
tsk_bitset_intersect(&arr, 0, &arr, 1, &int_result);
CU_ASSERT_EQUAL_FATAL(int_result.data[0], 4293918720);
CU_ASSERT_EQUAL_FATAL(int_result.data[1], 255);

tsk_bit_array_free(&int_result);
tsk_bit_array_free(&arr);
tsk_bitset_free(&int_result);
tsk_bitset_free(&arr);
}

static void
Expand Down
119 changes: 67 additions & 52 deletions c/tskit/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1260,16 +1260,16 @@ tsk_avl_tree_int_ordered_nodes(const tsk_avl_tree_int_t *self, tsk_avl_node_int_
}

// Bit Array implementation. Allows us to store unsigned integers in a compact manner.
// Currently implemented as an array of 32-bit unsigned integers for ease of counting.
// Currently implemented as an array of 32-bit unsigned integers.

int
tsk_bit_array_init(tsk_bit_array_t *self, tsk_size_t num_bits, tsk_size_t length)
tsk_bitset_init(tsk_bitset_t *self, tsk_size_t num_bits, tsk_size_t length)
{
int ret = 0;

self->size = (num_bits >> TSK_BIT_ARRAY_CHUNK)
+ (num_bits % TSK_BIT_ARRAY_NUM_BITS ? 1 : 0);
self->data = tsk_calloc(self->size * length, sizeof(*self->data));
self->row_len = (num_bits / TSK_BITSET_BITS) + (num_bits % TSK_BITSET_BITS ? 1 : 0);
self->len = length;
self->data = tsk_calloc(self->row_len * length, sizeof(*self->data));
if (self->data == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
Expand All @@ -1278,96 +1278,111 @@ tsk_bit_array_init(tsk_bit_array_t *self, tsk_size_t num_bits, tsk_size_t length
return ret;
}

void
tsk_bit_array_get_row(const tsk_bit_array_t *self, tsk_size_t row, tsk_bit_array_t *out)
{
out->size = self->size;
out->data = self->data + (row * self->size);
}
#define BITSET_DATA_ROW(bs, row) (bs)->data + (row) * (bs)->row_len
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, safer to wrap full expression of the macro in parantheses as unexpected things can happen if used with different precedence operators.


void
tsk_bit_array_intersect(
const tsk_bit_array_t *self, const tsk_bit_array_t *other, tsk_bit_array_t *out)
tsk_bitset_intersect(const tsk_bitset_t *self, tsk_size_t self_row,
const tsk_bitset_t *other, tsk_size_t other_row, tsk_bitset_t *out)
{
for (tsk_size_t i = 0; i < self->size; i++) {
out->data[i] = self->data[i] & other->data[i];
const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);
const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);
tsk_bitset_val_t *restrict out_d = out->data;
for (tsk_size_t i = 0; i < self->row_len; i++) {
out_d[i] = self_d[i] & other_d[i];
}
}

void
tsk_bit_array_subtract(tsk_bit_array_t *self, const tsk_bit_array_t *other)
tsk_bitset_subtract(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,
tsk_size_t other_row)
{
for (tsk_size_t i = 0; i < self->size; i++) {
self->data[i] &= ~(other->data[i]);
tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);
const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);
for (tsk_size_t i = 0; i < self->row_len; i++) {
self_d[i] &= ~(other_d[i]);
}
}

void
tsk_bit_array_add(tsk_bit_array_t *self, const tsk_bit_array_t *other)
tsk_bitset_union(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,
tsk_size_t other_row)
{
for (tsk_size_t i = 0; i < self->size; i++) {
self->data[i] |= other->data[i];
tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);
const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);
for (tsk_size_t i = 0; i < self->row_len; i++) {
self_d[i] |= other_d[i];
}
}

void
tsk_bit_array_add_bit(tsk_bit_array_t *self, const tsk_bit_array_value_t bit)
tsk_bitset_set_bit(tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit)
{
tsk_bit_array_value_t i = bit >> TSK_BIT_ARRAY_CHUNK;
self->data[i] |= (tsk_bit_array_value_t) 1 << (bit - (TSK_BIT_ARRAY_NUM_BITS * i));
tsk_bitset_val_t i = (bit / TSK_BITSET_BITS);
*(BITSET_DATA_ROW(self, row) + i) |= (tsk_bitset_val_t) 1
<< (bit - (TSK_BITSET_BITS * i));
}

bool
tsk_bit_array_contains(const tsk_bit_array_t *self, const tsk_bit_array_value_t bit)
tsk_bitset_contains(const tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit)
{
tsk_bit_array_value_t i = bit >> TSK_BIT_ARRAY_CHUNK;
return self->data[i]
& ((tsk_bit_array_value_t) 1 << (bit - (TSK_BIT_ARRAY_NUM_BITS * i)));
tsk_bitset_val_t i = (bit / TSK_BITSET_BITS);
return *(BITSET_DATA_ROW(self, row) + i)
& ((tsk_bitset_val_t) 1 << (bit - (TSK_BITSET_BITS * i)));
}

tsk_size_t
tsk_bit_array_count(const tsk_bit_array_t *self)
static inline uint32_t
popcount(tsk_bitset_val_t v)
{
// Utilizes 12 operations per bit array. NB this only works on 32 bit integers.
// Utilizes 12 operations per chunk. NB this only works on 32 bit integers.
// Taken from:
// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
// There's a nice breakdown of this algorithm here:
// https://stackoverflow.com/a/109025
// Could probably do better with explicit SIMD (instead of SWAR), but not as
// portable: https://arxiv.org/pdf/1611.07612.pdf
//
// There is one solution to explore further, which uses __builtin_popcountll.
// This option is relatively simple, but requires a 64 bit bit array and also
// involves some compiler flag plumbing (-mpopcnt)
// The gcc/clang compiler flag will -mpopcnt will convert this code to a
// popcnt instruction (most if not all modern CPUs will support this). The
// popcnt instruction will yield some speed improvements, which depend on
// the tree sequence.
//
// NB: 32bit counting is typically faster than 64bit counting for this task.
// (at least on x86-64)

tsk_bit_array_value_t tmp;
tsk_size_t i, count = 0;
v = v - ((v >> 1) & 0x55555555);
v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
return (((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
}

tsk_size_t
tsk_bitset_count(const tsk_bitset_t *self, tsk_size_t row)
{
tsk_size_t i = 0;
uint32_t count = 0;
const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, row);

for (i = 0; i < self->size; i++) {
tmp = self->data[i] - ((self->data[i] >> 1) & 0x55555555);
tmp = (tmp & 0x33333333) + ((tmp >> 2) & 0x33333333);
count += (((tmp + (tmp >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
for (i = 0; i < self->row_len; i++) {
count += popcount(self_d[i]);
}
return count;
return (tsk_size_t) count;
}

void
tsk_bit_array_get_items(
const tsk_bit_array_t *self, tsk_id_t *items, tsk_size_t *n_items)
tsk_bitset_get_items(
const tsk_bitset_t *self, tsk_size_t row, tsk_id_t *items, tsk_size_t *n_items)
{
// Get the items stored in the row of a bitset.
// Uses a de Bruijn sequence lookup table to determine the lowest bit set. See the
// wikipedia article for more info: https://w.wiki/BYiF
// Uses a de Bruijn sequence lookup table to determine the lowest bit set.
// See the wikipedia article for more info: https://w.wiki/BYiF

tsk_size_t i, n, off;
tsk_bit_array_value_t v, lsb; // least significant bit
tsk_bitset_val_t v, lsb; // least significant bit
static const tsk_id_t lookup[32] = { 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25,
17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, row);

n = 0;
for (i = 0; i < self->size; i++) {
v = self->data[i];
off = i * ((tsk_size_t) TSK_BIT_ARRAY_NUM_BITS);
for (i = 0; i < self->row_len; i++) {
v = self_d[i];
off = i * TSK_BITSET_BITS;
if (v == 0) {
continue;
}
Expand All @@ -1381,7 +1396,7 @@ tsk_bit_array_get_items(
}

void
tsk_bit_array_free(tsk_bit_array_t *self)
tsk_bitset_free(tsk_bitset_t *self)
{
tsk_safe_free(self->data);
}
46 changes: 24 additions & 22 deletions c/tskit/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -1104,29 +1104,31 @@ FILE *tsk_get_debug_stream(void);

/* Bit Array functionality */

typedef uint32_t tsk_bit_array_value_t;
// define a 32-bit chunk size for our bitsets.
// this means we'll be able to hold 32 distinct items in each 32 bit uint
#define TSK_BITSET_BITS (tsk_size_t) 32
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
#define TSK_BITSET_BITS (tsk_size_t) 32
#define TSK_BITSET_BITS ((tsk_size_t) 32)

Extra parentheses avoid unexpected casts when using the macro

typedef uint32_t tsk_bitset_val_t;

typedef struct {
tsk_size_t size; // Number of chunks per row
tsk_bit_array_value_t *data; // Array data
} tsk_bit_array_t;

#define TSK_BIT_ARRAY_CHUNK 5U
#define TSK_BIT_ARRAY_NUM_BITS (1U << TSK_BIT_ARRAY_CHUNK)

int tsk_bit_array_init(tsk_bit_array_t *self, tsk_size_t num_bits, tsk_size_t length);
void tsk_bit_array_free(tsk_bit_array_t *self);
void tsk_bit_array_get_row(
const tsk_bit_array_t *self, tsk_size_t row, tsk_bit_array_t *out);
void tsk_bit_array_intersect(
const tsk_bit_array_t *self, const tsk_bit_array_t *other, tsk_bit_array_t *out);
void tsk_bit_array_subtract(tsk_bit_array_t *self, const tsk_bit_array_t *other);
void tsk_bit_array_add(tsk_bit_array_t *self, const tsk_bit_array_t *other);
void tsk_bit_array_add_bit(tsk_bit_array_t *self, const tsk_bit_array_value_t bit);
bool tsk_bit_array_contains(
const tsk_bit_array_t *self, const tsk_bit_array_value_t bit);
tsk_size_t tsk_bit_array_count(const tsk_bit_array_t *self);
void tsk_bit_array_get_items(
const tsk_bit_array_t *self, tsk_id_t *items, tsk_size_t *n_items);
tsk_size_t row_len; // Number of size TSK_BITSET_BITS chunks per row
tsk_size_t len; // Number of rows
tsk_bitset_val_t *data;
} tsk_bitset_t;

int tsk_bitset_init(tsk_bitset_t *self, tsk_size_t num_bits, tsk_size_t length);
void tsk_bitset_free(tsk_bitset_t *self);
void tsk_bitset_intersect(const tsk_bitset_t *self, tsk_size_t self_row,
const tsk_bitset_t *other, tsk_size_t other_row, tsk_bitset_t *out);
void tsk_bitset_subtract(tsk_bitset_t *self, tsk_size_t self_row,
const tsk_bitset_t *other, tsk_size_t other_row);
void tsk_bitset_union(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,
tsk_size_t other_row);
void tsk_bitset_set_bit(tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit);
bool tsk_bitset_contains(
const tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit);
tsk_size_t tsk_bitset_count(const tsk_bitset_t *self, tsk_size_t row);
void tsk_bitset_get_items(
const tsk_bitset_t *self, tsk_size_t row, tsk_id_t *items, tsk_size_t *n_items);

#ifdef __cplusplus
}
Expand Down
Loading