Skip to content

Commit 8746674

Browse files
authored
feat: implement roaring64_bitmap_add_offset (and roaring64_bitmap_sub_offset and roaring64_bitmap_add_offset_signed) (#787)
* feat: implement roaring64_bitmap_add_offset In order to allow shifting the full range, take `bool negative, uint64_t offset` rather than a `int64_t` * split to seperate `add/sub_offset` functions * refactor: use consistent `r` name for roaring64_bitmap_t * perf: avoid re-searching the ART for the item we just inserted
1 parent ffcc5fb commit 8746674

File tree

3 files changed

+421
-2
lines changed

3 files changed

+421
-2
lines changed

include/roaring/roaring64.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,38 @@ void roaring64_bitmap_flip_inplace(roaring64_bitmap_t *r, uint64_t min,
512512
*/
513513
void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min,
514514
uint64_t max);
515+
/**
516+
* Return a copy of the bitmap with all values shifted by offset.
517+
*
518+
* If `positive` is true, the shift is added, otherwise subtracted. Values that
519+
* overflow or underflow uint64_t are dropped. The caller is responsible for
520+
* freeing the returned bitmap.
521+
*/
522+
roaring64_bitmap_t *roaring64_bitmap_add_offset_signed(
523+
const roaring64_bitmap_t *r, bool positive, uint64_t offset);
524+
525+
/**
526+
* Return a copy of the bitmap with all values shifted up by offset.
527+
*
528+
* Values that overflow or underflow uint64_t are dropped. The caller is
529+
* responsible for freeing the returned bitmap.
530+
*/
531+
static inline roaring64_bitmap_t *roaring64_bitmap_add_offset(
532+
const roaring64_bitmap_t *r, uint64_t offset) {
533+
return roaring64_bitmap_add_offset_signed(r, true, offset);
534+
}
535+
536+
/**
537+
* Return a copy of the bitmap with all values shifted down by offset.
538+
*
539+
* Values that overflow or underflow uint64_t are dropped. The caller is
540+
* responsible for freeing the returned bitmap.
541+
*/
542+
static inline roaring64_bitmap_t *roaring64_bitmap_sub_offset(
543+
const roaring64_bitmap_t *r, uint64_t offset) {
544+
return roaring64_bitmap_add_offset_signed(r, false, offset);
545+
}
546+
515547
/**
516548
* How many bytes are required to serialize this bitmap.
517549
*

src/roaring64.c

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1919,6 +1919,131 @@ void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min,
19191919
}
19201920
}
19211921

1922+
roaring64_bitmap_t *roaring64_bitmap_add_offset_signed(
1923+
const roaring64_bitmap_t *r, bool positive, uint64_t offset) {
1924+
if (offset == 0) {
1925+
return roaring64_bitmap_copy(r);
1926+
}
1927+
1928+
roaring64_bitmap_t *answer = roaring64_bitmap_create();
1929+
1930+
// Decompose the offset into a signed container-level shift and an
1931+
// intra-container shift. For negative offsets the low 16 bits wrap: e.g.
1932+
// -1 = container_offset(-1) + in_offset(0xffff), because shifting by -1
1933+
// container is a shift of -0x1_0000, so we need to shift up within
1934+
// containers to get back to -1
1935+
uint16_t low16 = (uint16_t)offset;
1936+
int64_t container_offset;
1937+
uint16_t in_offset;
1938+
if (positive) {
1939+
container_offset = (int64_t)(offset >> 16);
1940+
in_offset = low16;
1941+
} else if (low16 == 0) {
1942+
container_offset = -(int64_t)(offset >> 16);
1943+
in_offset = 0;
1944+
} else {
1945+
container_offset = -(int64_t)(offset >> 16) - 1;
1946+
in_offset = (uint16_t)-low16;
1947+
}
1948+
1949+
art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true);
1950+
1951+
if (in_offset == 0) {
1952+
while (it.value != NULL) {
1953+
leaf_t leaf = (leaf_t)*it.value;
1954+
int64_t k =
1955+
(int64_t)(combine_key(it.key, 0) >> 16) + container_offset;
1956+
if ((uint64_t)k < (uint64_t)1 << 48) {
1957+
uint8_t new_high48[ART_KEY_BYTES];
1958+
split_key((uint64_t)k << 16, new_high48);
1959+
uint8_t typecode = get_typecode(leaf);
1960+
container_t *container =
1961+
get_copy_of_container(get_container(r, leaf), &typecode,
1962+
/*copy_on_write=*/false);
1963+
leaf_t new_leaf = add_container(answer, container, typecode);
1964+
art_insert(&answer->art, new_high48, (art_val_t)new_leaf);
1965+
}
1966+
art_iterator_next(&it);
1967+
}
1968+
return answer;
1969+
}
1970+
1971+
// Track the most recently inserted hi container so that the next
1972+
// iteration's lo can merge with it without re-searching the ART.
1973+
leaf_t *prev_hi_leaf = NULL;
1974+
int64_t prev_hi_k = -1;
1975+
1976+
while (it.value != NULL) {
1977+
leaf_t leaf = (leaf_t)*it.value;
1978+
int64_t k = (int64_t)(combine_key(it.key, 0) >> 16) + container_offset;
1979+
1980+
container_t *lo = NULL, *hi = NULL;
1981+
container_t **lo_ptr = NULL, **hi_ptr = NULL;
1982+
1983+
if ((uint64_t)k < (uint64_t)1 << 48) {
1984+
lo_ptr = &lo;
1985+
}
1986+
if ((uint64_t)(k + 1) < (uint64_t)1 << 48) {
1987+
hi_ptr = &hi;
1988+
}
1989+
if (lo_ptr == NULL && hi_ptr == NULL) {
1990+
art_iterator_next(&it);
1991+
continue;
1992+
}
1993+
1994+
uint8_t typecode = get_typecode(leaf);
1995+
const container_t *c =
1996+
container_unwrap_shared(get_container(r, leaf), &typecode);
1997+
container_add_offset(c, typecode, lo_ptr, hi_ptr, in_offset);
1998+
1999+
if (lo != NULL) {
2000+
if (prev_hi_leaf != NULL && prev_hi_k == k) {
2001+
uint8_t existing_type = get_typecode(*prev_hi_leaf);
2002+
container_t *existing_c = get_container(answer, *prev_hi_leaf);
2003+
uint8_t merged_type;
2004+
container_t *merged_c = container_ior(
2005+
existing_c, existing_type, lo, typecode, &merged_type);
2006+
if (merged_c != existing_c) {
2007+
container_free(existing_c, existing_type);
2008+
}
2009+
replace_container(answer, prev_hi_leaf, merged_c, merged_type);
2010+
container_free(lo, typecode);
2011+
} else {
2012+
uint8_t lo_high48[ART_KEY_BYTES];
2013+
split_key((uint64_t)k << 16, lo_high48);
2014+
leaf_t new_leaf = add_container(answer, lo, typecode);
2015+
art_insert(&answer->art, lo_high48, (art_val_t)new_leaf);
2016+
}
2017+
}
2018+
2019+
prev_hi_leaf = NULL;
2020+
if (hi != NULL) {
2021+
uint8_t hi_high48[ART_KEY_BYTES];
2022+
split_key((uint64_t)(k + 1) << 16, hi_high48);
2023+
leaf_t new_leaf = add_container(answer, hi, typecode);
2024+
prev_hi_leaf = (leaf_t *)art_insert(&answer->art, hi_high48,
2025+
(art_val_t)new_leaf);
2026+
prev_hi_k = k + 1;
2027+
}
2028+
2029+
art_iterator_next(&it);
2030+
}
2031+
2032+
// Repair containers (e.g., convert low-cardinality bitset containers to
2033+
// array containers after lazy union operations).
2034+
art_iterator_t repair_it = art_init_iterator(&answer->art, /*first=*/true);
2035+
while (repair_it.value != NULL) {
2036+
leaf_t *leaf_ptr = (leaf_t *)repair_it.value;
2037+
uint8_t typecode = get_typecode(*leaf_ptr);
2038+
container_t *repaired = container_repair_after_lazy(
2039+
get_container(answer, *leaf_ptr), &typecode);
2040+
replace_container(answer, leaf_ptr, repaired, typecode);
2041+
art_iterator_next(&repair_it);
2042+
}
2043+
2044+
return answer;
2045+
}
2046+
19222047
// Returns the number of distinct high 32-bit entries in the bitmap.
19232048
static inline uint64_t count_high32(const roaring64_bitmap_t *r) {
19242049
art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true);

0 commit comments

Comments
 (0)