Skip to content

Commit 29eedab

Browse files
committed
make RoaringBitmap generic over its values
This will allows to easily implement Roaring64, a 64-bit RoaringBitmap using the TwoLevelRoaringBitmap approach, which is up to 11x faster than the RoaringTreemap approach and reuse most of Roaring32 code. Note that RoaringBitmap::full have been removed because it won't scale for Roaring64. Comment out RoaringTreemap for now, will be replaced in the next commit.
1 parent 4f8c798 commit 29eedab

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+2271
-2250
lines changed

Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ license = "MIT OR Apache-2.0"
1818
[dependencies]
1919
bytemuck = "1.7.3"
2020
byteorder = "1.4.3"
21-
retain_mut = "=0.1.7"
2221
serde = { version = "1.0.139", optional = true }
2322

2423
[features]

benchmarks/benches/datasets.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::path::{Path, PathBuf};
66
use git2::FetchOptions;
77
use once_cell::sync::OnceCell as SyncOnceCell;
88

9-
use roaring::RoaringBitmap;
9+
use roaring::Roaring32;
1010

1111
static INSTANCE: SyncOnceCell<Vec<Dataset>> = SyncOnceCell::new();
1212

@@ -41,7 +41,7 @@ impl IntoIterator for Datasets {
4141

4242
pub struct Dataset {
4343
pub name: String,
44-
pub bitmaps: Vec<RoaringBitmap>,
44+
pub bitmaps: Vec<Roaring32>,
4545
}
4646

4747
fn init_datasets() -> Result<PathBuf, Box<dyn std::error::Error>> {
@@ -186,7 +186,7 @@ fn parse_datasets<P: AsRef<Path>>(path: P) -> Result<Vec<Dataset>, Box<dyn std::
186186
numbers.push(n);
187187
}
188188

189-
let bitmap = RoaringBitmap::from_sorted_iter(numbers.iter().copied())?;
189+
let bitmap = Roaring32::from_sorted_iter(numbers.iter().copied())?;
190190
numbers.clear();
191191
bitmaps.push(bitmap);
192192

benchmarks/benches/lib.rs

Lines changed: 56 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use criterion::{
88
Throughput,
99
};
1010

11-
use roaring::{MultiOps, RoaringBitmap, RoaringTreemap};
11+
use roaring::{MultiOps, Roaring32};
1212

1313
use crate::datasets::Datasets;
1414

@@ -18,13 +18,13 @@ mod datasets;
1818
fn pairwise_binary_op_matrix(
1919
c: &mut Criterion,
2020
op_name: &str,
21-
op_own_own: impl Fn(RoaringBitmap, RoaringBitmap) -> RoaringBitmap,
22-
op_own_ref: impl Fn(RoaringBitmap, &RoaringBitmap) -> RoaringBitmap,
23-
op_ref_own: impl Fn(&RoaringBitmap, RoaringBitmap) -> RoaringBitmap,
24-
op_ref_ref: impl Fn(&RoaringBitmap, &RoaringBitmap) -> RoaringBitmap,
25-
mut op_assign_owned: impl FnMut(&mut RoaringBitmap, RoaringBitmap),
26-
mut op_assign_ref: impl FnMut(&mut RoaringBitmap, &RoaringBitmap),
27-
op_len: impl Fn(&RoaringBitmap, &RoaringBitmap) -> u64,
21+
op_own_own: impl Fn(Roaring32, Roaring32) -> Roaring32,
22+
op_own_ref: impl Fn(Roaring32, &Roaring32) -> Roaring32,
23+
op_ref_own: impl Fn(&Roaring32, Roaring32) -> Roaring32,
24+
op_ref_ref: impl Fn(&Roaring32, &Roaring32) -> Roaring32,
25+
mut op_assign_owned: impl FnMut(&mut Roaring32, Roaring32),
26+
mut op_assign_ref: impl FnMut(&mut Roaring32, &Roaring32),
27+
op_len: impl Fn(&Roaring32, &Roaring32) -> u64,
2828
) {
2929
let mut group = c.benchmark_group(format!("pairwise_{}", op_name));
3030

@@ -120,7 +120,7 @@ fn pairwise_binary_op_matrix(
120120
fn pairwise_binary_op<R, M: Measurement>(
121121
group: &mut BenchmarkGroup<M>,
122122
op_name: &str,
123-
op: impl Fn(RoaringBitmap, RoaringBitmap) -> R,
123+
op: impl Fn(Roaring32, Roaring32) -> R,
124124
) {
125125
for dataset in Datasets {
126126
group.bench_function(BenchmarkId::new(op_name, &dataset.name), |b| {
@@ -152,17 +152,15 @@ fn creation(c: &mut Criterion) {
152152
group.bench_function(BenchmarkId::new("from_sorted_iter", &dataset.name), |b| {
153153
b.iter(|| {
154154
for bitmap_numbers in &dataset_numbers {
155-
black_box(
156-
RoaringBitmap::from_sorted_iter(bitmap_numbers.iter().copied()).unwrap(),
157-
);
155+
black_box(Roaring32::from_sorted_iter(bitmap_numbers.iter().copied()).unwrap());
158156
}
159157
})
160158
});
161159

162160
group.bench_function(BenchmarkId::new("collect", &dataset.name), |b| {
163161
b.iter(|| {
164162
for bitmap_numbers in &dataset_numbers {
165-
black_box(bitmap_numbers.iter().copied().collect::<RoaringBitmap>());
163+
black_box(bitmap_numbers.iter().copied().collect::<Roaring32>());
166164
}
167165
})
168166
});
@@ -408,15 +406,15 @@ fn deserialization(c: &mut Criterion) {
408406
group.bench_function(BenchmarkId::new("deserialize_from", &dataset.name), |b| {
409407
b.iter(|| {
410408
for buf in input.iter() {
411-
black_box(RoaringBitmap::deserialize_from(buf.as_slice()).unwrap());
409+
black_box(Roaring32::deserialize_from(buf.as_slice()).unwrap());
412410
}
413411
});
414412
});
415413

416414
group.bench_function(BenchmarkId::new("deserialize_unchecked_from", &dataset.name), |b| {
417415
b.iter(|| {
418416
for buf in input.iter() {
419-
black_box(RoaringBitmap::deserialize_unchecked_from(buf.as_slice()).unwrap());
417+
black_box(Roaring32::deserialize_unchecked_from(buf.as_slice()).unwrap());
420418
}
421419
});
422420
});
@@ -476,7 +474,7 @@ fn successive_and(c: &mut Criterion) {
476474
group.bench_function(BenchmarkId::new("Multi And Owned", &dataset.name), |b| {
477475
b.iter_batched(
478476
|| dataset.bitmaps.clone(),
479-
|bitmaps: Vec<RoaringBitmap>| black_box(bitmaps.intersection()),
477+
|bitmaps: Vec<Roaring32>| black_box(bitmaps.intersection()),
480478
BatchSize::LargeInput,
481479
);
482480
});
@@ -491,7 +489,7 @@ fn successive_or(c: &mut Criterion) {
491489
for dataset in Datasets {
492490
group.bench_function(BenchmarkId::new("Successive Or Assign Ref", &dataset.name), |b| {
493491
b.iter(|| {
494-
let mut output = RoaringBitmap::new();
492+
let mut output = Roaring32::new();
495493
for bitmap in &dataset.bitmaps {
496494
output |= bitmap;
497495
}
@@ -501,8 +499,8 @@ fn successive_or(c: &mut Criterion) {
501499
group.bench_function(BenchmarkId::new("Successive Or Assign Owned", &dataset.name), |b| {
502500
b.iter_batched(
503501
|| dataset.bitmaps.clone(),
504-
|bitmaps: Vec<RoaringBitmap>| {
505-
let mut output = RoaringBitmap::new();
502+
|bitmaps: Vec<Roaring32>| {
503+
let mut output = Roaring32::new();
506504
for bitmap in bitmaps {
507505
output |= bitmap;
508506
}
@@ -513,7 +511,7 @@ fn successive_or(c: &mut Criterion) {
513511

514512
group.bench_function(BenchmarkId::new("Successive Or Ref Ref", &dataset.name), |b| {
515513
b.iter(|| {
516-
let mut output = RoaringBitmap::new();
514+
let mut output = Roaring32::new();
517515
for bitmap in &dataset.bitmaps {
518516
output = (&output) | bitmap;
519517
}
@@ -527,7 +525,7 @@ fn successive_or(c: &mut Criterion) {
527525
group.bench_function(BenchmarkId::new("Multi Or Owned", &dataset.name), |b| {
528526
b.iter_batched(
529527
|| dataset.bitmaps.clone(),
530-
|bitmaps: Vec<RoaringBitmap>| black_box(bitmaps.union()),
528+
|bitmaps: Vec<Roaring32>| black_box(bitmaps.union()),
531529
BatchSize::LargeInput,
532530
);
533531
});
@@ -541,13 +539,13 @@ fn successive_or(c: &mut Criterion) {
541539

542540
fn is_empty(c: &mut Criterion) {
543541
c.bench_function("is_empty true", |b| {
544-
let bitmap = RoaringBitmap::new();
542+
let bitmap = Roaring32::new();
545543
b.iter(|| {
546544
bitmap.is_empty();
547545
});
548546
});
549547
c.bench_function("is_empty false", |b| {
550-
let mut bitmap = RoaringBitmap::new();
548+
let mut bitmap = Roaring32::new();
551549
bitmap.insert(1);
552550
b.iter(|| {
553551
bitmap.is_empty();
@@ -558,21 +556,21 @@ fn is_empty(c: &mut Criterion) {
558556
fn insert(c: &mut Criterion) {
559557
c.bench_function("create & insert 1", |b| {
560558
b.iter(|| {
561-
let mut bitmap = RoaringBitmap::new();
559+
let mut bitmap = Roaring32::new();
562560
bitmap.insert(black_box(1));
563561
});
564562
});
565563

566564
c.bench_function("insert 1", |b| {
567-
let mut bitmap = RoaringBitmap::new();
565+
let mut bitmap = Roaring32::new();
568566
b.iter(|| {
569567
bitmap.insert(black_box(1));
570568
});
571569
});
572570

573571
c.bench_function("create & insert several", |b| {
574572
b.iter(|| {
575-
let mut bitmap = RoaringBitmap::new();
573+
let mut bitmap = Roaring32::new();
576574
bitmap.insert(black_box(1));
577575
bitmap.insert(black_box(10));
578576
bitmap.insert(black_box(100));
@@ -584,7 +582,7 @@ fn insert(c: &mut Criterion) {
584582
});
585583

586584
c.bench_function("insert several", |b| {
587-
let mut bitmap = RoaringBitmap::new();
585+
let mut bitmap = Roaring32::new();
588586
b.iter(|| {
589587
bitmap.insert(black_box(1));
590588
bitmap.insert(black_box(10));
@@ -599,7 +597,7 @@ fn insert(c: &mut Criterion) {
599597

600598
fn contains(c: &mut Criterion) {
601599
c.bench_function("contains true", |b| {
602-
let mut bitmap: RoaringBitmap = RoaringBitmap::new();
600+
let mut bitmap: Roaring32 = Roaring32::new();
603601
bitmap.insert(1);
604602

605603
b.iter(|| {
@@ -608,7 +606,7 @@ fn contains(c: &mut Criterion) {
608606
});
609607

610608
c.bench_function("contains false", |b| {
611-
let bitmap: RoaringBitmap = RoaringBitmap::new();
609+
let bitmap: Roaring32 = Roaring32::new();
612610

613611
b.iter(|| {
614612
bitmap.contains(black_box(1));
@@ -618,7 +616,7 @@ fn contains(c: &mut Criterion) {
618616

619617
fn remove(c: &mut Criterion) {
620618
c.bench_function("remove 1", |b| {
621-
let mut sub: RoaringBitmap = (0..65_536).collect();
619+
let mut sub: Roaring32 = (0..65_536).collect();
622620
b.iter(|| {
623621
black_box(sub.remove(1000));
624622
});
@@ -627,7 +625,7 @@ fn remove(c: &mut Criterion) {
627625

628626
fn remove_range_bitmap(c: &mut Criterion) {
629627
c.bench_function("remove_range 1", |b| {
630-
let mut sub: RoaringBitmap = (0..65_536).collect();
628+
let mut sub: Roaring32 = (0..65_536).collect();
631629
b.iter(|| {
632630
// carefully delete part of the bitmap
633631
// only the first iteration will actually change something
@@ -641,7 +639,7 @@ fn remove_range_bitmap(c: &mut Criterion) {
641639
// Slower bench that creates a new bitmap on each iteration so that can benchmark
642640
// bitmap to array conversion
643641
b.iter(|| {
644-
let mut sub: RoaringBitmap = (0..65_536).collect();
642+
let mut sub: Roaring32 = (0..65_536).collect();
645643
black_box(sub.remove_range(100..65_536));
646644
assert_eq!(sub.len(), 100);
647645
});
@@ -653,15 +651,15 @@ fn insert_range_bitmap(c: &mut Criterion) {
653651
let mut group = c.benchmark_group("insert_range");
654652
group.throughput(criterion::Throughput::Elements(size as u64));
655653
group.bench_function(format!("from_empty_{}", size), |b| {
656-
let bm = RoaringBitmap::new();
654+
let bm = Roaring32::new();
657655
b.iter_batched(
658656
|| bm.clone(),
659657
|mut bm| black_box(bm.insert_range(0..size)),
660658
criterion::BatchSize::SmallInput,
661659
)
662660
});
663661
group.bench_function(format!("pre_populated_{}", size), |b| {
664-
let mut bm = RoaringBitmap::new();
662+
let mut bm = Roaring32::new();
665663
bm.insert_range(0..size);
666664
b.iter_batched(
667665
|| bm.clone(),
@@ -672,29 +670,29 @@ fn insert_range_bitmap(c: &mut Criterion) {
672670
}
673671
}
674672

675-
fn insert_range_treemap(c: &mut Criterion) {
676-
for &size in &[1_000_u64, 10_000u64, 2 * (u32::MAX as u64)] {
677-
let mut group = c.benchmark_group("insert_range_treemap");
678-
group.throughput(criterion::Throughput::Elements(size));
679-
group.bench_function(format!("from_empty_{}", size), |b| {
680-
let bm = RoaringTreemap::new();
681-
b.iter_batched(
682-
|| bm.clone(),
683-
|mut bm| black_box(bm.insert_range(0..size)),
684-
criterion::BatchSize::SmallInput,
685-
)
686-
});
687-
group.bench_function(format!("pre_populated_{}", size), |b| {
688-
let mut bm = RoaringTreemap::new();
689-
bm.insert_range(0..size);
690-
b.iter_batched(
691-
|| bm.clone(),
692-
|mut bm| black_box(bm.insert_range(0..size)),
693-
criterion::BatchSize::SmallInput,
694-
)
695-
});
696-
}
697-
}
673+
// fn insert_range_treemap(c: &mut Criterion) {
674+
// for &size in &[1_000_u64, 10_000u64, 2 * (u32::MAX as u64)] {
675+
// let mut group = c.benchmark_group("insert_range_treemap");
676+
// group.throughput(criterion::Throughput::Elements(size));
677+
// group.bench_function(format!("from_empty_{}", size), |b| {
678+
// let bm = RoaringTreemap::new();
679+
// b.iter_batched(
680+
// || bm.clone(),
681+
// |mut bm| black_box(bm.insert_range(0..size)),
682+
// criterion::BatchSize::SmallInput,
683+
// )
684+
// });
685+
// group.bench_function(format!("pre_populated_{}", size), |b| {
686+
// let mut bm = RoaringTreemap::new();
687+
// bm.insert_range(0..size);
688+
// b.iter_batched(
689+
// || bm.clone(),
690+
// |mut bm| black_box(bm.insert_range(0..size)),
691+
// criterion::BatchSize::SmallInput,
692+
// )
693+
// });
694+
// }
695+
// }
698696

699697
criterion_group!(
700698
benches,
@@ -713,7 +711,7 @@ criterion_group!(
713711
remove,
714712
remove_range_bitmap,
715713
insert_range_bitmap,
716-
insert_range_treemap,
714+
// insert_range_treemap,
717715
iteration,
718716
is_empty,
719717
serialization,

src/bitmap/mod.rs

Lines changed: 0 additions & 42 deletions
This file was deleted.

0 commit comments

Comments
 (0)