Cydhra
diff --git a/‎.github/workflows/rust.yml‎
Lines changed: 14 additions & 3 deletions b/‎.github/workflows/rust.yml‎
Lines changed: 14 additions & 3 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 5 additions & 0 deletions b/‎Cargo.toml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎benches/bp.rs‎
Lines changed: 147 additions & 0 deletions b/‎benches/bp.rs‎
Lines changed: 147 additions & 0 deletions
diff --git a/‎benches/sparse_equals.rs‎
Lines changed: 2 additions & 2 deletions b/‎benches/sparse_equals.rs‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎readme.md‎
Lines changed: 2 additions & 0 deletions b/‎readme.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/bit_vec/fast_rs_vec/tests.rs‎
Lines changed: 17 additions & 17 deletions b/‎src/bit_vec/fast_rs_vec/tests.rs‎
Lines changed: 17 additions & 17 deletions
diff --git a/‎src/bit_vec/mod.rs‎
Lines changed: 5 additions & 4 deletions b/‎src/bit_vec/mod.rs‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎src/bit_vec/tests.rs‎
Lines changed: 6 additions & 2 deletions b/‎src/bit_vec/tests.rs‎
Lines changed: 6 additions & 2 deletions
@@ -8,16 +8,27 @@ on:
 
 env:
   CARGO_TERM_COLOR: always
-  RUSTFLAGS: -C target-cpu=native
+
 
 jobs:
   build:
-
     runs-on: ubuntu-latest
-
+    env:
+      RUSTFLAGS: -C target-cpu=native
     steps:
     - uses: actions/checkout@v4
     - name: Build
       run: cargo build --verbose --all-features
     - name: Run tests
       run: cargo test --verbose --all-features
+
+  test-fallbacks:
+    runs-on: ubuntu-latest
+    env:
+      RUSTFLAGS: -C target-cpu=x86-64
+    steps:
+      - uses: actions/checkout@v4
+      - name: Build
+        run: cargo build --verbose --features serde
+      - name: Run tests
+        run: cargo test --verbose --features serde
@@ -25,6 +25,7 @@ rand = { version = "0.8", features = ["alloc"] }
 
 [features]
 simd = []
+bp_u16_lookup = []
 docsrs = [] # special feature for docs.rs to enable doc_auto_cfg on nightly
 
 [[bench]]
@@ -63,6 +64,10 @@ harness = false
 name = "rmq"
 harness = false
 
+[[bench]]
+name = "bp"
+harness = false
+
 [[bench]]
 name = "elias_fano_construction"
 harness = false
 
@@ -0,0 +1,147 @@
+#![allow(long_running_const_eval)]
+
+use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+use std::cmp::Reverse;
+use std::collections::{BinaryHeap, HashSet};
+use vers_vecs::trees::bp::BpBuilder;
+use vers_vecs::trees::bp::BpTree;
+use vers_vecs::trees::{Tree, TreeBuilder};
+
+mod common;
+
+const BLOCK_SIZE: usize = 1024;
+
+// TODO this function has nlogn runtime, which is a bit too much for the largest trees
+fn generate_tree<R: Rng>(rng: &mut R, nodes: u64) -> BpTree<BLOCK_SIZE> {
+    // generate prüfer sequence
+    let mut sequence = vec![0; (nodes - 2) as usize];
+    for i in 0..nodes - 2 {
+        sequence[i as usize] = rng.gen_range(0..nodes - 1);
+    }
+
+    // decode prüfer sequence
+    let mut degrees = vec![1; nodes as usize];
+    sequence.iter().for_each(|i| degrees[*i as usize] += 1);
+
+    let mut prefix_sum = vec![0; nodes as usize];
+    let mut sum = 0;
+    degrees.iter().enumerate().for_each(|(i, d)| {
+        prefix_sum[i] = sum;
+        sum += d;
+    });
+
+    let mut children = vec![0u64; sum];
+    let mut assigned_children = vec![0; nodes as usize];
+
+    // keep a priority queue of nodes with degree one to reduce runtime from O(n^2) to O(n log n)
+    let mut degree_one_set = BinaryHeap::new();
+    degrees
+        .iter()
+        .enumerate()
+        .filter(|(_, &v)| v == 1)
+        .for_each(|(idx, _)| degree_one_set.push(Reverse(idx as u64)));
+
+    sequence.iter().for_each(|&i| {
+        let j = degree_one_set.pop().unwrap().0;
+        children[prefix_sum[i as usize] + assigned_children[i as usize]] = j;
+        children[prefix_sum[j as usize] + assigned_children[j as usize]] = i;
+        degrees[i as usize] -= 1;
+        if degrees[i as usize] == 1 {
+            degree_one_set.push(Reverse(i))
+        }
+
+        degrees[j as usize] -= 1;
+        if degrees[j as usize] == 1 {
+            degree_one_set.push(Reverse(j))
+        }
+
+        assigned_children[i as usize] += 1;
+        assigned_children[j as usize] += 1;
+    });
+
+    assert_eq!(degrees.iter().sum::<usize>(), 2);
+    let u = degree_one_set.pop().unwrap().0;
+    let v = degree_one_set.pop().unwrap().0;
+
+    children[prefix_sum[u as usize] + assigned_children[u as usize]] = v;
+    children[prefix_sum[v as usize] + assigned_children[v as usize]] = u;
+
+    // build tree
+    let mut bpb = BpBuilder::with_capacity(nodes);
+    let mut stack = Vec::new();
+    let mut visited = HashSet::with_capacity(nodes as usize);
+    visited.insert(0);
+    stack.push((0, 0u64, true));
+    while let Some((depth, node, enter)) = stack.pop() {
+        if enter {
+            bpb.enter_node();
+            stack.push((depth, node, false));
+            for child in children
+                .iter()
+                .take(*prefix_sum.get(node as usize + 1).unwrap_or(&children.len()))
+                .skip(prefix_sum[node as usize])
+            {
+                if visited.insert(*child) {
+                    stack.push((depth + 1, *child, true))
+                }
+            }
+        } else {
+            bpb.leave_node();
+        }
+    }
+
+    bpb.build().unwrap()
+}
+
+fn bench_navigation(b: &mut Criterion) {
+    let mut group = b.benchmark_group("bp");
+    group.plot_config(common::plot_config());
+
+    for l in common::SIZES {
+        // fix the rng seed because the measurements depend on the input structure.
+        // to make multiple runs of the benchmark comparable, we fix the seed.
+        // this is only a valid approach to check for performance improvements, it may not give
+        // an accurate summary of the library's runtime
+        let mut rng = StdRng::from_seed([0; 32]);
+
+        let bp = generate_tree(&mut rng, l as u64);
+        let node_handles = (0..l).map(|i| bp.node_handle(i)).collect::<Vec<_>>();
+
+        group.bench_with_input(BenchmarkId::new("parent", l), &l, |b, _| {
+            b.iter_batched(
+                || node_handles[rng.gen_range(0..node_handles.len())],
+                |h| black_box(bp.parent(h)),
+                BatchSize::SmallInput,
+            )
+        });
+
+        group.bench_with_input(BenchmarkId::new("last_child", l), &l, |b, _| {
+            b.iter_batched(
+                || node_handles[rng.gen_range(0..node_handles.len())],
+                |h| black_box(bp.last_child(h)),
+                BatchSize::SmallInput,
+            )
+        });
+
+        group.bench_with_input(BenchmarkId::new("next_sibling", l), &l, |b, _| {
+            b.iter_batched(
+                || node_handles[rng.gen_range(0..node_handles.len())],
+                |h| black_box(bp.next_sibling(h)),
+                BatchSize::SmallInput,
+            )
+        });
+
+        group.bench_with_input(BenchmarkId::new("prev_sibling", l), &l, |b, _| {
+            b.iter_batched(
+                || node_handles[rng.gen_range(0..node_handles.len())],
+                |h| black_box(bp.previous_sibling(h)),
+                BatchSize::SmallInput,
+            )
+        });
+    }
+}
+
+criterion_group!(benches, bench_navigation);
+criterion_main!(benches);
@@ -44,7 +44,7 @@ fn bench(b: &mut Criterion<TimeDiff>) {
 
         for fill_factor in FILL_FACTORS {
             group.bench_with_input(
-                BenchmarkId::new("sparse overhead equal", &fill_factor),
+                BenchmarkId::new("sparse overhead equal", fill_factor),
                 &fill_factor,
                 |b, _| {
                     b.iter_custom(|iters| {
@@ -69,7 +69,7 @@ fn bench(b: &mut Criterion<TimeDiff>) {
             );
 
             group.bench_with_input(
-                BenchmarkId::new("sparse overhead unequal", &fill_factor),
+                BenchmarkId::new("sparse overhead unequal", fill_factor),
                 &fill_factor,
                 |b, _| {
                     b.iter_custom(|iters| {
 
@@ -16,6 +16,7 @@ since the intrinsics speed up both `rank` and `select` operations by a factor of
 - An Elias-Fano encoding of monotone sequences supporting constant-time predecessor/successor queries.
 - Two Range Minimum Query vector structures for constant-time range minimum queries.
 - A Wavelet Matrix supporting `O(k)` rank, select, statistical, predecessor, and successor queries.
+- A succinct tree structure supporting level-ordered and depth-first-ordered tree navigation and subtree queries.
 
 ## Why Vers?
 - Vers is among the fastest publicly available bit vector implementations for rank and select operations.
@@ -33,6 +34,7 @@ It also enables a special iterator for the rank/select bit vector that uses vect
 The feature only works on nightly Rust.
 Enabling it on stable Rust is a no-op, because the required CPU features are not available there.
 - `serde`: Enables serialization and deserialization of the data structures using the `serde` crate.
+- `u16_lookup` Enables a larger lookup table for BP tree queries. The larger table requires 128 KiB instead of 4 KiB.
 
 ## Benchmarks
 I benchmarked the implementations against publicly available implementations of the same data structures.
 
@@ -45,14 +45,14 @@ fn test_random_data_rank() {
         let data_index = rnd_index / WORD_SIZE;
         let bit_index = rnd_index % WORD_SIZE;
 
-        for i in 0..data_index {
-            expected_rank1 += data[i].count_ones() as usize;
-            expected_rank0 += data[i].count_zeros() as usize;
+        for v in data.iter().take(data_index) {
+            expected_rank1 += v.count_ones() as usize;
+            expected_rank0 += v.count_zeros() as usize;
         }
 
         if bit_index > 0 {
-            expected_rank1 += (data[data_index] & (1 << bit_index) - 1).count_ones() as usize;
-            expected_rank0 += (!data[data_index] & (1 << bit_index) - 1).count_ones() as usize;
+            expected_rank1 += (data[data_index] & ((1 << bit_index) - 1)).count_ones() as usize;
+            expected_rank0 += (!data[data_index] & ((1 << bit_index) - 1)).count_ones() as usize;
         }
 
         assert_eq!(actual_rank1, expected_rank1);
@@ -503,14 +503,14 @@ fn test_custom_iter_behavior() {
     assert!(iter.advance_by(6).is_err());
     assert!(iter.advance_back_by(5).is_ok());
 
-    assert_eq!(rs.iter().skip(2).next(), Some(0));
+    assert_eq!(rs.iter().nth(2), Some(0));
     assert_eq!(rs.iter().count(), 10);
     assert_eq!(rs.iter().skip(2).count(), 8);
     assert_eq!(rs.iter().last(), Some(0));
     assert_eq!(rs.iter().nth(3), Some(1));
     assert_eq!(rs.iter().nth(12), None);
 
-    assert_eq!(rs.clone().into_iter().skip(2).next(), Some(0));
+    assert_eq!(rs.clone().into_iter().nth(2), Some(0));
     assert_eq!(rs.clone().into_iter().count(), 10);
     assert_eq!(rs.clone().into_iter().skip(2).count(), 8);
     assert_eq!(rs.clone().into_iter().last(), Some(0));
@@ -1093,21 +1093,21 @@ fn test_sparse_equals() {
     let rs1 = RsVec::from_bit_vec(bv.clone());
     let rs2 = RsVec::from_bit_vec(bv.clone());
 
-    assert_eq!(rs1.sparse_equals::<false>(&rs2), true);
-    assert_eq!(rs1.sparse_equals::<true>(&rs2), true);
+    assert!(rs1.sparse_equals::<false>(&rs2));
+    assert!(rs1.sparse_equals::<true>(&rs2));
 
     bv.flip_bit(3);
     let rs2 = RsVec::from_bit_vec(bv.clone());
 
-    assert_eq!(rs1.sparse_equals::<false>(&rs2), false);
-    assert_eq!(rs1.sparse_equals::<true>(&rs2), false);
+    assert!(!rs1.sparse_equals::<false>(&rs2));
+    assert!(!rs1.sparse_equals::<true>(&rs2));
 
     bv.flip_bit(3);
     bv.flip_bit(2 * SUPER_BLOCK_SIZE - 1);
     let rs1 = RsVec::from_bit_vec(bv.clone());
 
-    assert_eq!(rs1.sparse_equals::<false>(&rs2), false);
-    assert_eq!(rs1.sparse_equals::<true>(&rs2), false);
+    assert!(!rs1.sparse_equals::<false>(&rs2));
+    assert!(!rs1.sparse_equals::<true>(&rs2));
 }
 
 #[test]
@@ -1137,18 +1137,18 @@ fn test_full_equals() {
     let rs1 = RsVec::from_bit_vec(bv.clone());
     let rs2 = RsVec::from_bit_vec(bv.clone());
 
-    assert_eq!(rs1.full_equals(&rs2), true);
+    assert!(rs1.full_equals(&rs2));
 
     bv.flip_bit(3);
     let rs2 = RsVec::from_bit_vec(bv.clone());
 
-    assert_eq!(rs1.full_equals(&rs2), false);
+    assert!(!rs1.full_equals(&rs2));
 
     bv.flip_bit(3);
     bv.flip_bit(2 * SUPER_BLOCK_SIZE - 1);
     let rs1 = RsVec::from_bit_vec(bv.clone());
 
-    assert_eq!(rs1.full_equals(&rs2), false);
+    assert!(!rs1.full_equals(&rs2));
 }
 
 // fuzzing test for iter1 and iter0 as last ditch fail-safe
@@ -1332,7 +1332,7 @@ fn test_iter1_regression_i8() {
     let mut bv = BitVec::from_zeros(8193);
 
     for idx in &input_on_bits {
-        bv.set(*idx as usize, 1).unwrap();
+        bv.set(*idx, 1).unwrap();
     }
 
     let bv = RsVec::from_bit_vec(bv);
 
@@ -535,7 +535,7 @@ impl BitVec {
             return;
         }
 
-        let new_limb_count = (self.len - n + WORD_SIZE - 1) / WORD_SIZE;
+        let new_limb_count = (self.len - n).div_ceil(WORD_SIZE);
 
         // cut off limbs that we no longer need
         if new_limb_count < self.data.len() {
@@ -1019,11 +1019,12 @@ impl BitVec {
     pub fn count_ones(&self) -> u64 {
         let mut ones: u64 = self.data[0..self.len / WORD_SIZE]
             .iter()
-            .map(|limb| limb.count_ones() as u64)
+            .map(|limb| u64::from(limb.count_ones()))
             .sum();
         if self.len % WORD_SIZE > 0 {
-            ones += (self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1)).count_ones()
-                as u64;
+            ones += u64::from(
+                (self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1)).count_ones(),
+            );
         }
         ones
     }
 
@@ -213,14 +213,18 @@ fn test_custom_iter_behavior() {
     assert!(iter.advance_by(6).is_err());
     assert!(iter.advance_back_by(5).is_ok());
 
-    assert_eq!(bv.iter().skip(2).next(), Some(0));
+    #[allow(clippy::iter_skip_next)]
+    let next = bv.iter().skip(2).next(); // explicit test for skip()
+    assert_eq!(next, Some(0));
     assert_eq!(bv.iter().count(), 10);
     assert_eq!(bv.iter().skip(2).count(), 8);
     assert_eq!(bv.iter().last(), Some(0));
     assert_eq!(bv.iter().nth(3), Some(1));
     assert_eq!(bv.iter().nth(12), None);
 
-    assert_eq!(bv.clone().into_iter().skip(2).next(), Some(0));
+    #[allow(clippy::iter_skip_next)]
+    let next = bv.clone().into_iter().skip(2).next(); // explicit test for skip()
+    assert_eq!(next, Some(0));
     assert_eq!(bv.clone().into_iter().count(), 10);
     assert_eq!(bv.clone().into_iter().skip(2).count(), 8);
     assert_eq!(bv.clone().into_iter().last(), Some(0));