Skip to content

Commit b820503

Browse files
authored
Quality-of-life API improvements. (#32)
* added `into_bit_vec` to RsVec * add `into_parentheses_vec` to `BpTree` * `From<T> for BitVec` implementations for converting RsVec and BpTree back into BitVecs * `impl Extend<BitVec> for BitVec` and `BitVec.extend_vec()` to append many bits at once * `BitVec.split_at()` and `._split_at_unchecked()` to split a BitVec into two halves * implement `From<BpTree>` for `RsVec` * ensure `extend` only reallocates once --------- Co-authored-by: Johannes Hengstler
1 parent 5291fab commit b820503

File tree

4 files changed

+323
-0
lines changed

4 files changed

+323
-0
lines changed

src/bit_vec/fast_rs_vec/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,12 @@ impl From<BitVec> for RsVec {
542542
}
543543
}
544544

545+
impl From<RsVec> for BitVec {
546+
fn from(value: RsVec) -> Self {
547+
value.into_bit_vec()
548+
}
549+
}
550+
545551
// iter code in here to keep it more organized
546552
mod iter;
547553
// select code in here to keep it more organized

src/bit_vec/mod.rs

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -736,6 +736,26 @@ impl BitVec {
736736
self.len += len;
737737
}
738738

739+
/// Append the bits of another bit vector to the end of this vector.
740+
/// If this vector does not contain a multiple of 64 bits, the appended limbs need to be
741+
/// shifted to the left.
742+
/// This function is guaranteed to reallocate the underlying vector at most once.
743+
pub fn extend_bitvec(&mut self, other: &Self) {
744+
// reserve space for the new bits, ensuring at most one re-allocation
745+
self.data
746+
.reserve((self.len + other.len).div_ceil(WORD_SIZE) - self.data.len());
747+
748+
let full_limbs = other.len() / WORD_SIZE;
749+
for i in 0..full_limbs {
750+
self.append_bits(other.data[i], WORD_SIZE);
751+
}
752+
753+
let partial_bits = other.len % WORD_SIZE;
754+
if partial_bits > 0 {
755+
self.append_bits(other.data[full_limbs], partial_bits);
756+
}
757+
}
758+
739759
/// Return the length of the bit vector. The length is measured in bits.
740760
#[must_use]
741761
pub fn len(&self) -> usize {
@@ -912,6 +932,9 @@ impl BitVec {
912932
/// If the position at the end of the query is larger than the length of the vector,
913933
/// None is returned (even if the query partially overlaps with the vector).
914934
/// If the length of the query is larger than 64, None is returned.
935+
///
936+
/// The first bit at `pos` is the most significant bit of the return value
937+
/// limited to `len` bits.
915938
#[must_use]
916939
pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
917940
if len > WORD_SIZE || len == 0 {
@@ -1192,6 +1215,85 @@ impl BitVec {
11921215
pub fn heap_size(&self) -> usize {
11931216
self.data.len() * size_of::<u64>()
11941217
}
1218+
1219+
/// Split the vector in two at the specified index. The left half contains bits `0..at` and the
1220+
/// right half the remaining bits `at..`. If the split index is larger than the length of the
1221+
/// vector, the vector is returned unmodified in an `Err` variant.
1222+
///
1223+
/// # Errors
1224+
/// If the index is out of bounds, the function will return an error
1225+
/// containing the original vector.
1226+
///
1227+
/// See also: [`split_at_unchecked`]
1228+
pub fn split_at(self, at: usize) -> Result<(Self, Self), Self> {
1229+
if at > self.len {
1230+
Err(self)
1231+
} else {
1232+
Ok(self.split_at_unchecked(at))
1233+
}
1234+
}
1235+
1236+
/// Split the vector in two at the specified index. The left half contains bits `0..at` and the
1237+
/// right half the remaining bits `at..`.
1238+
///
1239+
/// # Panics
1240+
/// If the index is larger than the length of the vector the function will panic or run
1241+
/// out of memory.
1242+
/// Use [`split_at`] to properly handle this case.
1243+
#[must_use]
1244+
pub fn split_at_unchecked(mut self, at: usize) -> (Self, Self) {
1245+
let other_len = self.len - at;
1246+
let mut other = Self::with_capacity(other_len);
1247+
1248+
if other_len == 0 {
1249+
return (self, other);
1250+
}
1251+
1252+
let first_limb = at / WORD_SIZE;
1253+
let last_limb = self.len / WORD_SIZE;
1254+
1255+
// First, we figure out the number of bits from the first limb to retain in this vector:
1256+
let leading_partial = at % WORD_SIZE;
1257+
1258+
// If the split point is in the last limb, and the vector ends before the last bit, first_limb
1259+
// and last_limb will be equal, and the other half is simply other_len bits off the limb
1260+
// right shifted by the number of bits to retain in this vector.
1261+
if first_limb == last_limb {
1262+
other.append_bits_unchecked(self.data[first_limb] >> leading_partial, other_len);
1263+
} else {
1264+
// Otherwise, some range n..last_limb should be copied in their entirety to the other half,
1265+
// with n=first_limb+1 if the split point is inside the first limb (leading_partial > 0), or
1266+
// n=first_limb if the entire first limb belongs in the other half.
1267+
let full_limbs = if leading_partial > 0 {
1268+
// If the split point is inside the first limb, we also have to remember to copy over
1269+
// the trailing bits to the new vector.
1270+
other.append_bits_unchecked(
1271+
self.data[first_limb] >> leading_partial,
1272+
WORD_SIZE - leading_partial,
1273+
);
1274+
first_limb + 1..last_limb
1275+
} else {
1276+
first_limb..last_limb
1277+
};
1278+
1279+
// Copy over any full limbs.
1280+
for i in full_limbs {
1281+
other.append_bits_unchecked(self.data[i], WORD_SIZE);
1282+
}
1283+
1284+
// Finally, if the vector has a partially filled last limb, we need to put those bits
1285+
// in the other half.
1286+
let trailing_partial = self.len % WORD_SIZE;
1287+
if trailing_partial > 0 {
1288+
other.append_bits_unchecked(self.data[last_limb], trailing_partial);
1289+
}
1290+
}
1291+
1292+
// remove the copied bits from the original vector
1293+
self.drop_last(other_len);
1294+
1295+
(self, other)
1296+
}
11951297
}
11961298

11971299
impl_vector_iterator! { BitVec, BitVecIter, BitVecRefIter }
@@ -1216,6 +1318,22 @@ impl From<Vec<u64>> for BitVec {
12161318
}
12171319
}
12181320

1321+
impl Extend<BitVec> for BitVec {
1322+
fn extend<T: IntoIterator<Item = BitVec>>(&mut self, iter: T) {
1323+
for v in iter {
1324+
self.extend_bitvec(&v)
1325+
}
1326+
}
1327+
}
1328+
1329+
impl<'t> Extend<&'t BitVec> for BitVec {
1330+
fn extend<T: IntoIterator<Item = &'t BitVec>>(&mut self, iter: T) {
1331+
for v in iter {
1332+
self.extend_bitvec(v)
1333+
}
1334+
}
1335+
}
1336+
12191337
/// Create a new bit vector from u64 values.
12201338
/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
12211339
/// The function will append the bits of each element to the bit vector in the order they are

src/bit_vec/tests.rs

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,3 +626,190 @@ fn test_unpack() {
626626
assert_eq!(bv.unpack_element(8, 10), None);
627627
assert_eq!(bv.unpack_element(1000, 10), None);
628628
}
629+
630+
#[test]
631+
fn test_extend() {
632+
// test bitvec extend
633+
let mut bv = BitVec::from_zeros(10);
634+
let bv_ones = BitVec::from_ones(10);
635+
bv.extend_bitvec(&bv_ones);
636+
assert_eq!(bv.len, 20);
637+
assert_eq!(bv.get_bits(0, 20), Some(0b11111111110000000000));
638+
639+
// extend with an empty bitvec
640+
let mut bv = BitVec::from_zeros(10);
641+
bv.extend_bitvec(&BitVec::default());
642+
assert_eq!(bv.len, 10);
643+
assert_eq!(bv.get_bits(0, 10), Some(0));
644+
645+
// test extend of empty bitvec
646+
let mut bv = BitVec::default();
647+
let bv_ones = BitVec::from_ones(10);
648+
bv.extend_bitvec(&bv_ones);
649+
assert_eq!(bv.len, 10);
650+
assert_eq!(bv.get_bits(0, 10), Some(0b1111111111));
651+
652+
// test large vectors
653+
let mut bv = BitVec::from_zeros(1000);
654+
let bv_ones = BitVec::from_ones(1000);
655+
bv.extend_bitvec(&bv_ones);
656+
assert_eq!(bv.len, 2000);
657+
// sanity check:
658+
assert_eq!(bv.get_bits(64, 64), Some(0));
659+
assert_eq!(bv.get_bits(1064, 64), Some(u64::MAX));
660+
661+
// test aligned vectors
662+
let mut bv = BitVec::from_zeros(64);
663+
let bv_ones = BitVec::from_ones(64);
664+
bv.extend_bitvec(&bv_ones);
665+
assert_eq!(bv.len, 128);
666+
assert_eq!(bv.get_bits(0, 64), Some(0));
667+
assert_eq!(bv.get_bits(64, 64), Some(u64::MAX));
668+
}
669+
670+
#[test]
671+
fn test_split_at() {
672+
// test the split_at(_unchecked) function
673+
let mut bv = BitVec::from_zeros(64);
674+
bv.flip_bit(1);
675+
bv.flip_bit(3);
676+
677+
// check splitting at 1
678+
let (left, right) = bv.split_at_unchecked(2);
679+
assert_eq!(left.len, 2);
680+
assert_eq!(right.len, 62);
681+
assert_eq!(left.get(0), Some(0));
682+
assert_eq!(left.get(1), Some(1));
683+
assert_eq!(right.get(0), Some(0));
684+
assert_eq!(right.get(1), Some(1));
685+
assert_eq!(right.get_bits(2, 60), Some(0));
686+
687+
// check splitting at 0
688+
let bv = BitVec::from_zeros(1000);
689+
let (left, right) = bv.split_at_unchecked(0);
690+
assert_eq!(left.len, 0);
691+
assert_eq!(right.len, 1000);
692+
assert_eq!(right.get(999), Some(0));
693+
694+
// check splitting at the end
695+
let bv = BitVec::from_zeros(1000);
696+
let (left, right) = bv.split_at_unchecked(1000);
697+
assert_eq!(left.len, 1000);
698+
assert_eq!(right.len, 0);
699+
assert_eq!(left.get(999), Some(0));
700+
701+
// check splitting aligned
702+
let bv = BitVec::from_ones(128);
703+
let (left, right) = bv.split_at_unchecked(64);
704+
assert_eq!(left.len, 64);
705+
assert_eq!(right.len, 64);
706+
assert_eq!(left.get_bits(0, 64), Some(u64::MAX));
707+
assert_eq!(right.get_bits(0, 64), Some(u64::MAX));
708+
709+
// check splitting in single limb
710+
let bv = BitVec::from_ones(20);
711+
let (left, right) = bv.split_at_unchecked(10);
712+
assert_eq!(left.len, 10);
713+
assert_eq!(right.len, 10);
714+
715+
// check splitting empty vector
716+
let bv = BitVec::default();
717+
let (left, right) = bv.split_at_unchecked(0);
718+
assert_eq!(left.len, 0);
719+
assert_eq!(right.len, 0);
720+
}
721+
722+
#[test]
723+
fn test_split_at_result() {
724+
// check splitting at 1
725+
let mut bv = BitVec::from_zeros(2);
726+
bv.flip_bit(1);
727+
let (left, right) = bv.split_at(1).expect("failed to split");
728+
assert_eq!(left.len, 1);
729+
assert_eq!(right.len, 1);
730+
assert_eq!(left.get(0), Some(0));
731+
assert_eq!(right.get(0), Some(1));
732+
733+
// check splitting at 0
734+
let bv = BitVec::from_zeros(2);
735+
let (left, right) = bv.split_at(0).expect("failed to split");
736+
assert_eq!(left.len, 0);
737+
assert_eq!(right.len, 2);
738+
739+
// check splitting at the end
740+
let bv = BitVec::from_zeros(2);
741+
let (left, right) = bv.split_at(2).expect("failed to split");
742+
assert_eq!(left.len, 2);
743+
assert_eq!(right.len, 0);
744+
745+
// check splitting past the end
746+
let bv = BitVec::from_zeros(2);
747+
let result = bv.split_at(3);
748+
assert!(result.is_err());
749+
750+
// check splitting empty vec
751+
let bv = BitVec::default();
752+
let (left, right) = bv.split_at(0).expect("failed to split");
753+
assert!(left.is_empty());
754+
assert!(right.is_empty());
755+
}
756+
757+
#[test]
758+
fn test_splitting_limbs() {
759+
// this test might overlap with test_split_at.
760+
// we test all variations of splitting in limbs of bit vecs
761+
762+
// check splitting inside a limb, with the end inside the next limb
763+
let mut bv = BitVec::from_zeros(68);
764+
bv.flip_bit(60);
765+
let (left, right) = bv.split_at(60).expect("failed to split");
766+
assert_eq!(left.len, 60);
767+
assert_eq!(right.len, 8);
768+
assert_eq!(left.get(0), Some(0));
769+
assert_eq!(right.get(0), Some(1));
770+
771+
// check splitting inside a limb, with the complete next limb being the final limb
772+
let mut bv = BitVec::from_zeros(128);
773+
bv.flip_bit(60);
774+
let (left, right) = bv.split_at(60).expect("failed to split");
775+
assert_eq!(left.len, 60);
776+
assert_eq!(right.len, 68);
777+
assert_eq!(left.get(0), Some(0));
778+
assert_eq!(right.get(0), Some(1));
779+
780+
// check splitting inside a limb, with a complete and then partial limb following
781+
let mut bv = BitVec::from_zeros(140);
782+
bv.flip_bit(60);
783+
let (left, right) = bv.split_at(60).expect("failed to split");
784+
assert_eq!(left.len, 60);
785+
assert_eq!(right.len, 80);
786+
assert_eq!(left.get(0), Some(0));
787+
assert_eq!(right.get(0), Some(1));
788+
789+
// check splitting at the beginning of a limb, with the end inside the next limb
790+
let mut bv = BitVec::from_zeros(144);
791+
bv.flip_bit(64);
792+
let (left, right) = bv.split_at(64).expect("failed to split");
793+
assert_eq!(left.len, 64);
794+
assert_eq!(right.len, 80);
795+
assert_eq!(left.get(0), Some(0));
796+
assert_eq!(right.get(0), Some(1));
797+
798+
// check splitting at the beginning of a limb, with the complete next limb being the final limb
799+
let mut bv = BitVec::from_zeros(192);
800+
bv.flip_bit(64);
801+
let (left, right) = bv.split_at(64).expect("failed to split");
802+
assert_eq!(left.len, 64);
803+
assert_eq!(right.len, 128);
804+
assert_eq!(left.get(0), Some(0));
805+
assert_eq!(right.get(0), Some(1));
806+
807+
// check splitting at the beginning of a limb, with a complete and then partial limb following
808+
let mut bv = BitVec::from_zeros(200);
809+
bv.flip_bit(64);
810+
let (left, right) = bv.split_at(64).expect("failed to split");
811+
assert_eq!(left.len, 64);
812+
assert_eq!(right.len, 136);
813+
assert_eq!(left.get(0), Some(0));
814+
assert_eq!(right.get(0), Some(1));
815+
}

src/trees/bp/mod.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -749,6 +749,18 @@ impl<const BLOCK_SIZE: usize> From<BitVec> for BpTree<BLOCK_SIZE> {
749749
}
750750
}
751751

752+
impl<const BLOCK_SIZE: usize> From<BpTree<BLOCK_SIZE>> for BitVec {
753+
fn from(value: BpTree<BLOCK_SIZE>) -> Self {
754+
value.into_parentheses_vec().into_bit_vec()
755+
}
756+
}
757+
758+
impl<const BLOCK_SIZE: usize> From<BpTree<BLOCK_SIZE>> for RsVec {
759+
fn from(value: BpTree<BLOCK_SIZE>) -> Self {
760+
value.into_parentheses_vec()
761+
}
762+
}
763+
752764
/// An iterator over the children of a node.
753765
/// Calls to `next` return the next child node handle in the order they appear in the parenthesis
754766
/// expression.

0 commit comments

Comments
 (0)