Skip to content

Commit a4feb91

Browse files
committed
Optimized .count() for the iterators.
Added a private count_set_bits() method that counts set bits in O(number of words) Fixed the deprecated rng methods in the benchmark Added tests and benchmarks for the new .count() functionality (#87)
1 parent c9c5577 commit a4feb91

File tree

2 files changed

+202
-11
lines changed

2 files changed

+202
-11
lines changed

benches/vob.rs

Lines changed: 67 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,9 @@ fn split_off(c: &mut Criterion) {
6565
fn xor(c: &mut Criterion) {
6666
let mut v1 = Vob::with_capacity(N);
6767
let mut rng = Pcg64Mcg::seed_from_u64(RNG_SEED);
68-
v1.extend((0..N).map(|_| rng.gen::<bool>()));
68+
v1.extend((0..N).map(|_| rng.random::<bool>()));
6969
let mut v2 = Vob::with_capacity(N);
70-
v2.extend((0..N).map(|_| rng.gen::<bool>()));
70+
v2.extend((0..N).map(|_| rng.random::<bool>()));
7171

7272
c.bench_function("xor", |b| {
7373
b.iter(|| {
@@ -79,9 +79,9 @@ fn xor(c: &mut Criterion) {
7979
fn or(c: &mut Criterion) {
8080
let mut v1 = Vob::with_capacity(N);
8181
let mut rng = Pcg64Mcg::seed_from_u64(RNG_SEED);
82-
v1.extend((0..N).map(|_| rng.gen::<bool>()));
82+
v1.extend((0..N).map(|_| rng.random::<bool>()));
8383
let mut v2 = Vob::with_capacity(N);
84-
v2.extend((0..N).map(|_| rng.gen::<bool>()));
84+
v2.extend((0..N).map(|_| rng.random::<bool>()));
8585

8686
c.bench_function("or", |b| {
8787
b.iter(|| {
@@ -93,9 +93,9 @@ fn or(c: &mut Criterion) {
9393
fn and(c: &mut Criterion) {
9494
let mut v1 = Vob::with_capacity(N);
9595
let mut rng = Pcg64Mcg::seed_from_u64(RNG_SEED);
96-
v1.extend((0..N).map(|_| rng.gen::<bool>()));
96+
v1.extend((0..N).map(|_| rng.random::<bool>()));
9797
let mut v2 = Vob::with_capacity(N);
98-
v2.extend((0..N).map(|_| rng.gen::<bool>()));
98+
v2.extend((0..N).map(|_| rng.random::<bool>()));
9999

100100
c.bench_function("and", |b| {
101101
b.iter(|| {
@@ -105,38 +105,88 @@ fn and(c: &mut Criterion) {
105105
}
106106

107107
fn from_bytes(c: &mut Criterion) {
108-
let mut rng = rand::thread_rng();
108+
let mut rng = rand::rng();
109109
let mut v1 = [0u8; 1024];
110110
rng.fill(&mut v1);
111111
c.bench_function("from_bytes", |b| b.iter(|| Vob::from_bytes(&v1)));
112112
}
113113

114+
fn iter_bits(c: &mut Criterion) {
115+
let mut a = Vob::with_capacity(N);
116+
let mut rng = Pcg64Mcg::seed_from_u64(RNG_SEED);
117+
a.extend((0..N).map(|_| rng.random::<bool>()));
118+
c.bench_function("iter_bits", |b| {
119+
b.iter(|| a.iter().filter(|_| true).count())
120+
});
121+
}
122+
123+
// this benchmark can later be removed, since it is only benchmarking std::Rage.count() as a
124+
// comparison with the old way of doing it. (iter_bits() above)
125+
fn count_bits(c: &mut Criterion) {
126+
let mut a = Vob::with_capacity(N);
127+
let mut rng = Pcg64Mcg::seed_from_u64(RNG_SEED);
128+
a.extend((0..N).map(|_| rng.random::<bool>()));
129+
c.bench_function("count_bits", |b| b.iter(|| a.iter().count()));
130+
}
131+
114132
fn iter_set_bits(c: &mut Criterion) {
115133
let mut a = Vob::with_capacity(N);
116134
let mut rng = Pcg64Mcg::seed_from_u64(RNG_SEED);
117-
a.extend((0..N).map(|_| rng.gen::<bool>()));
118-
c.bench_function("iter_set_bits", |b| b.iter(|| a.iter_set_bits(..).count()));
135+
a.extend((0..N).map(|_| rng.random::<bool>()));
136+
c.bench_function("iter_set_bits", |b| {
137+
b.iter(|| a.iter_set_bits(..).filter(|_| true).count())
138+
});
139+
}
140+
141+
fn count_set_bits(c: &mut Criterion) {
142+
let mut a = Vob::with_capacity(N);
143+
let mut rng = Pcg64Mcg::seed_from_u64(RNG_SEED);
144+
a.extend((0..N).map(|_| rng.random::<bool>()));
145+
c.bench_function("count_set_bits", |b| b.iter(|| a.iter_set_bits(..).count()));
119146
}
120147

121148
fn iter_set_bits_u8(c: &mut Criterion) {
122149
let mut a = Vob::<u8>::new_with_storage_type(N);
123150
let mut rng = Pcg64Mcg::seed_from_u64(RNG_SEED);
124-
a.extend((0..N).map(|_| rng.gen::<bool>()));
151+
a.extend((0..N).map(|_| rng.random::<bool>()));
125152
c.bench_function("iter_set_bits_u8", |b| {
153+
b.iter(|| a.iter_set_bits(..).filter(|_| true).count())
154+
});
155+
}
156+
157+
fn count_set_bits_u8(c: &mut Criterion) {
158+
let mut a = Vob::<u8>::new_with_storage_type(N);
159+
let mut rng = Pcg64Mcg::seed_from_u64(RNG_SEED);
160+
a.extend((0..N).map(|_| rng.random::<bool>()));
161+
c.bench_function("count_set_bits_u8", |b| {
126162
b.iter(|| a.iter_set_bits(..).count())
127163
});
128164
}
129165

130166
fn iter_all_set_bits(c: &mut Criterion) {
131167
let a = Vob::from_elem(true, N);
132168
c.bench_function("iter_all_set_bits", |b| {
169+
b.iter(|| a.iter_set_bits(..).filter(|_| true).count())
170+
});
171+
}
172+
173+
fn count_all_set_bits(c: &mut Criterion) {
174+
let a = Vob::from_elem(true, N);
175+
c.bench_function("count_all_set_bits", |b| {
133176
b.iter(|| a.iter_set_bits(..).count())
134177
});
135178
}
136179

137180
fn iter_all_unset_bits(c: &mut Criterion) {
138181
let a = Vob::from_elem(true, N);
139182
c.bench_function("iter_all_unset_bits", |b| {
183+
b.iter(|| a.iter_unset_bits(..).filter(|_| true).count())
184+
});
185+
}
186+
187+
fn count_all_unset_bits(c: &mut Criterion) {
188+
let a = Vob::from_elem(true, N);
189+
c.bench_function("count_all_unset_bits", |b| {
140190
b.iter(|| a.iter_unset_bits(..).count())
141191
});
142192
}
@@ -152,9 +202,15 @@ criterion_group!(
152202
or,
153203
and,
154204
from_bytes,
205+
iter_bits,
206+
count_bits,
155207
iter_set_bits,
208+
count_set_bits,
156209
iter_set_bits_u8,
210+
count_set_bits_u8,
157211
iter_all_set_bits,
158-
iter_all_unset_bits
212+
count_all_set_bits,
213+
iter_all_unset_bits,
214+
count_all_unset_bits
159215
);
160216
criterion_main!(benches);

src/lib.rs

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,66 @@ impl<T: Debug + PrimInt> Vob<T> {
585585
}
586586
}
587587

588+
/// Counts the number of set bits.
589+
/// This method assumes the range is processed with process_range()
590+
fn count_set_bits(&self, range: Range<usize>) -> usize {
591+
use std::convert::TryFrom;
592+
593+
// Early return for empty ranges
594+
if range.is_empty() {
595+
return 0;
596+
}
597+
let start_off = block_offset::<T>(range.start);
598+
debug_assert!(
599+
start_off < self.len(),
600+
"start_off {} >= self.len {}",
601+
start_off,
602+
self.len()
603+
);
604+
605+
// this -1 arithmetic is safe since we already tested for range.start & range.end equality
606+
let end_off = blocks_required::<T>(range.end) - 1;
607+
608+
if start_off == end_off {
609+
// Range entirely within one word
610+
let b = self.vec[start_off];
611+
let start_bit = range.start % bits_per_block::<T>();
612+
let end_bit = range.end % bits_per_block::<T>();
613+
614+
// Remove bits before start_bit and bits after end_bit
615+
let count = if end_bit == 0 {
616+
// end_bit = 0 means we want everything from start_bit to end of word
617+
// After the right shift, we have what we want
618+
b >> start_bit
619+
} else {
620+
// We want bits from start_bit to end_bit
621+
// After right shift, we need to remove the high bits
622+
(b >> start_bit) << (start_bit + bits_per_block::<T>() - end_bit)
623+
}
624+
.count_ones();
625+
return usize::try_from(count).unwrap();
626+
}
627+
628+
// First word: shift out bits before start_bit
629+
let start_bit = range.start % bits_per_block::<T>();
630+
let mut count = usize::try_from((self.vec[start_off] >> start_bit).count_ones()).unwrap();
631+
632+
// Middle words
633+
for word_idx in (start_off + 1)..end_off {
634+
count += usize::try_from(self.vec[word_idx].count_ones()).unwrap();
635+
}
636+
637+
// Last word: shift out bits after end_bit
638+
let end_bit = range.end % bits_per_block::<T>();
639+
let count_ones = if end_bit == 0 {
640+
// end_bit = 0 means we want to count the entire end_off word
641+
self.vec[end_off].count_ones()
642+
} else {
643+
(self.vec[end_off] << (bits_per_block::<T>() - end_bit)).count_ones()
644+
};
645+
count + usize::try_from(count_ones).unwrap()
646+
}
647+
588648
/// Returns an iterator which efficiently produces the index of each unset bit in the specified
589649
/// range. Assuming appropriate support from your CPU, this is much more efficient than
590650
/// checking each bit individually.
@@ -1082,6 +1142,10 @@ impl<T: Debug + PrimInt> Iterator for Iter<'_, T> {
10821142
fn size_hint(&self) -> (usize, Option<usize>) {
10831143
self.range.size_hint()
10841144
}
1145+
1146+
fn count(self) -> usize {
1147+
self.range.count()
1148+
}
10851149
}
10861150

10871151
impl<T: Debug + PrimInt> DoubleEndedIterator for Iter<'_, T> {
@@ -1148,6 +1212,10 @@ impl<T: Debug + PrimInt> Iterator for IterSetBits<'_, T> {
11481212
fn size_hint(&self) -> (usize, Option<usize>) {
11491213
self.range.size_hint()
11501214
}
1215+
1216+
fn count(self) -> usize {
1217+
self.vob.count_set_bits(self.range)
1218+
}
11511219
}
11521220

11531221
impl<T: Debug + PrimInt> DoubleEndedIterator for IterSetBits<'_, T> {
@@ -1228,6 +1296,12 @@ impl<T: Debug + PrimInt> Iterator for IterUnsetBits<'_, T> {
12281296
fn size_hint(&self) -> (usize, Option<usize>) {
12291297
self.range.size_hint()
12301298
}
1299+
1300+
fn count(self) -> usize {
1301+
// This arithmetic is safe because (self.range.end - self.range.start) is the total number of bits,
1302+
// and self.vob.count_set_bits() always returns a value less than or equal to that.
1303+
(self.range.end - self.range.start) - self.vob.count_set_bits(self.range)
1304+
}
12311305
}
12321306

12331307
impl<T: Debug + PrimInt> DoubleEndedIterator for IterUnsetBits<'_, T> {
@@ -1300,6 +1374,10 @@ impl<T: Debug + PrimInt> Iterator for StorageIter<'_, T> {
13001374
fn size_hint(&self) -> (usize, Option<usize>) {
13011375
self.iter.size_hint()
13021376
}
1377+
1378+
fn count(self) -> usize {
1379+
self.iter.count()
1380+
}
13031381
}
13041382

13051383
#[inline(always)]
@@ -1974,6 +2052,27 @@ mod tests {
19742052
for _ in 0..len {
19752053
vob.push(rng.random());
19762054
}
2055+
// these tests can later be dialed down, as they noticeable slow down every random vob test.
2056+
assert_eq!(
2057+
vob.iter_set_bits(..).count(),
2058+
vob.iter_set_bits(..).filter(|_| true).count()
2059+
);
2060+
assert_eq!(
2061+
vob.iter_unset_bits(..).count(),
2062+
vob.iter_unset_bits(..).filter(|_| true).count()
2063+
);
2064+
if len > 2 {
2065+
// trigger the edge cases of count_set_bits()
2066+
let range = 1..len - 1;
2067+
assert_eq!(
2068+
vob.iter_set_bits(range.clone()).count(),
2069+
vob.iter_set_bits(range.clone()).filter(|_| true).count()
2070+
);
2071+
assert_eq!(
2072+
vob.iter_unset_bits(range.clone()).count(),
2073+
vob.iter_unset_bits(range.clone()).filter(|_| true).count()
2074+
);
2075+
}
19772076
vob
19782077
}
19792078

@@ -2047,4 +2146,40 @@ mod tests {
20472146
v.push(true);
20482147
assert_eq!(v.vec.len(), 1);
20492148
}
2149+
2150+
#[test]
2151+
fn test_count() {
2152+
let mut rng = rand::rng();
2153+
2154+
for test_len in 1..128 {
2155+
let vob = random_vob(test_len);
2156+
assert_eq!(
2157+
vob.iter_storage().count(),
2158+
vob.iter_storage().filter(|_| true).count()
2159+
);
2160+
assert_eq!(vob.iter().count(), vob.iter().filter(|_| true).count());
2161+
for i in 1..test_len - 1 {
2162+
let from = rng.random_range(0..i);
2163+
if from == i {
2164+
continue;
2165+
}
2166+
let to = rng.random_range(from..i);
2167+
assert_eq!(
2168+
vob.iter_set_bits(from..to).count(),
2169+
vob.iter_set_bits(from..to).filter(|_| true).count()
2170+
);
2171+
assert_eq!(
2172+
vob.iter_unset_bits(from..to).count(),
2173+
vob.iter_unset_bits(from..to).filter(|_| true).count()
2174+
);
2175+
}
2176+
}
2177+
}
2178+
2179+
#[test]
2180+
fn test_collect_capacity() {
2181+
// a test to make sure that iter_set_bits().collect() does not always allocate .len() elements
2182+
let vec: Vec<usize> = Vob::from_elem(false, 100).iter_set_bits(..).collect();
2183+
assert_eq!(vec.capacity(), 0);
2184+
}
20502185
}

0 commit comments

Comments
 (0)