Skip to content

Commit 95432ec

Browse files
committed
Do the intersections on containers to ensure correct store type
1 parent 9ba1ca1 commit 95432ec

File tree

5 files changed

+108
-41
lines changed

5 files changed

+108
-41
lines changed

src/bitmap/container.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ impl Container {
3737
self.store.len()
3838
}
3939

40+
pub fn is_empty(&self) -> bool {
41+
self.store.is_empty()
42+
}
43+
4044
pub fn insert(&mut self, index: u16) -> bool {
4145
if self.store.insert(index) {
4246
self.ensure_correct_store();

src/bitmap/ops_with_serialized.rs

Lines changed: 89 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
use bytemuck::{cast_slice_mut, pod_collect_to_vec};
1+
use bytemuck::cast_slice_mut;
22
use byteorder::{LittleEndian, ReadBytesExt};
33
use core::convert::Infallible;
4-
use core::ops::{BitAndAssign, RangeInclusive};
4+
use core::mem;
5+
use core::ops::RangeInclusive;
56
use std::error::Error;
6-
use std::io;
7+
use std::io::{self, SeekFrom};
78

89
use crate::bitmap::container::Container;
910
use crate::bitmap::serialization::{
@@ -39,7 +40,7 @@ impl RoaringBitmap {
3940
// TODO convert this into a trait
4041
pub fn intersection_with_serialized_unchecked<R>(&self, other: R) -> io::Result<RoaringBitmap>
4142
where
42-
R: io::Read,
43+
R: io::Read + io::Seek,
4344
{
4445
RoaringBitmap::intersection_with_serialized_impl::<R, _, Infallible, _, Infallible>(
4546
self,
@@ -56,7 +57,7 @@ impl RoaringBitmap {
5657
b: B,
5758
) -> io::Result<RoaringBitmap>
5859
where
59-
R: io::Read,
60+
R: io::Read + io::Seek,
6061
A: Fn(Vec<u16>) -> Result<ArrayStore, AErr>,
6162
AErr: Error + Send + Sync + 'static,
6263
B: Fn(u64, Box<[u64; 1024]>) -> Result<BitmapStore, BErr>,
@@ -101,7 +102,7 @@ impl RoaringBitmap {
101102

102103
let mut containers = Vec::with_capacity(size);
103104

104-
// Read each container
105+
// Read each container and skip the useless ones
105106
for i in 0..size {
106107
let key = description_bytes.read_u16::<LittleEndian>()?;
107108
let container = match self.containers.binary_search_by_key(&key, |c| c.key) {
@@ -114,53 +115,100 @@ impl RoaringBitmap {
114115
let is_run_container =
115116
run_container_bitmap.as_ref().map_or(false, |bm| bm[i / 8] & (1 << (i % 8)) != 0);
116117

117-
let mut store = if is_run_container {
118+
let store = if is_run_container {
118119
let runs = reader.read_u16::<LittleEndian>()?;
119-
let mut intervals = vec![[0, 0]; runs as usize];
120-
reader.read_exact(cast_slice_mut(&mut intervals))?;
121-
if container.is_none() {
122-
continue;
120+
match container {
121+
Some(_) => {
122+
let mut intervals = vec![[0, 0]; runs as usize];
123+
reader.read_exact(cast_slice_mut(&mut intervals))?;
124+
intervals.iter_mut().for_each(|[s, len]| {
125+
*s = u16::from_le(*s);
126+
*len = u16::from_le(*len);
127+
});
128+
129+
let cardinality = intervals.iter().map(|[_, len]| *len as usize).sum();
130+
let mut store = Store::with_capacity(cardinality);
131+
intervals.into_iter().try_for_each(
132+
|[s, len]| -> Result<(), io::ErrorKind> {
133+
let end = s.checked_add(len).ok_or(io::ErrorKind::InvalidData)?;
134+
store.insert_range(RangeInclusive::new(s, end));
135+
Ok(())
136+
},
137+
)?;
138+
store
139+
}
140+
None => {
141+
let runs_size = mem::size_of::<u16>() * 2 * runs as usize;
142+
reader.seek(SeekFrom::Current(runs_size as i64))?;
143+
continue;
144+
}
123145
}
124-
intervals.iter_mut().for_each(|[s, len]| {
125-
*s = u16::from_le(*s);
126-
*len = u16::from_le(*len);
127-
});
128-
129-
let cardinality = intervals.iter().map(|[_, len]| *len as usize).sum();
130-
let mut store = Store::with_capacity(cardinality);
131-
intervals.into_iter().try_for_each(|[s, len]| -> Result<(), io::ErrorKind> {
132-
let end = s.checked_add(len).ok_or(io::ErrorKind::InvalidData)?;
133-
store.insert_range(RangeInclusive::new(s, end));
134-
Ok(())
135-
})?;
136-
store
137146
} else if cardinality <= ARRAY_LIMIT {
138-
let mut values = vec![0; cardinality as usize];
139-
reader.read_exact(cast_slice_mut(&mut values))?;
140-
if container.is_none() {
141-
continue;
147+
match container {
148+
Some(_) => {
149+
let mut values = vec![0; cardinality as usize];
150+
reader.read_exact(cast_slice_mut(&mut values))?;
151+
values.iter_mut().for_each(|n| *n = u16::from_le(*n));
152+
let array =
153+
a(values).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
154+
Store::Array(array)
155+
}
156+
None => {
157+
let array_size = mem::size_of::<u16>() * cardinality as usize;
158+
reader.seek(SeekFrom::Current(array_size as i64))?;
159+
continue;
160+
}
142161
}
143-
values.iter_mut().for_each(|n| *n = u16::from_le(*n));
144-
let array = a(values).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
145-
Store::Array(array)
146162
} else {
147-
let mut values = Box::new([0; BITMAP_LENGTH]);
148-
reader.read_exact(cast_slice_mut(&mut values[..]))?;
149-
if container.is_none() {
150-
continue;
163+
match container {
164+
Some(_) => {
165+
let mut values = Box::new([0; BITMAP_LENGTH]);
166+
reader.read_exact(cast_slice_mut(&mut values[..]))?;
167+
values.iter_mut().for_each(|n| *n = u64::from_le(*n));
168+
let bitmap = b(cardinality, values)
169+
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
170+
Store::Bitmap(bitmap)
171+
}
172+
None => {
173+
let bitmap_size = mem::size_of::<u64>() * BITMAP_LENGTH;
174+
reader.seek(SeekFrom::Current(bitmap_size as i64))?;
175+
continue;
176+
}
151177
}
152-
values.iter_mut().for_each(|n| *n = u64::from_le(*n));
153-
let bitmap = b(cardinality, values)
154-
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
155-
Store::Bitmap(bitmap)
156178
};
157179

158180
if let Some(container) = container {
159-
store &= &container.store;
160-
containers.push(Container { key, store });
181+
let mut tmp_container = Container { key, store };
182+
tmp_container &= container;
183+
if !tmp_container.is_empty() {
184+
containers.push(tmp_container);
185+
}
161186
}
162187
}
163188

164189
Ok(RoaringBitmap { containers })
165190
}
166191
}
192+
193+
#[cfg(test)]
194+
#[cfg(feature = "std")] // We need this to serialize bitmaps
195+
mod test {
196+
use crate::RoaringBitmap;
197+
use proptest::prelude::*;
198+
use std::io::Cursor;
199+
200+
// fast count tests
201+
proptest! {
202+
#[test]
203+
fn intersection_with_serialized_eq_materialized_intersection(
204+
a in RoaringBitmap::arbitrary(),
205+
b in RoaringBitmap::arbitrary()
206+
) {
207+
let mut serialized_bytes_b = Vec::new();
208+
b.serialize_into(&mut serialized_bytes_b).unwrap();
209+
let serialized_bytes_b = &serialized_bytes_b[..];
210+
211+
prop_assert_eq!(a.intersection_with_serialized_unchecked(Cursor::new(serialized_bytes_b)).unwrap(), a & b);
212+
}
213+
}
214+
}

src/bitmap/store/array_store/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,10 @@ impl ArrayStore {
200200
self.vec.len() as u64
201201
}
202202

203+
pub fn is_empty(&self) -> bool {
204+
self.vec.is_empty()
205+
}
206+
203207
pub fn min(&self) -> Option<u16> {
204208
self.vec.first().copied()
205209
}

src/bitmap/store/bitmap_store.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,10 @@ impl BitmapStore {
241241
self.len
242242
}
243243

244+
pub fn is_empty(&self) -> bool {
245+
self.len == 0
246+
}
247+
244248
pub fn min(&self) -> Option<u16> {
245249
self.bits
246250
.iter()

src/bitmap/store/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,13 @@ impl Store {
177177
}
178178
}
179179

180+
pub fn is_empty(&self) -> bool {
181+
match self {
182+
Array(vec) => vec.is_empty(),
183+
Bitmap(bits) => bits.is_empty(),
184+
}
185+
}
186+
180187
pub fn min(&self) -> Option<u16> {
181188
match self {
182189
Array(vec) => vec.min(),

0 commit comments

Comments
 (0)