Skip to content

Commit 88b848b

Browse files
committed
Use containers offsets when available
1 parent b92f110 commit 88b848b

File tree

1 file changed

+97
-8
lines changed

1 file changed

+97
-8
lines changed

src/bitmap/ops_with_serialized.rs

Lines changed: 97 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use std::ops::RangeInclusive;
88

99
use crate::bitmap::container::Container;
1010
use crate::bitmap::serialization::{
11-
NO_OFFSET_THRESHOLD, OFFSET_BYTES, SERIAL_COOKIE, SERIAL_COOKIE_NO_RUNCONTAINER,
11+
NO_OFFSET_THRESHOLD, SERIAL_COOKIE, SERIAL_COOKIE_NO_RUNCONTAINER,
1212
};
1313
use crate::RoaringBitmap;
1414

@@ -93,21 +93,35 @@ impl RoaringBitmap {
9393
}
9494

9595
// Read the container descriptions
96-
let mut description_bytes = vec![[0u16; 2]; size];
97-
reader.read_exact(cast_slice_mut(&mut description_bytes))?;
98-
description_bytes.iter_mut().for_each(|[ref mut key, ref mut len]| {
96+
let mut descriptions = vec![[0; 2]; size];
97+
reader.read_exact(cast_slice_mut(&mut descriptions))?;
98+
descriptions.iter_mut().for_each(|[ref mut key, ref mut len]| {
9999
*key = u16::from_le(*key);
100100
*len = u16::from_le(*len);
101101
});
102102

103-
104103
if has_offsets {
105-
// I could use these offsets but I am a lazy developer (for now)
106-
reader.seek(SeekFrom::Current((size * OFFSET_BYTES) as i64))?;
104+
let mut offsets = vec![0; size];
105+
reader.read_exact(cast_slice_mut(&mut offsets))?;
106+
offsets.iter_mut().for_each(|offset| *offset = u32::from_le(*offset));
107+
108+
// Loop on the materialized containers if there
109+
// are less or as many of them than serialized ones.
110+
if self.containers.len() <= size {
111+
return self.intersection_with_serialized_impl_with_offsets(
112+
reader,
113+
a,
114+
b,
115+
&descriptions,
116+
&offsets,
117+
run_container_bitmap.as_deref(),
118+
);
119+
}
107120
}
108121

109122
// Read each container and skip the useless ones
110-
for (i, &[key, len_minus_one]) in description_bytes.iter().enumerate() {
123+
let mut containers = Vec::new();
124+
for (i, &[key, len_minus_one]) in descriptions.iter().enumerate() {
111125
let container = match self.containers.binary_search_by_key(&key, |c| c.key) {
112126
Ok(index) => self.containers.get(index),
113127
Err(_) => None,
@@ -191,6 +205,81 @@ impl RoaringBitmap {
191205

192206
Ok(RoaringBitmap { containers })
193207
}
208+
209+
fn intersection_with_serialized_impl_with_offsets<R, A, AErr, B, BErr>(
210+
&self,
211+
mut reader: R,
212+
a: A,
213+
b: B,
214+
descriptions: &[[u16; 2]],
215+
offsets: &[u32],
216+
run_container_bitmap: Option<&[u8]>,
217+
) -> io::Result<RoaringBitmap>
218+
where
219+
R: io::Read + io::Seek,
220+
A: Fn(Vec<u16>) -> Result<ArrayStore, AErr>,
221+
AErr: Error + Send + Sync + 'static,
222+
B: Fn(u64, Box<[u64; 1024]>) -> Result<BitmapStore, BErr>,
223+
BErr: Error + Send + Sync + 'static,
224+
{
225+
let mut containers = Vec::new();
226+
for container in &self.containers {
227+
let i = match descriptions.binary_search_by_key(&container.key, |[k, _]| *k) {
228+
Ok(index) => index,
229+
Err(_) => continue,
230+
};
231+
232+
// Seek to the bytes of the container we want.
233+
reader.seek(SeekFrom::Start(offsets[i] as u64))?;
234+
235+
let [key, len_minus_one] = descriptions[i];
236+
let cardinality = u64::from(len_minus_one) + 1;
237+
238+
// If the run container bitmap is present, check if this container is a run container
239+
let is_run_container =
240+
run_container_bitmap.as_ref().map_or(false, |bm| bm[i / 8] & (1 << (i % 8)) != 0);
241+
242+
let store = if is_run_container {
243+
let runs = reader.read_u16::<LittleEndian>().unwrap();
244+
let mut intervals = vec![[0, 0]; runs as usize];
245+
reader.read_exact(cast_slice_mut(&mut intervals)).unwrap();
246+
intervals.iter_mut().for_each(|[s, len]| {
247+
*s = u16::from_le(*s);
248+
*len = u16::from_le(*len);
249+
});
250+
251+
let cardinality = intervals.iter().map(|[_, len]| *len as usize).sum();
252+
let mut store = Store::with_capacity(cardinality);
253+
intervals.into_iter().try_for_each(|[s, len]| -> Result<(), io::ErrorKind> {
254+
let end = s.checked_add(len).ok_or(io::ErrorKind::InvalidData)?;
255+
store.insert_range(RangeInclusive::new(s, end));
256+
Ok(())
257+
})?;
258+
store
259+
} else if cardinality <= ARRAY_LIMIT {
260+
let mut values = vec![0; cardinality as usize];
261+
reader.read_exact(cast_slice_mut(&mut values)).unwrap();
262+
values.iter_mut().for_each(|n| *n = u16::from_le(*n));
263+
let array = a(values).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
264+
Store::Array(array)
265+
} else {
266+
let mut values = Box::new([0; BITMAP_LENGTH]);
267+
reader.read_exact(cast_slice_mut(&mut values[..])).unwrap();
268+
values.iter_mut().for_each(|n| *n = u64::from_le(*n));
269+
let bitmap = b(cardinality, values)
270+
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
271+
Store::Bitmap(bitmap)
272+
};
273+
274+
let mut other_container = Container { key, store };
275+
other_container &= container;
276+
if !other_container.is_empty() {
277+
containers.push(other_container);
278+
}
279+
}
280+
281+
Ok(RoaringBitmap { containers })
282+
}
194283
}
195284

196285
#[cfg(test)]

0 commit comments

Comments
 (0)