Skip to content

Commit 0a11dd0

Browse files
committed
Use containers offsets when available
1 parent b92f110 commit 0a11dd0

File tree

1 file changed

+74
-7
lines changed

1 file changed

+74
-7
lines changed

src/bitmap/ops_with_serialized.rs

Lines changed: 74 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use std::ops::RangeInclusive;
88

99
use crate::bitmap::container::Container;
1010
use crate::bitmap::serialization::{
11-
NO_OFFSET_THRESHOLD, OFFSET_BYTES, SERIAL_COOKIE, SERIAL_COOKIE_NO_RUNCONTAINER,
11+
NO_OFFSET_THRESHOLD, SERIAL_COOKIE, SERIAL_COOKIE_NO_RUNCONTAINER,
1212
};
1313
use crate::RoaringBitmap;
1414

@@ -93,21 +93,88 @@ impl RoaringBitmap {
9393
}
9494

9595
// Read the container descriptions
96-
let mut description_bytes = vec![[0u16; 2]; size];
97-
reader.read_exact(cast_slice_mut(&mut description_bytes))?;
98-
description_bytes.iter_mut().for_each(|[ref mut key, ref mut len]| {
96+
let mut descriptions = vec![[0; 2]; size];
97+
reader.read_exact(cast_slice_mut(&mut descriptions))?;
98+
descriptions.iter_mut().for_each(|[ref mut key, ref mut len]| {
9999
*key = u16::from_le(*key);
100100
*len = u16::from_le(*len);
101101
});
102102

103+
let mut containers = Vec::new();
103104

104105
if has_offsets {
105-
// I could use these offsets but I am a lazy developer (for now)
106-
reader.seek(SeekFrom::Current((size * OFFSET_BYTES) as i64))?;
106+
let mut offsets = vec![0; size];
107+
reader.read_exact(cast_slice_mut(&mut offsets))?;
108+
offsets.iter_mut().for_each(|offset| *offset = u32::from_le(*offset));
109+
110+
// Loop on the materialized containers if there
111+
// are less or as many of them than serialized ones.
112+
if self.containers.len() <= size {
113+
for container in &self.containers {
114+
let i = match descriptions.binary_search_by_key(&container.key, |[k, _]| *k) {
115+
Ok(index) => index,
116+
Err(_) => continue,
117+
};
118+
119+
// Seek to the bytes of the container we want.
120+
reader.seek(SeekFrom::Start(offsets[i] as u64))?;
121+
122+
let [key, len_minus_one] = descriptions[i];
123+
let cardinality = u64::from(len_minus_one) + 1;
124+
125+
// If the run container bitmap is present, check if this container is a run container
126+
let is_run_container = run_container_bitmap
127+
.as_ref()
128+
.map_or(false, |bm| bm[i / 8] & (1 << (i % 8)) != 0);
129+
130+
let store = if is_run_container {
131+
let runs = reader.read_u16::<LittleEndian>().unwrap();
132+
let mut intervals = vec![[0, 0]; runs as usize];
133+
reader.read_exact(cast_slice_mut(&mut intervals)).unwrap();
134+
intervals.iter_mut().for_each(|[s, len]| {
135+
*s = u16::from_le(*s);
136+
*len = u16::from_le(*len);
137+
});
138+
139+
let cardinality = intervals.iter().map(|[_, len]| *len as usize).sum();
140+
let mut store = Store::with_capacity(cardinality);
141+
intervals.into_iter().try_for_each(
142+
|[s, len]| -> Result<(), io::ErrorKind> {
143+
let end = s.checked_add(len).ok_or(io::ErrorKind::InvalidData)?;
144+
store.insert_range(RangeInclusive::new(s, end));
145+
Ok(())
146+
},
147+
)?;
148+
store
149+
} else if cardinality <= ARRAY_LIMIT {
150+
let mut values = vec![0; cardinality as usize];
151+
reader.read_exact(cast_slice_mut(&mut values)).unwrap();
152+
values.iter_mut().for_each(|n| *n = u16::from_le(*n));
153+
let array =
154+
a(values).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
155+
Store::Array(array)
156+
} else {
157+
let mut values = Box::new([0; BITMAP_LENGTH]);
158+
reader.read_exact(cast_slice_mut(&mut values[..])).unwrap();
159+
values.iter_mut().for_each(|n| *n = u64::from_le(*n));
160+
let bitmap = b(cardinality, values)
161+
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
162+
Store::Bitmap(bitmap)
163+
};
164+
165+
let mut other_container = Container { key, store };
166+
other_container &= container;
167+
if !other_container.is_empty() {
168+
containers.push(other_container);
169+
}
170+
}
171+
172+
return Ok(RoaringBitmap { containers });
173+
}
107174
}
108175

109176
// Read each container and skip the useless ones
110-
for (i, &[key, len_minus_one]) in description_bytes.iter().enumerate() {
177+
for (i, &[key, len_minus_one]) in descriptions.iter().enumerate() {
111178
let container = match self.containers.binary_search_by_key(&key, |c| c.key) {
112179
Ok(index) => self.containers.get(index),
113180
Err(_) => None,

0 commit comments

Comments
 (0)