Skip to content

Commit 9ba1ca1

Browse files
committed
Prefer reading the content store by store
1 parent 9be1e4b commit 9ba1ca1

File tree

1 file changed

+48
-27
lines changed

1 file changed

+48
-27
lines changed

src/bitmap/ops_with_serialized.rs

Lines changed: 48 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ impl RoaringBitmap {
5151

5252
fn intersection_with_serialized_impl<R, A, AErr, B, BErr>(
5353
&self,
54-
mut other: R,
54+
mut reader: R,
5555
a: A,
5656
b: B,
5757
) -> io::Result<RoaringBitmap>
@@ -64,9 +64,9 @@ impl RoaringBitmap {
6464
{
6565
// First read the cookie to determine which version of the format we are reading
6666
let (size, has_offsets, has_run_containers) = {
67-
let cookie = other.read_u32::<LittleEndian>()?;
67+
let cookie = reader.read_u32::<LittleEndian>()?;
6868
if cookie == SERIAL_COOKIE_NO_RUNCONTAINER {
69-
(other.read_u32::<LittleEndian>()? as usize, true, false)
69+
(reader.read_u32::<LittleEndian>()? as usize, true, false)
7070
} else if (cookie as u16) == SERIAL_COOKIE {
7171
let size = ((cookie >> 16) + 1) as usize;
7272
(size, size >= NO_OFFSET_THRESHOLD, true)
@@ -78,7 +78,7 @@ impl RoaringBitmap {
7878
// Read the run container bitmap if necessary
7979
let run_container_bitmap = if has_run_containers {
8080
let mut bitmap = vec![0u8; (size + 7) / 8];
81-
other.read_exact(&mut bitmap)?;
81+
reader.read_exact(&mut bitmap)?;
8282
Some(bitmap)
8383
} else {
8484
None
@@ -90,54 +90,75 @@ impl RoaringBitmap {
9090

9191
// Read the container descriptions
9292
let mut description_bytes = vec![0u8; size * DESCRIPTION_BYTES];
93-
other.read_exact(&mut description_bytes)?;
94-
let mut description_bytes: Vec<[u16; 2]> = pod_collect_to_vec(&description_bytes);
95-
description_bytes.iter_mut().for_each(|[ref mut k, ref mut c]| {
96-
*k = u16::from_le(*k);
97-
*c = u16::from_le(*c);
98-
});
93+
reader.read_exact(&mut description_bytes)?;
94+
let mut description_bytes = &description_bytes[..];
9995

10096
if has_offsets {
10197
let mut offsets = vec![0u8; size * OFFSET_BYTES];
102-
other.read_exact(&mut offsets)?;
103-
drop(offsets); // Not useful when deserializing into memory
98+
reader.read_exact(&mut offsets)?;
99+
drop(offsets); // We could use these offsets but we are lazy
104100
}
105101

106102
let mut containers = Vec::with_capacity(size);
107-
for container in &self.containers {
108-
let (i, key, cardinality) =
109-
match description_bytes.binary_search_by_key(&container.key, |[k, _]| *k) {
110-
Ok(index) => {
111-
let [key, cardinality] = description_bytes[index];
112-
(index, key, u64::from(cardinality) + 1)
113-
}
114-
Err(_) => continue,
115-
};
103+
104+
// Read each container
105+
for i in 0..size {
106+
let key = description_bytes.read_u16::<LittleEndian>()?;
107+
let container = match self.containers.binary_search_by_key(&key, |c| c.key) {
108+
Ok(index) => self.containers.get(index),
109+
Err(_) => None,
110+
};
111+
let cardinality = u64::from(description_bytes.read_u16::<LittleEndian>()?) + 1;
116112

117113
// If the run container bitmap is present, check if this container is a run container
118114
let is_run_container =
119115
run_container_bitmap.as_ref().map_or(false, |bm| bm[i / 8] & (1 << (i % 8)) != 0);
120116

121117
let mut store = if is_run_container {
122-
todo!("support run containers")
118+
let runs = reader.read_u16::<LittleEndian>()?;
119+
let mut intervals = vec![[0, 0]; runs as usize];
120+
reader.read_exact(cast_slice_mut(&mut intervals))?;
121+
if container.is_none() {
122+
continue;
123+
}
124+
intervals.iter_mut().for_each(|[s, len]| {
125+
*s = u16::from_le(*s);
126+
*len = u16::from_le(*len);
127+
});
128+
129+
let cardinality = intervals.iter().map(|[_, len]| *len as usize).sum();
130+
let mut store = Store::with_capacity(cardinality);
131+
intervals.into_iter().try_for_each(|[s, len]| -> Result<(), io::ErrorKind> {
132+
let end = s.checked_add(len).ok_or(io::ErrorKind::InvalidData)?;
133+
store.insert_range(RangeInclusive::new(s, end));
134+
Ok(())
135+
})?;
136+
store
123137
} else if cardinality <= ARRAY_LIMIT {
124138
let mut values = vec![0; cardinality as usize];
125-
other.read_exact(cast_slice_mut(&mut values))?;
139+
reader.read_exact(cast_slice_mut(&mut values))?;
140+
if container.is_none() {
141+
continue;
142+
}
126143
values.iter_mut().for_each(|n| *n = u16::from_le(*n));
127144
let array = a(values).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
128145
Store::Array(array)
129146
} else {
130147
let mut values = Box::new([0; BITMAP_LENGTH]);
131-
other.read_exact(cast_slice_mut(&mut values[..]))?;
148+
reader.read_exact(cast_slice_mut(&mut values[..]))?;
149+
if container.is_none() {
150+
continue;
151+
}
132152
values.iter_mut().for_each(|n| *n = u64::from_le(*n));
133153
let bitmap = b(cardinality, values)
134154
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
135155
Store::Bitmap(bitmap)
136156
};
137157

138-
store &= &container.store;
139-
140-
containers.push(Container { key, store });
158+
if let Some(container) = container {
159+
store &= &container.store;
160+
containers.push(Container { key, store });
161+
}
141162
}
142163

143164
Ok(RoaringBitmap { containers })

0 commit comments

Comments
 (0)