Skip to content

Commit 6aabe69

Browse files
authored
Merge pull request #255 from RoaringBitmap/deserialize-run-containers
Deserialize Run Containers
2 parents 25251c9 + 95664bd commit 6aabe69

File tree

7 files changed

+80
-20
lines changed

7 files changed

+80
-20
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ Unless you explicitly state otherwise, any contribution intentionally submitted
4949
for inclusion in the work by you shall be dual licensed as above, without any
5050
additional terms or conditions.
5151

52-
[github-actions-badge]: https://img.shields.io/github/workflow/status/RoaringBitmap/roaring-rs/Continuous%20integration.svg?style=flat-square
52+
[github-actions-badge]:
53+
https://github.com/RoaringBitmap/roaring-rs/actions/workflows/test.yml/badge.svg
5354
[github-actions]: https://github.com/RoaringBitmap/roaring-rs/actions
5455
[release-badge]: https://img.shields.io/github/release/RoaringBitmap/roaring-rs.svg?style=flat-square
5556
[cargo]: https://crates.io/crates/roaring

src/bitmap/container.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::ops::{
66
use super::store::{self, Store};
77
use super::util;
88

9-
const ARRAY_LIMIT: u64 = 4096;
9+
pub const ARRAY_LIMIT: u64 = 4096;
1010

1111
#[derive(PartialEq, Clone)]
1212
pub struct Container {

src/bitmap/serialization.rs

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,19 @@ use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
33
use std::convert::{Infallible, TryFrom};
44
use std::error::Error;
55
use std::io;
6+
use std::ops::RangeInclusive;
67

7-
use super::container::Container;
8-
use crate::bitmap::store::{ArrayStore, BitmapStore, Store};
8+
use crate::bitmap::container::{Container, ARRAY_LIMIT};
9+
use crate::bitmap::store::{ArrayStore, BitmapStore, Store, BITMAP_LENGTH};
910
use crate::RoaringBitmap;
1011

1112
const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346;
1213
const SERIAL_COOKIE: u16 = 12347;
13-
// TODO: Need this once run containers are supported
14-
// const NO_OFFSET_THRESHOLD: u8 = 4;
14+
const NO_OFFSET_THRESHOLD: usize = 4;
15+
16+
// Sizes of header structures
17+
const DESCRIPTION_BYTES: usize = 4;
18+
const OFFSET_BYTES: usize = 4;
1519

1620
impl RoaringBitmap {
1721
/// Return the size in bytes of the serialized output.
@@ -163,49 +167,81 @@ impl RoaringBitmap {
163167
B: Fn(u64, Box<[u64; 1024]>) -> Result<BitmapStore, BErr>,
164168
BErr: Error + Send + Sync + 'static,
165169
{
166-
let (size, has_offsets) = {
170+
// First read the cookie to determine which version of the format we are reading
171+
let (size, has_offsets, has_run_containers) = {
167172
let cookie = reader.read_u32::<LittleEndian>()?;
168173
if cookie == SERIAL_COOKIE_NO_RUNCONTAINER {
169-
(reader.read_u32::<LittleEndian>()? as usize, true)
174+
(reader.read_u32::<LittleEndian>()? as usize, true, false)
170175
} else if (cookie as u16) == SERIAL_COOKIE {
171-
return Err(io::Error::new(io::ErrorKind::Other, "run containers are unsupported"));
176+
let size = ((cookie >> 16) + 1) as usize;
177+
(size, size >= NO_OFFSET_THRESHOLD, true)
172178
} else {
173179
return Err(io::Error::new(io::ErrorKind::Other, "unknown cookie value"));
174180
}
175181
};
176182

183+
// Read the run container bitmap if necessary
184+
let run_container_bitmap = if has_run_containers {
185+
let mut bitmap = vec![0u8; (size + 7) / 8];
186+
reader.read_exact(&mut bitmap)?;
187+
Some(bitmap)
188+
} else {
189+
None
190+
};
191+
177192
if size > u16::MAX as usize + 1 {
178193
return Err(io::Error::new(io::ErrorKind::Other, "size is greater than supported"));
179194
}
180195

181-
let mut description_bytes = vec![0u8; size * 4];
196+
// Read the container descriptions
197+
let mut description_bytes = vec![0u8; size * DESCRIPTION_BYTES];
182198
reader.read_exact(&mut description_bytes)?;
183199
let mut description_bytes = &description_bytes[..];
184200

185201
if has_offsets {
186-
let mut offsets = vec![0u8; size * 4];
202+
let mut offsets = vec![0u8; size * OFFSET_BYTES];
187203
reader.read_exact(&mut offsets)?;
188204
drop(offsets); // Not useful when deserializing into memory
189205
}
190206

191207
let mut containers = Vec::with_capacity(size);
192208

193-
for _ in 0..size {
209+
// Read each container
210+
for i in 0..size {
194211
let key = description_bytes.read_u16::<LittleEndian>()?;
195-
let len = u64::from(description_bytes.read_u16::<LittleEndian>()?) + 1;
212+
let cardinality = u64::from(description_bytes.read_u16::<LittleEndian>()?) + 1;
213+
214+
// If the run container bitmap is present, check if this container is a run container
215+
let is_run_container =
216+
run_container_bitmap.as_ref().map_or(false, |bm| bm[i / 8] & (1 << (i % 8)) != 0);
217+
218+
let store = if is_run_container {
219+
let runs = reader.read_u16::<LittleEndian>()?;
220+
let mut intervals = vec![[0, 0]; runs as usize];
221+
reader.read_exact(cast_slice_mut(&mut intervals))?;
222+
intervals.iter_mut().for_each(|[s, len]| {
223+
*s = u16::from_le(*s);
224+
*len = u16::from_le(*len);
225+
});
196226

197-
let store = if len <= 4096 {
198-
let mut values = vec![0; len as usize];
227+
let cardinality = intervals.iter().map(|[_, len]| *len as usize).sum();
228+
let mut store = Store::with_capacity(cardinality);
229+
intervals.into_iter().for_each(|[s, len]| {
230+
store.insert_range(RangeInclusive::new(s, s + len));
231+
});
232+
store
233+
} else if cardinality <= ARRAY_LIMIT {
234+
let mut values = vec![0; cardinality as usize];
199235
reader.read_exact(cast_slice_mut(&mut values))?;
200236
values.iter_mut().for_each(|n| *n = u16::from_le(*n));
201237
let array = a(values).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
202238
Store::Array(array)
203239
} else {
204-
let mut values = Box::new([0; 1024]);
240+
let mut values = Box::new([0; BITMAP_LENGTH]);
205241
reader.read_exact(cast_slice_mut(&mut values[..]))?;
206242
values.iter_mut().for_each(|n| *n = u64::from_le(*n));
207-
let bitmap =
208-
b(len, values).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
243+
let bitmap = b(cardinality, values)
244+
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
209245
Store::Bitmap(bitmap)
210246
};
211247

src/bitmap/store/array_store/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ impl ArrayStore {
2121
ArrayStore { vec: vec![] }
2222
}
2323

24+
pub fn with_capacity(capacity: usize) -> ArrayStore {
25+
ArrayStore { vec: Vec::with_capacity(capacity) }
26+
}
27+
2428
///
2529
/// Create a new SortedU16Vec from a given vec
2630
/// It is up to the caller to ensure the vec is sorted and deduplicated

src/bitmap/store/mod.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,14 @@ use std::ops::{
77
};
88
use std::{slice, vec};
99

10-
use self::bitmap_store::BITMAP_LENGTH;
10+
pub use self::bitmap_store::BITMAP_LENGTH;
1111
use self::Store::{Array, Bitmap};
1212

1313
pub use self::array_store::ArrayStore;
1414
pub use self::bitmap_store::{BitmapIter, BitmapStore};
1515

16+
use crate::bitmap::container::ARRAY_LIMIT;
17+
1618
#[derive(Clone)]
1719
pub enum Store {
1820
Array(ArrayStore),
@@ -31,6 +33,14 @@ impl Store {
3133
Store::Array(ArrayStore::new())
3234
}
3335

36+
pub fn with_capacity(capacity: usize) -> Store {
37+
if capacity <= ARRAY_LIMIT as usize {
38+
Store::Array(ArrayStore::with_capacity(capacity))
39+
} else {
40+
Store::Bitmap(BitmapStore::new())
41+
}
42+
}
43+
3444
pub fn full() -> Store {
3545
Store::Bitmap(BitmapStore::full())
3646
}

tests/bitmapwithruns.bin

46.9 KB
Binary file not shown.

tests/serialization.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use roaring::RoaringBitmap;
44

55
// Test data from https://github.com/RoaringBitmap/RoaringFormatSpec/tree/master/testdata
66
static BITMAP_WITHOUT_RUNS: &[u8] = include_bytes!("bitmapwithoutruns.bin");
7+
static BITMAP_WITH_RUNS: &[u8] = include_bytes!("bitmapwithruns.bin");
78

89
fn test_data_bitmap() -> RoaringBitmap {
910
(0..100)
@@ -21,10 +22,18 @@ fn serialize_and_deserialize(bitmap: &RoaringBitmap) -> RoaringBitmap {
2122
}
2223

2324
#[test]
24-
fn test_deserialize_from_provided_data() {
25+
fn test_deserialize_without_runs_from_provided_data() {
2526
assert_eq!(RoaringBitmap::deserialize_from(BITMAP_WITHOUT_RUNS).unwrap(), test_data_bitmap());
2627
}
2728

29+
#[test]
30+
fn test_deserialize_with_runs_from_provided_data() {
31+
assert_eq!(
32+
RoaringBitmap::deserialize_from(&mut &BITMAP_WITH_RUNS[..]).unwrap(),
33+
test_data_bitmap()
34+
);
35+
}
36+
2837
#[test]
2938
fn test_serialize_into_provided_data() {
3039
let bitmap = test_data_bitmap();

0 commit comments

Comments
 (0)