@@ -3,15 +3,19 @@ use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
33use std:: convert:: { Infallible , TryFrom } ;
44use std:: error:: Error ;
55use std:: io;
6+ use std:: ops:: RangeInclusive ;
67
7- use super :: container:: Container ;
8- use crate :: bitmap:: store:: { ArrayStore , BitmapStore , Store } ;
8+ use crate :: bitmap :: container:: { Container , ARRAY_LIMIT } ;
9+ use crate :: bitmap:: store:: { ArrayStore , BitmapStore , Store , BITMAP_LENGTH } ;
910use crate :: RoaringBitmap ;
1011
1112const SERIAL_COOKIE_NO_RUNCONTAINER : u32 = 12346 ;
1213const SERIAL_COOKIE : u16 = 12347 ;
13- // TODO: Need this once run containers are supported
14- // const NO_OFFSET_THRESHOLD: u8 = 4;
14+ const NO_OFFSET_THRESHOLD : usize = 4 ;
15+
16+ // Sizes of header structures
17+ const DESCRIPTION_BYTES : usize = 4 ;
18+ const OFFSET_BYTES : usize = 4 ;
1519
1620impl RoaringBitmap {
1721 /// Return the size in bytes of the serialized output.
@@ -163,49 +167,81 @@ impl RoaringBitmap {
163167 B : Fn ( u64 , Box < [ u64 ; 1024 ] > ) -> Result < BitmapStore , BErr > ,
164168 BErr : Error + Send + Sync + ' static ,
165169 {
166- let ( size, has_offsets) = {
170+ // First read the cookie to determine which version of the format we are reading
171+ let ( size, has_offsets, has_run_containers) = {
167172 let cookie = reader. read_u32 :: < LittleEndian > ( ) ?;
168173 if cookie == SERIAL_COOKIE_NO_RUNCONTAINER {
169- ( reader. read_u32 :: < LittleEndian > ( ) ? as usize , true )
174+ ( reader. read_u32 :: < LittleEndian > ( ) ? as usize , true , false )
170175 } else if ( cookie as u16 ) == SERIAL_COOKIE {
171- return Err ( io:: Error :: new ( io:: ErrorKind :: Other , "run containers are unsupported" ) ) ;
176+ let size = ( ( cookie >> 16 ) + 1 ) as usize ;
177+ ( size, size >= NO_OFFSET_THRESHOLD , true )
172178 } else {
173179 return Err ( io:: Error :: new ( io:: ErrorKind :: Other , "unknown cookie value" ) ) ;
174180 }
175181 } ;
176182
183+ // Read the run container bitmap if necessary
184+ let run_container_bitmap = if has_run_containers {
185+ let mut bitmap = vec ! [ 0u8 ; ( size + 7 ) / 8 ] ;
186+ reader. read_exact ( & mut bitmap) ?;
187+ Some ( bitmap)
188+ } else {
189+ None
190+ } ;
191+
177192 if size > u16:: MAX as usize + 1 {
178193 return Err ( io:: Error :: new ( io:: ErrorKind :: Other , "size is greater than supported" ) ) ;
179194 }
180195
181- let mut description_bytes = vec ! [ 0u8 ; size * 4 ] ;
196+ // Read the container descriptions
197+ let mut description_bytes = vec ! [ 0u8 ; size * DESCRIPTION_BYTES ] ;
182198 reader. read_exact ( & mut description_bytes) ?;
183199 let mut description_bytes = & description_bytes[ ..] ;
184200
185201 if has_offsets {
186- let mut offsets = vec ! [ 0u8 ; size * 4 ] ;
202+ let mut offsets = vec ! [ 0u8 ; size * OFFSET_BYTES ] ;
187203 reader. read_exact ( & mut offsets) ?;
188204 drop ( offsets) ; // Not useful when deserializing into memory
189205 }
190206
191207 let mut containers = Vec :: with_capacity ( size) ;
192208
193- for _ in 0 ..size {
209+ // Read each container
210+ for i in 0 ..size {
194211 let key = description_bytes. read_u16 :: < LittleEndian > ( ) ?;
195- let len = u64:: from ( description_bytes. read_u16 :: < LittleEndian > ( ) ?) + 1 ;
212+ let cardinality = u64:: from ( description_bytes. read_u16 :: < LittleEndian > ( ) ?) + 1 ;
213+
214+ // If the run container bitmap is present, check if this container is a run container
215+ let is_run_container =
216+ run_container_bitmap. as_ref ( ) . map_or ( false , |bm| bm[ i / 8 ] & ( 1 << ( i % 8 ) ) != 0 ) ;
217+
218+ let store = if is_run_container {
219+ let runs = reader. read_u16 :: < LittleEndian > ( ) ?;
220+ let mut intervals = vec ! [ [ 0 , 0 ] ; runs as usize ] ;
221+ reader. read_exact ( cast_slice_mut ( & mut intervals) ) ?;
222+ intervals. iter_mut ( ) . for_each ( |[ s, len] | {
223+ * s = u16:: from_le ( * s) ;
224+ * len = u16:: from_le ( * len) ;
225+ } ) ;
196226
197- let store = if len <= 4096 {
198- let mut values = vec ! [ 0 ; len as usize ] ;
227+ let cardinality = intervals. iter ( ) . map ( |[ _, len] | * len as usize ) . sum ( ) ;
228+ let mut store = Store :: with_capacity ( cardinality) ;
229+ intervals. into_iter ( ) . for_each ( |[ s, len] | {
230+ store. insert_range ( RangeInclusive :: new ( s, s + len) ) ;
231+ } ) ;
232+ store
233+ } else if cardinality <= ARRAY_LIMIT {
234+ let mut values = vec ! [ 0 ; cardinality as usize ] ;
199235 reader. read_exact ( cast_slice_mut ( & mut values) ) ?;
200236 values. iter_mut ( ) . for_each ( |n| * n = u16:: from_le ( * n) ) ;
201237 let array = a ( values) . map_err ( |e| io:: Error :: new ( io:: ErrorKind :: InvalidData , e) ) ?;
202238 Store :: Array ( array)
203239 } else {
204- let mut values = Box :: new ( [ 0 ; 1024 ] ) ;
240+ let mut values = Box :: new ( [ 0 ; BITMAP_LENGTH ] ) ;
205241 reader. read_exact ( cast_slice_mut ( & mut values[ ..] ) ) ?;
206242 values. iter_mut ( ) . for_each ( |n| * n = u64:: from_le ( * n) ) ;
207- let bitmap =
208- b ( len , values ) . map_err ( |e| io:: Error :: new ( io:: ErrorKind :: InvalidData , e) ) ?;
243+ let bitmap = b ( cardinality , values )
244+ . map_err ( |e| io:: Error :: new ( io:: ErrorKind :: InvalidData , e) ) ?;
209245 Store :: Bitmap ( bitmap)
210246 } ;
211247
0 commit comments