@@ -8,7 +8,7 @@ use std::ops::RangeInclusive;
88
99use crate :: bitmap:: container:: Container ;
1010use crate :: bitmap:: serialization:: {
11- NO_OFFSET_THRESHOLD , OFFSET_BYTES , SERIAL_COOKIE , SERIAL_COOKIE_NO_RUNCONTAINER ,
11+ NO_OFFSET_THRESHOLD , SERIAL_COOKIE , SERIAL_COOKIE_NO_RUNCONTAINER ,
1212} ;
1313use crate :: RoaringBitmap ;
1414
@@ -93,21 +93,35 @@ impl RoaringBitmap {
9393 }
9494
9595 // Read the container descriptions
96- let mut description_bytes = vec ! [ [ 0u16 ; 2 ] ; size] ;
97- reader. read_exact ( cast_slice_mut ( & mut description_bytes ) ) ?;
98- description_bytes . iter_mut ( ) . for_each ( |[ ref mut key, ref mut len] | {
96+ let mut descriptions = vec ! [ [ 0 ; 2 ] ; size] ;
97+ reader. read_exact ( cast_slice_mut ( & mut descriptions ) ) ?;
98+ descriptions . iter_mut ( ) . for_each ( |[ ref mut key, ref mut len] | {
9999 * key = u16:: from_le ( * key) ;
100100 * len = u16:: from_le ( * len) ;
101101 } ) ;
102102
103-
104103 if has_offsets {
105- // I could use these offsets but I am a lazy developer (for now)
106- reader. seek ( SeekFrom :: Current ( ( size * OFFSET_BYTES ) as i64 ) ) ?;
104+ let mut offsets = vec ! [ 0 ; size] ;
105+ reader. read_exact ( cast_slice_mut ( & mut offsets) ) ?;
106+ offsets. iter_mut ( ) . for_each ( |offset| * offset = u32:: from_le ( * offset) ) ;
107+
108+ // Loop on the materialized containers if there
109+ // are less or as many of them than serialized ones.
110+ if self . containers . len ( ) <= size {
111+ return self . intersection_with_serialized_impl_with_offsets (
112+ reader,
113+ a,
114+ b,
115+ & descriptions,
116+ & offsets,
117+ run_container_bitmap. as_deref ( ) ,
118+ ) ;
119+ }
107120 }
108121
109122 // Read each container and skip the useless ones
110- for ( i, & [ key, len_minus_one] ) in description_bytes. iter ( ) . enumerate ( ) {
123+ let mut containers = Vec :: new ( ) ;
124+ for ( i, & [ key, len_minus_one] ) in descriptions. iter ( ) . enumerate ( ) {
111125 let container = match self . containers . binary_search_by_key ( & key, |c| c. key ) {
112126 Ok ( index) => self . containers . get ( index) ,
113127 Err ( _) => None ,
@@ -191,6 +205,81 @@ impl RoaringBitmap {
191205
192206 Ok ( RoaringBitmap { containers } )
193207 }
208+
209+ fn intersection_with_serialized_impl_with_offsets < R , A , AErr , B , BErr > (
210+ & self ,
211+ mut reader : R ,
212+ a : A ,
213+ b : B ,
214+ descriptions : & [ [ u16 ; 2 ] ] ,
215+ offsets : & [ u32 ] ,
216+ run_container_bitmap : Option < & [ u8 ] > ,
217+ ) -> io:: Result < RoaringBitmap >
218+ where
219+ R : io:: Read + io:: Seek ,
220+ A : Fn ( Vec < u16 > ) -> Result < ArrayStore , AErr > ,
221+ AErr : Error + Send + Sync + ' static ,
222+ B : Fn ( u64 , Box < [ u64 ; 1024 ] > ) -> Result < BitmapStore , BErr > ,
223+ BErr : Error + Send + Sync + ' static ,
224+ {
225+ let mut containers = Vec :: new ( ) ;
226+ for container in & self . containers {
227+ let i = match descriptions. binary_search_by_key ( & container. key , |[ k, _] | * k) {
228+ Ok ( index) => index,
229+ Err ( _) => continue ,
230+ } ;
231+
232+ // Seek to the bytes of the container we want.
233+ reader. seek ( SeekFrom :: Start ( offsets[ i] as u64 ) ) ?;
234+
235+ let [ key, len_minus_one] = descriptions[ i] ;
236+ let cardinality = u64:: from ( len_minus_one) + 1 ;
237+
238+ // If the run container bitmap is present, check if this container is a run container
239+ let is_run_container =
240+ run_container_bitmap. as_ref ( ) . map_or ( false , |bm| bm[ i / 8 ] & ( 1 << ( i % 8 ) ) != 0 ) ;
241+
242+ let store = if is_run_container {
243+ let runs = reader. read_u16 :: < LittleEndian > ( ) . unwrap ( ) ;
244+ let mut intervals = vec ! [ [ 0 , 0 ] ; runs as usize ] ;
245+ reader. read_exact ( cast_slice_mut ( & mut intervals) ) . unwrap ( ) ;
246+ intervals. iter_mut ( ) . for_each ( |[ s, len] | {
247+ * s = u16:: from_le ( * s) ;
248+ * len = u16:: from_le ( * len) ;
249+ } ) ;
250+
251+ let cardinality = intervals. iter ( ) . map ( |[ _, len] | * len as usize ) . sum ( ) ;
252+ let mut store = Store :: with_capacity ( cardinality) ;
253+ intervals. into_iter ( ) . try_for_each ( |[ s, len] | -> Result < ( ) , io:: ErrorKind > {
254+ let end = s. checked_add ( len) . ok_or ( io:: ErrorKind :: InvalidData ) ?;
255+ store. insert_range ( RangeInclusive :: new ( s, end) ) ;
256+ Ok ( ( ) )
257+ } ) ?;
258+ store
259+ } else if cardinality <= ARRAY_LIMIT {
260+ let mut values = vec ! [ 0 ; cardinality as usize ] ;
261+ reader. read_exact ( cast_slice_mut ( & mut values) ) . unwrap ( ) ;
262+ values. iter_mut ( ) . for_each ( |n| * n = u16:: from_le ( * n) ) ;
263+ let array = a ( values) . map_err ( |e| io:: Error :: new ( io:: ErrorKind :: InvalidData , e) ) ?;
264+ Store :: Array ( array)
265+ } else {
266+ let mut values = Box :: new ( [ 0 ; BITMAP_LENGTH ] ) ;
267+ reader. read_exact ( cast_slice_mut ( & mut values[ ..] ) ) . unwrap ( ) ;
268+ values. iter_mut ( ) . for_each ( |n| * n = u64:: from_le ( * n) ) ;
269+ let bitmap = b ( cardinality, values)
270+ . map_err ( |e| io:: Error :: new ( io:: ErrorKind :: InvalidData , e) ) ?;
271+ Store :: Bitmap ( bitmap)
272+ } ;
273+
274+ let mut other_container = Container { key, store } ;
275+ other_container &= container;
276+ if !other_container. is_empty ( ) {
277+ containers. push ( other_container) ;
278+ }
279+ }
280+
281+ Ok ( RoaringBitmap { containers } )
282+ }
194283}
195284
196285#[ cfg( test) ]
0 commit comments