1- use bytemuck:: { cast_slice_mut, pod_collect_to_vec } ;
1+ use bytemuck:: cast_slice_mut;
22use byteorder:: { LittleEndian , ReadBytesExt } ;
33use core:: convert:: Infallible ;
4- use core:: ops:: { BitAndAssign , RangeInclusive } ;
4+ use core:: mem;
5+ use core:: ops:: RangeInclusive ;
56use std:: error:: Error ;
6- use std:: io;
7+ use std:: io:: { self , SeekFrom } ;
78
89use crate :: bitmap:: container:: Container ;
910use crate :: bitmap:: serialization:: {
@@ -39,7 +40,7 @@ impl RoaringBitmap {
3940 // TODO convert this into a trait
4041 pub fn intersection_with_serialized_unchecked < R > ( & self , other : R ) -> io:: Result < RoaringBitmap >
4142 where
42- R : io:: Read ,
43+ R : io:: Read + io :: Seek ,
4344 {
4445 RoaringBitmap :: intersection_with_serialized_impl :: < R , _ , Infallible , _ , Infallible > (
4546 self ,
@@ -56,7 +57,7 @@ impl RoaringBitmap {
5657 b : B ,
5758 ) -> io:: Result < RoaringBitmap >
5859 where
59- R : io:: Read ,
60+ R : io:: Read + io :: Seek ,
6061 A : Fn ( Vec < u16 > ) -> Result < ArrayStore , AErr > ,
6162 AErr : Error + Send + Sync + ' static ,
6263 B : Fn ( u64 , Box < [ u64 ; 1024 ] > ) -> Result < BitmapStore , BErr > ,
@@ -101,7 +102,7 @@ impl RoaringBitmap {
101102
102103 let mut containers = Vec :: with_capacity ( size) ;
103104
104- // Read each container
105+ // Read each container and skip the useless ones
105106 for i in 0 ..size {
106107 let key = description_bytes. read_u16 :: < LittleEndian > ( ) ?;
107108 let container = match self . containers . binary_search_by_key ( & key, |c| c. key ) {
@@ -114,53 +115,100 @@ impl RoaringBitmap {
114115 let is_run_container =
115116 run_container_bitmap. as_ref ( ) . map_or ( false , |bm| bm[ i / 8 ] & ( 1 << ( i % 8 ) ) != 0 ) ;
116117
117- let mut store = if is_run_container {
118+ let store = if is_run_container {
118119 let runs = reader. read_u16 :: < LittleEndian > ( ) ?;
119- let mut intervals = vec ! [ [ 0 , 0 ] ; runs as usize ] ;
120- reader. read_exact ( cast_slice_mut ( & mut intervals) ) ?;
121- if container. is_none ( ) {
122- continue ;
120+ match container {
121+ Some ( _) => {
122+ let mut intervals = vec ! [ [ 0 , 0 ] ; runs as usize ] ;
123+ reader. read_exact ( cast_slice_mut ( & mut intervals) ) ?;
124+ intervals. iter_mut ( ) . for_each ( |[ s, len] | {
125+ * s = u16:: from_le ( * s) ;
126+ * len = u16:: from_le ( * len) ;
127+ } ) ;
128+
129+ let cardinality = intervals. iter ( ) . map ( |[ _, len] | * len as usize ) . sum ( ) ;
130+ let mut store = Store :: with_capacity ( cardinality) ;
131+ intervals. into_iter ( ) . try_for_each (
132+ |[ s, len] | -> Result < ( ) , io:: ErrorKind > {
133+ let end = s. checked_add ( len) . ok_or ( io:: ErrorKind :: InvalidData ) ?;
134+ store. insert_range ( RangeInclusive :: new ( s, end) ) ;
135+ Ok ( ( ) )
136+ } ,
137+ ) ?;
138+ store
139+ }
140+ None => {
141+ let runs_size = mem:: size_of :: < u16 > ( ) * 2 * runs as usize ;
142+ reader. seek ( SeekFrom :: Current ( runs_size as i64 ) ) ?;
143+ continue ;
144+ }
123145 }
124- intervals. iter_mut ( ) . for_each ( |[ s, len] | {
125- * s = u16:: from_le ( * s) ;
126- * len = u16:: from_le ( * len) ;
127- } ) ;
128-
129- let cardinality = intervals. iter ( ) . map ( |[ _, len] | * len as usize ) . sum ( ) ;
130- let mut store = Store :: with_capacity ( cardinality) ;
131- intervals. into_iter ( ) . try_for_each ( |[ s, len] | -> Result < ( ) , io:: ErrorKind > {
132- let end = s. checked_add ( len) . ok_or ( io:: ErrorKind :: InvalidData ) ?;
133- store. insert_range ( RangeInclusive :: new ( s, end) ) ;
134- Ok ( ( ) )
135- } ) ?;
136- store
137146 } else if cardinality <= ARRAY_LIMIT {
138- let mut values = vec ! [ 0 ; cardinality as usize ] ;
139- reader. read_exact ( cast_slice_mut ( & mut values) ) ?;
140- if container. is_none ( ) {
141- continue ;
147+ match container {
148+ Some ( _) => {
149+ let mut values = vec ! [ 0 ; cardinality as usize ] ;
150+ reader. read_exact ( cast_slice_mut ( & mut values) ) ?;
151+ values. iter_mut ( ) . for_each ( |n| * n = u16:: from_le ( * n) ) ;
152+ let array =
153+ a ( values) . map_err ( |e| io:: Error :: new ( io:: ErrorKind :: InvalidData , e) ) ?;
154+ Store :: Array ( array)
155+ }
156+ None => {
157+ let array_size = mem:: size_of :: < u16 > ( ) * cardinality as usize ;
158+ reader. seek ( SeekFrom :: Current ( array_size as i64 ) ) ?;
159+ continue ;
160+ }
142161 }
143- values. iter_mut ( ) . for_each ( |n| * n = u16:: from_le ( * n) ) ;
144- let array = a ( values) . map_err ( |e| io:: Error :: new ( io:: ErrorKind :: InvalidData , e) ) ?;
145- Store :: Array ( array)
146162 } else {
147- let mut values = Box :: new ( [ 0 ; BITMAP_LENGTH ] ) ;
148- reader. read_exact ( cast_slice_mut ( & mut values[ ..] ) ) ?;
149- if container. is_none ( ) {
150- continue ;
163+ match container {
164+ Some ( _) => {
165+ let mut values = Box :: new ( [ 0 ; BITMAP_LENGTH ] ) ;
166+ reader. read_exact ( cast_slice_mut ( & mut values[ ..] ) ) ?;
167+ values. iter_mut ( ) . for_each ( |n| * n = u64:: from_le ( * n) ) ;
168+ let bitmap = b ( cardinality, values)
169+ . map_err ( |e| io:: Error :: new ( io:: ErrorKind :: InvalidData , e) ) ?;
170+ Store :: Bitmap ( bitmap)
171+ }
172+ None => {
173+ let bitmap_size = mem:: size_of :: < u64 > ( ) * BITMAP_LENGTH ;
174+ reader. seek ( SeekFrom :: Current ( bitmap_size as i64 ) ) ?;
175+ continue ;
176+ }
151177 }
152- values. iter_mut ( ) . for_each ( |n| * n = u64:: from_le ( * n) ) ;
153- let bitmap = b ( cardinality, values)
154- . map_err ( |e| io:: Error :: new ( io:: ErrorKind :: InvalidData , e) ) ?;
155- Store :: Bitmap ( bitmap)
156178 } ;
157179
158180 if let Some ( container) = container {
159- store &= & container. store ;
160- containers. push ( Container { key, store } ) ;
181+ let mut tmp_container = Container { key, store } ;
182+ tmp_container &= container;
183+ if !tmp_container. is_empty ( ) {
184+ containers. push ( tmp_container) ;
185+ }
161186 }
162187 }
163188
164189 Ok ( RoaringBitmap { containers } )
165190 }
166191}
192+
193+ #[ cfg( test) ]
194+ #[ cfg( feature = "std" ) ] // We need this to serialize bitmaps
195+ mod test {
196+ use crate :: RoaringBitmap ;
197+ use proptest:: prelude:: * ;
198+ use std:: io:: Cursor ;
199+
200+ // fast count tests
201+ proptest ! {
202+ #[ test]
203+ fn intersection_with_serialized_eq_materialized_intersection(
204+ a in RoaringBitmap :: arbitrary( ) ,
205+ b in RoaringBitmap :: arbitrary( )
206+ ) {
207+ let mut serialized_bytes_b = Vec :: new( ) ;
208+ b. serialize_into( & mut serialized_bytes_b) . unwrap( ) ;
209+ let serialized_bytes_b = & serialized_bytes_b[ ..] ;
210+
211+ prop_assert_eq!( a. intersection_with_serialized_unchecked( Cursor :: new( serialized_bytes_b) ) . unwrap( ) , a & b) ;
212+ }
213+ }
214+ }
0 commit comments