1- use croaring:: Bitset ;
21use vortex_array:: aliases:: hash_map:: HashMap ;
32use vortex_array:: stats:: { ArrayStatisticsCompute , Stat , StatsSet } ;
43use vortex_error:: { vortex_err, VortexResult } ;
@@ -7,87 +6,41 @@ use crate::RoaringBoolArray;
76
87impl ArrayStatisticsCompute for RoaringBoolArray {
98 fn compute_statistics ( & self , stat : Stat ) -> VortexResult < StatsSet > {
10- if self . is_empty ( ) {
11- return Ok ( StatsSet :: new ( ) ) ;
12- }
13-
149 // Only needs to compute IsSorted, IsStrictSorted and RunCount all other stats have been populated on construction
1510 let bitmap = self . bitmap ( ) ;
16- BitmapStats (
17- bitmap
18- . to_bitset ( )
19- . ok_or_else ( || vortex_err ! ( "Bitmap to Bitset conversion run out of memory" ) ) ?,
20- self . len ( ) ,
21- bitmap. statistics ( ) . cardinality ,
22- )
23- . compute_statistics ( stat)
24- }
25- }
26-
27- // Underlying bitset, length in bits, cardinality (true count) of the bitset
28- struct BitmapStats ( Bitset , usize , u64 ) ;
29-
30- impl ArrayStatisticsCompute for BitmapStats {
31- fn compute_statistics ( & self , _stat : Stat ) -> VortexResult < StatsSet > {
32- let bitset_slice = self . 0 . as_slice ( ) ;
33- let whole_chunks = self . 1 / 64 ;
34- let last_chunk_len = self . 1 % 64 ;
35- let fist_bool = bitset_slice[ 0 ] & 1 == 1 ;
36- let mut stats = RoaringBoolStatsAccumulator :: new ( fist_bool) ;
37- for bits64 in bitset_slice[ 0 ..whole_chunks] . iter ( ) {
38- stats. next ( * bits64) ;
11+ let true_count = bitmap. statistics ( ) . cardinality ;
12+ if matches ! (
13+ stat,
14+ Stat :: TrueCount | Stat :: Min | Stat :: Max | Stat :: IsConstant
15+ ) {
16+ return Ok ( StatsSet :: bools_with_true_count (
17+ true_count as usize ,
18+ self . len ( ) ,
19+ ) ) ;
3920 }
40- stats. next_up_to_length ( bitset_slice[ whole_chunks] , last_chunk_len) ;
41- Ok ( stats. finish ( self . 2 ) )
42- }
43- }
4421
45- struct RoaringBoolStatsAccumulator {
46- prev : bool ,
47- is_sorted : bool ,
48- run_count : usize ,
49- len : usize ,
50- }
51-
52- impl RoaringBoolStatsAccumulator {
53- fn new ( first_value : bool ) -> Self {
54- Self {
55- prev : first_value,
56- is_sorted : true ,
57- run_count : 1 ,
58- len : 0 ,
59- }
60- }
61-
62- pub fn next_up_to_length ( & mut self , next : u64 , len : usize ) {
63- assert ! ( len <= 64 ) ;
64- self . len += len;
65- for i in 0 ..len {
66- let current = ( ( next >> i) & 1 ) == 1 ;
67- // Booleans are sorted true > false so we aren't sorted if we switched from true to false value
68- if !current && self . prev {
69- self . is_sorted = false ;
70- }
71- if current != self . prev {
72- self . run_count += 1 ;
73- self . prev = current;
74- }
22+ if matches ! ( stat, Stat :: IsSorted | Stat :: IsStrictSorted ) {
23+ let is_sorted = if true_count == 0 || true_count == self . len ( ) as u64 {
24+ true
25+ } else {
26+ let min_idx = bitmap. minimum ( ) . ok_or_else ( || {
27+ vortex_err ! ( "Bitmap has no minimum despite having cardinality > 0" )
28+ } ) ?;
29+ let max_idx = bitmap. maximum ( ) . ok_or_else ( || {
30+ vortex_err ! ( "Bitmap has no maximum despite having cardinality > 0" )
31+ } ) ?;
32+ ( max_idx as usize + 1 == self . len ( ) ) && ( max_idx + 1 - min_idx) as u64 == true_count
33+ } ;
34+
35+ let is_strict_sorted =
36+ is_sorted && ( self . len ( ) <= 1 || ( self . len ( ) == 2 && true_count == 1 ) ) ;
37+ return Ok ( StatsSet :: from ( HashMap :: from ( [
38+ ( Stat :: IsSorted , is_sorted. into ( ) ) ,
39+ ( Stat :: IsStrictSorted , is_strict_sorted. into ( ) ) ,
40+ ] ) ) ) ;
7541 }
76- }
77-
78- pub fn next ( & mut self , next : u64 ) {
79- self . next_up_to_length ( next, 64 )
80- }
8142
82- pub fn finish ( self , cardinality : u64 ) -> StatsSet {
83- StatsSet :: from ( HashMap :: from ( [
84- ( Stat :: IsSorted , self . is_sorted . into ( ) ) ,
85- (
86- Stat :: IsStrictSorted ,
87- ( self . is_sorted && ( self . len < 2 || ( self . len == 2 && cardinality == 1 ) ) ) . into ( ) ,
88- ) ,
89- ( Stat :: RunCount , self . run_count . into ( ) ) ,
90- ] ) )
43+ Ok ( StatsSet :: new ( ) )
9144 }
9245}
9346
@@ -111,7 +64,6 @@ mod test {
11164 assert ! ( !bool_arr. statistics( ) . compute_is_constant( ) . unwrap( ) ) ;
11265 assert ! ( !bool_arr. statistics( ) . compute_min:: <bool >( ) . unwrap( ) ) ;
11366 assert ! ( bool_arr. statistics( ) . compute_max:: <bool >( ) . unwrap( ) ) ;
114- assert_eq ! ( bool_arr. statistics( ) . compute_run_count( ) . unwrap( ) , 5 ) ;
11567 assert_eq ! ( bool_arr. statistics( ) . compute_true_count( ) . unwrap( ) , 4 ) ;
11668 }
11769
0 commit comments