@@ -8,12 +8,12 @@ use std::ops::Range;
88
99use vortex_error:: VortexExpect ;
1010use vortex_error:: VortexResult ;
11+ use vortex_mask:: Mask ;
12+ use vortex_vector:: binaryview:: Ref ;
1113
1214use crate :: arrays:: VarBinViewArray ;
1315use crate :: builders:: ArrayBuilder ;
1416use crate :: builders:: VarBinViewBuilder ;
15- use crate :: validity:: Validity ;
16- use crate :: vtable:: ValidityHelper ;
1717
1818impl VarBinViewArray {
1919 /// Returns a compacted copy of the input array, where all wasted space has been cleaned up. This
@@ -55,28 +55,42 @@ impl VarBinViewArray {
5555 bytes_referenced < buffer_total_bytes || buffer_total_bytes == 0
5656 }
5757
58- // count the number of bytes addressed by the views, not including null
59- // values or any inlined strings.
60- fn count_referenced_bytes ( & self ) -> u64 {
61- match self . validity ( ) {
62- Validity :: AllInvalid => 0u64 ,
63- Validity :: NonNullable | Validity :: AllValid | Validity :: Array ( _) => self
64- . views ( )
65- . iter ( )
66- . enumerate ( )
67- . map ( |( idx, & view) | {
68- if !self . is_valid ( idx) || view. is_inlined ( ) {
69- 0u64
70- } else {
71- view. len ( ) as u64
58+ /// Iterates over all valid, non-inlined views, calling the provided
59+ /// closure for each one.
60+ #[ inline( always) ]
61+ fn iter_valid_views < F > ( & self , mut f : F )
62+ where
63+ F : FnMut ( & Ref ) ,
64+ {
65+ match self . validity_mask ( ) {
66+ Mask :: AllTrue ( _) => {
67+ for & view in self . views ( ) . iter ( ) {
68+ if !view. is_inlined ( ) {
69+ f ( view. as_view ( ) ) ;
70+ }
71+ }
72+ }
73+ Mask :: AllFalse ( _) => { }
74+ Mask :: Values ( v) => {
75+ for ( & view, is_valid) in self . views ( ) . iter ( ) . zip ( v. bit_buffer ( ) . iter ( ) ) {
76+ if is_valid && !view. is_inlined ( ) {
77+ f ( view. as_view ( ) ) ;
7278 }
73- } )
74- . sum ( ) ,
79+ }
80+ }
7581 }
7682 }
7783
84+ /// Count the number of bytes addressed by the views, not including null
85+ /// values or any inlined strings.
86+ fn count_referenced_bytes ( & self ) -> u64 {
87+ let mut total = 0u64 ;
88+ self . iter_valid_views ( |view| total += view. size as u64 ) ;
89+ total
90+ }
91+
7892 pub ( crate ) fn buffer_utilizations ( & self ) -> Vec < BufferUtilization > {
79- let mut utilizations = self
93+ let mut utilizations: Vec < BufferUtilization > = self
8094 . buffers ( )
8195 . iter ( )
8296 . map ( |buf| {
@@ -85,18 +99,9 @@ impl VarBinViewArray {
8599 } )
86100 . collect ( ) ;
87101
88- if matches ! ( self . validity( ) , Validity :: AllInvalid ) {
89- return utilizations;
90- }
91-
92- for ( idx, & view) in self . views ( ) . iter ( ) . enumerate ( ) {
93- if !self . is_valid ( idx) || view. is_inlined ( ) {
94- continue ;
95- }
96- let view = view. as_view ( ) ;
97-
98- utilizations[ view. buffer_index as usize ] . add ( view. offset , view. size )
99- }
102+ self . iter_valid_views ( |view| {
103+ utilizations[ view. buffer_index as usize ] . add ( view. offset , view. size ) ;
104+ } ) ;
100105
101106 utilizations
102107 }
@@ -177,6 +182,7 @@ impl BufferUtilization {
177182
178183#[ cfg( test) ]
179184mod tests {
185+ use rstest:: rstest;
180186 use vortex_buffer:: buffer;
181187
182188 use crate :: IntoArray ;
@@ -433,4 +439,32 @@ mod tests {
433439 ) ;
434440 }
435441 }
442+
443+ const LONG1 : & str = "long string one!" ;
444+ const LONG2 : & str = "long string two!" ;
445+ const SHORT : & str = "x" ;
446+ const EXPECTED_BYTES : u64 = ( LONG1 . len ( ) + LONG2 . len ( ) ) as u64 ;
447+
448+ fn mixed_array ( ) -> VarBinViewArray {
449+ VarBinViewArray :: from_iter_nullable_str ( [ Some ( LONG1 ) , None , Some ( LONG2 ) , Some ( SHORT ) ] )
450+ }
451+
452+ #[ rstest]
453+ #[ case:: non_nullable( VarBinViewArray :: from_iter_str( [ LONG1 , LONG2 , SHORT ] ) , EXPECTED_BYTES , & [ 1.0 ] ) ]
454+ #[ case:: all_valid( VarBinViewArray :: from_iter_nullable_str( [ Some ( LONG1 ) , Some ( LONG2 ) , Some ( SHORT ) ] ) , EXPECTED_BYTES , & [ 1.0 ] ) ]
455+ #[ case:: all_invalid( VarBinViewArray :: from_iter_nullable_str( [ None :: <& str >, None ] ) , 0 , & [ ] ) ]
456+ #[ case:: mixed_validity( mixed_array( ) , EXPECTED_BYTES , & [ 1.0 ] ) ]
457+ fn test_validity_code_paths (
458+ #[ case] arr : VarBinViewArray ,
459+ #[ case] expected_bytes : u64 ,
460+ #[ case] expected_utils : & [ f64 ] ,
461+ ) {
462+ assert_eq ! ( arr. count_referenced_bytes( ) , expected_bytes) ;
463+ let utils: Vec < f64 > = arr
464+ . buffer_utilizations ( )
465+ . iter ( )
466+ . map ( |u| u. overall_utilization ( ) )
467+ . collect ( ) ;
468+ assert_eq ! ( utils, expected_utils) ;
469+ }
436470}
0 commit comments