File tree Expand file tree Collapse file tree 1 file changed +20
-5
lines changed
Expand file tree Collapse file tree 1 file changed +20
-5
lines changed Original file line number Diff line number Diff line change @@ -189,11 +189,26 @@ impl Scheme for VarBinScheme {
189189 _allowed_cascading : usize ,
190190 _excludes : & [ StringCode ] ,
191191 ) -> VortexResult < f64 > {
192- // VarBinScheme doesn't provide compression - it's a format conversion.
193- // VarBinView is generally more efficient than VarBin for most workloads
194- // (especially with small strings that can be inlined).
195- // Return 0.0 to indicate this scheme should not be selected by the compressor.
196- Ok ( 0.0 )
192+ if stats. src . is_empty ( ) {
193+ return Ok ( 1.0 ) ;
194+ }
195+
196+ let src = stats. source ( ) ;
197+
198+ // Calculate VarBinView size using nbytes()
199+ let varbinview_size = src. as_ref ( ) . nbytes ( ) ;
200+
201+ let string_bytes = src. buffers ( ) . iter ( ) . map ( |b| b. len ( ) as u64 ) . sum :: < u64 > ( ) ;
202+
203+ // Determine offset type size based on total string bytes
204+ // Arrow/Vortex uses i32 offsets if total size < u32::MAX, otherwise i64
205+ let offset_type_size = if string_bytes < u32:: MAX as u64 { 4 } else { 8 } ;
206+ let offset_bytes = ( src. len ( ) as u64 + 1 ) * offset_type_size;
207+
208+ let varbin_size = string_bytes + offset_bytes;
209+ assert ! ( varbin_size > 0 , "cannot be empty" ) ;
210+
211+ Ok ( varbinview_size as f64 / varbin_size as f64 )
197212 }
198213
199214 fn compress (
You can’t perform that action at this time.
0 commit comments