@@ -47,9 +47,7 @@ const ROOT_SCHEME_ID: SchemeId = SchemeId {
4747} ;
4848
4949/// Child indices for the compressor's list/listview compression.
50- pub mod root_list_children {
51- /// List elements child.
52- pub const ELEMENTS : usize = 0 ;
50+ mod root_list_children {
5351 /// List/ListView offsets child.
5452 pub const OFFSETS : usize = 1 ;
5553 /// ListView sizes child.
@@ -166,7 +164,7 @@ impl CascadingCompressor {
166164 /// # Errors
167165 ///
168166 /// Returns an error if compression of any sub-array fails.
169- pub fn compress_canonical (
167+ fn compress_canonical (
170168 & self ,
171169 array : Canonical ,
172170 ctx : CompressorContext ,
@@ -326,12 +324,23 @@ impl CascadingCompressor {
326324 let mut best: Option < ( & ' static dyn Scheme , f64 ) > = None ;
327325
328326 for & scheme in schemes {
327+ // Constant detection on a sample is a false positive: the sample being constant
328+ // does not mean the full array is constant.
329+ if ctx. is_sample ( ) && scheme. detects_constant ( ) {
330+ continue ;
331+ }
332+
329333 let ratio = scheme. expected_compression_ratio ( self , data, ctx. clone ( ) ) ?;
330334
331335 tracing:: debug!( scheme = %scheme. id( ) , ratio, "evaluated compression ratio" ) ;
332336
333337 if is_better_ratio ( ratio, & best) {
334338 best = Some ( ( scheme, ratio) ) ;
339+
340+ // Schemes that return f64::MAX (like Constant) cannot be beat, so stop early.
341+ if ratio == f64:: MAX {
342+ break ;
343+ }
335344 }
336345 }
337346
@@ -450,3 +459,79 @@ impl CascadingCompressor {
450459fn is_better_ratio ( ratio : f64 , best : & Option < ( & ' static dyn Scheme , f64 ) > ) -> bool {
451460 ratio. is_finite ( ) && !ratio. is_subnormal ( ) && ratio > 1.0 && best. is_none_or ( |( _, r) | ratio > r)
452461}
462+
463+ #[ cfg( test) ]
464+ mod tests {
465+ use super :: * ;
466+ use crate :: builtins:: FloatDictScheme ;
467+ use crate :: builtins:: IntDictScheme ;
468+ use crate :: builtins:: IntUncompressedScheme ;
469+ use crate :: builtins:: StringDictScheme ;
470+ use crate :: ctx:: CompressorContext ;
471+ use crate :: scheme:: SchemeExt ;
472+
473+ fn compressor ( ) -> CascadingCompressor {
474+ CascadingCompressor :: new ( vec ! [
475+ & IntUncompressedScheme ,
476+ & IntDictScheme ,
477+ & FloatDictScheme ,
478+ & StringDictScheme ,
479+ ] )
480+ }
481+
482+ #[ test]
483+ fn test_self_exclusion ( ) {
484+ let c = compressor ( ) ;
485+ let ctx = CompressorContext :: default ( ) . descend_with_scheme ( IntDictScheme . id ( ) , 0 ) ;
486+
487+ // IntDictScheme is in the history, so it should be excluded.
488+ assert ! ( c. is_excluded( & IntDictScheme , & ctx) ) ;
489+ // IntUncompressedScheme is not in the history.
490+ assert ! ( !c. is_excluded( & IntUncompressedScheme , & ctx) ) ;
491+ }
492+
493+ #[ test]
494+ fn test_root_exclusion_list_offsets ( ) {
495+ let c = compressor ( ) ;
496+ let ctx = CompressorContext :: default ( )
497+ . descend_with_scheme ( ROOT_SCHEME_ID , root_list_children:: OFFSETS ) ;
498+
499+ // IntDict should be excluded for list offsets.
500+ assert ! ( c. is_excluded( & IntDictScheme , & ctx) ) ;
501+ // IntUncompressed should not be excluded.
502+ assert ! ( !c. is_excluded( & IntUncompressedScheme , & ctx) ) ;
503+ }
504+
505+ #[ test]
506+ fn test_push_rule_float_dict_excludes_int_dict_from_codes ( ) {
507+ let c = compressor ( ) ;
508+ // FloatDict cascading through codes (child 1).
509+ let ctx = CompressorContext :: default ( ) . descend_with_scheme ( FloatDictScheme . id ( ) , 1 ) ;
510+
511+ // IntDict should be excluded from FloatDict's codes child.
512+ assert ! ( c. is_excluded( & IntDictScheme , & ctx) ) ;
513+ // IntUncompressed should not be excluded.
514+ assert ! ( !c. is_excluded( & IntUncompressedScheme , & ctx) ) ;
515+ }
516+
517+ #[ test]
518+ fn test_push_rule_float_dict_excludes_int_dict_from_values ( ) {
519+ let c = compressor ( ) ;
520+ // FloatDict cascading through values (child 0).
521+ let ctx = CompressorContext :: default ( ) . descend_with_scheme ( FloatDictScheme . id ( ) , 0 ) ;
522+
523+ // IntDict should also be excluded from FloatDict's values child (ALP propagation
524+ // replacement).
525+ assert ! ( c. is_excluded( & IntDictScheme , & ctx) ) ;
526+ }
527+
528+ #[ test]
529+ fn test_no_exclusion_without_history ( ) {
530+ let c = compressor ( ) ;
531+ let ctx = CompressorContext :: default ( ) ;
532+
533+ // No history means no exclusions.
534+ assert ! ( !c. is_excluded( & IntDictScheme , & ctx) ) ;
535+ assert ! ( !c. is_excluded( & IntUncompressedScheme , & ctx) ) ;
536+ }
537+ }
0 commit comments