@@ -98,14 +98,6 @@ impl rle::RLEConfig for IntRLEConfig {
9898 fn generate_stats ( array : & ArrayRef ) -> IntegerStats {
9999 IntegerStats :: generate ( & array. to_primitive ( ) )
100100 }
101-
102- fn compress_values (
103- compressor : & CascadingCompressor ,
104- values : & PrimitiveArray ,
105- ctx : CompressorContext ,
106- ) -> VortexResult < ArrayRef > {
107- compressor. compress_canonical ( Canonical :: Primitive ( values. clone ( ) ) , ctx)
108- }
109101}
110102
111103impl RLEStats for IntegerStats {
@@ -134,6 +126,24 @@ impl Scheme for FoRScheme {
134126 is_integer_primitive ( canonical)
135127 }
136128
129+ /// Dict codes always start at 0, so FoR (which subtracts the min) is a no-op.
130+ fn ancestor_exclusions ( & self ) -> Vec < AncestorExclusion > {
131+ vec ! [
132+ AncestorExclusion {
133+ ancestor: IntDictScheme . id( ) ,
134+ children: ChildSelection :: One ( 1 ) ,
135+ } ,
136+ AncestorExclusion {
137+ ancestor: FloatDictScheme . id( ) ,
138+ children: ChildSelection :: One ( 1 ) ,
139+ } ,
140+ AncestorExclusion {
141+ ancestor: StringDictScheme . id( ) ,
142+ children: ChildSelection :: One ( 1 ) ,
143+ } ,
144+ ]
145+ }
146+
137147 fn expected_compression_ratio (
138148 & self ,
139149 _compressor : & CascadingCompressor ,
@@ -225,6 +235,7 @@ impl Scheme for ZigZagScheme {
225235 is_integer_primitive ( canonical)
226236 }
227237
238+ /// Children: encoded=0.
228239 fn num_children ( & self ) -> usize {
229240 1
230241 }
@@ -249,6 +260,24 @@ impl Scheme for ZigZagScheme {
249260 ]
250261 }
251262
263+ /// Dict codes are unsigned integers (0..cardinality). ZigZag only helps negatives.
264+ fn ancestor_exclusions ( & self ) -> Vec < AncestorExclusion > {
265+ vec ! [
266+ AncestorExclusion {
267+ ancestor: IntDictScheme . id( ) ,
268+ children: ChildSelection :: One ( 1 ) ,
269+ } ,
270+ AncestorExclusion {
271+ ancestor: FloatDictScheme . id( ) ,
272+ children: ChildSelection :: One ( 1 ) ,
273+ } ,
274+ AncestorExclusion {
275+ ancestor: StringDictScheme . id( ) ,
276+ children: ChildSelection :: One ( 1 ) ,
277+ } ,
278+ ]
279+ }
280+
252281 fn expected_compression_ratio (
253282 & self ,
254283 compressor : & CascadingCompressor ,
@@ -365,6 +394,7 @@ impl Scheme for SparseScheme {
365394 }
366395 }
367396
397+ /// Children: values=0, indices=1.
368398 fn num_children ( & self ) -> usize {
369399 2
370400 }
@@ -494,6 +524,7 @@ impl Scheme for RunEndScheme {
494524 is_integer_primitive ( canonical)
495525 }
496526
527+ /// Children: values=0, ends=1.
497528 fn num_children ( & self ) -> usize {
498529 2
499530 }
@@ -507,12 +538,23 @@ impl Scheme for RunEndScheme {
507538 } ]
508539 }
509540
510- // TODO(connor): There seems to be stuff missing here...
541+ /// Dict values (child 0) are all unique by definition, so run-end encoding them is
542+ /// pointless. Codes (child 1) can have runs and may benefit from RunEnd.
511543 fn ancestor_exclusions ( & self ) -> Vec < AncestorExclusion > {
512- vec ! [ AncestorExclusion {
513- ancestor: FloatDictScheme . id( ) ,
514- children: ChildSelection :: One ( 0 ) ,
515- } ]
544+ vec ! [
545+ AncestorExclusion {
546+ ancestor: IntDictScheme . id( ) ,
547+ children: ChildSelection :: One ( 0 ) ,
548+ } ,
549+ AncestorExclusion {
550+ ancestor: FloatDictScheme . id( ) ,
551+ children: ChildSelection :: One ( 0 ) ,
552+ } ,
553+ AncestorExclusion {
554+ ancestor: StringDictScheme . id( ) ,
555+ children: ChildSelection :: One ( 0 ) ,
556+ } ,
557+ ]
516558 }
517559
518560 fn expected_compression_ratio (
0 commit comments