33
44//! Float compression schemes.
55
6- use vortex_alp:: ALP ;
76use vortex_alp:: ALPArray ;
87use vortex_alp:: RDEncoder ;
98use vortex_alp:: alp_encode;
109use vortex_array:: ArrayRef ;
1110use vortex_array:: Canonical ;
1211use vortex_array:: IntoArray ;
1312use vortex_array:: ToCanonical ;
14- use vortex_array:: arrays:: PrimitiveArray ;
1513use vortex_array:: dtype:: PType ;
1614use vortex_compressor:: scheme:: ChildSelection ;
1715use vortex_compressor:: scheme:: DescendantExclusion ;
@@ -28,15 +26,12 @@ use crate::Scheme;
2826use crate :: SchemeExt ;
2927use crate :: compress_patches;
3028use crate :: estimate_compression_ratio_with_sampling;
31- use crate :: schemes:: rle;
32- use crate :: schemes:: rle:: RLEScheme ;
33- use crate :: schemes:: rle:: RLEStats ;
3429
3530/// ALP (Adaptive Lossless floating-Point) encoding.
3631#[ derive( Debug , Copy , Clone , PartialEq , Eq ) ]
3732pub struct ALPScheme ;
3833
39- /// ALPRD (ALP with Right Division ) encoding variant.
34+ /// ALPRD (ALP with Real Double ) encoding variant.
4035#[ derive( Debug , Copy , Clone , PartialEq , Eq ) ]
4136pub struct ALPRDScheme ;
4237
@@ -51,47 +46,14 @@ pub struct NullDominatedSparseScheme;
5146#[ derive( Debug , Copy , Clone , PartialEq , Eq ) ]
5247pub struct PcoScheme ;
5348
54- /// Configuration for float RLE compression.
55- #[ derive( Debug , Copy , Clone , PartialEq , Eq ) ]
56- pub struct FloatRLEConfig ;
57-
5849// Re-export builtin schemes from vortex-compressor.
5950pub use vortex_compressor:: builtins:: FloatConstantScheme ;
6051pub use vortex_compressor:: builtins:: FloatDictScheme ;
6152pub use vortex_compressor:: builtins:: FloatUncompressedScheme ;
6253pub use vortex_compressor:: builtins:: is_float_primitive;
6354pub use vortex_compressor:: stats:: FloatStats ;
6455
65- impl rle:: RLEConfig for FloatRLEConfig {
66- type Stats = FloatStats ;
67-
68- const SCHEME_NAME : & ' static str = "vortex.float.rle" ;
69-
70- fn matches ( canonical : & Canonical ) -> bool {
71- is_float_primitive ( canonical)
72- }
73-
74- fn generate_stats ( array : & ArrayRef ) -> FloatStats {
75- FloatStats :: generate ( & array. to_primitive ( ) )
76- }
77- }
78-
79- impl RLEStats for FloatStats {
80- fn value_count ( & self ) -> u32 {
81- FloatStats :: value_count ( self )
82- }
83-
84- fn average_run_length ( & self ) -> u32 {
85- FloatStats :: average_run_length ( self )
86- }
87-
88- fn source ( & self ) -> & PrimitiveArray {
89- FloatStats :: source ( self )
90- }
91- }
92-
93- /// RLE scheme for float compression.
94- pub const RLE_FLOAT_SCHEME : RLEScheme < FloatRLEConfig > = RLEScheme :: new ( ) ;
56+ pub use crate :: schemes:: rle:: RLE_FLOAT_SCHEME ;
9557
9658impl Scheme for ALPScheme {
9759 fn scheme_name ( & self ) -> & ' static str {
@@ -113,16 +75,14 @@ impl Scheme for ALPScheme {
11375 data : & mut ArrayAndStats ,
11476 ctx : CompressorContext ,
11577 ) -> VortexResult < f64 > {
116- // ALP encodes floats as integers. Without integer compression afterward, the
117- // encoded ints are the same size.
78+ // ALP encodes floats as integers. Without integer compression afterward, the encoded ints
79+ // are the same size.
11880 if ctx. finished_cascading ( ) {
11981 return Ok ( 0.0 ) ;
12082 }
12183
122- let stats = data. float_stats ( ) ;
123-
12484 // We don't support ALP for f16.
125- if stats . source ( ) . ptype ( ) == PType :: F16 {
85+ if data . float_stats ( ) . source ( ) . ptype ( ) == PType :: F16 {
12686 return Ok ( 0.0 ) ;
12787 }
12888
@@ -138,19 +98,16 @@ impl Scheme for ALPScheme {
13898 let stats = data. float_stats ( ) ;
13999
140100 let alp_encoded = alp_encode ( & stats. source ( ) . to_primitive ( ) , None ) ?;
141- let alp = alp_encoded. as_ :: < ALP > ( ) ;
142- let alp_ints = alp. encoded ( ) . to_primitive ( ) ;
143101
144102 // Compress the ALP ints.
145-
146103 let compressed_alp_ints =
147- compressor. compress_child ( & alp_ints . into_array ( ) , & ctx, self . id ( ) , 0 ) ?;
104+ compressor. compress_child ( alp_encoded . encoded ( ) , & ctx, self . id ( ) , 0 ) ?;
148105
149106 // Patches are not compressed. They should be infrequent, and if they are not then we want
150107 // to keep them linear for easy indexing.
151- let patches = alp . patches ( ) . map ( compress_patches) . transpose ( ) ?;
108+ let patches = alp_encoded . patches ( ) . map ( compress_patches) . transpose ( ) ?;
152109
153- Ok ( ALPArray :: new ( compressed_alp_ints, alp . exponents ( ) , patches) . into_array ( ) )
110+ Ok ( ALPArray :: new ( compressed_alp_ints, alp_encoded . exponents ( ) , patches) . into_array ( ) )
154111 }
155112}
156113
@@ -169,9 +126,7 @@ impl Scheme for ALPRDScheme {
169126 data : & mut ArrayAndStats ,
170127 ctx : CompressorContext ,
171128 ) -> VortexResult < f64 > {
172- let stats = data. float_stats ( ) ;
173-
174- if stats. source ( ) . ptype ( ) == PType :: F16 {
129+ if data. float_stats ( ) . source ( ) . ptype ( ) == PType :: F16 {
175130 return Ok ( 0.0 ) ;
176131 }
177132
@@ -235,11 +190,11 @@ impl Scheme for NullDominatedSparseScheme {
235190 let stats = data. float_stats ( ) ;
236191
237192 if stats. value_count ( ) == 0 {
238- // All nulls should use ConstantScheme.
193+ // All nulls should use ConstantScheme instead of this .
239194 return Ok ( 0.0 ) ;
240195 }
241196
242- // If the majority is null, will compress well.
197+ // If the majority (90%) of values is null, this will compress well.
243198 if stats. null_count ( ) as f64 / stats. source ( ) . len ( ) as f64 > 0.9 {
244199 return Ok ( stats. source ( ) . len ( ) as f64 / stats. value_count ( ) as f64 ) ;
245200 }
@@ -261,12 +216,8 @@ impl Scheme for NullDominatedSparseScheme {
261216
262217 if let Some ( sparse) = sparse_encoded. as_opt :: < Sparse > ( ) {
263218 let indices = sparse. patches ( ) . indices ( ) . to_primitive ( ) . narrow ( ) ?;
264- let compressed_indices = compressor. compress_child (
265- & indices. to_primitive ( ) . into_array ( ) ,
266- & ctx,
267- self . id ( ) ,
268- 0 ,
269- ) ?;
219+ let compressed_indices =
220+ compressor. compress_child ( & indices. into_array ( ) , & ctx, self . id ( ) , 0 ) ?;
270221
271222 SparseArray :: try_new (
272223 compressed_indices,
@@ -326,8 +277,8 @@ mod tests {
326277 use vortex_error:: VortexResult ;
327278 use vortex_fastlanes:: RLE ;
328279
329- use super :: RLE_FLOAT_SCHEME ;
330280 use crate :: BtrBlocksCompressor ;
281+ use crate :: schemes:: rle:: RLE_FLOAT_SCHEME ;
331282
332283 #[ test]
333284 fn test_empty ( ) -> VortexResult < ( ) > {
0 commit comments