11use vortex_array:: array:: PrimitiveArray ;
22use vortex_array:: patches:: Patches ;
3+ use vortex_array:: validity:: Validity ;
34use vortex_array:: variants:: PrimitiveArrayTrait ;
45use vortex_array:: { Array , IntoArray , IntoArrayVariant } ;
6+ use vortex_buffer:: Buffer ;
57use vortex_dtype:: { NativePType , PType } ;
6- use vortex_error:: { vortex_bail, VortexResult , VortexUnwrap } ;
8+ use vortex_error:: { vortex_bail, VortexResult } ;
79use vortex_scalar:: ScalarType ;
810
911use crate :: alp:: { ALPArray , ALPFloat } ;
@@ -24,39 +26,74 @@ macro_rules! match_each_alp_float_ptype {
2426 } )
2527}
2628
27- pub fn alp_encode_components < T > (
29+ pub fn alp_encode ( parray : & PrimitiveArray ) -> VortexResult < ALPArray > {
30+ let ( exponents, encoded, patches) = alp_encode_components ( parray) ?;
31+ ALPArray :: try_new ( encoded, exponents, patches)
32+ }
33+
34+ pub fn alp_encode_components (
35+ parray : & PrimitiveArray ,
36+ ) -> VortexResult < ( Exponents , Array , Option < Patches > ) > {
37+ match parray. ptype ( ) {
38+ PType :: F32 => alp_encode_components_typed :: < f32 > ( parray) ,
39+ PType :: F64 => alp_encode_components_typed :: < f64 > ( parray) ,
40+ _ => vortex_bail ! ( "ALP can only encode f32 and f64" ) ,
41+ }
42+ }
43+
44+ #[ allow( clippy:: cast_possible_truncation) ]
45+ fn alp_encode_components_typed < T > (
2846 values : & PrimitiveArray ,
29- exponents : Option < Exponents > ,
30- ) -> ( Exponents , Array , Option < Patches > )
47+ ) -> VortexResult < ( Exponents , Array , Option < Patches > ) >
3148where
3249 T : ALPFloat + NativePType ,
3350 T :: ALPInt : NativePType ,
3451 T : ScalarType ,
3552{
36- let ( exponents, encoded, exc_pos, exc) = T :: encode ( values. as_slice :: < T > ( ) , exponents) ;
37- let len = encoded. len ( ) ;
38- (
39- exponents,
40- PrimitiveArray :: new ( encoded, values. validity ( ) ) . into_array ( ) ,
41- ( !exc. is_empty ( ) ) . then ( || {
42- let position_arr = exc_pos. into_array ( ) ;
43- let patch_validity = values. validity ( ) . take ( & position_arr) . vortex_unwrap ( ) ;
44- Patches :: new (
45- len,
46- position_arr,
47- PrimitiveArray :: new ( exc, patch_validity) . into_array ( ) ,
48- )
49- } ) ,
50- )
51- }
53+ let values_slice = values. as_slice :: < T > ( ) ;
5254
53- pub fn alp_encode ( parray : & PrimitiveArray ) -> VortexResult < ALPArray > {
54- let ( exponents, encoded, patches) = match parray. ptype ( ) {
55- PType :: F32 => alp_encode_components :: < f32 > ( parray, None ) ,
56- PType :: F64 => alp_encode_components :: < f64 > ( parray, None ) ,
57- _ => vortex_bail ! ( "ALP can only encode f32 and f64" ) ,
55+ let exponents = T :: find_best_exponents ( values_slice) ;
56+ let ( encoded, exceptional_positions) = T :: encode_chunkwise ( values. as_slice :: < T > ( ) , exponents) ;
57+
58+ let encoded_array = PrimitiveArray :: new ( encoded, values. validity ( ) ) . into_array ( ) ;
59+
60+ let validity = values. logical_validity ( ) ?;
61+ let n_valid = validity. true_count ( ) ;
62+ // exceptional_positions may contain exceptions at invalid positions (which contain garbage
63+ // data). We remove invalid exceptional positions in order to keep the Patches small.
64+ let valid_exceptional_positions = if n_valid == 0 {
65+ Buffer :: empty ( )
66+ } else if n_valid == values. len ( ) {
67+ exceptional_positions
68+ } else {
69+ exceptional_positions
70+ . into_iter ( )
71+ // index is a valid usize because it is an index into values.as_slice::<T>()
72+ . filter ( |index| validity. value ( * index as usize ) )
73+ . collect ( )
5874 } ;
59- ALPArray :: try_new ( encoded, exponents, patches)
75+
76+ let patches = if valid_exceptional_positions. is_empty ( ) {
77+ None
78+ } else {
79+ let patches_validity = if values. dtype ( ) . is_nullable ( ) {
80+ Validity :: AllValid
81+ } else {
82+ Validity :: NonNullable
83+ } ;
84+ let exceptional_values: Buffer < T > = valid_exceptional_positions
85+ . iter ( )
86+ . map ( |index| values_slice[ * index as usize ] )
87+ . collect ( ) ;
88+ let exceptional_values =
89+ PrimitiveArray :: new ( exceptional_values, patches_validity) . into_array ( ) ;
90+ Some ( Patches :: new (
91+ values_slice. len ( ) ,
92+ valid_exceptional_positions. into_array ( ) ,
93+ exceptional_values,
94+ ) )
95+ } ;
96+ Ok ( ( exponents, encoded_array, patches) )
6097}
6198
6299pub fn decompress ( array : ALPArray ) -> VortexResult < PrimitiveArray > {
@@ -140,14 +177,47 @@ mod tests {
140177 . into_primitive( )
141178 . unwrap( )
142179 . as_slice:: <i64 >( ) ,
143- vec![ 1234i64 , 2718 , 1234 , 4000 ] // fill forward
180+ vec![ 1234i64 , 2718 , 3142 , 4000 ]
144181 ) ;
145182 assert_eq ! ( encoded. exponents( ) , Exponents { e: 16 , f: 13 } ) ;
146183
147184 let decoded = decompress ( encoded) . unwrap ( ) ;
148185 assert_eq ! ( values. as_slice( ) , decoded. as_slice:: <f64 >( ) ) ;
149186 }
150187
188+ #[ test]
189+ #[ allow( clippy:: approx_constant) ] // ALP doesn't like E
190+ fn test_compress_ignores_invalid_exceptional_values ( ) {
191+ let values = buffer ! [ 1.234f64 , 2.718 , f64 :: consts:: PI , 4.0 ] ;
192+ let array = PrimitiveArray :: new ( values, Validity :: from_iter ( [ true , true , false , true ] ) ) ;
193+ let encoded = alp_encode ( & array) . unwrap ( ) ;
194+ assert ! ( encoded. patches( ) . is_none( ) ) ;
195+ assert_eq ! (
196+ encoded
197+ . encoded( )
198+ . into_primitive( )
199+ . unwrap( )
200+ . as_slice:: <i64 >( ) ,
201+ vec![ 1234i64 , 2718 , 3142 , 4000 ]
202+ ) ;
203+ assert_eq ! ( encoded. exponents( ) , Exponents { e: 16 , f: 13 } ) ;
204+
205+ let decoded = decompress ( encoded) . unwrap ( ) ;
206+ assert_eq ! (
207+ scalar_at( & decoded, 0 ) . unwrap( ) ,
208+ scalar_at( & array, 0 ) . unwrap( )
209+ ) ;
210+ assert_eq ! (
211+ scalar_at( & decoded, 1 ) . unwrap( ) ,
212+ scalar_at( & array, 1 ) . unwrap( )
213+ ) ;
214+ assert ! ( !decoded. is_valid( 2 ) . unwrap( ) ) ;
215+ assert_eq ! (
216+ scalar_at( & decoded, 3 ) . unwrap( ) ,
217+ scalar_at( & array, 3 ) . unwrap( )
218+ ) ;
219+ }
220+
151221 #[ test]
152222 #[ allow( clippy:: approx_constant) ] // ALP doesn't like E
153223 fn test_nullable_patched_scalar_at ( ) {
@@ -168,6 +238,7 @@ mod tests {
168238 assert ! ( s. is_valid( ) ) ;
169239 }
170240
241+ assert ! ( !encoded. is_valid( 4 ) . unwrap( ) ) ;
171242 let s = scalar_at ( encoded. as_ref ( ) , 4 ) . unwrap ( ) ;
172243 assert ! ( s. is_null( ) ) ;
173244
@@ -190,7 +261,6 @@ mod tests {
190261 ) ;
191262 let alp_arr = alp_encode ( & original) . unwrap ( ) ;
192263 let decompressed = alp_arr. into_primitive ( ) . unwrap ( ) ;
193- assert_eq ! ( original. as_slice:: <f64 >( ) , decompressed. as_slice:: <f64 >( ) ) ;
194264 assert_eq ! ( original. validity( ) , decompressed. validity( ) ) ;
195265 }
196266}
0 commit comments