@@ -11,11 +11,7 @@ mod compute;
1111
1212pub use array:: * ;
1313pub use compress:: * ;
14- use vortex_array:: array:: PrimitiveArray ;
15- use vortex_array:: validity:: Validity ;
16- use vortex_array:: IntoArrayData as _;
1714use vortex_buffer:: { Buffer , BufferMut } ;
18- use vortex_error:: VortexResult ;
1915
2016const SAMPLE_SIZE : usize = 32 ;
2117
@@ -59,47 +55,25 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
5955 /// Convert from the integer type back to the float type using `as`.
6056 fn from_int ( n : Self :: ALPInt ) -> Self ;
6157
62- fn sampled_find_best_exponents (
63- values : & [ Self ] ,
64- validity : & Validity ,
65- ) -> VortexResult < Exponents > {
66- if values. len ( ) <= SAMPLE_SIZE {
67- Self :: find_best_exponents ( values, validity)
68- } else {
69- let validity = validity. take (
70- & PrimitiveArray :: from_iter (
71- ( 0 ..values. len ( ) )
72- . step_by ( values. len ( ) / SAMPLE_SIZE )
73- . map ( |x| x as u64 )
74- . take ( SAMPLE_SIZE ) ,
75- )
76- . into_array ( ) ,
77- ) ?;
78- let values = values
58+ fn find_best_exponents ( values : & [ Self ] ) -> Exponents {
59+ let mut best_exp = Exponents { e : 0 , f : 0 } ;
60+ let mut best_nbytes: usize = usize:: MAX ;
61+
62+ let sample = ( values. len ( ) > SAMPLE_SIZE ) . then ( || {
63+ values
7964 . iter ( )
8065 . step_by ( values. len ( ) / SAMPLE_SIZE )
8166 . take ( SAMPLE_SIZE )
8267 . cloned ( )
83- . collect_vec ( ) ;
84- Self :: find_best_exponents ( & values, & validity)
85- }
86- }
87-
88- fn find_best_exponents ( values : & [ Self ] , validity : & Validity ) -> VortexResult < Exponents > {
89- let mut best_exp = Exponents { e : 0 , f : 0 } ;
90- let mut best_nbytes: usize = usize:: MAX ;
91-
92- assert ! (
93- values. len( ) <= SAMPLE_SIZE ,
94- "{} <= {}" ,
95- values. len( ) ,
96- SAMPLE_SIZE
97- ) ;
68+ . collect_vec ( )
69+ } ) ;
9870
9971 for e in ( 0 ..Self :: MAX_EXPONENT ) . rev ( ) {
10072 for f in 0 ..e {
101- let ( _, encoded, _, exc_patches) =
102- Self :: encode ( values, validity, Some ( Exponents { e, f } ) ) ?;
73+ let ( _, encoded, _, exc_patches) = Self :: encode (
74+ sample. as_deref ( ) . unwrap_or ( values) ,
75+ Some ( Exponents { e, f } ) ,
76+ ) ;
10377
10478 let size = Self :: estimate_encoded_size ( & encoded, & exc_patches) ;
10579 if size < best_nbytes {
@@ -111,7 +85,7 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
11185 }
11286 }
11387
114- Ok ( best_exp)
88+ best_exp
11589 }
11690
11791 #[ inline]
@@ -139,16 +113,11 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
139113 encoded_bytes + patch_bytes
140114 }
141115
142- #[ allow( clippy:: type_complexity) ]
143116 fn encode (
144117 values : & [ Self ] ,
145- validity : & Validity ,
146118 exponents : Option < Exponents > ,
147- ) -> VortexResult < ( Exponents , Buffer < Self :: ALPInt > , Buffer < u64 > , Buffer < Self > ) > {
148- let exponents = match exponents {
149- Some ( exponents) => exponents,
150- None => Self :: sampled_find_best_exponents ( values, validity) ?,
151- } ;
119+ ) -> ( Exponents , Buffer < Self :: ALPInt > , Buffer < u64 > , Buffer < Self > ) {
120+ let exp = exponents. unwrap_or_else ( || Self :: find_best_exponents ( values) ) ;
152121
153122 let mut encoded_output = BufferMut :: < Self :: ALPInt > :: with_capacity ( values. len ( ) ) ;
154123 let mut patch_indices = BufferMut :: < u64 > :: with_capacity ( values. len ( ) ) ;
@@ -161,21 +130,20 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
161130 for chunk in values. chunks ( encode_chunk_size) {
162131 encode_chunk_unchecked (
163132 chunk,
164- exponents ,
133+ exp ,
165134 & mut encoded_output,
166135 & mut patch_indices,
167136 & mut patch_values,
168137 & mut fill_value,
169- validity,
170138 ) ;
171139 }
172140
173- Ok ( (
174- exponents ,
141+ (
142+ exp ,
175143 encoded_output. freeze ( ) ,
176144 patch_indices. freeze ( ) ,
177145 patch_values. freeze ( ) ,
178- ) )
146+ )
179147 }
180148
181149 #[ inline]
@@ -224,7 +192,6 @@ fn encode_chunk_unchecked<T: ALPFloat>(
224192 patch_indices : & mut BufferMut < u64 > ,
225193 patch_values : & mut BufferMut < T > ,
226194 fill_value : & mut Option < T :: ALPInt > ,
227- validity : & Validity ,
228195) {
229196 let num_prev_encoded = encoded_output. len ( ) ;
230197 let num_prev_patches = patch_indices. len ( ) ;
@@ -258,13 +225,12 @@ fn encode_chunk_unchecked<T: ALPFloat>(
258225 // write() is only safe to call more than once because the values are primitive (i.e., Drop is a no-op)
259226 patch_indices_mut[ chunk_patch_index] . write ( i as u64 ) ;
260227 patch_values_mut[ chunk_patch_index] . write ( chunk[ i - num_prev_encoded] ) ;
261- let is_valid_and_an_exception =
262- ( decoded != chunk[ i - num_prev_encoded] ) && validity. is_valid ( i) ;
263- chunk_patch_index += is_valid_and_an_exception as usize ;
228+ chunk_patch_index += ( decoded != chunk[ i - num_prev_encoded] ) as usize ;
264229 }
230+ assert_eq ! ( chunk_patch_index, chunk_patch_count) ;
265231 unsafe {
266- patch_indices. set_len ( num_prev_patches + chunk_patch_index ) ;
267- patch_values. set_len ( num_prev_patches + chunk_patch_index ) ;
232+ patch_indices. set_len ( num_prev_patches + chunk_patch_count ) ;
233+ patch_values. set_len ( num_prev_patches + chunk_patch_count ) ;
268234 }
269235 }
270236
0 commit comments