@@ -11,11 +11,7 @@ mod compute;
1111
1212pub use array:: * ;
1313pub use compress:: * ;
14- use vortex_array:: array:: PrimitiveArray ;
15- use vortex_array:: validity:: Validity ;
16- use vortex_array:: IntoArrayData as _;
1714use vortex_buffer:: { Buffer , BufferMut } ;
18- use vortex_error:: VortexResult ;
1915
2016const SAMPLE_SIZE : usize = 32 ;
2117
@@ -59,47 +55,24 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
5955 /// Convert from the integer type back to the float type using `as`.
6056 fn from_int ( n : Self :: ALPInt ) -> Self ;
6157
62- fn sampled_find_best_exponents (
63- values : & [ Self ] ,
64- validity : & Validity ,
65- ) -> VortexResult < Exponents > {
66- if values. len ( ) <= SAMPLE_SIZE {
67- Self :: find_best_exponents ( values, validity)
68- } else {
69- let validity = validity. take (
70- & PrimitiveArray :: from_iter (
71- ( 0 ..values. len ( ) )
72- . step_by ( values. len ( ) / SAMPLE_SIZE )
73- . map ( |x| x as u64 )
74- . take ( SAMPLE_SIZE ) ,
75- )
76- . into_array ( ) ,
77- ) ?;
78- let values = values
79- . iter ( )
80- . step_by ( values. len ( ) / SAMPLE_SIZE )
81- . take ( SAMPLE_SIZE )
82- . cloned ( )
83- . collect_vec ( ) ;
84- Self :: find_best_exponents ( & values, & validity)
85- }
86- }
87-
88- fn find_best_exponents ( values : & [ Self ] , validity : & Validity ) -> VortexResult < Exponents > {
58+ fn find_best_exponents ( values : & [ Self ] ) -> Exponents {
8959 let mut best_exp = Exponents { e : 0 , f : 0 } ;
9060 let mut best_nbytes: usize = usize:: MAX ;
9161
92- assert ! (
93- values. len( ) <= SAMPLE_SIZE ,
94- "{} <= {}" ,
95- values. len( ) ,
96- SAMPLE_SIZE
97- ) ;
62+ let sample = ( values. len ( ) > SAMPLE_SIZE ) . then ( || {
63+ values
64+ . iter ( )
65+ . step_by ( values. len ( ) / SAMPLE_SIZE )
66+ . cloned ( )
67+ . collect_vec ( )
68+ } ) ;
9869
9970 for e in ( 0 ..Self :: MAX_EXPONENT ) . rev ( ) {
10071 for f in 0 ..e {
101- let ( _, encoded, _, exc_patches) =
102- Self :: encode ( values, validity, Some ( Exponents { e, f } ) ) ?;
72+ let ( _, encoded, _, exc_patches) = Self :: encode (
73+ sample. as_deref ( ) . unwrap_or ( values) ,
74+ Some ( Exponents { e, f } ) ,
75+ ) ;
10376
10477 let size = Self :: estimate_encoded_size ( & encoded, & exc_patches) ;
10578 if size < best_nbytes {
@@ -111,7 +84,7 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
11184 }
11285 }
11386
114- Ok ( best_exp)
87+ best_exp
11588 }
11689
11790 #[ inline]
@@ -139,16 +112,11 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
139112 encoded_bytes + patch_bytes
140113 }
141114
142- #[ allow( clippy:: type_complexity) ]
143115 fn encode (
144116 values : & [ Self ] ,
145- validity : & Validity ,
146117 exponents : Option < Exponents > ,
147- ) -> VortexResult < ( Exponents , Buffer < Self :: ALPInt > , Buffer < u64 > , Buffer < Self > ) > {
148- let exponents = match exponents {
149- Some ( exponents) => exponents,
150- None => Self :: sampled_find_best_exponents ( values, validity) ?,
151- } ;
118+ ) -> ( Exponents , Buffer < Self :: ALPInt > , Buffer < u64 > , Buffer < Self > ) {
119+ let exp = exponents. unwrap_or_else ( || Self :: find_best_exponents ( values) ) ;
152120
153121 let mut encoded_output = BufferMut :: < Self :: ALPInt > :: with_capacity ( values. len ( ) ) ;
154122 let mut patch_indices = BufferMut :: < u64 > :: with_capacity ( values. len ( ) ) ;
@@ -161,21 +129,20 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
161129 for chunk in values. chunks ( encode_chunk_size) {
162130 encode_chunk_unchecked (
163131 chunk,
164- exponents ,
132+ exp ,
165133 & mut encoded_output,
166134 & mut patch_indices,
167135 & mut patch_values,
168136 & mut fill_value,
169- validity,
170137 ) ;
171138 }
172139
173- Ok ( (
174- exponents ,
140+ (
141+ exp ,
175142 encoded_output. freeze ( ) ,
176143 patch_indices. freeze ( ) ,
177144 patch_values. freeze ( ) ,
178- ) )
145+ )
179146 }
180147
181148 #[ inline]
@@ -224,7 +191,6 @@ fn encode_chunk_unchecked<T: ALPFloat>(
224191 patch_indices : & mut BufferMut < u64 > ,
225192 patch_values : & mut BufferMut < T > ,
226193 fill_value : & mut Option < T :: ALPInt > ,
227- validity : & Validity ,
228194) {
229195 let num_prev_encoded = encoded_output. len ( ) ;
230196 let num_prev_patches = patch_indices. len ( ) ;
@@ -258,13 +224,12 @@ fn encode_chunk_unchecked<T: ALPFloat>(
258224 // write() is only safe to call more than once because the values are primitive (i.e., Drop is a no-op)
259225 patch_indices_mut[ chunk_patch_index] . write ( i as u64 ) ;
260226 patch_values_mut[ chunk_patch_index] . write ( chunk[ i - num_prev_encoded] ) ;
261- let is_valid_and_an_exception =
262- ( decoded != chunk[ i - num_prev_encoded] ) && validity. is_valid ( i) ;
263- chunk_patch_index += is_valid_and_an_exception as usize ;
227+ chunk_patch_index += ( decoded != chunk[ i - num_prev_encoded] ) as usize ;
264228 }
229+ assert_eq ! ( chunk_patch_index, chunk_patch_count) ;
265230 unsafe {
266- patch_indices. set_len ( num_prev_patches + chunk_patch_index ) ;
267- patch_values. set_len ( num_prev_patches + chunk_patch_index ) ;
231+ patch_indices. set_len ( num_prev_patches + chunk_patch_count ) ;
232+ patch_values. set_len ( num_prev_patches + chunk_patch_count ) ;
268233 }
269234 }
270235
0 commit comments