Skip to content

Commit c2cd0ab

Browse files
committed
wip
Signed-off-by: Joe Isaacs <[email protected]>
1 parent aafe376 commit c2cd0ab

File tree

8 files changed

+581
-54
lines changed

8 files changed

+581
-54
lines changed

encodings/alp/src/alp/compute/compare.rs

Lines changed: 13 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@ use std::fmt::Debug;
66
use vortex_array::arrays::ConstantArray;
77
use vortex_array::compute::{CompareKernel, CompareKernelAdapter, Operator, compare};
88
use vortex_array::{Array, ArrayRef, IntoArray, register_kernel};
9-
use vortex_dtype::NativePType;
109
use vortex_error::{VortexResult, vortex_bail};
1110
use vortex_scalar::{PrimitiveScalar, Scalar};
1211

12+
use super::compare_common::{EncodedComparison, encode_for_comparison};
1313
use crate::{ALPArray, ALPFloat, ALPVTable, match_each_alp_float_ptype};
1414

1515
// TODO(joe): add fuzzing.
@@ -54,6 +54,8 @@ register_kernel!(CompareKernelAdapter(ALPVTable).lift());
5454
/// We can compare a scalar to an ALPArray by encoding the scalar into the ALP domain and comparing
5555
/// the encoded value to the encoded values in the ALPArray. There are fixups when the value doesn't
5656
/// encode into the ALP domain.
57+
///
58+
/// This uses the common `encode_for_comparison` logic shared with the expression pushdown optimization.
5759
fn alp_scalar_compare<F: ALPFloat + Into<Scalar>>(
5860
alp: &ALPArray,
5961
value: F,
@@ -69,59 +71,18 @@ where
6971
}
7072

7173
let exponents = alp.exponents();
72-
// If the scalar doesn't fit into the ALP domain,
73-
// it cannot be equal to any values in the encoded array.
74-
let encoded = F::encode_single(value, alp.exponents());
75-
match encoded {
76-
Some(encoded) => {
77-
let s = ConstantArray::new(encoded, alp.len());
74+
75+
// Use the common comparison logic from compare_common.rs
76+
match encode_for_comparison(value, exponents, operator) {
77+
EncodedComparison::Encoded { value, operator } => {
78+
// Compare the encoded array with the encoded scalar value
79+
let s = ConstantArray::new(value, alp.len());
7880
Ok(Some(compare(alp.encoded(), s.as_ref(), operator)?))
7981
}
80-
None => match operator {
81-
// Since this value is not encodable it cannot be equal to any value in the encoded
82-
// array.
83-
Operator::Eq => Ok(Some(ConstantArray::new(false, alp.len()).into_array())),
84-
// Since this value is not encodable it cannot be equal to any value in the encoded
85-
// array, hence != to all values in the encoded array.
86-
Operator::NotEq => Ok(Some(ConstantArray::new(true, alp.len()).into_array())),
87-
Operator::Gt | Operator::Gte => {
88-
// Per IEEE 754 totalOrder semantics the ordering is -Nan < -Inf < Inf < Nan.
89-
// All values in the encoded array are definitely finite
90-
let is_not_finite = NativePType::is_infinite(value) || NativePType::is_nan(value);
91-
if is_not_finite {
92-
Ok(Some(
93-
ConstantArray::new(value.is_sign_negative(), alp.len()).into_array(),
94-
))
95-
} else {
96-
Ok(Some(compare(
97-
alp.encoded(),
98-
ConstantArray::new(F::encode_above(value, exponents), alp.len()).as_ref(),
99-
// Since the encoded value is unencodable gte is equivalent to gt.
100-
// Consider a value v, between two encodable values v_l (just less) and
101-
// v_a (just above), then for all encodable values (u), v > u <=> v_g >= u
102-
Operator::Gte,
103-
)?))
104-
}
105-
}
106-
Operator::Lt | Operator::Lte => {
107-
// Per IEEE 754 totalOrder semantics the ordering is -Nan < -Inf < Inf < Nan.
108-
// All values in the encoded array are definitely finite
109-
let is_not_finite = NativePType::is_infinite(value) || NativePType::is_nan(value);
110-
if is_not_finite {
111-
Ok(Some(
112-
ConstantArray::new(value.is_sign_positive(), alp.len()).into_array(),
113-
))
114-
} else {
115-
Ok(Some(compare(
116-
alp.encoded(),
117-
ConstantArray::new(F::encode_below(value, exponents), alp.len()).as_ref(),
118-
// Since the encoded values unencodable lt is equivalent to lte.
119-
// See Gt | Gte for further explanation.
120-
Operator::Lte,
121-
)?))
122-
}
123-
}
124-
},
82+
EncodedComparison::Constant(result) => {
83+
// Return a constant result for all elements
84+
Ok(Some(ConstantArray::new(result, alp.len()).into_array()))
85+
}
12586
}
12687
}
12788

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Common logic for comparing ALP-encoded arrays with scalar values.
5+
//!
6+
//! This module contains shared logic used by both eager comparison (compare.rs)
7+
//! and lazy comparison pushdown (expr_pushdown.rs).
8+
9+
use vortex_array::compute::Operator;
10+
use vortex_dtype::NativePType;
11+
use vortex_scalar::Scalar;
12+
13+
use crate::{ALPFloat, Exponents};
14+
15+
/// Result of encoding a scalar value for comparison with an ALP-encoded array.
16+
#[derive(Debug, Clone, Copy)]
17+
pub(super) enum EncodedComparison<T> {
18+
/// The scalar encoded cleanly - compare using the encoded value with the original operator
19+
Encoded { value: T, operator: Operator },
20+
/// The scalar doesn't encode - return a constant result for all elements
21+
Constant(bool),
22+
}
23+
24+
/// Determine how to compare an ALP-encoded array with a scalar value.
25+
///
26+
/// This encapsulates the core logic for ALP scalar comparisons:
27+
/// - If the scalar encodes cleanly in the ALP domain, compare using the encoded value
28+
/// - If not encodable, handle special cases based on the operator:
29+
/// - Eq/NotEq: constant result (false/true)
30+
/// - Gt/Gte: use encode_above with Gte operator (handles IEEE 754 totalOrder)
31+
/// - Lt/Lte: use encode_below with Lte operator (handles IEEE 754 totalOrder)
32+
///
33+
/// # Examples
34+
///
35+
/// ```ignore
36+
/// let exponents = Exponents { e: 3, f: 0 };
37+
/// match encode_for_comparison(1.234f32, exponents, Operator::Gt) {
38+
/// EncodedComparison::Encoded { value, operator } => {
39+
/// // Compare encoded array with encoded value using operator
40+
/// }
41+
/// EncodedComparison::Constant(result) => {
42+
/// // Return constant result for all elements
43+
/// }
44+
/// }
45+
/// ```
46+
pub(super) fn encode_for_comparison<F: ALPFloat + Into<Scalar>>(
47+
value: F,
48+
exponents: Exponents,
49+
operator: Operator,
50+
) -> EncodedComparison<F::ALPInt>
51+
where
52+
F::ALPInt: Into<Scalar>,
53+
{
54+
// Try to encode the scalar into the ALP domain
55+
let encoded = F::encode_single(value, exponents);
56+
57+
match encoded {
58+
Some(encoded_value) => EncodedComparison::Encoded {
59+
value: encoded_value,
60+
operator,
61+
},
62+
None => {
63+
// Value doesn't encode cleanly - handle special cases
64+
match operator {
65+
// Since this value is not encodable it cannot be equal to any value in the encoded array
66+
Operator::Eq => EncodedComparison::Constant(false),
67+
// Since this value is not encodable it is not equal to all values in the encoded array
68+
Operator::NotEq => EncodedComparison::Constant(true),
69+
Operator::Gt | Operator::Gte => {
70+
// Per IEEE 754 totalOrder semantics: -NaN < -Inf < finite < +Inf < +NaN
71+
// All values in the encoded array are definitely finite
72+
let is_not_finite =
73+
NativePType::is_infinite(value) || NativePType::is_nan(value);
74+
75+
if is_not_finite {
76+
// Comparing finite values to non-finite:
77+
// - finite > -Inf is true, finite > +Inf is false
78+
// - finite > -NaN is true, finite > +NaN is false
79+
// Result depends on the sign of the non-finite value
80+
EncodedComparison::Constant(value.is_sign_negative())
81+
} else {
82+
// For finite unencodable values, use encode_above
83+
// Since the encoded value is unencodable, Gte is equivalent to Gt
84+
// Consider a value v between two encodable values v_l (just less) and
85+
// v_a (just above), then for all encodable values u: v > u <=> v_a >= u
86+
EncodedComparison::Encoded {
87+
value: F::encode_above(value, exponents),
88+
operator: Operator::Gte,
89+
}
90+
}
91+
}
92+
Operator::Lt | Operator::Lte => {
93+
// Per IEEE 754 totalOrder semantics: -NaN < -Inf < finite < +Inf < +NaN
94+
// All values in the encoded array are definitely finite
95+
let is_not_finite =
96+
NativePType::is_infinite(value) || NativePType::is_nan(value);
97+
98+
if is_not_finite {
99+
// Comparing finite values to non-finite:
100+
// - finite < +Inf is true, finite < -Inf is false
101+
// - finite < +NaN is true, finite < -NaN is false
102+
// Result depends on the sign of the non-finite value (opposite of Gt/Gte)
103+
EncodedComparison::Constant(value.is_sign_positive())
104+
} else {
105+
// For finite unencodable values, use encode_below
106+
// Since the encoded value is unencodable, Lte is equivalent to Lt
107+
// See Gt | Gte for further explanation
108+
EncodedComparison::Encoded {
109+
value: F::encode_below(value, exponents),
110+
operator: Operator::Lte,
111+
}
112+
}
113+
}
114+
}
115+
}
116+
}
117+
}

0 commit comments

Comments
 (0)