|
| 1 | +// SPDX-License-Identifier: Apache-2.0 |
| 2 | +// SPDX-FileCopyrightText: Copyright the Vortex contributors |
| 3 | + |
| 4 | +//! Common logic for comparing ALP-encoded arrays with scalar values. |
| 5 | +//! |
| 6 | +//! This module contains shared logic used by both eager comparison (compare.rs) |
| 7 | +//! and lazy comparison pushdown (expr_pushdown.rs). |
| 8 | +
|
| 9 | +use vortex_array::compute::Operator; |
| 10 | +use vortex_dtype::NativePType; |
| 11 | +use vortex_scalar::Scalar; |
| 12 | + |
| 13 | +use crate::{ALPFloat, Exponents}; |
| 14 | + |
| 15 | +/// Result of encoding a scalar value for comparison with an ALP-encoded array. |
| 16 | +#[derive(Debug, Clone, Copy)] |
| 17 | +pub(super) enum EncodedComparison<T> { |
| 18 | + /// The scalar encoded cleanly - compare using the encoded value with the original operator |
| 19 | + Encoded { value: T, operator: Operator }, |
| 20 | + /// The scalar doesn't encode - return a constant result for all elements |
| 21 | + Constant(bool), |
| 22 | +} |
| 23 | + |
| 24 | +/// Determine how to compare an ALP-encoded array with a scalar value. |
| 25 | +/// |
| 26 | +/// This encapsulates the core logic for ALP scalar comparisons: |
| 27 | +/// - If the scalar encodes cleanly in the ALP domain, compare using the encoded value |
| 28 | +/// - If not encodable, handle special cases based on the operator: |
| 29 | +/// - Eq/NotEq: constant result (false/true) |
| 30 | +/// - Gt/Gte: use encode_above with Gte operator (handles IEEE 754 totalOrder) |
| 31 | +/// - Lt/Lte: use encode_below with Lte operator (handles IEEE 754 totalOrder) |
| 32 | +/// |
| 33 | +/// # Examples |
| 34 | +/// |
| 35 | +/// ```ignore |
| 36 | +/// let exponents = Exponents { e: 3, f: 0 }; |
| 37 | +/// match encode_for_comparison(1.234f32, exponents, Operator::Gt) { |
| 38 | +/// EncodedComparison::Encoded { value, operator } => { |
| 39 | +/// // Compare encoded array with encoded value using operator |
| 40 | +/// } |
| 41 | +/// EncodedComparison::Constant(result) => { |
| 42 | +/// // Return constant result for all elements |
| 43 | +/// } |
| 44 | +/// } |
| 45 | +/// ``` |
| 46 | +pub(super) fn encode_for_comparison<F: ALPFloat + Into<Scalar>>( |
| 47 | + value: F, |
| 48 | + exponents: Exponents, |
| 49 | + operator: Operator, |
| 50 | +) -> EncodedComparison<F::ALPInt> |
| 51 | +where |
| 52 | + F::ALPInt: Into<Scalar>, |
| 53 | +{ |
| 54 | + // Try to encode the scalar into the ALP domain |
| 55 | + let encoded = F::encode_single(value, exponents); |
| 56 | + |
| 57 | + match encoded { |
| 58 | + Some(encoded_value) => EncodedComparison::Encoded { |
| 59 | + value: encoded_value, |
| 60 | + operator, |
| 61 | + }, |
| 62 | + None => { |
| 63 | + // Value doesn't encode cleanly - handle special cases |
| 64 | + match operator { |
| 65 | + // Since this value is not encodable it cannot be equal to any value in the encoded array |
| 66 | + Operator::Eq => EncodedComparison::Constant(false), |
| 67 | + // Since this value is not encodable it is not equal to all values in the encoded array |
| 68 | + Operator::NotEq => EncodedComparison::Constant(true), |
| 69 | + Operator::Gt | Operator::Gte => { |
| 70 | + // Per IEEE 754 totalOrder semantics: -NaN < -Inf < finite < +Inf < +NaN |
| 71 | + // All values in the encoded array are definitely finite |
| 72 | + let is_not_finite = |
| 73 | + NativePType::is_infinite(value) || NativePType::is_nan(value); |
| 74 | + |
| 75 | + if is_not_finite { |
| 76 | + // Comparing finite values to non-finite: |
| 77 | + // - finite > -Inf is true, finite > +Inf is false |
| 78 | + // - finite > -NaN is true, finite > +NaN is false |
| 79 | + // Result depends on the sign of the non-finite value |
| 80 | + EncodedComparison::Constant(value.is_sign_negative()) |
| 81 | + } else { |
| 82 | + // For finite unencodable values, use encode_above |
| 83 | + // Since the encoded value is unencodable, Gte is equivalent to Gt |
| 84 | + // Consider a value v between two encodable values v_l (just less) and |
| 85 | + // v_a (just above), then for all encodable values u: v > u <=> v_a >= u |
| 86 | + EncodedComparison::Encoded { |
| 87 | + value: F::encode_above(value, exponents), |
| 88 | + operator: Operator::Gte, |
| 89 | + } |
| 90 | + } |
| 91 | + } |
| 92 | + Operator::Lt | Operator::Lte => { |
| 93 | + // Per IEEE 754 totalOrder semantics: -NaN < -Inf < finite < +Inf < +NaN |
| 94 | + // All values in the encoded array are definitely finite |
| 95 | + let is_not_finite = |
| 96 | + NativePType::is_infinite(value) || NativePType::is_nan(value); |
| 97 | + |
| 98 | + if is_not_finite { |
| 99 | + // Comparing finite values to non-finite: |
| 100 | + // - finite < +Inf is true, finite < -Inf is false |
| 101 | + // - finite < +NaN is true, finite < -NaN is false |
| 102 | + // Result depends on the sign of the non-finite value (opposite of Gt/Gte) |
| 103 | + EncodedComparison::Constant(value.is_sign_positive()) |
| 104 | + } else { |
| 105 | + // For finite unencodable values, use encode_below |
| 106 | + // Since the encoded value is unencodable, Lte is equivalent to Lt |
| 107 | + // See Gt | Gte for further explanation |
| 108 | + EncodedComparison::Encoded { |
| 109 | + value: F::encode_below(value, exponents), |
| 110 | + operator: Operator::Lte, |
| 111 | + } |
| 112 | + } |
| 113 | + } |
| 114 | + } |
| 115 | + } |
| 116 | + } |
| 117 | +} |
0 commit comments