|
| 1 | +// SPDX-License-Identifier: Apache-2.0 |
| 2 | +// SPDX-FileCopyrightText: Copyright the Vortex contributors |
| 3 | + |
| 4 | +use vortex_dtype::DType; |
| 5 | +use vortex_error::{VortexResult, vortex_ensure}; |
| 6 | + |
| 7 | +use crate::expr::Expression; |
| 8 | +use crate::stats::ArrayStats; |
| 9 | +use crate::{Array, ArrayRef}; |
| 10 | + |
| 11 | +/// A array that represents an expression to be evaluated over a child array. |
| 12 | +/// |
| 13 | +/// `ExprArray` enables deferred evaluation of expressions by wrapping a child array |
| 14 | +/// with an expression that operates on it. The expression is not evaluated until the |
| 15 | +/// array is canonicalized/executed. |
| 16 | +/// |
| 17 | +/// # Examples |
| 18 | +/// |
| 19 | +/// ```ignore |
| 20 | +/// // Create an expression that filters an integer array |
| 21 | +/// let data = PrimitiveArray::from_iter([1, 2, 3, 4, 5]); |
| 22 | +/// let expr = gt(root(), lit(3)); // $ > 3 |
| 23 | +/// let expr_array = ExprArray::new_infer_dtype(data.into_array(), expr)?; |
| 24 | +/// |
| 25 | +/// // The expression is evaluated when canonicalized |
| 26 | +/// let result = expr_array.to_canonical(); // Returns BoolArray([false, false, false, true, true]) |
| 27 | +/// ``` |
| 28 | +/// |
| 29 | +/// # Type Safety |
| 30 | +/// |
| 31 | +/// The `dtype` field must match `expr.return_dtype(child.dtype())`. This invariant |
| 32 | +/// is enforced by the safe constructors ([`try_new`](ExprArray::try_new) and |
| 33 | +/// [`new_infer_dtype`](ExprArray::new_infer_dtype)) but can be bypassed |
| 34 | +/// with [`unchecked_new`](ExprArray::unchecked_new) for performance-critical code. |
| 35 | +#[derive(Clone, Debug)] |
| 36 | +pub struct ExprArray { |
| 37 | + /// The underlying array that the expression will operate on. |
| 38 | + pub(super) child: ArrayRef, |
| 39 | + /// The expression to evaluate over the child array. |
| 40 | + pub(super) expr: Expression, |
| 41 | + /// The data type of the result after evaluating the expression. |
| 42 | + pub(super) dtype: DType, |
| 43 | + /// Statistics about the resulting array (may be computed lazily). |
| 44 | + pub(super) stats: ArrayStats, |
| 45 | +} |
| 46 | + |
| 47 | +impl ExprArray { |
| 48 | + /// Creates a new ExprArray with the dtype validated to match the expression's return type. |
| 49 | + pub fn try_new(child: ArrayRef, expr: Expression, dtype: DType) -> VortexResult<Self> { |
| 50 | + let expected_dtype = expr.return_dtype(child.dtype())?; |
| 51 | + vortex_ensure!( |
| 52 | + dtype == expected_dtype, |
| 53 | + "ExprArray dtype mismatch: expected {}, got {}", |
| 54 | + expected_dtype, |
| 55 | + dtype |
| 56 | + ); |
| 57 | + Ok(unsafe { Self::unchecked_new(child, expr, dtype) }) |
| 58 | + } |
| 59 | + |
| 60 | + /// Create a new ExprArray without validating that the dtype matches the expression's return type. |
| 61 | + /// |
| 62 | + /// # Safety |
| 63 | + /// |
| 64 | + /// The caller must ensure that `dtype` matches `expr.return_dtype(child.dtype())`. |
| 65 | + /// Violating this invariant may lead to incorrect results or panics when the array is used. |
| 66 | + pub unsafe fn unchecked_new(child: ArrayRef, expr: Expression, dtype: DType) -> Self { |
| 67 | + Self { |
| 68 | + child, |
| 69 | + expr, |
| 70 | + dtype, |
| 71 | + // TODO(joe): Propagate or compute statistics from the child array and expression. |
| 72 | + stats: ArrayStats::default(), |
| 73 | + } |
| 74 | + } |
| 75 | + |
| 76 | + /// Creates a new ExprArray with the dtype inferred from the expression and child. |
| 77 | + pub fn new_infer_dtype(child: ArrayRef, expr: Expression) -> VortexResult<Self> { |
| 78 | + let dtype = expr.return_dtype(child.dtype())?; |
| 79 | + Ok(unsafe { Self::unchecked_new(child, expr, dtype) }) |
| 80 | + } |
| 81 | + |
| 82 | + pub fn child(&self) -> &ArrayRef { |
| 83 | + &self.child |
| 84 | + } |
| 85 | + |
| 86 | + pub fn expr(&self) -> &Expression { |
| 87 | + &self.expr |
| 88 | + } |
| 89 | +} |
0 commit comments