Skip to content

Commit 1f0321e

Browse files
committed
feat: PyExpr::evaluate
This is primarily useful for diagnosing issues (e.g. #5385). Signed-off-by: Daniel King <[email protected]>
1 parent 35d12a3 commit 1f0321e

File tree

5 files changed

+109
-2
lines changed

5 files changed

+109
-2
lines changed

vortex-python/python/vortex/_lib/expr.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ from datetime import date, datetime
55
from typing import TypeAlias, final
66

77
from typing_extensions import override
8+
from vortex.type_aliases import IntoArray
89

10+
from .arrays import Array
911
from .dtype import DType
1012
from .scalar import ScalarPyType
1113

@@ -23,6 +25,7 @@ class Expr:
2325
def __ge__(self, other: IntoExpr) -> Expr: ...
2426
def __and__(self, other: IntoExpr) -> Expr: ...
2527
def __or__(self, other: IntoExpr) -> Expr: ...
28+
def evaluate(self, array: IntoArray) -> Array: ...
2629

2730
def column(name: str) -> Expr: ...
2831
def root() -> Expr: ...

vortex-python/python/vortex/type_aliases.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright the Vortex contributors
3-
4-
from typing import TypeAlias
3+
from typing import TypeAlias, Union
54

65
import pyarrow as pa
76

@@ -12,6 +11,7 @@
1211
# TypeAliases do not support __doc__.
1312
IntoProjection: TypeAlias = Expr | list[str] | None
1413
IntoArrayIterator: TypeAlias = Array | ArrayIterator | pa.Table | pa.RecordBatchReader
14+
IntoArray: TypeAlias = Union[Array, "pa.Array[pa.Scalar[pa.DataType]]", pa.Table]
1515

1616
# If you make an intersphinx reference to pyarrow.RecordBatchReader in the return type of a function
1717
# *and also* use the IntoProjection type alias in a parameter type, Sphinx thinks the type alias
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
use crate::PyVortex;
2+
use crate::arrays::py::PyPythonArray;
3+
use crate::arrays::{PyArrayRef, native::PyNativeArray};
4+
use crate::arrow::FromPyArrow;
5+
use arrow_array::ffi_stream::ArrowArrayStreamReader;
6+
use arrow_array::{RecordBatchReader as _, make_array};
7+
use arrow_data::ArrayData;
8+
use pyo3::{Bound, FromPyObject, PyAny, PyResult, exceptions::PyTypeError, types::PyAnyMethods};
9+
use vortex::dtype::DType;
10+
use vortex::dtype::arrow::FromArrowType as _;
11+
use vortex::error::VortexResult;
12+
use vortex::iter::{ArrayIteratorAdapter, ArrayIteratorExt};
13+
use vortex::{ArrayRef, arrow::FromArrowArray as _};
14+
15+
/// Conversion type for converting Python objects into a [`vortex::Array`].
16+
pub struct PyIntoArray(PyArrayRef);
17+
18+
impl PyIntoArray {
19+
pub fn inner(&self) -> &ArrayRef {
20+
self.0.inner()
21+
}
22+
23+
#[allow(dead_code)]
24+
pub fn into_inner(self) -> ArrayRef {
25+
self.0.into_inner()
26+
}
27+
}
28+
29+
impl<'py> FromPyObject<'py> for PyIntoArray {
30+
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
31+
if ob.is_instance_of::<PyNativeArray>() || ob.is_instance_of::<PyPythonArray>() {
32+
return PyArrayRef::extract_bound(ob).map(PyIntoArray);
33+
}
34+
35+
let py = ob.py();
36+
let pa = py.import("pyarrow")?;
37+
38+
if ob.is_instance(&pa.getattr("Array")?)? {
39+
let arrow_array_data = ArrayData::from_pyarrow_bound(ob)?;
40+
return Ok(PyIntoArray(PyVortex(ArrayRef::from_arrow(
41+
make_array(arrow_array_data).as_ref(),
42+
false,
43+
))));
44+
}
45+
46+
if ob.is_instance(&pa.getattr("Table")?)? {
47+
let arrow_stream = ArrowArrayStreamReader::from_pyarrow_bound(ob)?;
48+
let dtype = DType::from_arrow(arrow_stream.schema());
49+
let vortex_iter = arrow_stream
50+
.into_iter()
51+
.map(|batch_result| -> VortexResult<_> {
52+
Ok(ArrayRef::from_arrow(batch_result?, false))
53+
});
54+
let array = ArrayIteratorAdapter::new(dtype, vortex_iter).read_all()?;
55+
return Ok(PyIntoArray(PyVortex(array)));
56+
}
57+
58+
Err(PyTypeError::new_err(
59+
"Expected an object that can be converted to a Vortex ArrayRef (vortex.Array, pyarrow.Array, or pyarrow.Table)",
60+
))
61+
}
62+
}

vortex-python/src/arrays/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ pub(crate) mod builtins;
55
pub(crate) mod compressed;
66
pub(crate) mod fastlanes;
77
pub(crate) mod from_arrow;
8+
pub mod into_array;
89
mod native;
910
pub(crate) mod py;
1011
mod range_to_sequence;

vortex-python/src/expr/mod.rs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ use pyo3::types::*;
99
use vortex::dtype::{DType, Nullability, PType};
1010
use vortex::expr::{Binary, Expression, GetItem, Operator, VTableExt, and, lit, not};
1111

12+
use crate::arrays::PyArrayRef;
13+
use crate::arrays::into_array::PyIntoArray;
1214
use crate::dtype::PyDType;
1315
use crate::install_module;
1416
use crate::scalar::factory::scalar_helper;
@@ -164,6 +166,45 @@ impl PyExpr {
164166
fn __getitem__(self_: PyRef<'_, Self>, field: String) -> PyResult<PyExpr> {
165167
get_item(field, self_.clone())
166168
}
169+
170+
/// Evaluate this expression on an in-memory array.
171+
///
172+
/// Examples
173+
/// --------
174+
///
175+
/// Extract one column from a Vortex array:
176+
///
177+
/// ```python
178+
/// >>> import vortex.expr as ve
179+
/// >>> import vortex as vx
180+
/// >>> array = ve.column("a").evaluate(vx.array([{"a": 0, "b": "hello"}, {"a": 1, "b": "goodbye"}]))
181+
/// >>> array.to_arrow_array()
182+
/// <pyarrow.lib.Int64Array object at ...>
183+
/// [
184+
/// 0,
185+
/// 1
186+
/// ]
187+
/// ```
188+
///
189+
/// Evaluating an expression on an Arrow array or table implicitly converts it to a Vortex
190+
/// array:
191+
///
192+
/// >>> import pyarrow as pa
193+
/// >>> array = ve.column("a").evaluate(pa.Table.from_arrays(
194+
/// ... [[0, 1, 2, 3]],
195+
/// ... names=['a'],
196+
/// ... ))
197+
/// >>> array
198+
/// <vortex.PrimitiveArray object at ...>
199+
///
200+
/// See also
201+
/// --------
202+
/// vortex.open : Open an on-disk Vortex array for scanning with an expression.
203+
/// vortex.VortexFile : An on-disk Vortex array ready to scan with an expression.
204+
/// vortex.VortexFile.scan : Scan an on-disk Vortex array with an expression.
205+
fn evaluate(self_: PyRef<'_, Self>, array: PyIntoArray) -> PyResult<PyArrayRef> {
206+
Ok(PyArrayRef::from(self_.evaluate(array.inner())?))
207+
}
167208
}
168209

169210
/// Create an expression that represents a literal value.

0 commit comments

Comments
 (0)