Skip to content

Commit d404eea

Browse files
authored
feat: PyExpr::evaluate (#5388)
This is primarily useful for diagnosing issues (e.g. #5385). --------- Signed-off-by: Daniel King <[email protected]>
1 parent 39004fc commit d404eea

File tree

5 files changed

+118
-2
lines changed

5 files changed

+118
-2
lines changed

vortex-python/python/vortex/_lib/expr.pyi

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ from typing import TypeAlias, final
66

77
from typing_extensions import override
88

9+
from vortex.type_aliases import IntoArray
10+
11+
from .arrays import Array
912
from .dtype import DType
1013
from .scalar import ScalarPyType
1114

@@ -23,6 +26,7 @@ class Expr:
2326
def __ge__(self, other: IntoExpr) -> Expr: ...
2427
def __and__(self, other: IntoExpr) -> Expr: ...
2528
def __or__(self, other: IntoExpr) -> Expr: ...
29+
def evaluate(self, array: IntoArray) -> Array: ...
2630

2731
def column(name: str) -> Expr: ...
2832
def root() -> Expr: ...

vortex-python/python/vortex/type_aliases.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright the Vortex contributors
3-
4-
from typing import TypeAlias
3+
from typing import TypeAlias, Union # pyright: ignore[reportDeprecated]
54

65
import pyarrow as pa
76

@@ -12,6 +11,7 @@
1211
# TypeAliases do not support __doc__.
1312
IntoProjection: TypeAlias = Expr | list[str] | None
1413
IntoArrayIterator: TypeAlias = Array | ArrayIterator | pa.Table | pa.RecordBatchReader
14+
IntoArray: TypeAlias = Union[Array, "pa.Array[pa.Scalar[pa.DataType]]", pa.Table] # pyright: ignore[reportDeprecated]
1515

1616
# If you make an intersphinx reference to pyarrow.RecordBatchReader in the return type of a function
1717
# *and also* use the IntoProjection type alias in a parameter type, Sphinx thinks the type alias
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use arrow_array::ffi_stream::ArrowArrayStreamReader;
5+
use arrow_array::{RecordBatchReader as _, make_array};
6+
use arrow_data::ArrayData;
7+
use pyo3::exceptions::PyTypeError;
8+
use pyo3::types::PyAnyMethods;
9+
use pyo3::{Bound, FromPyObject, PyAny, PyResult};
10+
use vortex::ArrayRef;
11+
use vortex::arrow::FromArrowArray as _;
12+
use vortex::dtype::DType;
13+
use vortex::dtype::arrow::FromArrowType as _;
14+
use vortex::error::VortexResult;
15+
use vortex::iter::{ArrayIteratorAdapter, ArrayIteratorExt};
16+
17+
use crate::PyVortex;
18+
use crate::arrays::PyArrayRef;
19+
use crate::arrays::native::PyNativeArray;
20+
use crate::arrays::py::PyPythonArray;
21+
use crate::arrow::FromPyArrow;
22+
23+
/// Conversion type for converting Python objects into a [`vortex::Array`].
24+
pub struct PyIntoArray(PyArrayRef);
25+
26+
impl PyIntoArray {
27+
pub fn inner(&self) -> &ArrayRef {
28+
self.0.inner()
29+
}
30+
31+
#[allow(dead_code)]
32+
pub fn into_inner(self) -> ArrayRef {
33+
self.0.into_inner()
34+
}
35+
}
36+
37+
impl<'py> FromPyObject<'py> for PyIntoArray {
38+
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
39+
if ob.is_instance_of::<PyNativeArray>() || ob.is_instance_of::<PyPythonArray>() {
40+
return PyArrayRef::extract_bound(ob).map(PyIntoArray);
41+
}
42+
43+
let py = ob.py();
44+
let pa = py.import("pyarrow")?;
45+
46+
if ob.is_instance(&pa.getattr("Array")?)? {
47+
let arrow_array_data = ArrayData::from_pyarrow_bound(ob)?;
48+
return Ok(PyIntoArray(PyVortex(ArrayRef::from_arrow(
49+
make_array(arrow_array_data).as_ref(),
50+
false,
51+
))));
52+
}
53+
54+
if ob.is_instance(&pa.getattr("Table")?)? {
55+
let arrow_stream = ArrowArrayStreamReader::from_pyarrow_bound(ob)?;
56+
let dtype = DType::from_arrow(arrow_stream.schema());
57+
let vortex_iter = arrow_stream
58+
.into_iter()
59+
.map(|batch_result| -> VortexResult<_> {
60+
Ok(ArrayRef::from_arrow(batch_result?, false))
61+
});
62+
let array = ArrayIteratorAdapter::new(dtype, vortex_iter).read_all()?;
63+
return Ok(PyIntoArray(PyVortex(array)));
64+
}
65+
66+
Err(PyTypeError::new_err(
67+
"Expected an object that can be converted to a Vortex ArrayRef (vortex.Array, pyarrow.Array, or pyarrow.Table)",
68+
))
69+
}
70+
}

vortex-python/src/arrays/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ pub(crate) mod builtins;
55
pub(crate) mod compressed;
66
pub(crate) mod fastlanes;
77
pub(crate) mod from_arrow;
8+
pub mod into_array;
89
mod native;
910
pub(crate) mod py;
1011
mod range_to_sequence;

vortex-python/src/expr/mod.rs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ use pyo3::types::*;
99
use vortex::dtype::{DType, Nullability, PType};
1010
use vortex::expr::{Binary, Expression, GetItem, Operator, VTableExt, and, lit, not};
1111

12+
use crate::arrays::PyArrayRef;
13+
use crate::arrays::into_array::PyIntoArray;
1214
use crate::dtype::PyDType;
1315
use crate::install_module;
1416
use crate::scalar::factory::scalar_helper;
@@ -164,6 +166,45 @@ impl PyExpr {
164166
fn __getitem__(self_: PyRef<'_, Self>, field: String) -> PyResult<PyExpr> {
165167
get_item(field, self_.clone())
166168
}
169+
170+
/// Evaluate this expression on an in-memory array.
171+
///
172+
/// Examples
173+
/// --------
174+
///
175+
/// Extract one column from a Vortex array:
176+
///
177+
/// ```python
178+
/// >>> import vortex.expr as ve
179+
/// >>> import vortex as vx
180+
/// >>> array = ve.column("a").evaluate(vx.array([{"a": 0, "b": "hello"}, {"a": 1, "b": "goodbye"}]))
181+
/// >>> array.to_arrow_array()
182+
/// <pyarrow.lib.Int64Array object at ...>
183+
/// [
184+
/// 0,
185+
/// 1
186+
/// ]
187+
/// ```
188+
///
189+
/// Evaluating an expression on an Arrow array or table implicitly converts it to a Vortex
190+
/// array:
191+
///
192+
/// >>> import pyarrow as pa
193+
/// >>> array = ve.column("a").evaluate(pa.Table.from_arrays(
194+
/// ... [[0, 1, 2, 3]],
195+
/// ... names=['a'],
196+
/// ... ))
197+
/// >>> array
198+
/// <vortex.PrimitiveArray object at ...>
199+
///
200+
/// See also
201+
/// --------
202+
/// vortex.open : Open an on-disk Vortex array for scanning with an expression.
203+
/// vortex.VortexFile : An on-disk Vortex array ready to scan with an expression.
204+
/// vortex.VortexFile.scan : Scan an on-disk Vortex array with an expression.
205+
fn evaluate(self_: PyRef<'_, Self>, array: PyIntoArray) -> PyResult<PyArrayRef> {
206+
Ok(PyArrayRef::from(self_.evaluate(array.inner())?))
207+
}
167208
}
168209

169210
/// Create an expression that represents a literal value.

0 commit comments

Comments
 (0)