Skip to content

Commit b524763

Browse files
authored
Cache python class imports (#7191)
## Summary Was reading the arrow-rs changelog and I ran into [this](apache/arrow-rs#9439) PR, seems like `PyOnceLock` was built for this and they have some very promising benchmarks [there](https://github.com/apache/arrow-rs/pull/9439/changes#r2955818325). I've also wrapped all static strings that are passed into `pyo3` with the `intern` macro, which prevents allocating a new `PyString` on every call. --------- Signed-off-by: Adam Gutglick <adam@spiraldb.com>
1 parent 66db19a commit b524763

File tree

12 files changed

+139
-57
lines changed

12 files changed

+139
-57
lines changed

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-python/build.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
fn main() {
5-
let target_os = std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default();
6-
7-
if target_os == "macos" {
5+
#[cfg(target_os = "macos")]
6+
{
87
// For pyo3 to successfully link on macOS.
98
// See https://stackoverflow.com/a/77382609
109
println!("cargo:rustc-link-arg=-undefined");

vortex-python/src/arrays/from_arrow.rs

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use arrow_data::ArrayData as ArrowArrayData;
88
use arrow_schema::DataType;
99
use arrow_schema::Field;
1010
use pyo3::exceptions::PyValueError;
11+
use pyo3::intern;
1112
use pyo3::prelude::*;
1213
use vortex::array::ArrayRef;
1314
use vortex::array::IntoArray;
@@ -20,23 +21,26 @@ use vortex::error::VortexResult;
2021

2122
use crate::arrays::PyArrayRef;
2223
use crate::arrow::FromPyArrow;
24+
use crate::classes::array_class;
25+
use crate::classes::chunked_array_class;
26+
use crate::classes::table_class;
2327
use crate::error::PyVortexError;
2428
use crate::error::PyVortexResult;
2529

2630
/// Convert an Arrow object to a Vortex array.
2731
pub(super) fn from_arrow(obj: &Borrowed<'_, '_, PyAny>) -> PyVortexResult<PyArrayRef> {
28-
let pa = obj.py().import("pyarrow")?;
29-
let pa_array = pa.getattr("Array")?;
30-
let chunked_array = pa.getattr("ChunkedArray")?;
31-
let table = pa.getattr("Table")?;
32+
let py = obj.py();
33+
let pa_array = array_class(py)?;
34+
let chunked_array = chunked_array_class(py)?;
35+
let table = table_class(py)?;
3236

33-
if obj.is_instance(&pa_array)? {
37+
if obj.is_instance(pa_array)? {
3438
let arrow_array = ArrowArrayData::from_pyarrow(&obj.as_borrowed()).map(make_array)?;
3539
let is_nullable = arrow_array.is_nullable();
3640
let enc_array = ArrayRef::from_arrow(arrow_array.as_ref(), is_nullable)?;
3741
Ok(PyArrayRef::from(enc_array))
38-
} else if obj.is_instance(&chunked_array)? {
39-
let chunks: Vec<Bound<PyAny>> = obj.getattr("chunks")?.extract()?;
42+
} else if obj.is_instance(chunked_array)? {
43+
let chunks: Vec<Bound<PyAny>> = obj.getattr(intern!(py, "chunks"))?.extract()?;
4044
let encoded_chunks = chunks
4145
.iter()
4246
.map(|a| {
@@ -45,13 +49,13 @@ pub(super) fn from_arrow(obj: &Borrowed<'_, '_, PyAny>) -> PyVortexResult<PyArra
4549
})
4650
.collect::<PyVortexResult<Vec<_>>>()?;
4751
let dtype: DType = obj
48-
.getattr("type")
52+
.getattr(intern!(py, "type"))
4953
.and_then(|v| DataType::from_pyarrow(&v.as_borrowed()))
5054
.map(|dt| DType::from_arrow(&Field::new("_", dt, false)))?;
5155
Ok(PyArrayRef::from(
5256
ChunkedArray::try_new(encoded_chunks, dtype)?.into_array(),
5357
))
54-
} else if obj.is_instance(&table)? {
58+
} else if obj.is_instance(table)? {
5559
let array_stream = ArrowArrayStreamReader::from_pyarrow(&obj.as_borrowed())?;
5660
let dtype = DType::from_arrow(array_stream.schema());
5761
let chunks = array_stream

vortex-python/src/arrays/mod.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use pyo3::IntoPyObjectExt;
1515
use pyo3::exceptions::PyIndexError;
1616
use pyo3::exceptions::PyTypeError;
1717
use pyo3::exceptions::PyValueError;
18+
use pyo3::intern;
1819
use pyo3::prelude::*;
1920
use pyo3::types::PyDict;
2021
use pyo3::types::PyList;
@@ -742,7 +743,7 @@ impl PyArray {
742743
let dtype_buffers: Vec<Vec<u8>> = dtype_buffers.iter().map(|b| b.to_vec()).collect();
743744

744745
let vortex_module = PyModule::import(py, "vortex")?;
745-
let unpickle_fn = vortex_module.getattr("_unpickle_array")?;
746+
let unpickle_fn = vortex_module.getattr(intern!(py, "_unpickle_array"))?;
746747

747748
let args = (array_buffers, dtype_buffers).into_pyobject(py)?;
748749
Ok((unpickle_fn, args.into_any()))
@@ -769,7 +770,7 @@ impl PyArray {
769770
let dtype_buffers = encoder.encode(EncoderMessage::DType(array.dtype()))?;
770771

771772
let pickle_module = PyModule::import(py, "pickle")?;
772-
let pickle_buffer_class = pickle_module.getattr("PickleBuffer")?;
773+
let pickle_buffer_class = pickle_module.getattr(intern!(py, "PickleBuffer"))?;
773774

774775
let mut pickle_buffers = Vec::new();
775776
for buf in array_buffers.into_iter() {
@@ -788,7 +789,7 @@ impl PyArray {
788789
}
789790

790791
let vortex_module = PyModule::import(py, "vortex")?;
791-
let unpickle_fn = vortex_module.getattr("_unpickle_array")?;
792+
let unpickle_fn = vortex_module.getattr(intern!(py, "_unpickle_array"))?;
792793

793794
let args = (pickle_buffers, dtype_pickle_buffers).into_pyobject(py)?;
794795
Ok((unpickle_fn, args.into_any()))

vortex-python/src/arrays/py/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ pub(crate) use array::*;
99
use pyo3::Bound;
1010
use pyo3::PyAny;
1111
use pyo3::exceptions::PyValueError;
12+
use pyo3::intern;
1213
use pyo3::prelude::PyAnyMethods;
1314
pub(crate) use python::*;
1415
use vortex::array::vtable::ArrayId;
@@ -19,7 +20,7 @@ use crate::error::PyVortexResult;
1920
/// Extract the array id from a Python class `id` attribute.
2021
pub fn id_from_obj(cls: &Bound<PyAny>) -> PyVortexResult<ArrayId> {
2122
Ok(ArrayId::new_arc(
22-
cls.getattr("id")
23+
cls.getattr(intern!(cls.py(), "id"))
2324
.map_err(|_| {
2425
PyValueError::new_err(format!(
2526
"PyEncoding subclass {cls:?} must have an 'id' attribute"

vortex-python/src/arrays/py/vtable.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ impl VTable for PythonVTable {
118118
}
119119

120120
let bytes = obj
121-
.call_method("__vx_metadata__", (), None)
121+
.call_method(intern!(py, "__vx_metadata__"), (), None)
122122
.map_err(|e| vortex_err!("{}", e))?
123123
.cast::<PyBytes>()
124124
.map_err(|_| vortex_err!("Expected array metadata to be Python bytes"))?

vortex-python/src/arrow.rs

Lines changed: 42 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,17 @@ use pyo3::exceptions::PyValueError;
3131
use pyo3::ffi::Py_uintptr_t;
3232
use pyo3::ffi::c_str;
3333
use pyo3::import_exception;
34+
use pyo3::intern;
3435
use pyo3::prelude::*;
3536
use pyo3::types::PyCapsule;
3637
use pyo3::types::PyTuple;
3738

39+
use crate::classes::array_class;
40+
use crate::classes::data_type_class;
41+
use crate::classes::field_class;
42+
use crate::classes::record_batch_reader_class;
43+
use crate::classes::schema_class;
44+
3845
const SCHEMA_NAME: &CStr = c_str!("arrow_schema");
3946
const ARRAY_NAME: &CStr = c_str!("arrow_array");
4047
const ARRAY_STREAM_NAME: &CStr = c_str!("arrow_array_stream");
@@ -69,13 +76,14 @@ pub trait IntoPyArrow {
6976

7077
impl<'py> FromPyArrow<'_, 'py> for DataType {
7178
fn from_pyarrow(value: &Borrowed<'_, 'py, PyAny>) -> PyResult<Self> {
72-
if !value.hasattr("__arrow_c_schema__")? {
79+
let py = value.py();
80+
if !value.hasattr(intern!(py, "__arrow_c_schema__"))? {
7381
return Err(PyValueError::new_err(
7482
"Expected __arrow_c_schema__ attribute to be set.",
7583
));
7684
}
7785

78-
let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
86+
let capsule = value.getattr(intern!(py, "__arrow_c_schema__"))?.call0()?;
7987
let capsule = capsule.cast::<PyCapsule>()?;
8088

8189
let schema_ptr = unsafe {
@@ -92,22 +100,24 @@ impl<'py> FromPyArrow<'_, 'py> for DataType {
92100
impl ToPyArrow for DataType {
93101
fn to_pyarrow(&self, py: Python) -> PyResult<Py<PyAny>> {
94102
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
95-
let module = py.import("pyarrow")?;
96-
let class = module.getattr("DataType")?;
97-
let dtype = class.call_method1("_import_from_c", (&raw const c_schema as Py_uintptr_t,))?;
103+
let dtype = data_type_class(py)?.call_method1(
104+
intern!(py, "_import_from_c"),
105+
(&raw const c_schema as Py_uintptr_t,),
106+
)?;
98107
Ok(dtype.into())
99108
}
100109
}
101110

102111
impl<'py> FromPyArrow<'_, 'py> for Field {
103112
fn from_pyarrow(value: &Borrowed<'_, 'py, PyAny>) -> PyResult<Self> {
104-
if !value.hasattr("__arrow_c_schema__")? {
113+
let py = value.py();
114+
if !value.hasattr(intern!(py, "__arrow_c_schema__"))? {
105115
return Err(PyValueError::new_err(
106116
"Expected __arrow_c_schema__ attribute to be set.",
107117
));
108118
}
109119

110-
let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
120+
let capsule = value.getattr(intern!(py, "__arrow_c_schema__"))?.call0()?;
111121
let capsule = capsule.cast::<PyCapsule>()?;
112122

113123
let schema_ptr = unsafe {
@@ -124,22 +134,24 @@ impl<'py> FromPyArrow<'_, 'py> for Field {
124134
impl ToPyArrow for Field {
125135
fn to_pyarrow(&self, py: Python) -> PyResult<Py<PyAny>> {
126136
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
127-
let module = py.import("pyarrow")?;
128-
let class = module.getattr("Field")?;
129-
let dtype = class.call_method1("_import_from_c", (&raw const c_schema as Py_uintptr_t,))?;
137+
let dtype = field_class(py)?.call_method1(
138+
intern!(py, "_import_from_c"),
139+
(&raw const c_schema as Py_uintptr_t,),
140+
)?;
130141
Ok(dtype.into())
131142
}
132143
}
133144

134145
impl<'py> FromPyArrow<'_, 'py> for Schema {
135146
fn from_pyarrow(value: &Borrowed<'_, 'py, PyAny>) -> PyResult<Self> {
136-
if !value.hasattr("__arrow_c_schema__")? {
147+
let py = value.py();
148+
if !value.hasattr(intern!(py, "__arrow_c_schema__"))? {
137149
return Err(PyValueError::new_err(
138150
"Expected __arrow_c_schema__ attribute to be set.",
139151
));
140152
}
141153

142-
let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
154+
let capsule = value.getattr(intern!(py, "__arrow_c_schema__"))?.call0()?;
143155
let capsule = capsule.cast::<PyCapsule>()?;
144156

145157
let schema_ptr = unsafe {
@@ -157,23 +169,24 @@ impl<'py> FromPyArrow<'_, 'py> for Schema {
157169
impl ToPyArrow for Schema {
158170
fn to_pyarrow(&self, py: Python) -> PyResult<Py<PyAny>> {
159171
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
160-
let module = py.import("pyarrow")?;
161-
let class = module.getattr("Schema")?;
162-
let schema =
163-
class.call_method1("_import_from_c", (&raw const c_schema as Py_uintptr_t,))?;
172+
let schema = schema_class(py)?.call_method1(
173+
intern!(py, "_import_from_c"),
174+
(&raw const c_schema as Py_uintptr_t,),
175+
)?;
164176
Ok(schema.into())
165177
}
166178
}
167179

168180
impl<'py> FromPyArrow<'_, 'py> for ArrayData {
169181
fn from_pyarrow(value: &Borrowed<'_, 'py, PyAny>) -> PyResult<Self> {
170-
if !value.hasattr("__arrow_c_array__")? {
182+
let py = value.py();
183+
if !value.hasattr(intern!(py, "__arrow_c_array__"))? {
171184
return Err(PyValueError::new_err(
172185
"Expected __arrow_c_array__ attribute to be set.",
173186
));
174187
}
175188

176-
let tuple = value.getattr("__arrow_c_array__")?.call0()?;
189+
let tuple = value.getattr(intern!(py, "__arrow_c_array__"))?.call0()?;
177190

178191
if !tuple.is_instance_of::<PyTuple>() {
179192
return Err(PyTypeError::new_err(
@@ -207,10 +220,8 @@ impl ToPyArrow for ArrayData {
207220
let array = FFI_ArrowArray::new(self);
208221
let schema = FFI_ArrowSchema::try_from(self.data_type()).map_err(to_py_err)?;
209222

210-
let module = py.import("pyarrow")?;
211-
let class = module.getattr("Array")?;
212-
let array = class.call_method1(
213-
"_import_from_c",
223+
let array = array_class(py)?.call_method1(
224+
intern!(py, "_import_from_c"),
214225
(
215226
addr_of!(array) as Py_uintptr_t,
216227
addr_of!(schema) as Py_uintptr_t,
@@ -222,13 +233,14 @@ impl ToPyArrow for ArrayData {
222233

223234
impl<'py> FromPyArrow<'_, 'py> for RecordBatch {
224235
fn from_pyarrow(value: &Borrowed<'_, 'py, PyAny>) -> PyResult<Self> {
225-
if !value.hasattr("__arrow_c_array__")? {
236+
let py = value.py();
237+
if !value.hasattr(intern!(py, "__arrow_c_array__"))? {
226238
return Err(PyValueError::new_err(
227239
"Expected __arrow_c_array__ attribute to be set.",
228240
));
229241
}
230242

231-
let tuple = value.getattr("__arrow_c_array__")?.call0()?;
243+
let tuple = value.getattr(intern!(py, "__arrow_c_array__"))?.call0()?;
232244

233245
if !tuple.is_instance_of::<PyTuple>() {
234246
return Err(PyTypeError::new_err(
@@ -286,20 +298,21 @@ impl ToPyArrow for RecordBatch {
286298
let reader = RecordBatchIterator::new(vec![Ok(self.clone())], self.schema());
287299
let reader: Box<dyn RecordBatchReader + Send> = Box::new(reader);
288300
let py_reader = reader.into_pyarrow(py)?;
289-
py_reader.call_method0(py, "read_next_batch")
301+
py_reader.call_method0(py, intern!(py, "read_next_batch"))
290302
}
291303
}
292304

293305
/// Supports conversion from `pyarrow.RecordBatchReader` to [ArrowArrayStreamReader].
294306
impl<'py> FromPyArrow<'_, 'py> for ArrowArrayStreamReader {
295307
fn from_pyarrow(value: &Borrowed<'_, 'py, PyAny>) -> PyResult<Self> {
296-
if !value.hasattr("__arrow_c_stream__")? {
308+
let py = value.py();
309+
if !value.hasattr(intern!(py, "__arrow_c_stream__"))? {
297310
return Err(PyValueError::new_err(
298311
"Expected __arrow_c_stream__ attribute to be set.",
299312
));
300313
}
301314

302-
let capsule = value.getattr("__arrow_c_stream__")?.call0()?;
315+
let capsule = value.getattr(intern!(py, "__arrow_c_stream__"))?.call0()?;
303316
let capsule = capsule.cast::<PyCapsule>()?;
304317

305318
let array_ptr = capsule
@@ -323,10 +336,9 @@ impl IntoPyArrow for Box<dyn RecordBatchReader + Send> {
323336
fn into_pyarrow(self, py: Python) -> PyResult<Py<PyAny>> {
324337
let mut stream = FFI_ArrowArrayStream::new(self);
325338

326-
let module = py.import("pyarrow")?;
327-
let class = module.getattr("RecordBatchReader")?;
328339
let args = PyTuple::new(py, [&raw mut stream as Py_uintptr_t])?;
329-
let reader = class.call_method1("_import_from_c", args)?;
340+
let reader =
341+
record_batch_reader_class(py)?.call_method1(intern!(py, "_import_from_c"), args)?;
330342

331343
Ok(Py::from(reader))
332344
}

vortex-python/src/classes.rs

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Caching often accesses classes that are accessed across the C ABI
5+
6+
use pyo3::Bound;
7+
use pyo3::Py;
8+
use pyo3::PyResult;
9+
use pyo3::Python;
10+
use pyo3::sync::PyOnceLock;
11+
use pyo3::types::PyType;
12+
13+
/// Returns the pyarrow.DataType class
14+
pub fn data_type_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> {
15+
static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
16+
TYPE.import(py, "pyarrow", "DataType")
17+
}
18+
19+
/// Returns the pyarrow.Field class
20+
pub fn field_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> {
21+
static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
22+
TYPE.import(py, "pyarrow", "Field")
23+
}
24+
25+
/// Returns the pyarrow.Schema class
26+
pub fn schema_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> {
27+
static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
28+
TYPE.import(py, "pyarrow", "Schema")
29+
}
30+
31+
/// Returns the pyarrow.Array class
32+
pub fn array_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> {
33+
static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
34+
TYPE.import(py, "pyarrow", "Array")
35+
}
36+
37+
/// Returns the pyarrow.ChunkedArray class
38+
pub fn chunked_array_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> {
39+
static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
40+
TYPE.import(py, "pyarrow", "ChunkedArray")
41+
}
42+
43+
/// Returns the pyarrow.RecordBatchReader class
44+
pub fn record_batch_reader_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> {
45+
static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
46+
TYPE.import(py, "pyarrow", "RecordBatchReader")
47+
}
48+
49+
/// Returns the pyarrow.Table class
50+
pub fn table_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> {
51+
static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
52+
TYPE.import(py, "pyarrow", "Table")
53+
}
54+
55+
/// Returns the pyarrow.Decimal class
56+
pub fn decimal_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> {
57+
static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
58+
TYPE.import(py, "decimal", "Decimal")
59+
}

0 commit comments

Comments
 (0)