From a1bf651643a5548316dbfa388ebfa4bf29fed555 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 2 Sep 2024 09:13:25 +0100 Subject: [PATCH 1/3] fix: use fastpath for PyCapsule export when starting from pyarrow-backed Series, respect requested_schema --- pandas/core/series.py | 16 +++++++++++++-- pandas/tests/series/test_arrow_interface.py | 22 +++++++++++++++++++++ 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 4f79e30f48f3c..579227609f387 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -77,6 +77,7 @@ validate_all_hashable, ) from pandas.core.dtypes.dtypes import ( + ArrowDtype, CategoricalDtype, ExtensionDtype, SparseDtype, @@ -580,8 +581,19 @@ def __arrow_c_stream__(self, requested_schema=None): PyCapsule """ pa = import_optional_dependency("pyarrow", min_version="16.0.0") - ca = pa.chunked_array([pa.Array.from_pandas(self, type=requested_schema)]) - return ca.__arrow_c_stream__(requested_schema) + type = ( + pa.DataType._import_from_c_capsule(requested_schema) + if requested_schema is not None + else None + ) + if isinstance(self.dtype, ArrowDtype): + # fastpath! + ca = self.values._pa_array + if type is not None: + ca = ca.cast(type) + else: + ca = pa.chunked_array([pa.Array.from_pandas(self, type=type)]) + return ca.__arrow_c_stream__() # ---------------------------------------------------------------------- diff --git a/pandas/tests/series/test_arrow_interface.py b/pandas/tests/series/test_arrow_interface.py index 34a2a638e4185..9f1f0403d23a7 100644 --- a/pandas/tests/series/test_arrow_interface.py +++ b/pandas/tests/series/test_arrow_interface.py @@ -21,3 +21,25 @@ def test_series_arrow_interface(): ca = pa.chunked_array(s) expected = pa.chunked_array([[1, 4, 2]]) assert ca.equals(expected) + ca = pa.chunked_array(s, type=pa.int32()) + expected = pa.chunked_array([[1, 4, 2]], type=pa.int32()) + assert ca.equals(expected) + + +def test_series_arrow_interface_arrow_dtypes(): + s = pd.Series([1, 4, 2], dtype="Int64[pyarrow]") + + capsule = s.__arrow_c_stream__() + assert ( + ctypes.pythonapi.PyCapsule_IsValid( + ctypes.py_object(capsule), b"arrow_array_stream" + ) + == 1 + ) + + ca = pa.chunked_array(s) + expected = pa.chunked_array([[1, 4, 2]]) + assert ca.equals(expected) + ca = pa.chunked_array(s, type=pa.int32()) + expected = pa.chunked_array([[1, 4, 2]], type=pa.int32()) + assert ca.equals(expected) From 04871d4e87bbc2c418747c8ed45cd75d29bb49af Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 6 Sep 2024 14:10:14 +0100 Subject: [PATCH 2/3] simplify --- pandas/core/series.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 579227609f387..0c26ce27c680c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -77,7 +77,6 @@ validate_all_hashable, ) from pandas.core.dtypes.dtypes import ( - ArrowDtype, CategoricalDtype, ExtensionDtype, SparseDtype, @@ -586,13 +585,9 @@ def __arrow_c_stream__(self, requested_schema=None): if requested_schema is not None else None ) - if isinstance(self.dtype, ArrowDtype): - # fastpath! - ca = self.values._pa_array - if type is not None: - ca = ca.cast(type) - else: - ca = pa.chunked_array([pa.Array.from_pandas(self, type=type)]) + ca = pa.array(self, type=type) + if not isinstance(ca, pa.ChunkedArray): + ca = pa.chunked_array([ca]) return ca.__arrow_c_stream__() # ---------------------------------------------------------------------- From bf33163a39edf914ed482d02702ed3c23a24bc3f Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 6 Sep 2024 14:13:49 +0100 Subject: [PATCH 3/3] stringdtype test --- pandas/tests/series/test_arrow_interface.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/series/test_arrow_interface.py b/pandas/tests/series/test_arrow_interface.py index 9f1f0403d23a7..e73cf9bee6aeb 100644 --- a/pandas/tests/series/test_arrow_interface.py +++ b/pandas/tests/series/test_arrow_interface.py @@ -43,3 +43,19 @@ def test_series_arrow_interface_arrow_dtypes(): ca = pa.chunked_array(s, type=pa.int32()) expected = pa.chunked_array([[1, 4, 2]], type=pa.int32()) assert ca.equals(expected) + + +def test_series_arrow_interface_stringdtype(): + s = pd.Series(["foo", "bar"], dtype="string[pyarrow]") + + capsule = s.__arrow_c_stream__() + assert ( + ctypes.pythonapi.PyCapsule_IsValid( + ctypes.py_object(capsule), b"arrow_array_stream" + ) + == 1 + ) + + ca = pa.chunked_array(s) + expected = pa.chunked_array([["foo", "bar"]], type=pa.large_string()) + assert ca.equals(expected)