Skip to content

Commit ebbd7fa

Browse files
authored
chore: Bump pyarrow min version to 13.0.0 (#2825)
1 parent ef7c46c commit ebbd7fa

24 files changed

+29
-160
lines changed

.github/workflows/extremes.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
cache-suffix: min-versions-${{ matrix.python-version }}
2626
cache-dependency-glob: "pyproject.toml"
2727
- name: install-minimum-versions
28-
run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.3 polars==0.20.4 numpy==1.19.3 pyarrow==11.0.0 "pyarrow-stubs<17" scipy==1.6.0 scikit-learn==1.1.0 duckdb==1.0 tzdata --system
28+
run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.3 polars==0.20.4 numpy==1.19.3 pyarrow==13.0.0 "pyarrow-stubs<17" scipy==1.6.0 scikit-learn==1.1.0 duckdb==1.0 tzdata --system
2929
- name: install-reqs
3030
run: |
3131
uv pip install -e . --group tests --system
@@ -37,7 +37,7 @@ jobs:
3737
echo "$DEPS" | grep 'pandas==1.1.3'
3838
echo "$DEPS" | grep 'polars==0.20.4'
3939
echo "$DEPS" | grep 'numpy==1.19.3'
40-
echo "$DEPS" | grep 'pyarrow==11.0.0'
40+
echo "$DEPS" | grep 'pyarrow==13.0.0'
4141
echo "$DEPS" | grep 'scipy==1.6.0'
4242
echo "$DEPS" | grep 'scikit-learn==1.1.0'
4343
echo "$DEPS" | grep 'duckdb==1.0'
@@ -62,7 +62,7 @@ jobs:
6262
cache-suffix: pretty-old-versions-${{ matrix.python-version }}
6363
cache-dependency-glob: "pyproject.toml"
6464
- name: install-pretty-old-versions
65-
run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.5 polars==0.20.4 numpy==1.19.3 pyarrow==11.0.0 "pyarrow-stubs<17" scipy==1.6.0 scikit-learn==1.1.0 duckdb==1.0 tzdata --system
65+
run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.5 polars==0.20.4 numpy==1.19.3 pyarrow==14.0.0 "pyarrow-stubs<17" scipy==1.6.0 scikit-learn==1.1.0 duckdb==1.0 tzdata --system
6666
- name: install-reqs
6767
run: uv pip install -e . --group tests --system
6868
- name: show-deps
@@ -75,7 +75,7 @@ jobs:
7575
echo "$DEPS" | grep 'pandas==1.1.5'
7676
echo "$DEPS" | grep 'polars==0.20.4'
7777
echo "$DEPS" | grep 'numpy==1.19.3'
78-
echo "$DEPS" | grep 'pyarrow==11.0.0'
78+
echo "$DEPS" | grep 'pyarrow==14.0.0'
7979
echo "$DEPS" | grep 'scipy==1.6.0'
8080
echo "$DEPS" | grep 'scikit-learn==1.1.0'
8181
echo "$DEPS" | grep 'duckdb==1.0'

narwhals/_arrow/group_by.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,6 @@ def agg(self, *exprs: ArrowExpr) -> ArrowDataFrame:
122122
]
123123
new_column_names = [new_column_names[i] for i in index_map]
124124
result_simple = result_simple.rename_columns(new_column_names)
125-
if self.compliant._backend_version < (12, 0, 0):
126-
columns = result_simple.column_names
127-
result_simple = result_simple.select(
128-
[*self._keys, *[col for col in columns if col not in self._keys]]
129-
)
130-
131125
return self.compliant._with_native(result_simple).rename(
132126
dict(zip(self._keys, self._output_key_names))
133127
)

narwhals/_arrow/series.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
generate_temporary_column_name,
3030
is_list_of,
3131
not_implemented,
32-
requires,
3332
)
3433
from narwhals.dependencies import is_numpy_array_1d
3534
from narwhals.exceptions import InvalidOperationError, ShapeError
@@ -156,7 +155,7 @@ def from_iterable(
156155
)
157156

158157
def _from_scalar(self, value: Any) -> Self:
159-
if self._backend_version < (13,) and hasattr(value, "as_py"):
158+
if hasattr(value, "as_py"):
160159
value = value.as_py()
161160
return super()._from_scalar(value)
162161

@@ -866,7 +865,6 @@ def cum_count(self, *, reverse: bool) -> Self:
866865
dtypes = self._version.dtypes
867866
return (~self.is_null()).cast(dtypes.UInt32()).cum_sum(reverse=reverse)
868867

869-
@requires.backend_version((13,))
870868
def cum_min(self, *, reverse: bool) -> Self:
871869
result = (
872870
pc.cumulative_min(self.native, skip_nulls=True)
@@ -875,7 +873,6 @@ def cum_min(self, *, reverse: bool) -> Self:
875873
)
876874
return self._with_native(result)
877875

878-
@requires.backend_version((13,))
879876
def cum_max(self, *, reverse: bool) -> Self:
880877
result = (
881878
pc.cumulative_max(self.native, skip_nulls=True)
@@ -884,7 +881,6 @@ def cum_max(self, *, reverse: bool) -> Self:
884881
)
885882
return self._with_native(result)
886883

887-
@requires.backend_version((13,))
888884
def cum_prod(self, *, reverse: bool) -> Self:
889885
result = (
890886
pc.cumulative_prod(self.native, skip_nulls=True)
@@ -1023,7 +1019,6 @@ def rank(self, method: RankMethod, *, descending: bool) -> Self:
10231019
result = pc.if_else(null_mask, lit(None, rank.type), rank)
10241020
return self._with_native(result)
10251021

1026-
@requires.backend_version((13,))
10271022
def hist( # noqa: C901, PLR0912, PLR0915
10281023
self,
10291024
bins: list[float | int] | None,

narwhals/_arrow/series_str.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,9 @@ def zfill(self, width: int) -> ArrowSeries:
7070
binary_join: Incomplete = pc.binary_join_element_wise
7171
native = self.native
7272
hyphen, plus = lit("-"), lit("+")
73-
74-
_slice_length: int | None = (
75-
self.len_chars().max() if self.backend_version < (13, 0) else None
76-
)
7773
first_char, remaining_chars = (
7874
self.slice(0, 1).native,
79-
self.slice(1, _slice_length).native,
75+
self.slice(1, None).native,
8076
)
8177

8278
# Conditions

narwhals/_arrow/utils.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -295,9 +295,8 @@ def cast_for_truediv(
295295
if pa.types.is_integer(arrow_array.type) and pa.types.is_integer(pa_object.type):
296296
# GH: 56645. # noqa: ERA001
297297
# https://github.com/apache/arrow/issues/35563
298-
# NOTE: `pyarrow==11.*` doesn't allow keywords in `Array.cast`
299-
return pc.cast(arrow_array, pa.float64(), safe=False), pc.cast(
300-
pa_object, pa.float64(), safe=False
298+
return arrow_array.cast(pa.float64(), safe=False), pa_object.cast(
299+
pa.float64(), safe=False
301300
)
302301

303302
return arrow_array, pa_object

narwhals/_pandas_like/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,7 @@ def narwhals_to_native_dtype( # noqa: C901, PLR0912, PLR0915
508508
try:
509509
import pyarrow as pa # ignore-banned-import
510510
except ModuleNotFoundError: # pragma: no cover
511-
msg = "'pyarrow>=11.0.0' is required for `Date` dtype."
511+
msg = "'pyarrow>=13.0.0' is required for `Date` dtype."
512512
return "date32[pyarrow]"
513513
if isinstance_or_issubclass(dtype, dtypes.Enum):
514514
if version is Version.V1:

narwhals/_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ def _backend_version(self) -> tuple[int, ...]:
595595
Implementation.PANDAS: (1, 1, 3),
596596
Implementation.MODIN: (0, 8, 2),
597597
Implementation.CUDF: (24, 10),
598-
Implementation.PYARROW: (11,),
598+
Implementation.PYARROW: (13,),
599599
Implementation.PYSPARK: (3, 5),
600600
Implementation.PYSPARK_CONNECT: (3, 5),
601601
Implementation.POLARS: (0, 20, 4),

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ Repository = "https://github.com/narwhals-dev/narwhals"
3737
pandas = ["pandas>=1.1.3"]
3838
modin = ["modin"]
3939
cudf = ["cudf>=24.10.0"]
40-
pyarrow = ["pyarrow>=11.0.0"]
40+
pyarrow = ["pyarrow>=13.0.0"]
4141
pyspark = ["pyspark>=3.5.0"]
4242
pyspark-connect = ["pyspark[connect]>=3.5.0"]
4343
polars = ["polars>=0.20.4"]

tests/expr_and_series/cum_max_test.py

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
DUCKDB_VERSION,
88
PANDAS_VERSION,
99
POLARS_VERSION,
10-
PYARROW_VERSION,
1110
Constructor,
1211
ConstructorEager,
1312
assert_equal_data,
@@ -22,12 +21,7 @@
2221
def test_cum_max_expr(
2322
request: pytest.FixtureRequest, constructor_eager: ConstructorEager, *, reverse: bool
2423
) -> None:
25-
if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor_eager):
26-
request.applymarker(pytest.mark.xfail)
27-
28-
if (PANDAS_VERSION < (2, 1) or PYARROW_VERSION < (13,)) and "pandas_pyarrow" in str(
29-
constructor_eager
30-
):
24+
if (PANDAS_VERSION < (2, 1)) and "pandas_pyarrow" in str(constructor_eager):
3125
request.applymarker(pytest.mark.xfail)
3226

3327
name = "reverse_cum_max" if reverse else "cum_max"
@@ -141,8 +135,6 @@ def test_lazy_cum_max_ungrouped(
141135
"duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3)
142136
):
143137
pytest.skip(reason="too old version")
144-
if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor):
145-
request.applymarker(pytest.mark.xfail)
146138

147139
df = nw.from_native(
148140
constructor({"a": [2, 3, 1], "b": [0, 2, 1], "i": [1, 2, 0]})
@@ -174,8 +166,6 @@ def test_lazy_cum_max_ungrouped_ordered_by_nulls(
174166
"duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3)
175167
):
176168
pytest.skip(reason="too old version")
177-
if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor):
178-
request.applymarker(pytest.mark.xfail)
179169

180170
df = nw.from_native(
181171
constructor(
@@ -200,12 +190,7 @@ def test_lazy_cum_max_ungrouped_ordered_by_nulls(
200190
def test_cum_max_series(
201191
request: pytest.FixtureRequest, constructor_eager: ConstructorEager
202192
) -> None:
203-
if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor_eager):
204-
request.applymarker(pytest.mark.xfail)
205-
206-
if (PANDAS_VERSION < (2, 1) or PYARROW_VERSION < (13,)) and "pandas_pyarrow" in str(
207-
constructor_eager
208-
):
193+
if (PANDAS_VERSION < (2, 1)) and "pandas_pyarrow" in str(constructor_eager):
209194
request.applymarker(pytest.mark.xfail)
210195

211196
df = nw.from_native(constructor_eager(data), eager_only=True)

tests/expr_and_series/cum_min_test.py

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
DUCKDB_VERSION,
88
PANDAS_VERSION,
99
POLARS_VERSION,
10-
PYARROW_VERSION,
1110
Constructor,
1211
ConstructorEager,
1312
assert_equal_data,
@@ -22,12 +21,7 @@
2221
def test_cum_min_expr(
2322
request: pytest.FixtureRequest, constructor_eager: ConstructorEager, *, reverse: bool
2423
) -> None:
25-
if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor_eager):
26-
request.applymarker(pytest.mark.xfail)
27-
28-
if (PANDAS_VERSION < (2, 1) or PYARROW_VERSION < (13,)) and "pandas_pyarrow" in str(
29-
constructor_eager
30-
):
24+
if (PANDAS_VERSION < (2, 1)) and "pandas_pyarrow" in str(constructor_eager):
3125
request.applymarker(pytest.mark.xfail)
3226

3327
name = "reverse_cum_min" if reverse else "cum_min"
@@ -141,9 +135,6 @@ def test_lazy_cum_min_ungrouped(
141135
"duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3)
142136
):
143137
pytest.skip(reason="too old version")
144-
if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor):
145-
request.applymarker(pytest.mark.xfail)
146-
147138
df = nw.from_native(
148139
constructor({"a": [2, 3, 1], "b": [0, 2, 1], "i": [1, 2, 0]})
149140
).sort("i")
@@ -174,9 +165,6 @@ def test_lazy_cum_min_ungrouped_ordered_by_nulls(
174165
"duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3)
175166
):
176167
pytest.skip(reason="too old version")
177-
if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor):
178-
request.applymarker(pytest.mark.xfail)
179-
180168
df = nw.from_native(
181169
constructor(
182170
{
@@ -200,12 +188,7 @@ def test_lazy_cum_min_ungrouped_ordered_by_nulls(
200188
def test_cum_min_series(
201189
request: pytest.FixtureRequest, constructor_eager: ConstructorEager
202190
) -> None:
203-
if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor_eager):
204-
request.applymarker(pytest.mark.xfail)
205-
206-
if (PANDAS_VERSION < (2, 1) or PYARROW_VERSION < (13,)) and "pandas_pyarrow" in str(
207-
constructor_eager
208-
):
191+
if (PANDAS_VERSION < (2, 1)) and "pandas_pyarrow" in str(constructor_eager):
209192
request.applymarker(pytest.mark.xfail)
210193

211194
df = nw.from_native(constructor_eager(data), eager_only=True)

0 commit comments

Comments
 (0)