Skip to content

Commit 46ff2c1

Browse files
committed
Change logic & whatsnew
1 parent ed895b9 commit 46ff2c1

File tree

3 files changed

+21
-28
lines changed

3 files changed

+21
-28
lines changed

doc/source/whatsnew/v2.3.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ Other enhancements
3535
- The semantics for the ``copy`` keyword in ``__array__`` methods (i.e. called
3636
when using ``np.array()`` or ``np.asarray()`` on pandas objects) has been
3737
updated to work correctly with NumPy >= 2 (:issue:`57739`)
38+
- The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for ``StringDtype`` columns when backed by PyArrow (:issue:`60633`)
3839
- The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)
39-
-
4040

4141
.. ---------------------------------------------------------------------------
4242
.. _whatsnew_230.notable_bug_fixes:

pandas/core/arrays/arrow/array.py

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1670,9 +1670,9 @@ def _str_accumulate(
16701670
msg = f"operation '{name}' not supported for dtype '{self.dtype}'"
16711671
raise TypeError(msg)
16721672

1673-
# We may need to strip out leading / trailing NA values
1674-
head: pa.array | None = None
1673+
# We may need to strip out trailing NA values
16751674
tail: pa.array | None = None
1675+
na_mask: pa.array | None = None
16761676
pa_array = self._pa_array
16771677
np_func = {
16781678
"cumsum": np.cumsum,
@@ -1681,37 +1681,30 @@ def _str_accumulate(
16811681
}[name]
16821682

16831683
if self._hasna:
1684+
na_mask = pc.is_null(pa_array)
1685+
if pc.all(na_mask) == pa.scalar(True):
1686+
return type(self)(pa_array)
16841687
if skipna:
16851688
if name == "cumsum":
16861689
pa_array = pc.fill_null(pa_array, "")
16871690
else:
1688-
# After the first non-NA value we can retain the running min/max
1689-
# by forward filling.
1691+
# We can retain the running min/max by forward/backward filling.
16901692
pa_array = pc.fill_null_forward(pa_array)
1691-
# But any leading NA values should result in "".
1692-
nulls = pc.is_null(pa_array)
1693-
idx = pc.index(nulls, False).as_py()
1694-
if idx == -1:
1695-
idx = len(pa_array)
1696-
if idx > 0:
1697-
head = pa.array([""] * idx, type=pa_array.type)
1698-
pa_array = pa_array[idx:].combine_chunks()
1693+
pa_array = pc.fill_null_backward(pa_array)
16991694
else:
17001695
# When not skipping NA values, the result should be null from
17011696
# the first NA value onward.
1702-
nulls = pc.is_null(pa_array)
1703-
idx = pc.index(nulls, True).as_py()
1697+
idx = pc.index(na_mask, True).as_py()
17041698
tail = pa.nulls(len(pa_array) - idx, type=pa_array.type)
17051699
pa_array = pa_array[:idx].combine_chunks()
17061700

17071701
# error: Cannot call function of unknown type
17081702
pa_result = pa.array(np_func(pa_array), type=pa_array.type) # type: ignore[operator]
17091703

1710-
assert head is None or tail is None
1711-
if head is not None:
1712-
pa_result = pa.concat_arrays([head, pa_result])
1713-
elif tail is not None:
1704+
if tail is not None:
17141705
pa_result = pa.concat_arrays([pa_result, tail])
1706+
elif na_mask is not None:
1707+
pa_result = pc.if_else(na_mask, None, pa_result)
17151708

17161709
result = type(self)(pa_result)
17171710
return result

pandas/tests/series/test_cumulative.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -237,31 +237,31 @@ def test_cumprod_timedelta(self):
237237
([], "cumsum", False, []),
238238
(["x", "z", "y"], "cumsum", True, ["x", "xz", "xzy"]),
239239
(["x", "z", "y"], "cumsum", False, ["x", "xz", "xzy"]),
240-
(["x", pd.NA, "y"], "cumsum", True, ["x", "x", "xy"]),
240+
(["x", pd.NA, "y"], "cumsum", True, ["x", pd.NA, "xy"]),
241241
(["x", pd.NA, "y"], "cumsum", False, ["x", pd.NA, pd.NA]),
242-
([pd.NA, "x", "y"], "cumsum", True, ["", "x", "xy"]),
242+
([pd.NA, "x", "y"], "cumsum", True, [pd.NA, "x", "xy"]),
243243
([pd.NA, "x", "y"], "cumsum", False, [pd.NA, pd.NA, pd.NA]),
244-
([pd.NA, pd.NA, pd.NA], "cumsum", True, ["", "", ""]),
244+
([pd.NA, pd.NA, pd.NA], "cumsum", True, [pd.NA, pd.NA, pd.NA]),
245245
([pd.NA, pd.NA, pd.NA], "cumsum", False, [pd.NA, pd.NA, pd.NA]),
246246
([], "cummin", True, []),
247247
([], "cummin", False, []),
248248
(["y", "z", "x"], "cummin", True, ["y", "y", "x"]),
249249
(["y", "z", "x"], "cummin", False, ["y", "y", "x"]),
250-
(["y", pd.NA, "x"], "cummin", True, ["y", "y", "x"]),
250+
(["y", pd.NA, "x"], "cummin", True, ["y", pd.NA, "x"]),
251251
(["y", pd.NA, "x"], "cummin", False, ["y", pd.NA, pd.NA]),
252-
([pd.NA, "y", "x"], "cummin", True, ["", "y", "x"]),
252+
([pd.NA, "y", "x"], "cummin", True, [pd.NA, "y", "x"]),
253253
([pd.NA, "y", "x"], "cummin", False, [pd.NA, pd.NA, pd.NA]),
254-
([pd.NA, pd.NA, pd.NA], "cummin", True, ["", "", ""]),
254+
([pd.NA, pd.NA, pd.NA], "cummin", True, [pd.NA, pd.NA, pd.NA]),
255255
([pd.NA, pd.NA, pd.NA], "cummin", False, [pd.NA, pd.NA, pd.NA]),
256256
([], "cummax", True, []),
257257
([], "cummax", False, []),
258258
(["x", "z", "y"], "cummax", True, ["x", "z", "z"]),
259259
(["x", "z", "y"], "cummax", False, ["x", "z", "z"]),
260-
(["x", pd.NA, "y"], "cummax", True, ["x", "x", "y"]),
260+
(["x", pd.NA, "y"], "cummax", True, ["x", pd.NA, "y"]),
261261
(["x", pd.NA, "y"], "cummax", False, ["x", pd.NA, pd.NA]),
262-
([pd.NA, "x", "y"], "cummax", True, ["", "x", "y"]),
262+
([pd.NA, "x", "y"], "cummax", True, [pd.NA, "x", "y"]),
263263
([pd.NA, "x", "y"], "cummax", False, [pd.NA, pd.NA, pd.NA]),
264-
([pd.NA, pd.NA, pd.NA], "cummax", True, ["", "", ""]),
264+
([pd.NA, pd.NA, pd.NA], "cummax", True, [pd.NA, pd.NA, pd.NA]),
265265
([pd.NA, pd.NA, pd.NA], "cummax", False, [pd.NA, pd.NA, pd.NA]),
266266
],
267267
)

0 commit comments

Comments
 (0)