pandas-dev
diff --git a/‎doc/source/reference/window.rst
Lines changed: 2 additions & 0 deletions b/‎doc/source/reference/window.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v2.3.0.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/whatsnew/v2.3.0.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 2 additions & 0 deletions b/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/_libs/src/parser/tokenizer.c
Lines changed: 3 additions & 4 deletions b/‎pandas/_libs/src/parser/tokenizer.c
Lines changed: 3 additions & 4 deletions
diff --git a/‎pandas/_libs/src/vendored/ujson/python/objToJSON.c
Lines changed: 11 additions & 11 deletions b/‎pandas/_libs/src/vendored/ujson/python/objToJSON.c
Lines changed: 11 additions & 11 deletions
diff --git a/‎pandas/_libs/tslibs/period.pyx
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/tslibs/period.pyx
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/api/typing/__init__.py
Lines changed: 2 additions & 0 deletions b/‎pandas/api/typing/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/conftest.py
Lines changed: 16 additions & 0 deletions b/‎pandas/conftest.py
Lines changed: 16 additions & 0 deletions
diff --git a/‎pandas/core/algorithms.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/algorithms.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/arrays/arrow/array.py
Lines changed: 55 additions & 0 deletions b/‎pandas/core/arrays/arrow/array.py
Lines changed: 55 additions & 0 deletions
@@ -35,6 +35,7 @@ Rolling window functions
    Rolling.skew
    Rolling.kurt
    Rolling.apply
+   Rolling.pipe
    Rolling.aggregate
    Rolling.quantile
    Rolling.sem
@@ -76,6 +77,7 @@ Expanding window functions
    Expanding.skew
    Expanding.kurt
    Expanding.apply
+   Expanding.pipe
    Expanding.aggregate
    Expanding.quantile
    Expanding.sem
 
@@ -35,8 +35,8 @@ Other enhancements
 - The semantics for the ``copy`` keyword in ``__array__`` methods (i.e. called
   when using ``np.array()`` or ``np.asarray()`` on pandas objects) has been
   updated to work correctly with NumPy >= 2 (:issue:`57739`)
+- The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for ``StringDtype`` columns when backed by PyArrow (:issue:`60633`)
 - The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)
--
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_230.notable_bug_fixes:
 
@@ -30,6 +30,7 @@ Other enhancements
 ^^^^^^^^^^^^^^^^^^
 - :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`)
 - :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`)
+- :class:`pandas.api.typing.NoDefault` is available for typing ``no_default``
 - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
 - :func:`pandas.merge` now validates the ``how`` parameter input (merge type) (:issue:`59435`)
 - :func:`read_spss` now supports kwargs to be passed to pyreadstat (:issue:`56356`)
@@ -44,6 +45,7 @@ Other enhancements
 - Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
 - :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
 - :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
+- :class:`Rolling` and :class:`Expanding` now support ``pipe`` method (:issue:`57076`)
 - :class:`Series` now supports the Arrow PyCapsule Interface for export (:issue:`59518`)
 - :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`)
 - :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`)
 
@@ -148,7 +148,7 @@ int parser_init(parser_t *self) {
   self->warn_msg = NULL;
 
   // token stream
-  self->stream = malloc(STREAM_INIT_SIZE * sizeof(char));
+  self->stream = malloc(STREAM_INIT_SIZE);
   if (self->stream == NULL) {
     parser_cleanup(self);
     return PARSER_OUT_OF_MEMORY;
@@ -221,9 +221,8 @@ static int make_stream_space(parser_t *self, size_t nbytes) {
   char *orig_ptr = (void *)self->stream;
   TRACE(("\n\nmake_stream_space: nbytes = %zu.  grow_buffer(self->stream...)\n",
          nbytes))
-  self->stream =
-      (char *)grow_buffer((void *)self->stream, self->stream_len,
-                          &self->stream_cap, nbytes * 2, sizeof(char), &status);
+  self->stream = (char *)grow_buffer((void *)self->stream, self->stream_len,
+                                     &self->stream_cap, nbytes * 2, 1, &status);
   TRACE(("make_stream_space: self->stream=%p, self->stream_len = %zu, "
          "self->stream_cap=%zu, status=%zu\n",
          self->stream, self->stream_len, self->stream_cap, status))
 
@@ -984,7 +984,7 @@ static char *List_iterGetName(JSOBJ Py_UNUSED(obj),
 //=============================================================================
 static void Index_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
   GET_TC(tc)->index = 0;
-  GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char));
+  GET_TC(tc)->cStr = PyObject_Malloc(20);
   if (!GET_TC(tc)->cStr) {
     PyErr_NoMemory();
   }
@@ -998,10 +998,10 @@ static int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) {
   const Py_ssize_t index = GET_TC(tc)->index;
   Py_XDECREF(GET_TC(tc)->itemValue);
   if (index == 0) {
-    memcpy(GET_TC(tc)->cStr, "name", sizeof(char) * 5);
+    memcpy(GET_TC(tc)->cStr, "name", 5);
     GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
   } else if (index == 1) {
-    memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5);
+    memcpy(GET_TC(tc)->cStr, "data", 5);
     GET_TC(tc)->itemValue = get_values(obj);
     if (!GET_TC(tc)->itemValue) {
       return 0;
@@ -1033,7 +1033,7 @@ static char *Index_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
 static void Series_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
   PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
   GET_TC(tc)->index = 0;
-  GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char));
+  GET_TC(tc)->cStr = PyObject_Malloc(20);
   enc->outputFormat = VALUES; // for contained series
   if (!GET_TC(tc)->cStr) {
     PyErr_NoMemory();
@@ -1048,13 +1048,13 @@ static int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) {
   const Py_ssize_t index = GET_TC(tc)->index;
   Py_XDECREF(GET_TC(tc)->itemValue);
   if (index == 0) {
-    memcpy(GET_TC(tc)->cStr, "name", sizeof(char) * 5);
+    memcpy(GET_TC(tc)->cStr, "name", 5);
     GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
   } else if (index == 1) {
-    memcpy(GET_TC(tc)->cStr, "index", sizeof(char) * 6);
+    memcpy(GET_TC(tc)->cStr, "index", 6);
     GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
   } else if (index == 2) {
-    memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5);
+    memcpy(GET_TC(tc)->cStr, "data", 5);
     GET_TC(tc)->itemValue = get_values(obj);
     if (!GET_TC(tc)->itemValue) {
       return 0;
@@ -1088,7 +1088,7 @@ static char *Series_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
 static void DataFrame_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
   PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
   GET_TC(tc)->index = 0;
-  GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char));
+  GET_TC(tc)->cStr = PyObject_Malloc(20);
   enc->outputFormat = VALUES; // for contained series & index
   if (!GET_TC(tc)->cStr) {
     PyErr_NoMemory();
@@ -1103,13 +1103,13 @@ static int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) {
   const Py_ssize_t index = GET_TC(tc)->index;
   Py_XDECREF(GET_TC(tc)->itemValue);
   if (index == 0) {
-    memcpy(GET_TC(tc)->cStr, "columns", sizeof(char) * 8);
+    memcpy(GET_TC(tc)->cStr, "columns", 8);
     GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns");
   } else if (index == 1) {
-    memcpy(GET_TC(tc)->cStr, "index", sizeof(char) * 6);
+    memcpy(GET_TC(tc)->cStr, "index", 6);
     GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
   } else if (index == 2) {
-    memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5);
+    memcpy(GET_TC(tc)->cStr, "data", 5);
     Py_INCREF(obj);
     GET_TC(tc)->itemValue = obj;
   } else {
 
@@ -679,7 +679,7 @@ cdef char* c_strftime(npy_datetimestruct *dts, char *fmt):
     c_date.tm_yday = get_day_of_year(dts.year, dts.month, dts.day) - 1
     c_date.tm_isdst = -1
 
-    result = <char*>malloc(result_len * sizeof(char))
+    result = <char*>malloc(result_len)
     if result is NULL:
         raise MemoryError()
 
 
@@ -3,6 +3,7 @@
 """
 
 from pandas._libs import NaTType
+from pandas._libs.lib import NoDefault
 from pandas._libs.missing import NAType
 
 from pandas.core.groupby import (
@@ -44,6 +45,7 @@
     "JsonReader",
     "NAType",
     "NaTType",
+    "NoDefault",
     "PeriodIndexResamplerGroupby",
     "Resampler",
     "Rolling",
 
@@ -1317,6 +1317,22 @@ def nullable_string_dtype(request):
     return request.param
 
 
+@pytest.fixture(
+    params=[
+        pytest.param(("pyarrow", np.nan), marks=td.skip_if_no("pyarrow")),
+        pytest.param(("pyarrow", pd.NA), marks=td.skip_if_no("pyarrow")),
+    ]
+)
+def pyarrow_string_dtype(request):
+    """
+    Parametrized fixture for string dtypes backed by Pyarrow.
+
+    * 'str[pyarrow]'
+    * 'string[pyarrow]'
+    """
+    return pd.StringDtype(*request.param)
+
+
 @pytest.fixture(
     params=[
         "python",
 
@@ -1012,7 +1012,7 @@ def mode(
         return npresult, res_mask  # type: ignore[return-value]
 
     try:
-        npresult = np.sort(npresult)
+        npresult = safe_sort(npresult)
     except TypeError as err:
         warnings.warn(
             f"Unable to sort modes: {err}",
 
@@ -41,6 +41,7 @@
     is_list_like,
     is_numeric_dtype,
     is_scalar,
+    is_string_dtype,
     pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
@@ -1619,6 +1620,9 @@ def _accumulate(
         ------
         NotImplementedError : subclass does not define accumulations
         """
+        if is_string_dtype(self):
+            return self._str_accumulate(name=name, skipna=skipna, **kwargs)
+
         pyarrow_name = {
             "cummax": "cumulative_max",
             "cummin": "cumulative_min",
@@ -1654,6 +1658,57 @@ def _accumulate(
 
         return type(self)(result)
 
+    def _str_accumulate(
+        self, name: str, *, skipna: bool = True, **kwargs
+    ) -> ArrowExtensionArray | ExtensionArray:
+        """
+        Accumulate implementation for strings, see `_accumulate` docstring for details.
+
+        pyarrow.compute does not implement these methods for strings.
+        """
+        if name == "cumprod":
+            msg = f"operation '{name}' not supported for dtype '{self.dtype}'"
+            raise TypeError(msg)
+
+        # We may need to strip out trailing NA values
+        tail: pa.array | None = None
+        na_mask: pa.array | None = None
+        pa_array = self._pa_array
+        np_func = {
+            "cumsum": np.cumsum,
+            "cummin": np.minimum.accumulate,
+            "cummax": np.maximum.accumulate,
+        }[name]
+
+        if self._hasna:
+            na_mask = pc.is_null(pa_array)
+            if pc.all(na_mask) == pa.scalar(True):
+                return type(self)(pa_array)
+            if skipna:
+                if name == "cumsum":
+                    pa_array = pc.fill_null(pa_array, "")
+                else:
+                    # We can retain the running min/max by forward/backward filling.
+                    pa_array = pc.fill_null_forward(pa_array)
+                    pa_array = pc.fill_null_backward(pa_array)
+            else:
+                # When not skipping NA values, the result should be null from
+                # the first NA value onward.
+                idx = pc.index(na_mask, True).as_py()
+                tail = pa.nulls(len(pa_array) - idx, type=pa_array.type)
+                pa_array = pa_array[:idx]
+
+        # error: Cannot call function of unknown type
+        pa_result = pa.array(np_func(pa_array), type=pa_array.type)  # type: ignore[operator]
+
+        if tail is not None:
+            pa_result = pa.concat_arrays([pa_result, tail])
+        elif na_mask is not None:
+            pa_result = pc.if_else(na_mask, None, pa_result)
+
+        result = type(self)(pa_result)
+        return result
+
     def _reduce_pyarrow(self, name: str, *, skipna: bool = True, **kwargs) -> pa.Scalar:
         """
         Return a pyarrow scalar result of performing the reduction operation.