pandas-dev
diff --git a/‎.github/workflows/wheels.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/wheels.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/reference/missing_value.rst‎
Lines changed: 0 additions & 2 deletions b/‎doc/source/reference/missing_value.rst‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎doc/source/whatsnew/v2.3.2.rst‎
Lines changed: 1 addition & 3 deletions b/‎doc/source/whatsnew/v2.3.2.rst‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎doc/source/whatsnew/v2.3.3.rst‎
Lines changed: 19 additions & 14 deletions b/‎doc/source/whatsnew/v2.3.3.rst‎
Lines changed: 19 additions & 14 deletions
diff --git a/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 6 additions & 2 deletions b/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎pandas/_libs/missing.pyx‎
Lines changed: 1 addition & 0 deletions b/‎pandas/_libs/missing.pyx‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/_libs/tslibs/nattype.pyx‎
Lines changed: 2 additions & 0 deletions b/‎pandas/_libs/tslibs/nattype.pyx‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/core/arrays/arrow/array.py‎
Lines changed: 18 additions & 13 deletions b/‎pandas/core/arrays/arrow/array.py‎
Lines changed: 18 additions & 13 deletions
diff --git a/‎pandas/core/arrays/base.py‎
Lines changed: 41 additions & 4 deletions b/‎pandas/core/arrays/base.py‎
Lines changed: 41 additions & 4 deletions
diff --git a/‎pandas/core/arrays/datetimes.py‎
Lines changed: 7 additions & 6 deletions b/‎pandas/core/arrays/datetimes.py‎
Lines changed: 7 additions & 6 deletions
@@ -162,7 +162,7 @@ jobs:
         run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
 
       - name: Build wheels
-        uses: pypa/cibuildwheel@v3.1.4
+        uses: pypa/cibuildwheel@v3.2.0
         with:
          package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
         env:
 
@@ -11,14 +11,12 @@ NA is the way to represent missing values for nullable dtypes (see below):
 
 .. autosummary::
    :toctree: api/
-   :template: autosummary/class_without_autosummary.rst
 
    NA
 
 NaT is the missing value for timedelta and datetime data (see below):
 
 .. autosummary::
    :toctree: api/
-   :template: autosummary/class_without_autosummary.rst
 
    NaT
@@ -22,8 +22,6 @@ become the default string dtype in pandas 3.0. See
 
 Bug fixes
 ^^^^^^^^^
-- Fix :meth:`~Series.str.isdigit` to correctly recognize unicode superscript
-  characters as digits for :class:`StringDtype` backed by PyArrow (:issue:`61466`)
 - Fix :meth:`~DataFrame.to_json` with ``orient="table"`` to correctly use the
   "string" type in the JSON Table Schema for :class:`StringDtype` columns
   (:issue:`61889`)
@@ -39,4 +37,4 @@ Bug fixes
 Contributors
 ~~~~~~~~~~~~
 
-.. contributors:: v2.3.1..v2.3.2|HEAD
+.. contributors:: v2.3.1..v2.3.2
@@ -1,14 +1,14 @@
 .. _whatsnew_233:
 
-What's new in 2.3.3 (September XX, 2025)
+What's new in 2.3.3 (September 29, 2025)
 ----------------------------------------
 
 These are the changes in pandas 2.3.3. See :ref:`release` for a full changelog
 including other versions of pandas.
 
 {{ header }}
 
-.. _whatsnew_220.py14_compat:
+.. _whatsnew_233.py14_compat:
 
 Pandas 2.3.3 is now compatible with Python 3.14
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -37,25 +37,22 @@ Improvements
   specifying ``include=["object"]`` for backwards compatibility. In a future
   release, this will be deprecated and code for pandas 3+ should be updated to
   do ``include=["str"]`` (:issue:`61916`)
-
+- Support the ``/`` operation between a ``pathlib.Path`` object and a :class:`StringDtype`
+  Series, similarly as it works for object-dtype Series (:issue:`61940`)
 
 .. _whatsnew_233.string_fixes.bugs:
 
 Bug fixes
 ^^^^^^^^^
 - Fix bug in :meth:`Series.str.replace` using named capture groups (e.g., ``\g<name>``) with the Arrow-backed dtype would raise an error (:issue:`57636`)
-- Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch``
+- Fix regression in :meth:`Series.str.contains`, :meth:`~Series.str.match` and :meth:`~Series.str.fullmatch`
   with a compiled regex and custom flags (:issue:`62240`)
-- Fix :meth:`Series.str.match` and :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`)
-
-
-Improvements and fixes for Copy-on-Write
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Bug fixes
-^^^^^^^^^
-
-- The :meth:`DataFrame.iloc` now works correctly with ``copy_on_write`` option when assigning values after subsetting the columns of a homogeneous DataFrame (:issue:`60309`)
+- Fix :meth:`Series.str.match` and :meth:`~Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`)
+- Fix bug in :meth:`~DataFrame.groupby` with ``sum()`` and unobserved categories resulting in ``0`` instead of the empty string ``""`` (:issue:`61909`)
+- Fix :meth:`Series.str.isdigit` to correctly recognize unicode superscript
+  characters as digits for :class:`StringDtype` backed by PyArrow (:issue:`61466`)
+- Fix comparing a :class:`StringDtype` Series with mixed objects raising an error (:issue:`60228`)
+- Fix error being raised when using a numpy ufunc with a Python-backed string array (:issue:`40800`)
 
 Other changes
 ~~~~~~~~~~~~~
@@ -65,9 +62,17 @@ Other changes
   Resampling with a :class:`PeriodIndex` is supported again, but a subset of
   methods that return incorrect results will raise an error in pandas 3.0 (:issue:`57033`)
 
+Other bug fixes
+~~~~~~~~~~~~~~~~
+
+- Fix memory leak in :meth:`DataFrame.to_json` with datetime columns (:issue:`62204`)
+- Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`)
+- The :meth:`DataFrame.iloc` now works correctly with ``copy_on_write`` option when assigning values after subsetting the columns of a homogeneous DataFrame (:issue:`60309`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_233.contributors:
 
 Contributors
 ~~~~~~~~~~~~
+
+.. contributors:: v2.3.2..v2.3.3|HEAD
@@ -981,7 +981,8 @@ Timezones
 ^^^^^^^^^
 - Bug in :meth:`DatetimeIndex.union`, :meth:`DatetimeIndex.intersection`, and :meth:`DatetimeIndex.symmetric_difference` changing timezone to UTC when merging two DatetimeIndex objects with the same timezone but different units (:issue:`60080`)
 - Bug in :meth:`Series.dt.tz_localize` with a timezone-aware :class:`ArrowDtype` incorrectly converting to UTC when ``tz=None`` (:issue:`61780`)
--
+- Fixed bug in :func:`date_range` where tz-aware endpoints with calendar offsets (e.g. ``"MS"``) failed on DST fall-back. These now respect ``ambiguous``/ ``nonexistent``. (:issue:`52908`)
+
 
 Numeric
 ^^^^^^^
@@ -1054,6 +1055,8 @@ MultiIndex
 I/O
 ^^^
 - Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping` elements. (:issue:`57915`)
+- Fix bug in ``on_bad_lines`` callable when returning too many fields: now emits
+  ``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`)
 - Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`)
 - Bug in :meth:`.io.common.is_fsspec_url` not recognizing chained fsspec URLs (:issue:`48978`)
 - Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`)
@@ -1217,10 +1220,11 @@ Other
 - Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`)
 - Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`)
 - Deprecated the keyword ``check_datetimelike_compat`` in :meth:`testing.assert_frame_equal` and :meth:`testing.assert_series_equal` (:issue:`55638`)
+- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when trying to replace :class:`NA` values in a :class:`Float64Dtype` object with ``np.nan``; this now works with ``pd.set_option("mode.nan_is_na", False)`` and is irrelevant otherwise (:issue:`55127`)
+- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when trying to replace :class:`np.nan` values in a :class:`Int64Dtype` object with :class:`NA`; this is now a no-op with ``pd.set_option("mode.nan_is_na", False)`` and is irrelevant otherwise (:issue:`51237`)
 - Fixed bug in the :meth:`Series.rank` with object dtype and extremely small float values (:issue:`62036`)
 - Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`)
 - Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`)
--
 
 .. ***DO NOT USE THIS SECTION***
 
 
@@ -393,6 +393,7 @@ class NAType(C_NAType):
     >>> True | pd.NA
     True
     """
+    __module__ = "pandas"
 
     _instance = None
 
 
@@ -372,6 +372,8 @@ class NaTType(_NaT):
     1         NaT
     """
 
+    __module__ = "pandas"
+
     def __new__(cls):
         cdef _NaT base
 
 
@@ -883,22 +883,27 @@ def _cmp_method(self, other, op) -> ArrowExtensionArray:
         ltype = self._pa_array.type
 
         if isinstance(other, (ExtensionArray, np.ndarray, list)):
-            boxed = self._box_pa(other)
-            rtype = boxed.type
-            if (pa.types.is_timestamp(ltype) and pa.types.is_date(rtype)) or (
-                pa.types.is_timestamp(rtype) and pa.types.is_date(ltype)
-            ):
-                # GH#62157 match non-pyarrow behavior
-                result = ops.invalid_comparison(self, other, op)
-                result = pa.array(result, type=pa.bool_())
+            try:
+                boxed = self._box_pa(other)
+            except pa.lib.ArrowInvalid:
+                # e.g. GH#60228 [1, "b"] we have to operate pointwise
+                res_values = [op(x, y) for x, y in zip(self, other)]
+                result = pa.array(res_values, type=pa.bool_(), from_pandas=True)
             else:
-                try:
-                    result = pc_func(self._pa_array, boxed)
-                except pa.ArrowNotImplementedError:
-                    # TODO: could this be wrong if other is object dtype?
-                    #  in which case we need to operate pointwise?
+                rtype = boxed.type
+                if (pa.types.is_timestamp(ltype) and pa.types.is_date(rtype)) or (
+                    pa.types.is_timestamp(rtype) and pa.types.is_date(ltype)
+                ):
+                    # GH#62157 match non-pyarrow behavior
                     result = ops.invalid_comparison(self, other, op)
                     result = pa.array(result, type=pa.bool_())
+                else:
+                    try:
+                        result = pc_func(self._pa_array, boxed)
+                    except pa.ArrowNotImplementedError:
+                        result = ops.invalid_comparison(self, other, op)
+                        result = pa.array(result, type=pa.bool_())
+
         elif is_scalar(other):
             if (isinstance(other, datetime) and pa.types.is_date(ltype)) or (
                 type(other) is date and pa.types.is_timestamp(ltype)
 
@@ -30,8 +30,6 @@
 from pandas.compat.numpy import function as nv
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import (
-    Appender,
-    Substitution,
     cache_readonly,
 )
 from pandas.util._validators import (
@@ -1669,9 +1667,48 @@ def factorize(
         Categories (3, str): ['a', 'b', 'c']
         """
 
-    @Substitution(klass="ExtensionArray")
-    @Appender(_extension_array_shared_docs["repeat"])
     def repeat(self, repeats: int | Sequence[int], axis: AxisInt | None = None) -> Self:
+        """
+        Repeat elements of an ExtensionArray.
+
+        Returns a new ExtensionArray where each element of the current ExtensionArray
+        is repeated consecutively a given number of times.
+
+        Parameters
+        ----------
+        repeats : int or array of ints
+            The number of repetitions for each element. This should be a
+            non-negative integer. Repeating 0 times will return an empty
+            ExtensionArray.
+        axis : None
+            Must be ``None``. Has no effect but is accepted for compatibility
+            with numpy.
+
+        Returns
+        -------
+        ExtensionArray
+            Newly created ExtensionArray with repeated elements.
+
+        See Also
+        --------
+        Series.repeat : Equivalent function for Series.
+        Index.repeat : Equivalent function for Index.
+        numpy.repeat : Similar method for :class:`numpy.ndarray`.
+        ExtensionArray.take : Take arbitrary positions.
+
+        Examples
+        --------
+        >>> cat = pd.Categorical(["a", "b", "c"])
+        >>> cat
+        ['a', 'b', 'c']
+        Categories (3, str): ['a', 'b', 'c']
+        >>> cat.repeat(2)
+        ['a', 'a', 'b', 'b', 'c', 'c']
+        Categories (3, str): ['a', 'b', 'c']
+        >>> cat.repeat([1, 2, 3])
+        ['a', 'b', 'b', 'c', 'c', 'c']
+        Categories (3, str): ['a', 'b', 'c']
+        """
         nv.validate_repeat((), {"axis": axis})
         ind = np.arange(len(self)).repeat(repeats)
         return self.take(ind)
 
@@ -456,13 +456,14 @@ def _generate_range(
             end = _maybe_localize_point(end, freq, tz, ambiguous, nonexistent)
 
         if freq is not None:
-            # We break Day arithmetic (fixed 24 hour) here and opt for
-            # Day to mean calendar day (23/24/25 hour). Therefore, strip
-            # tz info from start and day to avoid DST arithmetic
-            if isinstance(freq, Day):
-                if start is not None:
+            # Offset handling:
+            # Ticks (fixed-duration like hours/minutes): keep tz; do absolute-time math.
+            # Other calendar offsets: drop tz; do naive wall time; localize once later
+            # so `ambiguous`/`nonexistent` are applied correctly.
+            if not isinstance(freq, Tick):
+                if start is not None and start.tz is not None:
                     start = start.tz_localize(None)
-                if end is not None:
+                if end is not None and end.tz is not None:
                     end = end.tz_localize(None)
 
             if isinstance(freq, (Tick, Day)):