Skip to content

Commit 5e59c78

Browse files
Merge remote-tracking branch 'upstream/2.3.x' into backport-61446
2 parents 8d9ff0d + 4d42cd8 commit 5e59c78

File tree

9 files changed

+143
-60
lines changed

9 files changed

+143
-60
lines changed

.github/workflows/package-checks.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ jobs:
5353
runs-on: ubuntu-22.04
5454
strategy:
5555
matrix:
56-
python-version: ['3.10', '3.11']
56+
python-version: ['3.9', '3.10', '3.11']
5757
fail-fast: false
5858
name: Test Conda Forge Recipe - Python ${{ matrix.python-version }}
5959
concurrency:

.github/workflows/unit-tests.yml

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
strategy:
2828
matrix:
2929
platform: [ubuntu-22.04, ubuntu-24.04-arm]
30-
env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml]
30+
env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml, actions-312.yaml]
3131
# Prevent the include jobs from overriding other jobs
3232
pattern: [""]
3333
pandas_future_infer_string: ["0"]
@@ -38,7 +38,7 @@ jobs:
3838
pytest_target: "pandas/tests/test_downstream.py"
3939
platform: ubuntu-22.04
4040
- name: "Minimum Versions"
41-
env_file: actions-310-minimum_versions.yaml
41+
env_file: actions-39-minimum_versions.yaml
4242
pattern: "not slow and not network and not single_cpu"
4343
platform: ubuntu-22.04
4444
- name: "Locale: it_IT"
@@ -63,6 +63,11 @@ jobs:
6363
# It will be temporarily activated during tests with locale.setlocale
6464
extra_loc: "zh_CN"
6565
platform: ubuntu-22.04
66+
- name: "Copy-on-Write 3.9"
67+
env_file: actions-39.yaml
68+
pattern: "not slow and not network and not single_cpu"
69+
pandas_copy_on_write: "1"
70+
platform: ubuntu-22.04
6671
- name: "Copy-on-Write 3.10"
6772
env_file: actions-310.yaml
6873
pattern: "not slow and not network and not single_cpu"
@@ -88,6 +93,11 @@ jobs:
8893
pattern: "not slow and not network and not single_cpu"
8994
pandas_copy_on_write: "warn"
9095
platform: ubuntu-22.04
96+
- name: "Copy-on-Write 3.9 (warnings)"
97+
env_file: actions-39.yaml
98+
pattern: "not slow and not network and not single_cpu"
99+
pandas_copy_on_write: "warn"
100+
platform: ubuntu-22.04
91101
- name: "Future infer strings"
92102
env_file: actions-312.yaml
93103
pandas_future_infer_string: "1"
@@ -218,7 +228,7 @@ jobs:
218228
matrix:
219229
# Note: Don't use macOS latest since macos 14 appears to be arm64 only
220230
os: [macos-13, macos-14, windows-latest]
221-
env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml]
231+
env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml, actions-312.yaml]
222232
fail-fast: false
223233
runs-on: ${{ matrix.os }}
224234
name: ${{ format('{0} {1}', matrix.os, matrix.env_file) }}

.github/workflows/wheels.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,13 @@ jobs:
9595
- [ubuntu-22.04, manylinux_x86_64]
9696
- [ubuntu-22.04, musllinux_x86_64]
9797
- [ubuntu-24.04-arm, manylinux_aarch64]
98+
- [ubuntu-24.04-arm, musllinux_aarch64]
9899
- [macos-13, macosx_x86_64]
99100
# Note: M1 images on Github Actions start from macOS 14
100101
- [macos-14, macosx_arm64]
101102
- [windows-2022, win_amd64]
102103
# TODO: support PyPy?
103-
python: [["cp310", "3.10"], ["cp311", "3.11"], ["cp312", "3.12"], ["cp313", "3.13"], ["cp313t", "3.13"]]
104+
python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"], ["cp312", "3.12"], ["cp313", "3.13"], ["cp313t", "3.13"]]
104105
# TODO: Build free-threaded wheels for Windows
105106
exclude:
106107
- buildplat: [windows-2022, win_amd64]

ci/deps/actions-311-downstream_compat.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ dependencies:
5151
- python-calamine>=0.1.7
5252
- pyxlsb>=1.0.10
5353
- s3fs>=2022.11.0
54-
- scipy>=1.10.0
54+
# TEMP upper pin for scipy (https://github.com/statsmodels/statsmodels/issues/9584)
55+
- scipy>=1.10.0,<1.16
5556
- sqlalchemy>=2.0.0
5657
- tabulate>=0.9.0
5758
- xarray>=2022.12.0, <=2024.9.0

ci/deps/actions-310-minimum_versions.yaml renamed to ci/deps/actions-39-minimum_versions.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ name: pandas-dev
44
channels:
55
- conda-forge
66
dependencies:
7-
- python=3.10
7+
- python=3.9
88

99
# build dependencies
1010
- versioneer

ci/deps/actions-39.yaml

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
name: pandas-dev
2+
channels:
3+
- conda-forge
4+
dependencies:
5+
- python=3.9
6+
7+
# build dependencies
8+
- versioneer[toml]
9+
- cython>=0.29.33
10+
- meson[ninja]=1.2.1
11+
- meson-python=0.13.1
12+
13+
# test dependencies
14+
- pytest>=7.3.2
15+
- pytest-cov
16+
- pytest-xdist>=2.2.0
17+
- pytest-qt>=4.2.0
18+
- boto3
19+
20+
# required dependencies
21+
- python-dateutil
22+
- numpy
23+
# pytz 2024.2 timezones cause wrong results
24+
- pytz<2024.2
25+
26+
# optional dependencies
27+
- beautifulsoup4>=4.11.2
28+
- blosc>=1.21.3
29+
- bottleneck>=1.3.6
30+
- fastparquet>=2022.12.0
31+
- fsspec>=2022.11.0
32+
- html5lib>=1.1
33+
- hypothesis>=6.46.1
34+
- gcsfs>=2022.11.0
35+
- jinja2>=3.1.2
36+
- lxml>=4.9.2
37+
- matplotlib>=3.6.3
38+
- numba>=0.56.4
39+
- numexpr>=2.8.4
40+
- odfpy>=1.4.1
41+
- qtpy>=2.3.0
42+
- openpyxl>=3.1.0
43+
- psycopg2>=2.9.6
44+
- pyarrow>=10.0.1
45+
- pymysql>=1.0.2
46+
- pyqt>=5.15.9
47+
- pyreadstat>=1.2.0
48+
- pytables>=3.8.0
49+
- python-calamine>=0.1.7
50+
- pyxlsb>=1.0.10
51+
- s3fs>=2022.11.0
52+
- scipy>=1.10.0
53+
- sqlalchemy>=2.0.0
54+
- tabulate>=0.9.0
55+
- xarray>=2022.12.0
56+
- xlrd>=2.0.1
57+
- xlsxwriter>=3.0.5
58+
- zstandard>=0.19.0
59+
60+
- pip:
61+
- adbc-driver-postgresql>=0.8.0
62+
- adbc-driver-sqlite>=0.8.0
63+
- tzdata>=2022.7
64+
- pytest-localserver>=0.7.1

doc/source/whatsnew/v2.3.0.rst

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -30,50 +30,6 @@ Other enhancements
3030
- The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for :class:`StringDtype` columns (:issue:`60633`)
3131
- The :meth:`~Series.sum` reduction is now implemented for :class:`StringDtype` columns (:issue:`59853`)
3232

33-
.. ---------------------------------------------------------------------------
34-
.. _whatsnew_230.notable_bug_fixes:
35-
36-
Notable bug fixes
37-
~~~~~~~~~~~~~~~~~
38-
39-
These are bug fixes that might have notable behavior changes.
40-
41-
.. _whatsnew_230.notable_bug_fixes.string_comparisons:
42-
43-
Comparisons between different string dtypes
44-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
45-
46-
In previous versions, comparing Series of different string dtypes (e.g. ``pd.StringDtype("pyarrow", na_value=pd.NA)`` against ``pd.StringDtype("python", na_value=np.nan)``) would result in inconsistent resulting dtype or incorrectly raise. pandas will now use the hierarchy
47-
48-
object < (python, NaN) < (pyarrow, NaN) < (python, NA) < (pyarrow, NA)
49-
50-
in determining the result dtype when there are different string dtypes compared. Some examples:
51-
52-
- When ``pd.StringDtype("pyarrow", na_value=pd.NA)`` is compared against any other string dtype, the result will always be ``boolean[pyarrow]``.
53-
- When ``pd.StringDtype("python", na_value=pd.NA)`` is compared against ``pd.StringDtype("pyarrow", na_value=np.nan)``, the result will be ``boolean``, the NumPy-backed nullable extension array.
54-
- When ``pd.StringDtype("python", na_value=pd.NA)`` is compared against ``pd.StringDtype("python", na_value=np.nan)``, the result will be ``boolean``, the NumPy-backed nullable extension array.
55-
56-
In previous versions, comparing :class:`Series` of different string dtypes (e.g. ``pd.StringDtype("pyarrow", na_value=pd.NA)`` against ``pd.StringDtype("python", na_value=np.nan)``) would result in inconsistent resulting dtype or incorrectly raise. pandas will now use the hierarchy
57-
58-
Increased minimum version for Python
59-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
60-
61-
in determining the result dtype when there are different string dtypes compared. Some examples:
62-
63-
- When ``pd.StringDtype("pyarrow", na_value=pd.NA)`` is compared against any other string dtype, the result will always be ``boolean[pyarrow]``.
64-
- When ``pd.StringDtype("python", na_value=pd.NA)`` is compared against ``pd.StringDtype("pyarrow", na_value=np.nan)``, the result will be ``boolean``, the NumPy-backed nullable extension array.
65-
- When ``pd.StringDtype("python", na_value=pd.NA)`` is compared against ``pd.StringDtype("python", na_value=np.nan)``, the result will be ``boolean``, the NumPy-backed nullable extension array.
66-
67-
.. _whatsnew_230.api_changes:
68-
69-
API changes
70-
~~~~~~~~~~~
71-
72-
- When enabling the ``future.infer_string`` option, :class:`Index` set operations (like
73-
union or intersection) will now ignore the dtype of an empty :class:`RangeIndex` or
74-
empty :class:`Index` with ``object`` dtype when determining the dtype of the resulting
75-
Index (:issue:`60797`)
76-
7733
.. ---------------------------------------------------------------------------
7834
.. _whatsnew_230.deprecations:
7935

@@ -96,8 +52,6 @@ Numeric
9652

9753
Strings
9854
^^^^^^^
99-
- Bug in :meth:`.DataFrameGroupBy.min`, :meth:`.DataFrameGroupBy.max`, :meth:`.Resampler.min`, :meth:`.Resampler.max` where all NA values of string dtype would return float instead of string dtype (:issue:`60810`)
100-
- Bug in :meth:`DataFrame.sum` with ``axis=1``, :meth:`.DataFrameGroupBy.sum` or :meth:`.SeriesGroupBy.sum` with ``skipna=True``, and :meth:`.Resampler.sum` with all NA values of :class:`StringDtype` resulted in ``0`` instead of the empty string ``""`` (:issue:`60229`)
10155
- Bug in :meth:`Series.__pos__` and :meth:`DataFrame.__pos__` where an ``Exception`` was not raised for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`60710`)
10256
- Bug in :meth:`Series.rank` for :class:`StringDtype` with ``storage="pyarrow"`` that incorrectly returned integer results with ``method="average"`` and raised an error if it would truncate results (:issue:`59768`)
10357
- Bug in :meth:`Series.replace` with :class:`StringDtype` when replacing with a non-string value was not upcasting to ``object`` dtype (:issue:`60282`)

doc/source/whatsnew/v2.3.1.rst

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,57 @@ including other versions of pandas.
99
{{ header }}
1010

1111
.. ---------------------------------------------------------------------------
12-
.. _whatsnew_231.enhancements:
12+
.. _whatsnew_231.string_fixes:
13+
14+
Improvements and fixes for the StringDtype
15+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
16+
17+
.. _whatsnew_231.string_fixes.string_comparisons:
18+
19+
Comparisons between different string dtypes
20+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
21+
22+
In previous versions, comparing :class:`Series` of different string dtypes (e.g. ``pd.StringDtype("pyarrow", na_value=pd.NA)`` against ``pd.StringDtype("python", na_value=np.nan)``) would result in inconsistent resulting dtype or incorrectly raise. pandas will now use the hierarchy
23+
24+
object < (python, NaN) < (pyarrow, NaN) < (python, NA) < (pyarrow, NA)
25+
26+
in determining the result dtype when there are different string dtypes compared. Some examples:
27+
28+
- When ``pd.StringDtype("pyarrow", na_value=pd.NA)`` is compared against any other string dtype, the result will always be ``boolean[pyarrow]``.
29+
- When ``pd.StringDtype("python", na_value=pd.NA)`` is compared against ``pd.StringDtype("pyarrow", na_value=np.nan)``, the result will be ``boolean``, the NumPy-backed nullable extension array.
30+
- When ``pd.StringDtype("python", na_value=pd.NA)`` is compared against ``pd.StringDtype("python", na_value=np.nan)``, the result will be ``boolean``, the NumPy-backed nullable extension array.
31+
32+
.. _whatsnew_231.string_fixes.ignore_empty:
33+
34+
Index set operations ignore empty RangeIndex and object dtype Index
35+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
36+
37+
When enabling the ``future.infer_string`` option, :class:`Index` set operations (like
38+
union or intersection) will now ignore the dtype of an empty :class:`RangeIndex` or
39+
empty :class:`Index` with ``object`` dtype when determining the dtype of the resulting
40+
Index (:issue:`60797`).
41+
42+
This ensures that combining such empty Index with strings will infer the string dtype
43+
correctly, rather than defaulting to ``object`` dtype. For example:
44+
45+
.. code-block:: python
46+
47+
>>> pd.options.mode.infer_string = True
48+
>>> df = pd.DataFrame()
49+
>>> df.columns.dtype
50+
dtype('int64') # default RangeIndex for empty columns
51+
>>> df["a"] = [1, 2, 3]
52+
>>> df.columns.dtype
53+
<StringDtype(na_value=nan)> # new columns use string dtype instead of object dtype
54+
55+
.. _whatsnew_231.string_fixes.bugs:
56+
57+
Bug fixes
58+
^^^^^^^^^
59+
- Bug in :meth:`.DataFrameGroupBy.min`, :meth:`.DataFrameGroupBy.max`, :meth:`.Resampler.min`, :meth:`.Resampler.max` where all NA values of string dtype would return float instead of string dtype (:issue:`60810`)
60+
- Bug in :meth:`DataFrame.sum` with ``axis=1``, :meth:`.DataFrameGroupBy.sum` or :meth:`.SeriesGroupBy.sum` with ``skipna=True``, and :meth:`.Resampler.sum` with all NA values of :class:`StringDtype` resulted in ``0`` instead of the empty string ``""`` (:issue:`60229`)
61+
- Fixed bug in :meth:`DataFrame.explode` and :meth:`Series.explode` where methods would fail with ``dtype="str"`` (:issue:`61623`)
1362

14-
Enhancements
15-
~~~~~~~~~~~~
16-
-
1763

1864
.. _whatsnew_231.regressions:
1965

@@ -26,7 +72,7 @@ Fixed regressions
2672

2773
Bug fixes
2874
~~~~~~~~~
29-
- Fixed bug in :meth:`DataFrame.explode` and :meth:`Series.explode` where methods would fail with ``dtype="str"`` (:issue:`61623`)
75+
-
3076

3177
.. ---------------------------------------------------------------------------
3278
.. _whatsnew_231.other:

scripts/validate_docstrings.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,8 +269,15 @@ def pandas_validate(func_name: str):
269269
# Some objects are instances, e.g. IndexSlice, which numpydoc can't validate
270270
doc_obj = get_doc_object(func_obj, doc=func_obj.__doc__)
271271
doc = PandasDocstring(func_name, doc_obj)
272-
result = validate(doc_obj)
273-
272+
if func_obj.__doc__ is not None:
273+
result = validate(doc_obj)
274+
else:
275+
result = {
276+
"docstring": "",
277+
"file": None,
278+
"file_line": None,
279+
"errors": [("GL08", "The object does not have a docstring")],
280+
}
274281
mentioned_errs = doc.mentioned_private_classes
275282
if mentioned_errs:
276283
result["errors"].append(

0 commit comments

Comments
 (0)