Skip to content

Commit 7399ae5

Browse files
Merge remote-tracking branch 'upstream/main' into string-dtype-alias
2 parents 61ec243 + 1272cb1 commit 7399ae5

File tree

21 files changed

+296
-67
lines changed

21 files changed

+296
-67
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
164164
-i "pandas.Series.str.center RT03,SA01" \
165165
-i "pandas.Series.str.decode PR07,RT03,SA01" \
166166
-i "pandas.Series.str.encode PR07,RT03,SA01" \
167-
-i "pandas.Series.str.fullmatch RT03" \
168167
-i "pandas.Series.str.index RT03" \
169168
-i "pandas.Series.str.ljust RT03,SA01" \
170169
-i "pandas.Series.str.lower RT03" \

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,7 @@ Groupby/resample/rolling
610610
- Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`)
611611
- Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
612612
- Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
613+
- Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`)
613614
- Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`)
614615
- Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
615616
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)

pandas/_libs/lib.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2702,7 +2702,7 @@ def maybe_convert_objects(ndarray[object] objects,
27022702
if using_string_dtype() and is_string_array(objects, skipna=True):
27032703
from pandas.core.arrays.string_ import StringDtype
27042704

2705-
dtype = StringDtype(storage="pyarrow", na_value=np.nan)
2705+
dtype = StringDtype(na_value=np.nan)
27062706
return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
27072707

27082708
elif convert_to_nullable_dtype and is_string_array(objects, skipna=True):

pandas/_testing/asserters.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,24 @@ def assert_extension_array_equal(
796796
left_na, right_na, obj=f"{obj} NA mask", index_values=index_values
797797
)
798798

799+
# Specifically for StringArrayNumpySemantics, validate here we have a valid array
800+
if (
801+
isinstance(left.dtype, StringDtype)
802+
and left.dtype.storage == "python"
803+
and left.dtype.na_value is np.nan
804+
):
805+
assert np.all(
806+
[np.isnan(val) for val in left._ndarray[left_na]] # type: ignore[attr-defined]
807+
), "wrong missing value sentinels"
808+
if (
809+
isinstance(right.dtype, StringDtype)
810+
and right.dtype.storage == "python"
811+
and right.dtype.na_value is np.nan
812+
):
813+
assert np.all(
814+
[np.isnan(val) for val in right._ndarray[right_na]] # type: ignore[attr-defined]
815+
), "wrong missing value sentinels"
816+
799817
left_valid = left[~left_na].to_numpy(dtype=object)
800818
right_valid = right[~right_na].to_numpy(dtype=object)
801819
if check_exact:

pandas/compat/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
)
2626
from pandas.compat.numpy import is_numpy_dev
2727
from pandas.compat.pyarrow import (
28+
HAS_PYARROW,
2829
pa_version_under10p1,
2930
pa_version_under11p0,
3031
pa_version_under13p0,
@@ -156,6 +157,7 @@ def is_ci_environment() -> bool:
156157
"pa_version_under14p1",
157158
"pa_version_under16p0",
158159
"pa_version_under17p0",
160+
"HAS_PYARROW",
159161
"IS64",
160162
"ISMUSL",
161163
"PY311",

pandas/compat/pyarrow.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
pa_version_under15p0 = _palv < Version("15.0.0")
1818
pa_version_under16p0 = _palv < Version("16.0.0")
1919
pa_version_under17p0 = _palv < Version("17.0.0")
20+
HAS_PYARROW = True
2021
except ImportError:
2122
pa_version_under10p1 = True
2223
pa_version_under11p0 = True
@@ -27,3 +28,4 @@
2728
pa_version_under15p0 = True
2829
pa_version_under16p0 = True
2930
pa_version_under17p0 = True
31+
HAS_PYARROW = False

pandas/conftest.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,6 +1313,7 @@ def string_storage(request):
13131313
("python", pd.NA),
13141314
pytest.param(("pyarrow", pd.NA), marks=td.skip_if_no("pyarrow")),
13151315
pytest.param(("pyarrow", np.nan), marks=td.skip_if_no("pyarrow")),
1316+
("python", np.nan),
13161317
]
13171318
)
13181319
def string_dtype_arguments(request):
@@ -1374,12 +1375,14 @@ def object_dtype(request):
13741375
("python", pd.NA),
13751376
pytest.param(("pyarrow", pd.NA), marks=td.skip_if_no("pyarrow")),
13761377
pytest.param(("pyarrow", np.nan), marks=td.skip_if_no("pyarrow")),
1378+
("python", np.nan),
13771379
],
13781380
ids=[
13791381
"string=object",
13801382
"string=string[python]",
13811383
"string=string[pyarrow]",
13821384
"string=str[pyarrow]",
1385+
"string=str[python]",
13831386
],
13841387
)
13851388
def any_string_dtype(request):
@@ -1389,6 +1392,7 @@ def any_string_dtype(request):
13891392
* 'string[python]' (NA variant)
13901393
* 'string[pyarrow]' (NA variant)
13911394
* 'str' (NaN variant, with pyarrow)
1395+
* 'str' (NaN variant, without pyarrow)
13921396
"""
13931397
if isinstance(request.param, np.dtype):
13941398
return request.param

0 commit comments

Comments
 (0)