Skip to content

Commit 347fc7f

Browse files
committed
Merge remote-tracking branch 'upstream/main' into fix-docstring
2 parents 5c543ca + 0d2505d commit 347fc7f

File tree

18 files changed

+179
-71
lines changed

18 files changed

+179
-71
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -151,14 +151,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
151151
-i "pandas.core.groupby.DataFrameGroupBy.groups SA01" \
152152
-i "pandas.core.groupby.DataFrameGroupBy.hist RT03" \
153153
-i "pandas.core.groupby.DataFrameGroupBy.indices SA01" \
154-
-i "pandas.core.groupby.DataFrameGroupBy.max SA01" \
155-
-i "pandas.core.groupby.DataFrameGroupBy.min SA01" \
156154
-i "pandas.core.groupby.DataFrameGroupBy.nth PR02" \
157155
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
158156
-i "pandas.core.groupby.DataFrameGroupBy.ohlc SA01" \
159157
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
160158
-i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
161-
-i "pandas.core.groupby.DataFrameGroupBy.sum SA01" \
162159
-i "pandas.core.groupby.SeriesGroupBy.__iter__ RT03,SA01" \
163160
-i "pandas.core.groupby.SeriesGroupBy.agg RT03" \
164161
-i "pandas.core.groupby.SeriesGroupBy.aggregate RT03" \
@@ -167,13 +164,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
167164
-i "pandas.core.groupby.SeriesGroupBy.indices SA01" \
168165
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing SA01" \
169166
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing SA01" \
170-
-i "pandas.core.groupby.SeriesGroupBy.max SA01" \
171-
-i "pandas.core.groupby.SeriesGroupBy.min SA01" \
172167
-i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
173168
-i "pandas.core.groupby.SeriesGroupBy.ohlc SA01" \
174169
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
175170
-i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
176-
-i "pandas.core.groupby.SeriesGroupBy.sum SA01" \
177171
-i "pandas.core.resample.Resampler.__iter__ RT03,SA01" \
178172
-i "pandas.core.resample.Resampler.ffill RT03" \
179173
-i "pandas.core.resample.Resampler.get_group RT03,SA01" \

doc/source/whatsnew/v2.3.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ Conversion
102102

103103
Strings
104104
^^^^^^^
105+
- Bug in :meth:`Series.rank` for :class:`StringDtype` with ``storage="pyarrow"`` incorrectly returning integer results in case of ``method="average"`` and raising an error if it would truncate results (:issue:`59768`)
105106
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
106107
- Bug in ``ser.str.slice`` with negative ``step`` with :class:`ArrowDtype` and :class:`StringDtype` with ``storage="pyarrow"`` giving incorrect results (:issue:`59710`)
107108
- Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,7 @@ I/O
627627
- Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`)
628628
- Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
629629
- Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
630+
- Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
630631
- Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
631632
- Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`)
632633
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)

pandas/_libs/lib.pyx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,8 @@ def array_equivalent_object(ndarray left, ndarray right) -> bool:
600600
if not array_equivalent(x, y):
601601
return False
602602

603+
elif PyArray_Check(x) or PyArray_Check(y):
604+
return False
603605
elif (x is C_NA) ^ (y is C_NA):
604606
return False
605607
elif not (

pandas/conftest.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1338,7 +1338,13 @@ def string_storage(request):
13381338
pytest.param(("pyarrow", pd.NA), marks=td.skip_if_no("pyarrow")),
13391339
pytest.param(("pyarrow", np.nan), marks=td.skip_if_no("pyarrow")),
13401340
("python", np.nan),
1341-
]
1341+
],
1342+
ids=[
1343+
"string=string[python]",
1344+
"string=string[pyarrow]",
1345+
"string=str[pyarrow]",
1346+
"string=str[python]",
1347+
],
13421348
)
13431349
def string_dtype_arguments(request):
13441350
"""
@@ -1369,6 +1375,7 @@ def dtype_backend(request):
13691375

13701376
# Alias so we can test with cartesian product of string_storage
13711377
string_storage2 = string_storage
1378+
string_dtype_arguments2 = string_dtype_arguments
13721379

13731380

13741381
@pytest.fixture(params=tm.BYTES_DTYPES)

pandas/core/arrays/arrow/array.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1999,7 +1999,7 @@ def _rank(
19991999
"""
20002000
See Series.rank.__doc__.
20012001
"""
2002-
return self._convert_int_result(
2002+
return self._convert_rank_result(
20032003
self._rank_calc(
20042004
axis=axis,
20052005
method=method,
@@ -2318,6 +2318,9 @@ def _convert_bool_result(self, result):
23182318
def _convert_int_result(self, result):
23192319
return type(self)(result)
23202320

2321+
def _convert_rank_result(self, result):
2322+
return type(self)(result)
2323+
23212324
def _str_count(self, pat: str, flags: int = 0) -> Self:
23222325
if flags:
23232326
raise NotImplementedError(f"count not implemented with {flags=}")

pandas/core/arrays/string_.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
nanops,
4747
ops,
4848
)
49+
from pandas.core.algorithms import isin
4950
from pandas.core.array_algos import masked_reductions
5051
from pandas.core.arrays.base import ExtensionArray
5152
from pandas.core.arrays.floating import (
@@ -65,6 +66,7 @@
6566
import pyarrow
6667

6768
from pandas._typing import (
69+
ArrayLike,
6870
AxisInt,
6971
Dtype,
7072
DtypeObj,
@@ -735,6 +737,24 @@ def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
735737
# base class implementation that uses __setitem__
736738
ExtensionArray._putmask(self, mask, value)
737739

740+
def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
741+
if isinstance(values, BaseStringArray) or (
742+
isinstance(values, ExtensionArray) and is_string_dtype(values.dtype)
743+
):
744+
values = values.astype(self.dtype, copy=False)
745+
else:
746+
if not lib.is_string_array(np.asarray(values), skipna=True):
747+
values = np.array(
748+
[val for val in values if isinstance(val, str) or isna(val)],
749+
dtype=object,
750+
)
751+
if not len(values):
752+
return np.zeros(self.shape, dtype=bool)
753+
754+
values = self._from_sequence(values, dtype=self.dtype)
755+
756+
return isin(np.asarray(self), np.asarray(values))
757+
738758
def astype(self, dtype, copy: bool = True):
739759
dtype = pandas_dtype(dtype)
740760

pandas/core/arrays/string_arrow.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from pandas.core.arrays._arrow_string_mixins import ArrowStringArrayMixin
3030
from pandas.core.arrays.arrow import ArrowExtensionArray
3131
from pandas.core.arrays.boolean import BooleanDtype
32+
from pandas.core.arrays.floating import Float64Dtype
3233
from pandas.core.arrays.integer import Int64Dtype
3334
from pandas.core.arrays.numeric import NumericDtype
3435
from pandas.core.arrays.string_ import (
@@ -395,6 +396,16 @@ def _convert_int_result(self, result):
395396

396397
return Int64Dtype().__from_arrow__(result)
397398

399+
def _convert_rank_result(self, result):
400+
if self.dtype.na_value is np.nan:
401+
if isinstance(result, pa.Array):
402+
result = result.to_numpy(zero_copy_only=False)
403+
else:
404+
result = result.to_numpy()
405+
return result.astype("float64", copy=False)
406+
407+
return Float64Dtype().__from_arrow__(result)
408+
398409
def _reduce(
399410
self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
400411
):

pandas/core/groupby/generic.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,7 @@ def filter(self, func, dropna: bool = True, *args, **kwargs):
615615
616616
See Also
617617
--------
618+
Series.filter: Filter elements of ungrouped Series.
618619
DataFrameGroupBy.filter : Filter elements from groups base on criterion.
619620
620621
Notes
@@ -1963,6 +1964,7 @@ def filter(self, func, dropna: bool = True, *args, **kwargs) -> DataFrame:
19631964
19641965
See Also
19651966
--------
1967+
DataFrame.filter: Filter elements of ungrouped DataFrame.
19661968
SeriesGroupBy.filter : Filter elements from groups base on criterion.
19671969
19681970
Notes

pandas/core/groupby/groupby.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,15 @@ class providing the base-class of operations.
199199
Series or DataFrame
200200
Computed {fname} of values within each group.
201201
202+
See Also
203+
--------
204+
SeriesGroupBy.min : Return the min of the group values.
205+
DataFrameGroupBy.min : Return the min of the group values.
206+
SeriesGroupBy.max : Return the max of the group values.
207+
DataFrameGroupBy.max : Return the max of the group values.
208+
SeriesGroupBy.sum : Return the sum of the group values.
209+
DataFrameGroupBy.sum : Return the sum of the group values.
210+
202211
Examples
203212
--------
204213
{example}

0 commit comments

Comments
 (0)