Skip to content

Commit d634da2

Browse files
Merge remote-tracking branch 'upstream/main' into string-dtype-astype-str
2 parents d413fc6 + 160b3eb commit d634da2

39 files changed

+531
-406
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7070
--format=actions \
7171
-i ES01 `# For now it is ok if docstrings are missing the extended summary` \
7272
-i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
73-
-i "pandas.NA SA01" \
7473
-i "pandas.Period.freq GL08" \
7574
-i "pandas.Period.ordinal GL08" \
76-
-i "pandas.PeriodDtype.freq SA01" \
7775
-i "pandas.RangeIndex.from_range PR01,SA01" \
7876
-i "pandas.RangeIndex.step SA01" \
7977
-i "pandas.Series.cat.add_categories PR01,PR02" \
@@ -103,29 +101,24 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
103101
-i "pandas.Series.sparse.from_coo PR07,SA01" \
104102
-i "pandas.Series.sparse.npoints SA01" \
105103
-i "pandas.Series.sparse.sp_values SA01" \
106-
-i "pandas.Timedelta.components SA01" \
107104
-i "pandas.Timedelta.max PR02" \
108105
-i "pandas.Timedelta.min PR02" \
109106
-i "pandas.Timedelta.resolution PR02" \
110107
-i "pandas.Timedelta.to_timedelta64 SA01" \
111-
-i "pandas.Timedelta.total_seconds SA01" \
112108
-i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
113109
-i "pandas.Timestamp.max PR02" \
114110
-i "pandas.Timestamp.min PR02" \
115111
-i "pandas.Timestamp.nanosecond GL08" \
116112
-i "pandas.Timestamp.resolution PR02" \
117113
-i "pandas.Timestamp.tzinfo GL08" \
118114
-i "pandas.Timestamp.year GL08" \
119-
-i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \
120115
-i "pandas.api.types.is_dict_like PR07,SA01" \
121-
-i "pandas.api.types.is_extension_array_dtype SA01" \
122116
-i "pandas.api.types.is_file_like PR07,SA01" \
123117
-i "pandas.api.types.is_float PR01,SA01" \
124118
-i "pandas.api.types.is_float_dtype SA01" \
125119
-i "pandas.api.types.is_hashable PR01,RT03,SA01" \
126120
-i "pandas.api.types.is_int64_dtype SA01" \
127121
-i "pandas.api.types.is_integer PR01,SA01" \
128-
-i "pandas.api.types.is_integer_dtype SA01" \
129122
-i "pandas.api.types.is_interval_dtype SA01" \
130123
-i "pandas.api.types.is_iterator PR07,SA01" \
131124
-i "pandas.api.types.is_list_like SA01" \
@@ -137,7 +130,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
137130
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
138131
-i "pandas.arrays.BooleanArray SA01" \
139132
-i "pandas.arrays.DatetimeArray SA01" \
140-
-i "pandas.arrays.FloatingArray SA01" \
141133
-i "pandas.arrays.IntegerArray SA01" \
142134
-i "pandas.arrays.IntervalArray.left SA01" \
143135
-i "pandas.arrays.IntervalArray.length SA01" \
@@ -154,14 +146,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
154146
-i "pandas.core.groupby.DataFrameGroupBy.groups SA01" \
155147
-i "pandas.core.groupby.DataFrameGroupBy.hist RT03" \
156148
-i "pandas.core.groupby.DataFrameGroupBy.indices SA01" \
157-
-i "pandas.core.groupby.DataFrameGroupBy.max SA01" \
158-
-i "pandas.core.groupby.DataFrameGroupBy.min SA01" \
159149
-i "pandas.core.groupby.DataFrameGroupBy.nth PR02" \
160150
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
161151
-i "pandas.core.groupby.DataFrameGroupBy.ohlc SA01" \
162152
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
163153
-i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
164-
-i "pandas.core.groupby.DataFrameGroupBy.sum SA01" \
165154
-i "pandas.core.groupby.SeriesGroupBy.__iter__ RT03,SA01" \
166155
-i "pandas.core.groupby.SeriesGroupBy.agg RT03" \
167156
-i "pandas.core.groupby.SeriesGroupBy.aggregate RT03" \
@@ -170,13 +159,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
170159
-i "pandas.core.groupby.SeriesGroupBy.indices SA01" \
171160
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing SA01" \
172161
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing SA01" \
173-
-i "pandas.core.groupby.SeriesGroupBy.max SA01" \
174-
-i "pandas.core.groupby.SeriesGroupBy.min SA01" \
175162
-i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
176163
-i "pandas.core.groupby.SeriesGroupBy.ohlc SA01" \
177164
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
178165
-i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
179-
-i "pandas.core.groupby.SeriesGroupBy.sum SA01" \
180166
-i "pandas.core.resample.Resampler.__iter__ RT03,SA01" \
181167
-i "pandas.core.resample.Resampler.ffill RT03" \
182168
-i "pandas.core.resample.Resampler.get_group RT03,SA01" \
@@ -205,7 +191,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
205191
-i "pandas.errors.IntCastingNaNError SA01" \
206192
-i "pandas.errors.InvalidIndexError SA01" \
207193
-i "pandas.errors.InvalidVersion SA01" \
208-
-i "pandas.errors.MergeError SA01" \
209194
-i "pandas.errors.NullFrequencyError SA01" \
210195
-i "pandas.errors.NumExprClobberingError SA01" \
211196
-i "pandas.errors.NumbaUtilError SA01" \

doc/source/whatsnew/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Version 2.2
3232
.. toctree::
3333
:maxdepth: 2
3434

35+
v2.2.3
3536
v2.2.2
3637
v2.2.1
3738
v2.2.0

doc/source/whatsnew/v2.2.3.rst

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
.. _whatsnew_223:
2+
3+
What's new in 2.2.3 (September XX, 2024)
4+
----------------------------------------
5+
6+
These are the changes in pandas 2.2.3. See :ref:`release` for a full changelog
7+
including other versions of pandas.
8+
9+
{{ header }}
10+
11+
.. ---------------------------------------------------------------------------
12+
.. _whatsnew_223.regressions:
13+
14+
Fixed regressions
15+
~~~~~~~~~~~~~~~~~
16+
-
17+
18+
.. ---------------------------------------------------------------------------
19+
.. _whatsnew_223.bug_fixes:
20+
21+
Bug fixes
22+
~~~~~~~~~
23+
-
24+
25+
.. ---------------------------------------------------------------------------
26+
.. _whatsnew_223.other:
27+
28+
Other
29+
~~~~~
30+
-
31+
32+
.. ---------------------------------------------------------------------------
33+
.. _whatsnew_223.contributors:
34+
35+
Contributors
36+
~~~~~~~~~~~~

doc/source/whatsnew/v2.3.0.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,11 @@ Conversion
102102

103103
Strings
104104
^^^^^^^
105+
- Bug in :meth:`Series.rank` for :class:`StringDtype` with ``storage="pyarrow"`` incorrectly returning integer results in case of ``method="average"`` and raising an error if it would truncate results (:issue:`59768`)
105106
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
107+
- Bug in ``ser.str.slice`` with negative ``step`` with :class:`ArrowDtype` and :class:`StringDtype` with ``storage="pyarrow"`` giving incorrect results (:issue:`59710`)
106108
- Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)
107-
109+
-
108110

109111
Interval
110112
^^^^^^^^

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,7 @@ I/O
627627
- Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`)
628628
- Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
629629
- Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
630+
- Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
630631
- Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
631632
- Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`)
632633
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)

pandas/_libs/lib.pyx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,8 @@ def array_equivalent_object(ndarray left, ndarray right) -> bool:
600600
if not array_equivalent(x, y):
601601
return False
602602

603+
elif PyArray_Check(x) or PyArray_Check(y):
604+
return False
603605
elif (x is C_NA) ^ (y is C_NA):
604606
return False
605607
elif not (

pandas/_libs/missing.pyx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,14 @@ class NAType(C_NAType):
347347
The NA singleton is a missing value indicator defined by pandas. It is
348348
used in certain new extension dtypes (currently the "string" dtype).
349349
350+
See Also
351+
--------
352+
numpy.nan : Floating point representation of Not a Number (NaN) for numerical data.
353+
isna : Detect missing values for an array-like object.
354+
notna : Detect non-missing values for an array-like object.
355+
DataFrame.fillna : Fill missing values in a DataFrame.
356+
Series.fillna : Fill missing values in a Series.
357+
350358
Examples
351359
--------
352360
>>> pd.NA

pandas/_libs/tslibs/nattype.pyx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,14 @@ class NaTType(_NaT):
493493
"""
494494
Total seconds in the duration.
495495
496+
This method calculates the total duration in seconds by combining
497+
the days, seconds, and microseconds of the `Timedelta` object.
498+
499+
See Also
500+
--------
501+
to_timedelta : Convert argument to timedelta.
502+
Timedelta : Represents a duration, the difference between two dates or times.
503+
496504
Examples
497505
--------
498506
>>> td = pd.Timedelta('1min')

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,6 +1189,14 @@ cdef class _Timedelta(timedelta):
11891189
"""
11901190
Total seconds in the duration.
11911191

1192+
This method calculates the total duration in seconds by combining
1193+
the days, seconds, and microseconds of the `Timedelta` object.
1194+
1195+
See Also
1196+
--------
1197+
to_timedelta : Convert argument to timedelta.
1198+
Timedelta : Represents a duration, the difference between two dates or times.
1199+
11921200
Examples
11931201
--------
11941202
>>> td = pd.Timedelta('1min')
@@ -1493,6 +1501,17 @@ cdef class _Timedelta(timedelta):
14931501
"""
14941502
Return a components namedtuple-like.
14951503
1504+
Each component represents a different time unit, allowing you to access the
1505+
breakdown of the total duration in terms of days, hours, minutes, seconds,
1506+
milliseconds, microseconds, and nanoseconds.
1507+
1508+
See Also
1509+
--------
1510+
Timedelta.total_seconds : Returns the total duration of the Timedelta in
1511+
seconds.
1512+
to_timedelta : Convert argument to Timedelta.
1513+
Timedelta : Represents a duration, the difference between two dates or times.
1514+
14961515
Examples
14971516
--------
14981517
>>> td = pd.Timedelta('2 day 4 min 3 us 42 ns')

pandas/conftest.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1338,7 +1338,13 @@ def string_storage(request):
13381338
pytest.param(("pyarrow", pd.NA), marks=td.skip_if_no("pyarrow")),
13391339
pytest.param(("pyarrow", np.nan), marks=td.skip_if_no("pyarrow")),
13401340
("python", np.nan),
1341-
]
1341+
],
1342+
ids=[
1343+
"string=string[python]",
1344+
"string=string[pyarrow]",
1345+
"string=str[pyarrow]",
1346+
"string=str[python]",
1347+
],
13421348
)
13431349
def string_dtype_arguments(request):
13441350
"""
@@ -1369,6 +1375,7 @@ def dtype_backend(request):
13691375

13701376
# Alias so we can test with cartesian product of string_storage
13711377
string_storage2 = string_storage
1378+
string_dtype_arguments2 = string_dtype_arguments
13721379

13731380

13741381
@pytest.fixture(params=tm.BYTES_DTYPES)

0 commit comments

Comments
 (0)