Skip to content

Commit cda40ee

Browse files
Merge remote-tracking branch 'upstream/main' into string-dtype-operations-error
2 parents 44c0090 + 1b7bfed commit cda40ee

File tree

20 files changed

+362
-161
lines changed

20 files changed

+362
-161
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
142142
-i "pandas.Series.sparse.sp_values SA01" \
143143
-i "pandas.Series.sparse.to_coo PR07,RT03,SA01" \
144144
-i "pandas.Series.std PR01,RT03,SA01" \
145-
-i "pandas.Series.str.wrap RT03,SA01" \
146-
-i "pandas.Series.str.zfill RT03" \
147145
-i "pandas.Timedelta.asm8 SA01" \
148146
-i "pandas.Timedelta.ceil SA01" \
149147
-i "pandas.Timedelta.components SA01" \
@@ -175,14 +173,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
175173
-i "pandas.Timestamp.tzinfo GL08" \
176174
-i "pandas.Timestamp.value GL08" \
177175
-i "pandas.Timestamp.year GL08" \
178-
-i "pandas.api.extensions.ExtensionArray.duplicated RT03,SA01" \
179-
-i "pandas.api.extensions.ExtensionArray.fillna SA01" \
180-
-i "pandas.api.extensions.ExtensionArray.insert PR07,RT03,SA01" \
181176
-i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \
182-
-i "pandas.api.extensions.ExtensionArray.isin PR07,RT03,SA01" \
183-
-i "pandas.api.extensions.ExtensionArray.tolist RT03,SA01" \
184-
-i "pandas.api.extensions.ExtensionArray.unique RT03,SA01" \
185-
-i "pandas.api.extensions.ExtensionArray.view SA01" \
186177
-i "pandas.api.interchange.from_dataframe RT03,SA01" \
187178
-i "pandas.api.types.is_bool PR01,SA01" \
188179
-i "pandas.api.types.is_categorical_dtype SA01" \
@@ -235,7 +226,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
235226
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
236227
-i "pandas.core.groupby.DataFrameGroupBy.ohlc SA01" \
237228
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
238-
-i "pandas.core.groupby.DataFrameGroupBy.prod SA01" \
239229
-i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
240230
-i "pandas.core.groupby.DataFrameGroupBy.sum SA01" \
241231
-i "pandas.core.groupby.SeriesGroupBy.__iter__ RT03,SA01" \
@@ -252,7 +242,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
252242
-i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
253243
-i "pandas.core.groupby.SeriesGroupBy.ohlc SA01" \
254244
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
255-
-i "pandas.core.groupby.SeriesGroupBy.prod SA01" \
256245
-i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
257246
-i "pandas.core.groupby.SeriesGroupBy.sum SA01" \
258247
-i "pandas.core.resample.Resampler.__iter__ RT03,SA01" \
@@ -305,34 +294,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
305294
-i "pandas.errors.UnsupportedFunctionCall SA01" \
306295
-i "pandas.errors.ValueLabelTypeMismatch SA01" \
307296
-i "pandas.infer_freq SA01" \
308-
-i "pandas.io.formats.style.Styler.apply RT03" \
309-
-i "pandas.io.formats.style.Styler.apply_index RT03" \
310-
-i "pandas.io.formats.style.Styler.background_gradient RT03" \
311-
-i "pandas.io.formats.style.Styler.bar RT03,SA01" \
312-
-i "pandas.io.formats.style.Styler.clear SA01" \
313-
-i "pandas.io.formats.style.Styler.concat RT03,SA01" \
314-
-i "pandas.io.formats.style.Styler.export RT03" \
315-
-i "pandas.io.formats.style.Styler.from_custom_template SA01" \
316-
-i "pandas.io.formats.style.Styler.hide RT03,SA01" \
317-
-i "pandas.io.formats.style.Styler.highlight_between RT03" \
318-
-i "pandas.io.formats.style.Styler.highlight_max RT03" \
319-
-i "pandas.io.formats.style.Styler.highlight_min RT03" \
320-
-i "pandas.io.formats.style.Styler.highlight_null RT03" \
321-
-i "pandas.io.formats.style.Styler.highlight_quantile RT03" \
322-
-i "pandas.io.formats.style.Styler.map RT03" \
323-
-i "pandas.io.formats.style.Styler.map_index RT03" \
324-
-i "pandas.io.formats.style.Styler.set_caption RT03,SA01" \
325-
-i "pandas.io.formats.style.Styler.set_properties RT03,SA01" \
326-
-i "pandas.io.formats.style.Styler.set_sticky RT03,SA01" \
327-
-i "pandas.io.formats.style.Styler.set_table_attributes PR07,RT03" \
328-
-i "pandas.io.formats.style.Styler.set_table_styles RT03" \
329-
-i "pandas.io.formats.style.Styler.set_td_classes RT03" \
330-
-i "pandas.io.formats.style.Styler.set_tooltips RT03,SA01" \
331-
-i "pandas.io.formats.style.Styler.set_uuid PR07,RT03,SA01" \
332-
-i "pandas.io.formats.style.Styler.text_gradient RT03" \
333-
-i "pandas.io.formats.style.Styler.to_excel PR01" \
334-
-i "pandas.io.formats.style.Styler.to_string SA01" \
335-
-i "pandas.io.formats.style.Styler.use RT03" \
336297
-i "pandas.io.json.build_table_schema PR07,RT03,SA01" \
337298
-i "pandas.io.stata.StataReader.data_label SA01" \
338299
-i "pandas.io.stata.StataReader.value_labels RT03,SA01" \

doc/source/whatsnew/v3.0.0.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ Other enhancements
4343
- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
4444
- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
4545
- :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
46+
- :class:`Series` now supports the Arrow PyCapsule Interface for export (:issue:`59518`)
4647
- :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`)
4748
- :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`)
4849
- :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
@@ -504,6 +505,7 @@ Performance improvements
504505
- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
505506
- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
506507
- Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`)
508+
- Performance improvement in :meth:`DataFrame.to_csv` when ``index=False`` (:issue:`59312`)
507509
- Performance improvement in :meth:`DataFrameGroupBy.ffill`, :meth:`DataFrameGroupBy.bfill`, :meth:`SeriesGroupBy.ffill`, and :meth:`SeriesGroupBy.bfill` (:issue:`56902`)
508510
- Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`)
509511
- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
@@ -553,6 +555,7 @@ Datetimelike
553555
- Bug in :meth:`DatetimeIndex.union` and :meth:`DatetimeIndex.intersection` when ``unit`` was non-nanosecond (:issue:`59036`)
554556
- Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`)
555557
- Bug in :meth:`to_datetime` not respecting dayfirst if an uncommon date string was passed. (:issue:`58859`)
558+
- Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`)
556559
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
557560

558561
Timedelta
@@ -658,6 +661,7 @@ Reshaping
658661
^^^^^^^^^
659662
- Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`)
660663
- Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
664+
- Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
661665
- Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)
662666
- Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`)
663667
- Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)

pandas/_libs/tslib.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ cpdef array_to_datetime(
439439
raise TypeError(f"{type(val)} is not convertible to datetime")
440440

441441
except (TypeError, OverflowError, ValueError) as ex:
442-
ex.args = (f"{ex}, at position {i}",)
442+
ex.args = (f"{ex}",)
443443
if is_coerce:
444444
iresult[i] = NPY_NAT
445445
continue

pandas/_libs/tslibs/strptime.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ def array_strptime(
536536

537537
except ValueError as ex:
538538
ex.args = (
539-
f"{str(ex)}, at position {i}. You might want to try:\n"
539+
f"{str(ex)}. You might want to try:\n"
540540
" - passing `format` if your strings have a consistent format;\n"
541541
" - passing `format='ISO8601'` if your strings are "
542542
"all ISO8601 but not necessarily in exactly the same format;\n"

pandas/core/arrays/base.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1137,6 +1137,13 @@ def fillna(
11371137
ExtensionArray
11381138
With NA/NaN filled.
11391139
1140+
See Also
1141+
--------
1142+
api.extensions.ExtensionArray.dropna : Return ExtensionArray without
1143+
NA values.
1144+
api.extensions.ExtensionArray.isna : A 1-D array indicating if
1145+
each value is missing.
1146+
11401147
Examples
11411148
--------
11421149
>>> arr = pd.array([np.nan, np.nan, 2, 3, np.nan, np.nan])
@@ -1220,6 +1227,15 @@ def duplicated(
12201227
Returns
12211228
-------
12221229
ndarray[bool]
1230+
With true in indices where elements are duplicated and false otherwise.
1231+
1232+
See Also
1233+
--------
1234+
DataFrame.duplicated : Return boolean Series denoting
1235+
duplicate rows.
1236+
Series.duplicated : Indicate duplicate Series values.
1237+
api.extensions.ExtensionArray.unique : Compute the ExtensionArray
1238+
of unique values.
12231239
12241240
Examples
12251241
--------
@@ -1303,6 +1319,13 @@ def unique(self) -> Self:
13031319
Returns
13041320
-------
13051321
pandas.api.extensions.ExtensionArray
1322+
With unique values from the input array.
1323+
1324+
See Also
1325+
--------
1326+
Index.unique: Return unique values in the index.
1327+
Series.unique: Return unique values of Series object.
1328+
unique: Return unique values based on a hash table.
13061329
13071330
Examples
13081331
--------
@@ -1436,10 +1459,18 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
14361459
Parameters
14371460
----------
14381461
values : np.ndarray or ExtensionArray
1462+
Values to compare every element in the array against.
14391463
14401464
Returns
14411465
-------
14421466
np.ndarray[bool]
1467+
With true at indices where value is in `values`.
1468+
1469+
See Also
1470+
--------
1471+
DataFrame.isin: Whether each element in the DataFrame is contained in values.
1472+
Index.isin: Return a boolean array where the index values are in values.
1473+
Series.isin: Whether elements in Series are contained in values.
14431474
14441475
Examples
14451476
--------
@@ -1743,6 +1774,12 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike:
17431774
ExtensionArray or np.ndarray
17441775
A view on the :class:`ExtensionArray`'s data.
17451776
1777+
See Also
1778+
--------
1779+
api.extensions.ExtensionArray.ravel: Return a flattened view on input array.
1780+
Index.view: Equivalent function for Index.
1781+
ndarray.view: New view of array with the same data.
1782+
17461783
Examples
17471784
--------
17481785
This gives view on the underlying data of an ``ExtensionArray`` and is not a
@@ -2201,6 +2238,12 @@ def tolist(self) -> list:
22012238
Returns
22022239
-------
22032240
list
2241+
Python list of values in array.
2242+
2243+
See Also
2244+
--------
2245+
Index.to_list: Return a list of the values in the Index.
2246+
Series.to_list: Return a list of the values in the Series.
22042247
22052248
Examples
22062249
--------
@@ -2223,11 +2266,18 @@ def insert(self, loc: int, item) -> Self:
22232266
Parameters
22242267
----------
22252268
loc : int
2269+
Index where the `item` needs to be inserted.
22262270
item : scalar-like
2271+
Value to be inserted.
22272272
22282273
Returns
22292274
-------
2230-
same type as self
2275+
ExtensionArray
2276+
With `item` inserted at `loc`.
2277+
2278+
See Also
2279+
--------
2280+
Index.insert: Make new Index inserting new item at location.
22312281
22322282
Notes
22332283
-----

pandas/core/generic.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2123,11 +2123,13 @@ def _repr_data_resource_(self):
21232123
klass="object",
21242124
storage_options=_shared_docs["storage_options"],
21252125
storage_options_versionadded="1.2.0",
2126+
encoding_parameter="",
2127+
verbose_parameter="",
21262128
extra_parameters=textwrap.dedent(
21272129
"""\
21282130
engine_kwargs : dict, optional
21292131
Arbitrary keyword arguments passed to excel engine.
2130-
"""
2132+
"""
21312133
),
21322134
)
21332135
def to_excel(
@@ -2196,9 +2198,11 @@ def to_excel(
21962198
21972199
merge_cells : bool, default True
21982200
Write MultiIndex and Hierarchical Rows as merged cells.
2201+
{encoding_parameter}
21992202
inf_rep : str, default 'inf'
22002203
Representation for infinity (there is no native representation for
22012204
infinity in Excel).
2205+
{verbose_parameter}
22022206
freeze_panes : tuple of int (length 2), optional
22032207
Specifies the one-based bottommost row and rightmost column that
22042208
is to be frozen.

pandas/core/groupby/categorical.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,8 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica
4646
# In cases with c.ordered, this is equivalent to
4747
# return c.remove_unused_categories(), c
4848

49-
unique_codes = unique1d(c.codes) # type: ignore[no-untyped-call]
49+
take_codes = unique1d(c.codes[c.codes != -1]) # type: ignore[no-untyped-call]
5050

51-
take_codes = unique_codes[unique_codes != -1]
5251
if sort:
5352
take_codes = np.sort(take_codes)
5453

@@ -67,17 +66,18 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica
6766

6867
# sort=False should order groups in as-encountered order (GH-8868)
6968

70-
# xref GH:46909: Re-ordering codes faster than using (set|add|reorder)_categories
71-
all_codes = np.arange(c.categories.nunique())
69+
# GH:46909: Re-ordering codes faster than using (set|add|reorder)_categories
7270
# GH 38140: exclude nan from indexer for categories
7371
unique_notnan_codes = unique1d(c.codes[c.codes != -1]) # type: ignore[no-untyped-call]
7472
if sort:
7573
unique_notnan_codes = np.sort(unique_notnan_codes)
76-
if len(all_codes) > len(unique_notnan_codes):
74+
if (num_cat := len(c.categories)) > len(unique_notnan_codes):
7775
# GH 13179: All categories need to be present, even if missing from the data
78-
missing_codes = np.setdiff1d(all_codes, unique_notnan_codes, assume_unique=True)
76+
missing_codes = np.setdiff1d(
77+
np.arange(num_cat), unique_notnan_codes, assume_unique=True
78+
)
7979
take_codes = np.concatenate((unique_notnan_codes, missing_codes))
8080
else:
8181
take_codes = unique_notnan_codes
8282

83-
return Categorical(c, c.unique().categories.take(take_codes))
83+
return Categorical(c, c.categories.take(take_codes))

0 commit comments

Comments
 (0)