Skip to content

Commit 3b0b779

Browse files
Merge remote-tracking branch 'upstream/main' into string-dtype-astype-str
2 parents 4c775d1 + 715585d commit 3b0b779

File tree

88 files changed

+1337
-708
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+1337
-708
lines changed

.github/workflows/unit-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ jobs:
380380
fetch-depth: 0
381381

382382
- name: Set up Python Free-threading Version
383-
uses: deadsnakes/action@v3.1.0
383+
uses: deadsnakes/action@v3.2.0
384384
with:
385385
python-version: 3.13-dev
386386
nogil: true

ci/code_checks.sh

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,9 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7373
-i "pandas.NA SA01" \
7474
-i "pandas.Period.freq GL08" \
7575
-i "pandas.Period.ordinal GL08" \
76-
-i "pandas.Period.to_timestamp SA01" \
7776
-i "pandas.PeriodDtype.freq SA01" \
7877
-i "pandas.RangeIndex.from_range PR01,SA01" \
79-
-i "pandas.RangeIndex.start SA01" \
8078
-i "pandas.RangeIndex.step SA01" \
81-
-i "pandas.RangeIndex.stop SA01" \
8279
-i "pandas.Series.cat.add_categories PR01,PR02" \
8380
-i "pandas.Series.cat.as_ordered PR01" \
8481
-i "pandas.Series.cat.as_unordered PR01" \
@@ -92,12 +89,9 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9289
-i "pandas.Series.dt.day_name PR01,PR02" \
9390
-i "pandas.Series.dt.floor PR01,PR02" \
9491
-i "pandas.Series.dt.freq GL08" \
95-
-i "pandas.Series.dt.microseconds SA01" \
9692
-i "pandas.Series.dt.month_name PR01,PR02" \
97-
-i "pandas.Series.dt.nanoseconds SA01" \
9893
-i "pandas.Series.dt.normalize PR01" \
9994
-i "pandas.Series.dt.round PR01,PR02" \
100-
-i "pandas.Series.dt.seconds SA01" \
10195
-i "pandas.Series.dt.strftime PR01,PR02" \
10296
-i "pandas.Series.dt.to_period PR01,PR02" \
10397
-i "pandas.Series.dt.total_seconds PR01" \
@@ -109,39 +103,20 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
109103
-i "pandas.Series.sparse.from_coo PR07,SA01" \
110104
-i "pandas.Series.sparse.npoints SA01" \
111105
-i "pandas.Series.sparse.sp_values SA01" \
112-
-i "pandas.Timedelta.asm8 SA01" \
113-
-i "pandas.Timedelta.ceil SA01" \
114106
-i "pandas.Timedelta.components SA01" \
115-
-i "pandas.Timedelta.floor SA01" \
116107
-i "pandas.Timedelta.max PR02" \
117108
-i "pandas.Timedelta.min PR02" \
118109
-i "pandas.Timedelta.resolution PR02" \
119-
-i "pandas.Timedelta.round SA01" \
120-
-i "pandas.Timedelta.to_numpy PR01" \
121110
-i "pandas.Timedelta.to_timedelta64 SA01" \
122111
-i "pandas.Timedelta.total_seconds SA01" \
123-
-i "pandas.Timedelta.view SA01" \
124-
-i "pandas.TimedeltaIndex.components SA01" \
125-
-i "pandas.TimedeltaIndex.microseconds SA01" \
126-
-i "pandas.TimedeltaIndex.nanoseconds SA01" \
127-
-i "pandas.TimedeltaIndex.seconds SA01" \
128112
-i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
129113
-i "pandas.Timestamp.max PR02" \
130114
-i "pandas.Timestamp.min PR02" \
131115
-i "pandas.Timestamp.nanosecond GL08" \
132116
-i "pandas.Timestamp.resolution PR02" \
133117
-i "pandas.Timestamp.tzinfo GL08" \
134-
-i "pandas.Timestamp.value GL08" \
135118
-i "pandas.Timestamp.year GL08" \
136119
-i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \
137-
-i "pandas.api.interchange.from_dataframe RT03,SA01" \
138-
-i "pandas.api.types.is_bool PR01,SA01" \
139-
-i "pandas.api.types.is_categorical_dtype SA01" \
140-
-i "pandas.api.types.is_complex PR01,SA01" \
141-
-i "pandas.api.types.is_complex_dtype SA01" \
142-
-i "pandas.api.types.is_datetime64_dtype SA01" \
143-
-i "pandas.api.types.is_datetime64_ns_dtype SA01" \
144-
-i "pandas.api.types.is_datetime64tz_dtype SA01" \
145120
-i "pandas.api.types.is_dict_like PR07,SA01" \
146121
-i "pandas.api.types.is_extension_array_dtype SA01" \
147122
-i "pandas.api.types.is_file_like PR07,SA01" \
@@ -175,7 +150,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
175150
-i "pandas.core.groupby.DataFrameGroupBy.agg RT03" \
176151
-i "pandas.core.groupby.DataFrameGroupBy.aggregate RT03" \
177152
-i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
178-
-i "pandas.core.groupby.DataFrameGroupBy.filter SA01" \
179153
-i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
180154
-i "pandas.core.groupby.DataFrameGroupBy.groups SA01" \
181155
-i "pandas.core.groupby.DataFrameGroupBy.hist RT03" \
@@ -191,7 +165,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
191165
-i "pandas.core.groupby.SeriesGroupBy.__iter__ RT03,SA01" \
192166
-i "pandas.core.groupby.SeriesGroupBy.agg RT03" \
193167
-i "pandas.core.groupby.SeriesGroupBy.aggregate RT03" \
194-
-i "pandas.core.groupby.SeriesGroupBy.filter PR01,SA01" \
195168
-i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \
196169
-i "pandas.core.groupby.SeriesGroupBy.groups SA01" \
197170
-i "pandas.core.groupby.SeriesGroupBy.indices SA01" \

doc/source/whatsnew/v3.0.0.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,9 @@ Other enhancements
5353
- :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
5454
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
5555
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
56+
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
5657
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
58+
- :meth:`str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
5759
- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
5860
- Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
5961
- Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)
@@ -503,6 +505,7 @@ Performance improvements
503505
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`)
504506
- :meth:`Series.str.partition` with :class:`ArrowDtype` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57768`)
505507
- Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`)
508+
- Performance improvement in :class:`MultiIndex` when setting :attr:`MultiIndex.names` doesn't invalidate all cached operations (:issue:`59578`)
506509
- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
507510
- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
508511
- Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`)
@@ -526,6 +529,7 @@ Performance improvements
526529
- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
527530
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
528531
- Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`)
532+
- Performance improvement in :meth:`CategoricalDtype.update_dtype` when ``dtype`` is a :class:`CategoricalDtype` with non ``None`` categories and ordered (:issue:`59647`)
529533
- Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`)
530534
- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
531535
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
@@ -665,6 +669,7 @@ Reshaping
665669
- Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
666670
- Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)
667671
- Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`)
672+
- Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`)
668673
- Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
669674

670675
Sparse

pandas/_libs/lib.pyx

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1132,10 +1132,21 @@ def is_bool(obj: object) -> bool:
11321132
"""
11331133
Return True if given object is boolean.
11341134

1135+
Parameters
1136+
----------
1137+
obj : object
1138+
Object to check.
1139+
11351140
Returns
11361141
-------
11371142
bool
11381143

1144+
See Also
1145+
--------
1146+
api.types.is_scalar : Check if the input is a scalar.
1147+
api.types.is_integer : Check if the input is an integer.
1148+
api.types.is_float : Check if the input is a float.
1149+
11391150
Examples
11401151
--------
11411152
>>> pd.api.types.is_bool(True)
@@ -1151,10 +1162,22 @@ def is_complex(obj: object) -> bool:
11511162
"""
11521163
Return True if given object is complex.
11531164

1165+
Parameters
1166+
----------
1167+
obj : object
1168+
Object to check.
1169+
11541170
Returns
11551171
-------
11561172
bool
11571173

1174+
See Also
1175+
--------
1176+
api.types.is_complex_dtype: Check whether the provided array or
1177+
dtype is of a complex dtype.
1178+
api.types.is_number: Check if the object is a number.
1179+
api.types.is_integer: Return True if given object is integer.
1180+
11581181
Examples
11591182
--------
11601183
>>> pd.api.types.is_complex(1 + 1j)

pandas/_libs/tslibs/period.pyx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2001,6 +2001,12 @@ cdef class _Period(PeriodMixin):
20012001
-------
20022002
Timestamp
20032003

2004+
See Also
2005+
--------
2006+
Timestamp : A class representing a single point in time.
2007+
Period : Represents a span of time with a fixed frequency.
2008+
PeriodIndex.to_timestamp : Convert a `PeriodIndex` to a `DatetimeIndex`.
2009+
20042010
Examples
20052011
--------
20062012
>>> period = pd.Period('2023-1-1', freq='D')

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1421,9 +1421,16 @@ cdef class _Timedelta(timedelta):
14211421
"""
14221422
Convert the Timedelta to a NumPy timedelta64.
14231423

1424-
This is an alias method for `Timedelta.to_timedelta64()`. The dtype and
1425-
copy parameters are available here only for compatibility. Their values
1426-
will not affect the return value.
1424+
This is an alias method for `Timedelta.to_timedelta64()`.
1425+
1426+
Parameters
1427+
----------
1428+
dtype : NoneType
1429+
It is available here only for compatibility. Its value will not
1430+
affect the return value.
1431+
copy : bool, default False
1432+
It is available here only for compatibility. Its value will not
1433+
affect the return value.
14271434

14281435
Returns
14291436
-------
@@ -1451,11 +1458,26 @@ cdef class _Timedelta(timedelta):
14511458
"""
14521459
Array view compatibility.
14531460
1461+
This method allows you to reinterpret the underlying data of a Timedelta
1462+
object as a different dtype. The `view` method provides a way to reinterpret
1463+
the internal representation of the `Timedelta` object without modifying its
1464+
data. This is particularly useful when you need to work with the underlying
1465+
data directly, such as for performance optimizations or interfacing with
1466+
low-level APIs. The returned value is typically the number of nanoseconds
1467+
since the epoch, represented as an integer or another specified dtype.
1468+
14541469
Parameters
14551470
----------
14561471
dtype : str or dtype
14571472
The dtype to view the underlying data as.
14581473
1474+
See Also
1475+
--------
1476+
numpy.ndarray.view : Returns a view of an array with the same data.
1477+
Timedelta.to_numpy : Converts the Timedelta to a NumPy timedelta64.
1478+
Timedelta.total_seconds : Returns the total duration of the Timedelta
1479+
object in seconds.
1480+
14591481
Examples
14601482
--------
14611483
>>> td = pd.Timedelta('3D')
@@ -1498,6 +1520,12 @@ cdef class _Timedelta(timedelta):
14981520
numpy timedelta64 array scalar view
14991521
Array scalar view of the timedelta in nanoseconds.
15001522

1523+
See Also
1524+
--------
1525+
Timedelta.total_seconds : Return the total seconds in the duration.
1526+
Timedelta.components : Return a namedtuple of the Timedelta's components.
1527+
Timedelta.to_timedelta64 : Convert the Timedelta to a numpy.timedelta64.
1528+
15011529
Examples
15021530
--------
15031531
>>> td = pd.Timedelta('1 days 2 min 3 us 42 ns')
@@ -2061,6 +2089,12 @@ class Timedelta(_Timedelta):
20612089
------
20622090
ValueError if the freq cannot be converted
20632091
2092+
See Also
2093+
--------
2094+
Timedelta.floor : Floor the Timedelta to the specified resolution.
2095+
Timedelta.round : Round the Timedelta to the nearest specified resolution.
2096+
Timestamp.ceil : Similar method for Timestamp objects.
2097+
20642098
Examples
20652099
--------
20662100
>>> td = pd.Timedelta('1001ms')
@@ -2081,6 +2115,16 @@ class Timedelta(_Timedelta):
20812115
Frequency string indicating the flooring resolution.
20822116
It uses the same units as class constructor :class:`~pandas.Timedelta`.
20832117
2118+
Returns
2119+
-------
2120+
Timedelta
2121+
A new Timedelta object floored to the specified resolution.
2122+
2123+
See Also
2124+
--------
2125+
Timestamp.ceil : Round the Timestamp up to the nearest specified resolution.
2126+
Timestamp.round : Round the Timestamp to the nearest specified resolution.
2127+
20842128
Examples
20852129
--------
20862130
>>> td = pd.Timedelta('1001ms')
@@ -2101,6 +2145,16 @@ class Timedelta(_Timedelta):
21012145
Frequency string indicating the ceiling resolution.
21022146
It uses the same units as class constructor :class:`~pandas.Timedelta`.
21032147
2148+
Returns
2149+
-------
2150+
Timedelta
2151+
A new Timedelta object ceiled to the specified resolution.
2152+
2153+
See Also
2154+
--------
2155+
Timedelta.floor : Floor the Timedelta to the specified resolution.
2156+
Timedelta.round : Round the Timedelta to the nearest specified resolution.
2157+
21042158
Examples
21052159
--------
21062160
>>> td = pd.Timedelta('1001ms')

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,27 @@ cdef class _Timestamp(ABCTimestamp):
240240

241241
@property
242242
def value(self) -> int:
243+
"""
244+
Return the value of the Timestamp.
245+
246+
Returns
247+
-------
248+
int
249+
The integer representation of the Timestamp object in nanoseconds
250+
since the Unix epoch (1970-01-01 00:00:00 UTC).
251+
252+
See Also
253+
--------
254+
Timestamp.second : Return the second of the Timestamp.
255+
Timestamp.minute : Return the minute of the Timestamp.
256+
257+
Examples
258+
--------
259+
>>> ts = pd.Timestamp("2024-08-31 16:16:30")
260+
>>> ts.value
261+
1725120990000000000
262+
"""
263+
243264
try:
244265
return convert_reso(self._value, self._creso, NPY_FR_ns, False)
245266
except OverflowError:
@@ -1020,8 +1041,8 @@ cdef class _Timestamp(ABCTimestamp):
10201041

10211042
See Also
10221043
--------
1023-
Timestamp.day : Return the day of the year.
1024-
Timestamp.year : Return the year of the week.
1044+
Timestamp.day : Return the day of the Timestamp.
1045+
Timestamp.year : Return the year of the Timestamp.
10251046

10261047
Examples
10271048
--------

pandas/_testing/asserters.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def assert_index_equal(
188188
check_order: bool = True,
189189
rtol: float = 1.0e-5,
190190
atol: float = 1.0e-8,
191-
obj: str = "Index",
191+
obj: str | None = None,
192192
) -> None:
193193
"""
194194
Check that left and right Index are equal.
@@ -217,7 +217,7 @@ def assert_index_equal(
217217
Relative tolerance. Only used when check_exact is False.
218218
atol : float, default 1e-8
219219
Absolute tolerance. Only used when check_exact is False.
220-
obj : str, default 'Index'
220+
obj : str, default 'Index' or 'MultiIndex'
221221
Specify object name being compared, internally used to show appropriate
222222
assertion message.
223223
@@ -235,6 +235,9 @@ def assert_index_equal(
235235
"""
236236
__tracebackhide__ = True
237237

238+
if obj is None:
239+
obj = "MultiIndex" if isinstance(left, MultiIndex) else "Index"
240+
238241
def _check_types(left, right, obj: str = "Index") -> None:
239242
if not exact:
240243
return
@@ -283,7 +286,7 @@ def _check_types(left, right, obj: str = "Index") -> None:
283286
right = cast(MultiIndex, right)
284287

285288
for level in range(left.nlevels):
286-
lobj = f"MultiIndex level [{level}]"
289+
lobj = f"{obj} level [{level}]"
287290
try:
288291
# try comparison on levels/codes to avoid densifying MultiIndex
289292
assert_index_equal(
@@ -314,7 +317,7 @@ def _check_types(left, right, obj: str = "Index") -> None:
314317
obj=lobj,
315318
)
316319
# get_level_values may change dtype
317-
_check_types(left.levels[level], right.levels[level], obj=obj)
320+
_check_types(left.levels[level], right.levels[level], obj=lobj)
318321

319322
# skip exact index checking when `check_categorical` is False
320323
elif check_exact and check_categorical:
@@ -527,7 +530,7 @@ def assert_interval_array_equal(
527530
kwargs["check_freq"] = False
528531

529532
assert_equal(left._left, right._left, obj=f"{obj}.left", **kwargs)
530-
assert_equal(left._right, right._right, obj=f"{obj}.left", **kwargs)
533+
assert_equal(left._right, right._right, obj=f"{obj}.right", **kwargs)
531534

532535
assert_attr_equal("closed", left, right, obj=obj)
533536

0 commit comments

Comments
 (0)