Skip to content

Commit 44c69f2

Browse files
committed
Merge branch 'main' of https://github.com/pandas-dev/pandas into timestamp-fix-docstring-validation
2 parents b5004da + bc9b1c3 commit 44c69f2

File tree

30 files changed

+343
-167
lines changed

30 files changed

+343
-167
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -109,14 +109,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
109109
-i "pandas.Series.sparse.from_coo PR07,SA01" \
110110
-i "pandas.Series.sparse.npoints SA01" \
111111
-i "pandas.Series.sparse.sp_values SA01" \
112-
-i "pandas.Timedelta.asm8 SA01" \
113-
-i "pandas.Timedelta.ceil SA01" \
114112
-i "pandas.Timedelta.components SA01" \
115-
-i "pandas.Timedelta.floor SA01" \
116113
-i "pandas.Timedelta.max PR02" \
117114
-i "pandas.Timedelta.min PR02" \
118115
-i "pandas.Timedelta.resolution PR02" \
119-
-i "pandas.Timedelta.round SA01" \
120116
-i "pandas.Timedelta.to_numpy PR01" \
121117
-i "pandas.Timedelta.to_timedelta64 SA01" \
122118
-i "pandas.Timedelta.total_seconds SA01" \
@@ -131,7 +127,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
131127
-i "pandas.Timestamp.resolution PR02" \
132128
-i "pandas.Timestamp.tzinfo GL08" \
133129
-i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \
134-
-i "pandas.api.interchange.from_dataframe RT03,SA01" \
135130
-i "pandas.api.types.is_bool PR01,SA01" \
136131
-i "pandas.api.types.is_categorical_dtype SA01" \
137132
-i "pandas.api.types.is_complex PR01,SA01" \

doc/source/whatsnew/v2.3.0.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ Conversion
103103
Strings
104104
^^^^^^^
105105
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
106-
-
106+
- Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)
107+
107108

108109
Interval
109110
^^^^^^^^

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ Other enhancements
5353
- :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
5454
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
5555
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
56+
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
5657
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
5758
- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
5859
- Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
@@ -503,6 +504,7 @@ Performance improvements
503504
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`)
504505
- :meth:`Series.str.partition` with :class:`ArrowDtype` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57768`)
505506
- Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`)
507+
- Performance improvement in :class:`MultiIndex` when setting :attr:`MultiIndex.names` doesn't invalidate all cached operations (:issue:`59578`)
506508
- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
507509
- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
508510
- Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`)

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1498,6 +1498,12 @@ cdef class _Timedelta(timedelta):
14981498
numpy timedelta64 array scalar view
14991499
Array scalar view of the timedelta in nanoseconds.
15001500

1501+
See Also
1502+
--------
1503+
Timedelta.total_seconds : Return the total seconds in the duration.
1504+
Timedelta.components : Return a namedtuple of the Timedelta's components.
1505+
Timedelta.to_timedelta64 : Convert the Timedelta to a numpy.timedelta64.
1506+
15011507
Examples
15021508
--------
15031509
>>> td = pd.Timedelta('1 days 2 min 3 us 42 ns')
@@ -2061,6 +2067,12 @@ class Timedelta(_Timedelta):
20612067
------
20622068
ValueError if the freq cannot be converted
20632069
2070+
See Also
2071+
--------
2072+
Timedelta.floor : Floor the Timedelta to the specified resolution.
2073+
Timedelta.round : Round the Timedelta to the nearest specified resolution.
2074+
Timestamp.ceil : Similar method for Timestamp objects.
2075+
20642076
Examples
20652077
--------
20662078
>>> td = pd.Timedelta('1001ms')
@@ -2081,6 +2093,16 @@ class Timedelta(_Timedelta):
20812093
Frequency string indicating the flooring resolution.
20822094
It uses the same units as class constructor :class:`~pandas.Timedelta`.
20832095
2096+
Returns
2097+
-------
2098+
Timedelta
2099+
A new Timedelta object floored to the specified resolution.
2100+
2101+
See Also
2102+
--------
2103+
Timestamp.ceil : Round the Timestamp up to the nearest specified resolution.
2104+
Timestamp.round : Round the Timestamp to the nearest specified resolution.
2105+
20842106
Examples
20852107
--------
20862108
>>> td = pd.Timedelta('1001ms')
@@ -2101,6 +2123,16 @@ class Timedelta(_Timedelta):
21012123
Frequency string indicating the ceiling resolution.
21022124
It uses the same units as class constructor :class:`~pandas.Timedelta`.
21032125
2126+
Returns
2127+
-------
2128+
Timedelta
2129+
A new Timedelta object ceiled to the specified resolution.
2130+
2131+
See Also
2132+
--------
2133+
Timedelta.floor : Floor the Timedelta to the specified resolution.
2134+
Timedelta.round : Round the Timedelta to the nearest specified resolution.
2135+
21042136
Examples
21052137
--------
21062138
>>> td = pd.Timedelta('1001ms')

pandas/_testing/asserters.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def assert_index_equal(
188188
check_order: bool = True,
189189
rtol: float = 1.0e-5,
190190
atol: float = 1.0e-8,
191-
obj: str = "Index",
191+
obj: str | None = None,
192192
) -> None:
193193
"""
194194
Check that left and right Index are equal.
@@ -217,7 +217,7 @@ def assert_index_equal(
217217
Relative tolerance. Only used when check_exact is False.
218218
atol : float, default 1e-8
219219
Absolute tolerance. Only used when check_exact is False.
220-
obj : str, default 'Index'
220+
obj : str, default 'Index' or 'MultiIndex'
221221
Specify object name being compared, internally used to show appropriate
222222
assertion message.
223223
@@ -235,6 +235,9 @@ def assert_index_equal(
235235
"""
236236
__tracebackhide__ = True
237237

238+
if obj is None:
239+
obj = "MultiIndex" if isinstance(left, MultiIndex) else "Index"
240+
238241
def _check_types(left, right, obj: str = "Index") -> None:
239242
if not exact:
240243
return
@@ -283,7 +286,7 @@ def _check_types(left, right, obj: str = "Index") -> None:
283286
right = cast(MultiIndex, right)
284287

285288
for level in range(left.nlevels):
286-
lobj = f"MultiIndex level [{level}]"
289+
lobj = f"{obj} level [{level}]"
287290
try:
288291
# try comparison on levels/codes to avoid densifying MultiIndex
289292
assert_index_equal(
@@ -314,7 +317,7 @@ def _check_types(left, right, obj: str = "Index") -> None:
314317
obj=lobj,
315318
)
316319
# get_level_values may change dtype
317-
_check_types(left.levels[level], right.levels[level], obj=obj)
320+
_check_types(left.levels[level], right.levels[level], obj=lobj)
318321

319322
# skip exact index checking when `check_categorical` is False
320323
elif check_exact and check_categorical:
@@ -527,7 +530,7 @@ def assert_interval_array_equal(
527530
kwargs["check_freq"] = False
528531

529532
assert_equal(left._left, right._left, obj=f"{obj}.left", **kwargs)
530-
assert_equal(left._right, right._right, obj=f"{obj}.left", **kwargs)
533+
assert_equal(left._right, right._right, obj=f"{obj}.right", **kwargs)
531534

532535
assert_attr_equal("closed", left, right, obj=obj)
533536

pandas/core/array_algos/quantile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def quantile_with_mask(
9191
if is_empty:
9292
# create the array of na_values
9393
# 2d len(values) * len(qs)
94-
flat = np.array([fill_value] * len(qs))
94+
flat = np.full(len(qs), fill_value)
9595
result = np.repeat(flat, len(values)).reshape(len(values), len(qs))
9696
else:
9797
result = _nanquantile(

pandas/core/arrays/_arrow_string_mixins.py

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
from __future__ import annotations
22

3+
from functools import partial
34
from typing import (
45
TYPE_CHECKING,
56
Literal,
67
)
78

89
import numpy as np
910

10-
from pandas.compat import pa_version_under10p1
11+
from pandas.compat import (
12+
pa_version_under10p1,
13+
pa_version_under17p0,
14+
)
1115

1216
from pandas.core.dtypes.missing import isna
1317

@@ -49,7 +53,19 @@ def _str_pad(
4953
elif side == "right":
5054
pa_pad = pc.utf8_rpad
5155
elif side == "both":
52-
pa_pad = pc.utf8_center
56+
if pa_version_under17p0:
57+
# GH#59624 fall back to object dtype
58+
from pandas import array
59+
60+
obj_arr = self.astype(object, copy=False) # type: ignore[attr-defined]
61+
obj = array(obj_arr, dtype=object)
62+
result = obj._str_pad(width, side, fillchar) # type: ignore[attr-defined]
63+
return type(self)._from_sequence(result, dtype=self.dtype) # type: ignore[attr-defined]
64+
else:
65+
# GH#54792
66+
# https://github.com/apache/arrow/issues/15053#issuecomment-2317032347
67+
lean_left = (width % 2) == 0
68+
pa_pad = partial(pc.utf8_center, lean_left_on_odd_padding=lean_left)
5369
else:
5470
raise ValueError(
5571
f"Invalid side: {side}. Side must be one of 'left', 'right', 'both'"
@@ -138,3 +154,39 @@ def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
138154
if not isna(na): # pyright: ignore [reportGeneralTypeIssues]
139155
result = result.fill_null(na)
140156
return self._convert_bool_result(result)
157+
158+
def _str_isalnum(self):
159+
result = pc.utf8_is_alnum(self._pa_array)
160+
return self._convert_bool_result(result)
161+
162+
def _str_isalpha(self):
163+
result = pc.utf8_is_alpha(self._pa_array)
164+
return self._convert_bool_result(result)
165+
166+
def _str_isdecimal(self):
167+
result = pc.utf8_is_decimal(self._pa_array)
168+
return self._convert_bool_result(result)
169+
170+
def _str_isdigit(self):
171+
result = pc.utf8_is_digit(self._pa_array)
172+
return self._convert_bool_result(result)
173+
174+
def _str_islower(self):
175+
result = pc.utf8_is_lower(self._pa_array)
176+
return self._convert_bool_result(result)
177+
178+
def _str_isnumeric(self):
179+
result = pc.utf8_is_numeric(self._pa_array)
180+
return self._convert_bool_result(result)
181+
182+
def _str_isspace(self):
183+
result = pc.utf8_is_space(self._pa_array)
184+
return self._convert_bool_result(result)
185+
186+
def _str_istitle(self):
187+
result = pc.utf8_is_title(self._pa_array)
188+
return self._convert_bool_result(result)
189+
190+
def _str_isupper(self):
191+
result = pc.utf8_is_upper(self._pa_array)
192+
return self._convert_bool_result(result)

pandas/core/arrays/_ranges.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
iNaT,
1919
)
2020

21+
from pandas.core.construction import range_to_ndarray
22+
2123
if TYPE_CHECKING:
2224
from pandas._typing import npt
2325

@@ -82,17 +84,7 @@ def generate_regular_range(
8284
"at least 'start' or 'end' should be specified if a 'period' is given."
8385
)
8486

85-
with np.errstate(over="raise"):
86-
# If the range is sufficiently large, np.arange may overflow
87-
# and incorrectly return an empty array if not caught.
88-
try:
89-
values = np.arange(b, e, stride, dtype=np.int64)
90-
except FloatingPointError:
91-
xdr = [b]
92-
while xdr[-1] != e:
93-
xdr.append(xdr[-1] + stride)
94-
values = np.array(xdr[:-1], dtype=np.int64)
95-
return values
87+
return range_to_ndarray(range(b, e, stride))
9688

9789

9890
def _generate_range_overflow_safe(

pandas/core/arrays/arrow/array.py

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2442,33 +2442,6 @@ def _str_slice(
24422442
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
24432443
)
24442444

2445-
def _str_isalnum(self) -> Self:
2446-
return type(self)(pc.utf8_is_alnum(self._pa_array))
2447-
2448-
def _str_isalpha(self) -> Self:
2449-
return type(self)(pc.utf8_is_alpha(self._pa_array))
2450-
2451-
def _str_isdecimal(self) -> Self:
2452-
return type(self)(pc.utf8_is_decimal(self._pa_array))
2453-
2454-
def _str_isdigit(self) -> Self:
2455-
return type(self)(pc.utf8_is_digit(self._pa_array))
2456-
2457-
def _str_islower(self) -> Self:
2458-
return type(self)(pc.utf8_is_lower(self._pa_array))
2459-
2460-
def _str_isnumeric(self) -> Self:
2461-
return type(self)(pc.utf8_is_numeric(self._pa_array))
2462-
2463-
def _str_isspace(self) -> Self:
2464-
return type(self)(pc.utf8_is_space(self._pa_array))
2465-
2466-
def _str_istitle(self) -> Self:
2467-
return type(self)(pc.utf8_is_title(self._pa_array))
2468-
2469-
def _str_isupper(self) -> Self:
2470-
return type(self)(pc.utf8_is_upper(self._pa_array))
2471-
24722445
def _str_len(self) -> Self:
24732446
return type(self)(pc.utf8_length(self._pa_array))
24742447

pandas/core/arrays/string_arrow.py

Lines changed: 11 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -281,9 +281,20 @@ def astype(self, dtype, copy: bool = True):
281281
# ------------------------------------------------------------------------
282282
# String methods interface
283283

284+
_str_isalnum = ArrowStringArrayMixin._str_isalnum
285+
_str_isalpha = ArrowStringArrayMixin._str_isalpha
286+
_str_isdecimal = ArrowStringArrayMixin._str_isdecimal
287+
_str_isdigit = ArrowStringArrayMixin._str_isdigit
288+
_str_islower = ArrowStringArrayMixin._str_islower
289+
_str_isnumeric = ArrowStringArrayMixin._str_isnumeric
290+
_str_isspace = ArrowStringArrayMixin._str_isspace
291+
_str_istitle = ArrowStringArrayMixin._str_istitle
292+
_str_isupper = ArrowStringArrayMixin._str_isupper
293+
284294
_str_map = BaseStringArray._str_map
285295
_str_startswith = ArrowStringArrayMixin._str_startswith
286296
_str_endswith = ArrowStringArrayMixin._str_endswith
297+
_str_pad = ArrowStringArrayMixin._str_pad
287298

288299
def _str_contains(
289300
self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
@@ -359,42 +370,6 @@ def _str_slice(
359370
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
360371
)
361372

362-
def _str_isalnum(self):
363-
result = pc.utf8_is_alnum(self._pa_array)
364-
return self._convert_bool_result(result)
365-
366-
def _str_isalpha(self):
367-
result = pc.utf8_is_alpha(self._pa_array)
368-
return self._convert_bool_result(result)
369-
370-
def _str_isdecimal(self):
371-
result = pc.utf8_is_decimal(self._pa_array)
372-
return self._convert_bool_result(result)
373-
374-
def _str_isdigit(self):
375-
result = pc.utf8_is_digit(self._pa_array)
376-
return self._convert_bool_result(result)
377-
378-
def _str_islower(self):
379-
result = pc.utf8_is_lower(self._pa_array)
380-
return self._convert_bool_result(result)
381-
382-
def _str_isnumeric(self):
383-
result = pc.utf8_is_numeric(self._pa_array)
384-
return self._convert_bool_result(result)
385-
386-
def _str_isspace(self):
387-
result = pc.utf8_is_space(self._pa_array)
388-
return self._convert_bool_result(result)
389-
390-
def _str_istitle(self):
391-
result = pc.utf8_is_title(self._pa_array)
392-
return self._convert_bool_result(result)
393-
394-
def _str_isupper(self):
395-
result = pc.utf8_is_upper(self._pa_array)
396-
return self._convert_bool_result(result)
397-
398373
def _str_len(self):
399374
result = pc.utf8_length(self._pa_array)
400375
return self._convert_int_result(result)
@@ -546,7 +521,6 @@ class ArrowStringArrayNumpySemantics(ArrowStringArray):
546521
_str_get = ArrowStringArrayMixin._str_get
547522
_str_removesuffix = ArrowStringArrayMixin._str_removesuffix
548523
_str_capitalize = ArrowStringArrayMixin._str_capitalize
549-
_str_pad = ArrowStringArrayMixin._str_pad
550524
_str_title = ArrowStringArrayMixin._str_title
551525
_str_swapcase = ArrowStringArrayMixin._str_swapcase
552526
_str_slice_replace = ArrowStringArrayMixin._str_slice_replace

0 commit comments

Comments
 (0)