Skip to content

Commit a13b8b1

Browse files
branch 'upstream/main' into bugfix--pprint-embedded-quotes
2 parents 5da2343 + 084b199 commit a13b8b1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+583
-243
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9090
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
9191
-i "pandas.arrays.IntegerArray SA01" \
9292
-i "pandas.arrays.IntervalArray.length SA01" \
93-
-i "pandas.arrays.IntervalArray.right SA01" \
9493
-i "pandas.arrays.NumpyExtensionArray SA01" \
95-
-i "pandas.arrays.SparseArray PR07,SA01" \
9694
-i "pandas.arrays.TimedeltaArray PR07,SA01" \
9795
-i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
9896
-i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \

doc/source/whatsnew/v2.3.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ MultiIndex
133133

134134
I/O
135135
^^^
136-
-
136+
- :meth:`DataFrame.to_excel` was storing decimals as strings instead of numbers (:issue:`49598`)
137137
-
138138

139139
Period

doc/source/whatsnew/v3.0.0.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ Other Removals
481481
- Enforced deprecation of :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` for object-dtype (:issue:`57820`)
482482
- Enforced deprecation of :meth:`offsets.Tick.delta`, use ``pd.Timedelta(obj)`` instead (:issue:`55498`)
483483
- Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`)
484-
- Enforced deprecation of ``core.internals`` members ``Block``, ``ExtensionBlock``, and ``DatetimeTZBlock`` (:issue:`58467`)
484+
- Enforced deprecation of ``core.internals`` member ``DatetimeTZBlock`` (:issue:`58467`)
485485
- Enforced deprecation of ``date_parser`` in :func:`read_csv`, :func:`read_table`, :func:`read_fwf`, and :func:`read_excel` in favour of ``date_format`` (:issue:`50601`)
486486
- Enforced deprecation of ``keep_date_col`` keyword in :func:`read_csv` (:issue:`55569`)
487487
- Enforced deprecation of ``quantile`` keyword in :meth:`.Rolling.quantile` and :meth:`.Expanding.quantile`, renamed to ``q`` instead. (:issue:`52550`)
@@ -613,6 +613,7 @@ Categorical
613613
Datetimelike
614614
^^^^^^^^^^^^
615615
- Bug in :attr:`is_year_start` where a DateTimeIndex constructed via a date_range with frequency 'MS' wouldn't have the correct year or quarter start attributes (:issue:`57377`)
616+
- Bug in :class:`DataFrame` raising ``ValueError`` when ``dtype`` is ``timedelta64`` and ``data`` is a list containing ``None`` (:issue:`60064`)
616617
- Bug in :class:`Timestamp` constructor failing to raise when ``tz=None`` is explicitly specified in conjunction with timezone-aware ``tzinfo`` or data (:issue:`48688`)
617618
- Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
618619
- Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56147`)
@@ -710,6 +711,7 @@ Period
710711
Plotting
711712
^^^^^^^^
712713
- Bug in :meth:`.DataFrameGroupBy.boxplot` failed when there were multiple groupings (:issue:`14701`)
714+
- Bug in :meth:`DataFrame.plot.bar` with ``stacked=True`` where labels on stacked bars with zero-height segments were incorrectly positioned at the base instead of the label position of the previous segment (:issue:`59429`)
713715
- Bug in :meth:`DataFrame.plot.line` raising ``ValueError`` when set both color and a ``dict`` style (:issue:`59461`)
714716
- Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`)
715717
- Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`)

pandas/_libs/tslibs/period.pyx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ from pandas._libs.tslibs.offsets import (
114114
INVALID_FREQ_ERR_MSG,
115115
BDay,
116116
)
117+
from pandas.util._decorators import set_module
117118

118119
cdef:
119120
enum:
@@ -2830,6 +2831,7 @@ cdef class _Period(PeriodMixin):
28302831
return period_format(self.ordinal, base, fmt)
28312832

28322833

2834+
@set_module("pandas")
28332835
class Period(_Period):
28342836
"""
28352837
Represents a period of time.

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import collections
22
import warnings
33

4+
from pandas.util._decorators import set_module
45
from pandas.util._exceptions import find_stack_level
56

67
cimport cython
@@ -1854,7 +1855,7 @@ cdef class _Timedelta(timedelta):
18541855

18551856
# Python front end to C extension type _Timedelta
18561857
# This serves as the box for timedelta64
1857-
1858+
@set_module("pandas")
18581859
class Timedelta(_Timedelta):
18591860
"""
18601861
Represents a duration, the difference between two dates or times.
@@ -1916,7 +1917,7 @@ class Timedelta(_Timedelta):
19161917
--------
19171918
Here we initialize Timedelta object with both value and unit
19181919
1919-
>>> td = pd.Timedelta(1, "d")
1920+
>>> td = pd.Timedelta(1, "D")
19201921
>>> td
19211922
Timedelta('1 days 00:00:00')
19221923

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ import datetime as dt
5050
from pandas._libs.tslibs cimport ccalendar
5151
from pandas._libs.tslibs.base cimport ABCTimestamp
5252

53+
from pandas.util._decorators import set_module
5354
from pandas.util._exceptions import find_stack_level
5455

5556
from pandas._libs.tslibs.conversion cimport (
@@ -1648,7 +1649,7 @@ cdef class _Timestamp(ABCTimestamp):
16481649
# Python front end to C extension type _Timestamp
16491650
# This serves as the box for datetime64
16501651
1651-
1652+
@set_module("pandas")
16521653
class Timestamp(_Timestamp):
16531654
"""
16541655
Pandas replacement for python datetime.datetime object.
@@ -2926,7 +2927,7 @@ timedelta}, default 'raise'
29262927
--------
29272928
>>> ts = pd.Timestamp(1584226800, unit='s', tz='Europe/Stockholm')
29282929
>>> ts.tz
2929-
<DstTzInfo 'Europe/Stockholm' CET+1:00:00 STD>
2930+
zoneinfo.ZoneInfo(key='Europe/Stockholm')
29302931
"""
29312932
return self.tzinfo
29322933

pandas/_testing/__init__.py

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from typing import (
88
TYPE_CHECKING,
99
ContextManager,
10-
cast,
1110
)
1211

1312
import numpy as np
@@ -21,8 +20,6 @@
2120

2221
from pandas.compat import pa_version_under10p1
2322

24-
from pandas.core.dtypes.common import is_string_dtype
25-
2623
import pandas as pd
2724
from pandas import (
2825
ArrowDtype,
@@ -77,8 +74,8 @@
7774
with_csv_dialect,
7875
)
7976
from pandas.core.arrays import (
77+
ArrowExtensionArray,
8078
BaseMaskedArray,
81-
ExtensionArray,
8279
NumpyExtensionArray,
8380
)
8481
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
@@ -92,7 +89,6 @@
9289
NpDtype,
9390
)
9491

95-
from pandas.core.arrays import ArrowExtensionArray
9692

9793
UNSIGNED_INT_NUMPY_DTYPES: list[NpDtype] = ["uint8", "uint16", "uint32", "uint64"]
9894
UNSIGNED_INT_EA_DTYPES: list[Dtype] = ["UInt8", "UInt16", "UInt32", "UInt64"]
@@ -512,24 +508,18 @@ def shares_memory(left, right) -> bool:
512508
if isinstance(left, pd.core.arrays.IntervalArray):
513509
return shares_memory(left._left, right) or shares_memory(left._right, right)
514510

515-
if (
516-
isinstance(left, ExtensionArray)
517-
and is_string_dtype(left.dtype)
518-
and left.dtype.storage == "pyarrow" # type: ignore[attr-defined]
519-
):
520-
# https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
521-
left = cast("ArrowExtensionArray", left)
522-
if (
523-
isinstance(right, ExtensionArray)
524-
and is_string_dtype(right.dtype)
525-
and right.dtype.storage == "pyarrow" # type: ignore[attr-defined]
526-
):
527-
right = cast("ArrowExtensionArray", right)
511+
if isinstance(left, ArrowExtensionArray):
512+
if isinstance(right, ArrowExtensionArray):
513+
# https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
528514
left_pa_data = left._pa_array
529515
right_pa_data = right._pa_array
530516
left_buf1 = left_pa_data.chunk(0).buffers()[1]
531517
right_buf1 = right_pa_data.chunk(0).buffers()[1]
532-
return left_buf1 == right_buf1
518+
return left_buf1.address == right_buf1.address
519+
else:
520+
# if we have one one ArrowExtensionArray and one other array, assume
521+
# they can only share memory if they share the same numpy buffer
522+
return np.shares_memory(left, right)
533523

534524
if isinstance(left, BaseMaskedArray) and isinstance(right, BaseMaskedArray):
535525
# By convention, we'll say these share memory if they share *either*

pandas/core/arrays/arrow/array.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,7 @@ def fillna(
11451145
try:
11461146
fill_value = self._box_pa(value, pa_type=self._pa_array.type)
11471147
except pa.ArrowTypeError as err:
1148-
msg = f"Invalid value '{value!s}' for dtype {self.dtype}"
1148+
msg = f"Invalid value '{value!s}' for dtype '{self.dtype}'"
11491149
raise TypeError(msg) from err
11501150

11511151
try:
@@ -2136,7 +2136,7 @@ def _maybe_convert_setitem_value(self, value):
21362136
try:
21372137
value = self._box_pa(value, self._pa_array.type)
21382138
except pa.ArrowTypeError as err:
2139-
msg = f"Invalid value '{value!s}' for dtype {self.dtype}"
2139+
msg = f"Invalid value '{value!s}' for dtype '{self.dtype}'"
21402140
raise TypeError(msg) from err
21412141
return value
21422142

@@ -2312,6 +2312,20 @@ def _groupby_op(
23122312
**kwargs,
23132313
):
23142314
if isinstance(self.dtype, StringDtype):
2315+
if how in [
2316+
"prod",
2317+
"mean",
2318+
"median",
2319+
"cumsum",
2320+
"cumprod",
2321+
"std",
2322+
"sem",
2323+
"var",
2324+
"skew",
2325+
]:
2326+
raise TypeError(
2327+
f"dtype '{self.dtype}' does not support operation '{how}'"
2328+
)
23152329
return super()._groupby_op(
23162330
how=how,
23172331
has_dropped_na=has_dropped_na,

pandas/core/arrays/base.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2608,6 +2608,20 @@ def _groupby_op(
26082608
# GH#43682
26092609
if isinstance(self.dtype, StringDtype):
26102610
# StringArray
2611+
if op.how in [
2612+
"prod",
2613+
"mean",
2614+
"median",
2615+
"cumsum",
2616+
"cumprod",
2617+
"std",
2618+
"sem",
2619+
"var",
2620+
"skew",
2621+
]:
2622+
raise TypeError(
2623+
f"dtype '{self.dtype}' does not support operation '{how}'"
2624+
)
26112625
if op.how not in ["any", "all"]:
26122626
# Fail early to avoid conversion to object
26132627
op._get_cython_function(op.kind, op.how, np.dtype(object), False)

pandas/core/arrays/interval.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,6 +1269,21 @@ def right(self) -> Index:
12691269
"""
12701270
Return the right endpoints of each Interval in the IntervalArray as an Index.
12711271
1272+
This property extracts the right endpoints from each interval contained within
1273+
the IntervalArray. This can be helpful in use cases where you need to work
1274+
with or compare only the upper bounds of intervals, such as when performing
1275+
range-based filtering, determining interval overlaps, or visualizing the end
1276+
boundaries of data segments.
1277+
1278+
See Also
1279+
--------
1280+
arrays.IntervalArray.left : Return the left endpoints of each Interval in
1281+
the IntervalArray as an Index.
1282+
arrays.IntervalArray.mid : Return the midpoint of each Interval in the
1283+
IntervalArray as an Index.
1284+
arrays.IntervalArray.contains : Check elementwise if the Intervals contain
1285+
the value.
1286+
12721287
Examples
12731288
--------
12741289

0 commit comments

Comments
 (0)