Skip to content

Commit c51cb6a

Browse files
authored
Merge branch 'main' into bug
2 parents 3d6abaa + 084b199 commit c51cb6a

39 files changed

+356
-127
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9090
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
9191
-i "pandas.arrays.IntegerArray SA01" \
9292
-i "pandas.arrays.IntervalArray.length SA01" \
93-
-i "pandas.arrays.IntervalArray.right SA01" \
9493
-i "pandas.arrays.NumpyExtensionArray SA01" \
95-
-i "pandas.arrays.SparseArray PR07,SA01" \
9694
-i "pandas.arrays.TimedeltaArray PR07,SA01" \
9795
-i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
9896
-i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ Other Removals
481481
- Enforced deprecation of :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` for object-dtype (:issue:`57820`)
482482
- Enforced deprecation of :meth:`offsets.Tick.delta`, use ``pd.Timedelta(obj)`` instead (:issue:`55498`)
483483
- Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`)
484-
- Enforced deprecation of ``core.internals`` members ``Block``, ``ExtensionBlock``, and ``DatetimeTZBlock`` (:issue:`58467`)
484+
- Enforced deprecation of ``core.internals`` member ``DatetimeTZBlock`` (:issue:`58467`)
485485
- Enforced deprecation of ``date_parser`` in :func:`read_csv`, :func:`read_table`, :func:`read_fwf`, and :func:`read_excel` in favour of ``date_format`` (:issue:`50601`)
486486
- Enforced deprecation of ``keep_date_col`` keyword in :func:`read_csv` (:issue:`55569`)
487487
- Enforced deprecation of ``quantile`` keyword in :meth:`.Rolling.quantile` and :meth:`.Expanding.quantile`, renamed to ``q`` instead. (:issue:`52550`)

pandas/_testing/__init__.py

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from typing import (
88
TYPE_CHECKING,
99
ContextManager,
10-
cast,
1110
)
1211

1312
import numpy as np
@@ -21,8 +20,6 @@
2120

2221
from pandas.compat import pa_version_under10p1
2322

24-
from pandas.core.dtypes.common import is_string_dtype
25-
2623
import pandas as pd
2724
from pandas import (
2825
ArrowDtype,
@@ -77,8 +74,8 @@
7774
with_csv_dialect,
7875
)
7976
from pandas.core.arrays import (
77+
ArrowExtensionArray,
8078
BaseMaskedArray,
81-
ExtensionArray,
8279
NumpyExtensionArray,
8380
)
8481
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
@@ -92,7 +89,6 @@
9289
NpDtype,
9390
)
9491

95-
from pandas.core.arrays import ArrowExtensionArray
9692

9793
UNSIGNED_INT_NUMPY_DTYPES: list[NpDtype] = ["uint8", "uint16", "uint32", "uint64"]
9894
UNSIGNED_INT_EA_DTYPES: list[Dtype] = ["UInt8", "UInt16", "UInt32", "UInt64"]
@@ -512,24 +508,18 @@ def shares_memory(left, right) -> bool:
512508
if isinstance(left, pd.core.arrays.IntervalArray):
513509
return shares_memory(left._left, right) or shares_memory(left._right, right)
514510

515-
if (
516-
isinstance(left, ExtensionArray)
517-
and is_string_dtype(left.dtype)
518-
and left.dtype.storage == "pyarrow" # type: ignore[attr-defined]
519-
):
520-
# https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
521-
left = cast("ArrowExtensionArray", left)
522-
if (
523-
isinstance(right, ExtensionArray)
524-
and is_string_dtype(right.dtype)
525-
and right.dtype.storage == "pyarrow" # type: ignore[attr-defined]
526-
):
527-
right = cast("ArrowExtensionArray", right)
511+
if isinstance(left, ArrowExtensionArray):
512+
if isinstance(right, ArrowExtensionArray):
513+
# https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
528514
left_pa_data = left._pa_array
529515
right_pa_data = right._pa_array
530516
left_buf1 = left_pa_data.chunk(0).buffers()[1]
531517
right_buf1 = right_pa_data.chunk(0).buffers()[1]
532-
return left_buf1 == right_buf1
518+
return left_buf1.address == right_buf1.address
519+
else:
520+
# if we have one one ArrowExtensionArray and one other array, assume
521+
# they can only share memory if they share the same numpy buffer
522+
return np.shares_memory(left, right)
533523

534524
if isinstance(left, BaseMaskedArray) and isinstance(right, BaseMaskedArray):
535525
# By convention, we'll say these share memory if they share *either*

pandas/core/arrays/arrow/array.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2312,6 +2312,20 @@ def _groupby_op(
23122312
**kwargs,
23132313
):
23142314
if isinstance(self.dtype, StringDtype):
2315+
if how in [
2316+
"prod",
2317+
"mean",
2318+
"median",
2319+
"cumsum",
2320+
"cumprod",
2321+
"std",
2322+
"sem",
2323+
"var",
2324+
"skew",
2325+
]:
2326+
raise TypeError(
2327+
f"dtype '{self.dtype}' does not support operation '{how}'"
2328+
)
23152329
return super()._groupby_op(
23162330
how=how,
23172331
has_dropped_na=has_dropped_na,

pandas/core/arrays/base.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2608,6 +2608,20 @@ def _groupby_op(
26082608
# GH#43682
26092609
if isinstance(self.dtype, StringDtype):
26102610
# StringArray
2611+
if op.how in [
2612+
"prod",
2613+
"mean",
2614+
"median",
2615+
"cumsum",
2616+
"cumprod",
2617+
"std",
2618+
"sem",
2619+
"var",
2620+
"skew",
2621+
]:
2622+
raise TypeError(
2623+
f"dtype '{self.dtype}' does not support operation '{how}'"
2624+
)
26112625
if op.how not in ["any", "all"]:
26122626
# Fail early to avoid conversion to object
26132627
op._get_cython_function(op.kind, op.how, np.dtype(object), False)

pandas/core/arrays/interval.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,6 +1269,21 @@ def right(self) -> Index:
12691269
"""
12701270
Return the right endpoints of each Interval in the IntervalArray as an Index.
12711271
1272+
This property extracts the right endpoints from each interval contained within
1273+
the IntervalArray. This can be helpful in use cases where you need to work
1274+
with or compare only the upper bounds of intervals, such as when performing
1275+
range-based filtering, determining interval overlaps, or visualizing the end
1276+
boundaries of data segments.
1277+
1278+
See Also
1279+
--------
1280+
arrays.IntervalArray.left : Return the left endpoints of each Interval in
1281+
the IntervalArray as an Index.
1282+
arrays.IntervalArray.mid : Return the midpoint of each Interval in the
1283+
IntervalArray as an Index.
1284+
arrays.IntervalArray.contains : Check elementwise if the Intervals contain
1285+
the value.
1286+
12721287
Examples
12731288
--------
12741289

pandas/core/arrays/sparse/array.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,12 +289,18 @@ class SparseArray(OpsMixin, PandasObject, ExtensionArray):
289289
"""
290290
An ExtensionArray for storing sparse data.
291291
292+
SparseArray efficiently stores data with a high frequency of a
293+
specific fill value (e.g., zeros), saving memory by only retaining
294+
non-fill elements and their indices. This class is particularly
295+
useful for large datasets where most values are redundant.
296+
292297
Parameters
293298
----------
294299
data : array-like or scalar
295300
A dense array of values to store in the SparseArray. This may contain
296301
`fill_value`.
297302
sparse_index : SparseIndex, optional
303+
Index indicating the locations of sparse elements.
298304
fill_value : scalar, optional
299305
Elements in data that are ``fill_value`` are not stored in the
300306
SparseArray. For memory savings, this should be the most common value
@@ -345,6 +351,10 @@ class SparseArray(OpsMixin, PandasObject, ExtensionArray):
345351
-------
346352
None
347353
354+
See Also
355+
--------
356+
SparseDtype : Dtype for sparse data.
357+
348358
Examples
349359
--------
350360
>>> from pandas.arrays import SparseArray

pandas/core/arrays/string_.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -846,7 +846,7 @@ def _reduce(
846846
else:
847847
return nanops.nanall(self._ndarray, skipna=skipna)
848848

849-
if name in ["min", "max", "sum"]:
849+
if name in ["min", "max", "argmin", "argmax", "sum"]:
850850
result = getattr(self, name)(skipna=skipna, axis=axis, **kwargs)
851851
if keepdims:
852852
return self._from_sequence([result], dtype=self.dtype)

pandas/core/dtypes/dtypes.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from pandas._libs.tslibs.offsets import BDay
4949
from pandas.compat import pa_version_under10p1
5050
from pandas.errors import PerformanceWarning
51+
from pandas.util._decorators import set_module
5152
from pandas.util._exceptions import find_stack_level
5253

5354
from pandas.core.dtypes.base import (
@@ -155,6 +156,7 @@ class CategoricalDtypeType(type):
155156

156157

157158
@register_extension_dtype
159+
@set_module("pandas")
158160
class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
159161
"""
160162
Type for categorical data with the categories and orderedness.
@@ -706,6 +708,7 @@ def index_class(self) -> type_t[CategoricalIndex]:
706708

707709

708710
@register_extension_dtype
711+
@set_module("pandas")
709712
class DatetimeTZDtype(PandasExtensionDtype):
710713
"""
711714
An ExtensionDtype for timezone-aware datetime data.
@@ -974,6 +977,7 @@ def index_class(self) -> type_t[DatetimeIndex]:
974977

975978

976979
@register_extension_dtype
980+
@set_module("pandas")
977981
class PeriodDtype(PeriodDtypeBase, PandasExtensionDtype):
978982
"""
979983
An ExtensionDtype for Period data.
@@ -1215,6 +1219,7 @@ def index_class(self) -> type_t[PeriodIndex]:
12151219

12161220

12171221
@register_extension_dtype
1222+
@set_module("pandas")
12181223
class IntervalDtype(PandasExtensionDtype):
12191224
"""
12201225
An ExtensionDtype for Interval data.
@@ -1691,6 +1696,7 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
16911696

16921697

16931698
@register_extension_dtype
1699+
@set_module("pandas")
16941700
class SparseDtype(ExtensionDtype):
16951701
"""
16961702
Dtype for data stored in :class:`SparseArray`.
@@ -2130,6 +2136,7 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
21302136

21312137

21322138
@register_extension_dtype
2139+
@set_module("pandas")
21332140
class ArrowDtype(StorageExtensionDtype):
21342141
"""
21352142
An ExtensionDtype for PyArrow data types.

pandas/core/groupby/groupby.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4162,9 +4162,9 @@ def quantile(
41624162
starts, ends = lib.generate_slices(splitter._slabels, splitter.ngroups)
41634163

41644164
def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, DtypeObj | None]:
4165-
if is_object_dtype(vals.dtype):
4165+
if isinstance(vals.dtype, StringDtype) or is_object_dtype(vals.dtype):
41664166
raise TypeError(
4167-
"'quantile' cannot be performed against 'object' dtypes!"
4167+
f"dtype '{vals.dtype}' does not support operation 'quantile'"
41684168
)
41694169

41704170
inference: DtypeObj | None = None

0 commit comments

Comments
 (0)