Skip to content

Commit de9caf6

Browse files
authored
Merge branch 'main' into BUG-56994/pyarrow-assignment-unexpected-dtypes
2 parents 89f0429 + eacf032 commit de9caf6

30 files changed

+288
-95
lines changed

.pre-commit-config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ ci:
1919
skip: [pyright, mypy]
2020
repos:
2121
- repo: https://github.com/astral-sh/ruff-pre-commit
22-
rev: v0.6.9
22+
rev: v0.7.2
2323
hooks:
2424
- id: ruff
2525
args: [--exit-non-zero-on-fix]
@@ -74,7 +74,7 @@ repos:
7474
hooks:
7575
- id: isort
7676
- repo: https://github.com/asottile/pyupgrade
77-
rev: v3.17.0
77+
rev: v3.19.0
7878
hooks:
7979
- id: pyupgrade
8080
args: [--py310-plus]
@@ -95,7 +95,7 @@ repos:
9595
- id: sphinx-lint
9696
args: ["--enable", "all", "--disable", "line-too-long"]
9797
- repo: https://github.com/pre-commit/mirrors-clang-format
98-
rev: v19.1.1
98+
rev: v19.1.3
9999
hooks:
100100
- id: clang-format
101101
files: ^pandas/_libs/src|^pandas/_libs/include

doc/source/getting_started/comparison/comparison_with_r.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,7 @@ In Python, this list would be a list of tuples, so
405405
a = list(enumerate(list(range(1, 5)) + [np.NAN]))
406406
pd.DataFrame(a)
407407
408-
For more details and examples see :ref:`the Into to Data Structures
408+
For more details and examples see :ref:`the Intro to Data Structures
409409
documentation <dsintro>`.
410410

411411
meltdf

doc/source/whatsnew/v2.3.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ enhancement1
3232
Other enhancements
3333
^^^^^^^^^^^^^^^^^^
3434

35+
- The semantics for the ``copy`` keyword in ``__array__`` methods (i.e. called
36+
when using ``np.array()`` or ``np.asarray()`` on pandas objects) has been
37+
updated to work correctly with NumPy >= 2 (:issue:`57739`)
3538
- The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)
3639
-
3740

pandas/core/arrays/arrow/array.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -668,7 +668,16 @@ def __array__(
668668
self, dtype: NpDtype | None = None, copy: bool | None = None
669669
) -> np.ndarray:
670670
"""Correctly construct numpy arrays when passed to `np.asarray()`."""
671-
return self.to_numpy(dtype=dtype)
671+
if copy is False:
672+
# TODO: By using `zero_copy_only` it may be possible to implement this
673+
raise ValueError(
674+
"Unable to avoid copy while creating an array as requested."
675+
)
676+
elif copy is None:
677+
# `to_numpy(copy=False)` has the meaning of NumPy `copy=None`.
678+
copy = False
679+
680+
return self.to_numpy(dtype=dtype, copy=copy)
672681

673682
def __invert__(self) -> Self:
674683
# This is a bit wise op for integer types
@@ -734,7 +743,7 @@ def _cmp_method(self, other, op) -> ArrowExtensionArray:
734743
try:
735744
result[valid] = op(np_array[valid], other)
736745
except TypeError:
737-
result = ops.invalid_comparison(np_array, other, op)
746+
result = ops.invalid_comparison(self, other, op)
738747
result = pa.array(result, type=pa.bool_())
739748
result = pc.if_else(valid, result, None)
740749
else:

pandas/core/arrays/categorical.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -579,11 +579,12 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
579579
raise ValueError("Cannot convert float NaN to integer")
580580

581581
elif len(self.codes) == 0 or len(self.categories) == 0:
582-
result = np.array(
583-
self,
584-
dtype=dtype,
585-
copy=copy,
586-
)
582+
# For NumPy 1.x compatibility we cannot use copy=None. And
583+
# `copy=False` has the meaning of `copy=None` here:
584+
if not copy:
585+
result = np.asarray(self, dtype=dtype)
586+
else:
587+
result = np.array(self, dtype=dtype)
587588

588589
else:
589590
# GH8628 (PERF): astype category codes instead of astyping array
@@ -1663,7 +1664,7 @@ def __array__(
16631664
Specifies the the dtype for the array.
16641665
16651666
copy : bool or None, optional
1666-
Unused.
1667+
See :func:`numpy.asarray`.
16671668
16681669
Returns
16691670
-------
@@ -1686,13 +1687,18 @@ def __array__(
16861687
>>> np.asarray(cat)
16871688
array(['a', 'b'], dtype=object)
16881689
"""
1690+
if copy is False:
1691+
raise ValueError(
1692+
"Unable to avoid copy while creating an array as requested."
1693+
)
1694+
16891695
ret = take_nd(self.categories._values, self._codes)
1690-
if dtype and np.dtype(dtype) != self.categories.dtype:
1691-
return np.asarray(ret, dtype)
16921696
# When we're a Categorical[ExtensionArray], like Interval,
16931697
# we need to ensure __array__ gets all the way to an
16941698
# ndarray.
1695-
return np.asarray(ret)
1699+
1700+
# `take_nd` should already make a copy, so don't force again.
1701+
return np.asarray(ret, dtype=dtype)
16961702

16971703
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
16981704
# for binary ops, use our custom dunder methods

pandas/core/arrays/datetimelike.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,14 @@ def __array__(
359359
) -> np.ndarray:
360360
# used for Timedelta/DatetimeArray, overwritten by PeriodArray
361361
if is_object_dtype(dtype):
362+
if copy is False:
363+
raise ValueError(
364+
"Unable to avoid copy while creating an array as requested."
365+
)
362366
return np.array(list(self), dtype=object)
367+
368+
if copy is True:
369+
return np.array(self._ndarray, dtype=dtype)
363370
return self._ndarray
364371

365372
@overload

pandas/core/arrays/interval.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1622,6 +1622,11 @@ def __array__(
16221622
Return the IntervalArray's data as a numpy array of Interval
16231623
objects (with dtype='object')
16241624
"""
1625+
if copy is False:
1626+
raise ValueError(
1627+
"Unable to avoid copy while creating an array as requested."
1628+
)
1629+
16251630
left = self._left
16261631
right = self._right
16271632
mask = self.isna()

pandas/core/arrays/masked.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,17 @@ def __array__(
581581
the array interface, return my values
582582
We return an object array here to preserve our scalar values
583583
"""
584-
return self.to_numpy(dtype=dtype)
584+
if copy is False:
585+
if not self._hasna:
586+
# special case, here we can simply return the underlying data
587+
return np.array(self._data, dtype=dtype, copy=copy)
588+
raise ValueError(
589+
"Unable to avoid copy while creating an array as requested."
590+
)
591+
592+
if copy is None:
593+
copy = False # The NumPy copy=False meaning is different here.
594+
return self.to_numpy(dtype=dtype, copy=copy)
585595

586596
_HANDLED_TYPES: tuple[type, ...]
587597

pandas/core/arrays/numpy_.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ def dtype(self) -> NumpyEADtype:
150150
def __array__(
151151
self, dtype: NpDtype | None = None, copy: bool | None = None
152152
) -> np.ndarray:
153+
if copy is not None:
154+
# Note: branch avoids `copy=None` for NumPy 1.x support
155+
return np.array(self._ndarray, dtype=dtype, copy=copy)
153156
return np.asarray(self._ndarray, dtype=dtype)
154157

155158
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

pandas/core/arrays/period.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -390,8 +390,19 @@ def __array__(
390390
self, dtype: NpDtype | None = None, copy: bool | None = None
391391
) -> np.ndarray:
392392
if dtype == "i8":
393-
return self.asi8
394-
elif dtype == bool:
393+
# For NumPy 1.x compatibility we cannot use copy=None. And
394+
# `copy=False` has the meaning of `copy=None` here:
395+
if not copy:
396+
return np.asarray(self.asi8, dtype=dtype)
397+
else:
398+
return np.array(self.asi8, dtype=dtype)
399+
400+
if copy is False:
401+
raise ValueError(
402+
"Unable to avoid copy while creating an array as requested."
403+
)
404+
405+
if dtype == bool:
395406
return ~self._isnan
396407

397408
# This will raise TypeError for non-object dtypes

0 commit comments

Comments
 (0)