Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -943,7 +943,6 @@ Numeric

Conversion
^^^^^^^^^^
- Bug in :meth:`BaseMaskedArray._cast_pointwise_result` with all-NA values results returned ``object`` dtype instead of preserving the original dtype (:issue:`62344`)
- Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`)
- Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
- Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`)
Expand Down
3 changes: 3 additions & 0 deletions pandas/_libs/lib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def maybe_convert_objects(
convert_numeric: bool = ...,
convert_non_numeric: Literal[False] = ...,
convert_to_nullable_dtype: Literal[False] = ...,
dtype_if_all_na: DtypeObj | None = ...,
dtype_if_all_nat: DtypeObj | None = ...,
) -> npt.NDArray[np.object_ | np.number]: ...
@overload
Expand All @@ -105,6 +106,7 @@ def maybe_convert_objects(
convert_numeric: bool = ...,
convert_non_numeric: bool = ...,
convert_to_nullable_dtype: Literal[True] = ...,
dtype_if_all_na: DtypeObj | None = ...,
dtype_if_all_nat: DtypeObj | None = ...,
) -> ArrayLike: ...
@overload
Expand All @@ -116,6 +118,7 @@ def maybe_convert_objects(
convert_numeric: bool = ...,
convert_non_numeric: bool = ...,
convert_to_nullable_dtype: bool = ...,
dtype_if_all_na: DtypeObj | None = ...,
dtype_if_all_nat: DtypeObj | None = ...,
) -> ArrayLike: ...
@overload
Expand Down
13 changes: 13 additions & 0 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2545,6 +2545,7 @@ def maybe_convert_objects(ndarray[object] objects,
bint convert_numeric=True, # NB: different default!
bint convert_to_nullable_dtype=False,
bint convert_non_numeric=False,
object dtype_if_all_na=None,
object dtype_if_all_nat=None) -> "ArrayLike":
"""
Type inference function-- convert object array to proper dtype
Expand All @@ -2566,6 +2567,8 @@ def maybe_convert_objects(ndarray[object] objects,
encountered, whether to convert and return an Boolean/IntegerArray.
convert_non_numeric : bool, default False
Whether to convert datetime, timedelta, period, interval types.
dtype_if_all_na : np.dtype, ExtensionDtype, or None, default None
Dtype to cast to if we have all-NA or all-None.
dtype_if_all_nat : np.dtype, ExtensionDtype, or None, default None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this replace dtype_if_all_nat instead of adding a new arg?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

never mind, i now see that isnt viable.

Copy link
Contributor Author

@heoh heoh Sep 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, It seems not easy. the details are above. #62352 (comment)

There will be pros and cons, but I think the logic of _cast_pointwise_result is that the current version is clearer. (d5c6cfb)

Dtype to cast to if we have all-NaT.

Expand Down Expand Up @@ -2838,6 +2841,16 @@ def maybe_convert_objects(ndarray[object] objects,
else:
seen.object_ = True

elif seen.null_:
if not seen.object_ and not seen.numeric_ and not seen.bool_:
# all NaT, None, or nan (at least one NA or None)
dtype = dtype_if_all_na
if dtype is not None:
cls = dtype.construct_array_type()
obj = cls._from_sequence([], dtype=dtype)
taker = -np.ones((<object>objects).shape, dtype=np.intp)
return obj.take(taker, allow_fill=True)

if not convert_numeric:
# Note: we count "bool" as numeric here. This is because
# np.array(list_of_items) will convert bools just like it will numeric
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,10 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self:
return cls(values, mask)

def _cast_pointwise_result(self, values) -> ArrayLike:
if isna(values).all():
return type(self)._from_sequence(values, dtype=self.dtype)
values = np.asarray(values, dtype=object)
result = lib.maybe_convert_objects(values, convert_to_nullable_dtype=True)
result = lib.maybe_convert_objects(
values, convert_to_nullable_dtype=True, dtype_if_all_na=self.dtype
)
lkind = self.dtype.kind
rkind = result.dtype.kind
if (lkind in "iu" and rkind in "iu") or (lkind == rkind == "f"):
Expand Down
26 changes: 10 additions & 16 deletions pandas/tests/extension/test_masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,19 +364,13 @@ def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool):
def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request):
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)

@pytest.mark.parametrize(
"arr, values",
[
(pd.array([True, False]), [pd.NA, pd.NA]),
(pd.array([1, 2]), [pd.NA, pd.NA]),
],
)
def test_cast_pointwise_result_all_na_respects_dtype(self, arr, values):
"""
GH#62344
Ensure that _cast_pointwise_result respects the original dtype
even when the result consists entirely of NA values.
"""
result = arr._cast_pointwise_result(values)
assert result.dtype == arr.dtype
assert all(x is pd.NA for x in result)

@pytest.mark.parametrize(
"arr", [pd.array([True, False]), pd.array([1, 2]), pd.array([1.0, 2.0])]
)
def test_cast_pointwise_result_all_na_respects_original_dtype(arr):
# GH#62344
values = [pd.NA, pd.NA]
result = arr._cast_pointwise_result(values)
assert result.dtype == arr.dtype
assert all(x is pd.NA for x in result)
Loading