Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1217,10 +1217,11 @@ Other
- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`)
- Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`)
- Deprecated the keyword ``check_datetimelike_compat`` in :meth:`testing.assert_frame_equal` and :meth:`testing.assert_series_equal` (:issue:`55638`)
- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when trying to replace :class:`NA` values in a :class:`Float64Dtype` object with ``np.nan``; this now works with ``pd.set_option("mode.nan_is_na", False)`` and is irrelevant otherwise (:issue:`55127`)
- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when trying to replace :class:`np.nan` values in a :class:`Int64Dtype` object with :class:`NA`; this is now a no-op with ``pd.set_option("mode.nan_is_na", False)`` and is irrelevant otherwise (:issue:`51237`)
- Fixed bug in the :meth:`Series.rank` with object dtype and extremely small float values (:issue:`62036`)
- Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`)
- Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`)
-

.. ***DO NOT USE THIS SECTION***

Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,9 @@ def __setitem__(self, key, value) -> None:
key = check_array_indexer(self, key)

if is_scalar(value):
if is_valid_na_for_dtype(value, self.dtype):
if is_valid_na_for_dtype(value, self.dtype) and not (
lib.is_float(value) and not is_nan_na()
):
self._mask[key] = True
else:
value = self._validate_setitem_value(value)
Expand Down
33 changes: 32 additions & 1 deletion pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

import numpy as np

from pandas._config import is_nan_na

from pandas._libs import (
NaT,
algos,
Expand All @@ -37,7 +39,11 @@
is_object_dtype,
needs_i8_conversion,
)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.dtypes import (
ArrowDtype,
BaseMaskedDtype,
DatetimeTZDtype,
)
from pandas.core.dtypes.missing import (
is_valid_na_for_dtype,
isna,
Expand Down Expand Up @@ -86,6 +92,31 @@ def mask_missing(arr: ArrayLike, value) -> npt.NDArray[np.bool_]:
"""
dtype, value = infer_dtype_from(value)

if (
isinstance(arr.dtype, (BaseMaskedDtype, ArrowDtype))
and lib.is_float(value)
and np.isnan(value)
and not is_nan_na()
):
# TODO: this should be done in an EA method?
if arr.dtype.kind == "f":
# GH#55127
if isinstance(arr.dtype, BaseMaskedDtype):
# error: "ExtensionArray" has no attribute "_data" [attr-defined]
mask = np.isnan(arr._data) & ~arr.isna() # type: ignore[attr-defined,operator]
return mask
else:
# error: "ExtensionArray" has no attribute "_pa_array" [attr-defined]
import pyarrow.compute as pc

mask = pc.is_nan(arr._pa_array).fill_null(False).to_numpy() # type: ignore[attr-defined]
return mask

elif arr.dtype.kind in "iu":
# GH#51237
mask = np.zeros(arr.shape, dtype=bool)
return mask

if isna(value):
return isna(arr)

Expand Down
46 changes: 46 additions & 0 deletions pandas/tests/arrays/masked/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd


Expand Down Expand Up @@ -58,3 +60,47 @@ def test_setitem_validation_scalar_int(self, invalid, any_int_ea_dtype):
def test_setitem_validation_scalar_float(self, invalid, float_ea_dtype):
arr = pd.array([1, 2, None], dtype=float_ea_dtype)
self._check_setitem_invalid(arr, invalid)


@pytest.mark.parametrize(
"dtype",
[
"Float64",
pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow")),
],
)
@pytest.mark.parametrize("indexer", [1, [1], [False, True, False]])
def test_setitem_nan_in_float64_array(dtype, indexer, using_nan_is_na):
arr = pd.array([0, pd.NA, 1], dtype=dtype)

arr[indexer] = np.nan
if not using_nan_is_na:
assert np.isnan(arr[1])
else:
assert arr[1] is pd.NA


@pytest.mark.parametrize(
"dtype",
[
"Int64",
pytest.param("int64[pyarrow]", marks=td.skip_if_no("pyarrow")),
],
)
@pytest.mark.parametrize("indexer", [1, [1], [False, True, False]])
def test_setitem_nan_in_int64_array(dtype, indexer, using_nan_is_na):
arr = pd.array([0, 1, 2], dtype=dtype)
if not using_nan_is_na:
err = TypeError
msg = "Invalid value 'nan' for dtype 'Int64'"
if dtype == "int64[pyarrow]":
import pyarrow as pa

err = pa.lib.ArrowInvalid
msg = "Could not convert nan with type float"
with pytest.raises(err, match=msg):
arr[indexer] = np.nan
assert arr[1] == 1
else:
arr[indexer] = np.nan
assert arr[1] is pd.NA
45 changes: 45 additions & 0 deletions pandas/tests/frame/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -1430,6 +1432,49 @@ def test_replace_with_nil_na(self):
result = ser.replace("nil", "anything else")
tm.assert_frame_equal(expected, result)

@pytest.mark.parametrize(
"dtype",
[
"Float64",
pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow")),
],
)
def test_replace_na_to_nan_nullable_floats(self, dtype, using_nan_is_na):
# GH#55127
df = DataFrame({0: [1, np.nan, 1], 1: Series([0, pd.NA, 1], dtype=dtype)})

result = df.replace(pd.NA, np.nan)

if using_nan_is_na:
expected = result
else:
expected = DataFrame(
{0: [1, np.nan, 1], 1: Series([0, np.nan, 1], dtype=dtype)}
)
assert np.isnan(expected.loc[1, 1])

tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"dtype",
[
"Int64",
pytest.param("int64[pyarrow]", marks=td.skip_if_no("pyarrow")),
],
)
def test_replace_nan_nullable_ints(self, dtype, using_nan_is_na):
# GH#51237 with nan_is_na=False, replacing NaN should be a no-op here
ser = Series([1, 2, None], dtype=dtype)

result = ser.replace(np.nan, -1)

if using_nan_is_na:
# np.nan is equivalent to pd.NA here
expected = Series([1, 2, -1], dtype=dtype)
else:
expected = ser
tm.assert_series_equal(result, expected)


class TestDataFrameReplaceRegex:
@pytest.mark.parametrize(
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def test_convert_dtypes(
with pytest.raises(TypeError, match="Invalid value"):
result[result.notna()] = np.nan
else:
result[result.notna()] = np.nan
result[result.notna()] = pd.NA

# Make sure original not changed
tm.assert_series_equal(series, copy)
Expand Down
Loading