pandas-dev · jbrockmendel · Aug 6, 2025 · Aug 6, 2025 · Aug 6, 2025 · Aug 6, 2025
diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py
@@ -199,8 +199,8 @@ class SortIntegerArray:
     params = [10**3, 10**5]
 
     def setup(self, N):
-        data = np.arange(N, dtype=float)
-        data[40] = np.nan
+        data = np.arange(N, dtype=float).astype(object)
+        data[40] = pd.NA
         self.array = pd.array(data, dtype="Int64")
 
     def time_argsort(self, N):

diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
@@ -4,6 +4,7 @@
 import numpy as np
 
 from pandas import (
+    NA,
     DataFrame,
     Index,
     MultiIndex,
@@ -445,6 +446,8 @@ def setup(self, inplace, dtype):
             values[::2] = np.nan
             if dtype == "Int64":
                 values = values.round()
+                values = values.astype(object)
+                values[::2] = NA
             self.df = DataFrame(values, dtype=dtype)
         self.fill_values = self.df.iloc[self.df.first_valid_index()].to_dict()
 

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
@@ -689,6 +689,10 @@ def setup(self, dtype, method, with_nans):
             null_vals = vals.astype(float, copy=True)
             null_vals[::2, :] = np.nan
             null_vals[::3, :] = np.nan
+            if dtype in ["Int64", "Float64"]:
+                null_vals = null_vals.astype(object)
+                null_vals[::2, :] = NA
+                null_vals[::3, :] = NA
             df = DataFrame(null_vals, columns=list("abcde"), dtype=dtype)
             df["key"] = keys
             self.df = df

diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst
@@ -75,7 +75,7 @@ or convert from existing pandas data:
 
 .. ipython:: python
 
-   s1 = pd.Series([1, 2, np.nan], dtype="Int64")
+   s1 = pd.Series([1, 2, pd.NA], dtype="Int64")
    s1
    s2 = s1.astype("string")
    s2

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -50,7 +50,7 @@ marker of ``np.nan`` will infer to integer dtype. The display of the ``Series``
 
 .. ipython:: python
 
-   s = pd.Series([1, 2, np.nan], dtype='Int64')
+   s = pd.Series([1, 2, pd.NA], dtype='Int64')
    s
 
 
@@ -166,7 +166,7 @@ See the :ref:`dtypes docs <basics.dtypes>` for more on extension arrays.
 
 .. ipython:: python
 
-   pd.array([1, 2, np.nan], dtype='Int64')
+   pd.array([1, 2, pd.NA], dtype='Int64')
    pd.array(['a', 'b', 'c'], dtype='category')
 
 Passing data for which there isn't dedicated extension type (e.g. float, integer, etc.)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -463,6 +463,55 @@ small behavior differences as collateral:
 - Adding or subtracting a :class:`Day` with a :class:`Timedelta` is no longer supported.
 - Adding or subtracting a :class:`Day` offset to a timezone-aware :class:`Timestamp` or datetime-like may lead to an ambiguous or non-existent time, which will raise.
 
+.. _whatsnew_300.api_breaking.nan_vs_na:
+
+Changed treatment of NaN values in pyarrow and numpy-nullable floating dtypes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Previously, when dealing with a nullable dtype (e.g. ``Float64Dtype`` or ``int64[pyarrow]``), ``NaN`` was treated as interchangeable with :class:`NA` in some circumstances but not others. This was done to make adoption easier, but caused some confusion (:issue:`32265`). In 3.0, an option ``"mode.nan_is_na"`` (default ``True``) controls whether to treat ``NaN`` as equivalent to :class:`NA`.
+
+With ``pd.set_option("mode.nan_is_na", True)`` (again, this is the default), ``NaN`` can be passed to constructors, ``__setitem__``, ``__contains__`` and be treated the same as :class:`NA`. The only change users will see is that arithmetic and ``np.ufunc`` operations that previously introduced ``NaN`` entries produce :class:`NA` entries instead:
+
+*Old behavior:*
+
+.. code-block:: ipython
+
+    In [2]: ser = pd.Series([0, None], dtype=pd.Float64Dtype())
+    In [3]: ser / 0
+    Out[3]:
+    0     NaN
+    1    <NA>
+    dtype: Float64
+
+*New behavior:*
+
+.. ipython:: python
+
+    ser = pd.Series([0, None], dtype=pd.Float64Dtype())
+    ser / 0
+
+By contrast, with ``pd.set_option("mode.nan_is_na", False)``, ``NaN`` is always considered distinct and specifically as a floating-point value, so cannot be used with integer dtypes:
+
+*Old behavior:*
+
+.. code-block:: ipython
+
+    In [2]: ser = pd.Series([1, np.nan], dtype=pd.Float64Dtype())
+    In [3]: ser[1]
+    Out[3]: <NA>
+
+*New behavior:*
+
+.. ipython:: python
+
+    pd.set_option("mode.nan_is_na", False)
+    ser = pd.Series([1, np.nan], dtype=pd.Float64Dtype())
+    ser[1]
+
+If we had passed ``pd.Int64Dtype()`` or ``"int64[pyarrow]"`` for the dtype in the latter example, this would raise, as a float ``NaN`` cannot be held by an integer dtype.
+
+With ``"mode.nan_is_na"`` set to ``False``, ``ser.to_numpy()`` (and ``frame.values`` and ``np.asarray(obj)``) will convert to ``object`` dtype if :class:`NA` entries are present, where before they would coerce to ``NaN``.  To retain a float numpy dtype, explicitly pass ``na_value=np.nan`` to :meth:`Series.to_numpy`.
+
 .. _whatsnew_300.api_breaking.deps:
 
 Increased minimum version for Python

diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py
@@ -33,3 +33,8 @@
 def using_string_dtype() -> bool:
     _mode_options = _global_config["future"]
     return _mode_options["infer_string"]
+
+
+def is_nan_na() -> bool:
+    _mode_options = _global_config["mode"]
+    return _mode_options["nan_is_na"]
diff --git a/pandas/_libs/missing.pyi b/pandas/_libs/missing.pyi
@@ -14,3 +14,4 @@ def isneginf_scalar(val: object) -> bool: ...
 def checknull(val: object) -> bool: ...
 def isnaobj(arr: np.ndarray) -> npt.NDArray[np.bool_]: ...
 def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
+def is_pdna_or_none(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
@@ -249,6 +249,24 @@ cdef bint checknull_with_nat_and_na(object obj):
     return checknull_with_nat(obj) or obj is C_NA
 
 
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def is_pdna_or_none(values: ndarray) -> ndarray:
+    cdef:
+        ndarray[uint8_t] result
+        Py_ssize_t i, N
+        object val
+
+    N = len(values)
+    result = np.zeros(N, dtype=np.uint8)
+
+    for i in range(N):
+        val = values[i]
+        if val is None or val is C_NA:
+            result[i] = True
+    return result.view(bool)
+
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def is_numeric_na(values: ndarray) -> ndarray:

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -8,6 +8,8 @@ from csv import (
 )
 import warnings
 
+from pandas._config import is_nan_na
+
 from pandas.util._exceptions import find_stack_level
 
 from pandas import StringDtype
@@ -43,7 +45,6 @@ from libc.string cimport (
     strncpy,
 )
 
-
 import numpy as np
 
 cimport numpy as cnp
@@ -1461,7 +1462,7 @@ def _maybe_upcast(
         if isinstance(arr, IntegerArray) and arr.isna().all():
             # use null instead of int64 in pyarrow
             arr = arr.to_numpy(na_value=None)
-        arr = ArrowExtensionArray(pa.array(arr, from_pandas=True))
+        arr = ArrowExtensionArray(pa.array(arr, from_pandas=is_nan_na()))
 
     return arr
 

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -2116,3 +2116,10 @@ def temp_file(tmp_path):
 def monkeysession():
     with pytest.MonkeyPatch.context() as mp:
         yield mp
+
+
+@pytest.fixture(params=[True, False])
+def using_nan_is_na(request):
+    opt = request.param
+    with pd.option_context("mode.nan_is_na", opt):
+        yield opt
diff --git a/pandas/core/arrays/_utils.py b/pandas/core/arrays/_utils.py
@@ -7,7 +7,10 @@
 
 import numpy as np
 
+from pandas._config import is_nan_na
+
 from pandas._libs import lib
+from pandas._libs.missing import NA
 from pandas.errors import LossySetitemError
 
 from pandas.core.dtypes.cast import np_can_hold_element
@@ -21,7 +24,10 @@
 
 
 def to_numpy_dtype_inference(
-    arr: ArrayLike, dtype: npt.DTypeLike | None, na_value, hasna: bool
+    arr: ArrayLike,
+    dtype: npt.DTypeLike | None,
+    na_value,
+    hasna: bool,
 ) -> tuple[npt.DTypeLike, Any]:
     if dtype is None and is_numeric_dtype(arr.dtype):
         dtype_given = False
@@ -34,7 +40,11 @@ def to_numpy_dtype_inference(
                 else:
                     dtype = arr.dtype.numpy_dtype  # type: ignore[union-attr]
                 if na_value is lib.no_default:
-                    na_value = np.nan
+                    if not is_nan_na():
+                        na_value = NA
+                        dtype = np.dtype(object)
+                    else:
+                        na_value = np.nan
         else:
             dtype = arr.dtype.numpy_dtype  # type: ignore[union-attr]
     elif dtype is not None: