pandas-dev
diff --git a/‎doc/source/user_guide/text.rst‎
Lines changed: 1 addition & 1 deletion b/‎doc/source/user_guide/text.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/parsers.pyx‎
Lines changed: 3 additions & 2 deletions b/‎pandas/_libs/parsers.pyx‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎pandas/core/algorithms.py‎
Lines changed: 10 additions & 1 deletion b/‎pandas/core/algorithms.py‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎pandas/core/arrays/_utils.py‎
Lines changed: 1 addition & 2 deletions b/‎pandas/core/arrays/_utils.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎pandas/core/arrays/arrow/array.py‎
Lines changed: 10 additions & 11 deletions b/‎pandas/core/arrays/arrow/array.py‎
Lines changed: 10 additions & 11 deletions
diff --git a/‎pandas/core/arrays/masked.py‎
Lines changed: 11 additions & 4 deletions b/‎pandas/core/arrays/masked.py‎
Lines changed: 11 additions & 4 deletions
diff --git a/‎pandas/core/arrays/numeric.py‎
Lines changed: 22 additions & 3 deletions b/‎pandas/core/arrays/numeric.py‎
Lines changed: 22 additions & 3 deletions
diff --git a/‎pandas/core/config_init.py‎
Lines changed: 4 additions & 4 deletions b/‎pandas/core/config_init.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎pandas/core/dtypes/cast.py‎
Lines changed: 12 additions & 3 deletions b/‎pandas/core/dtypes/cast.py‎
Lines changed: 12 additions & 3 deletions
diff --git a/‎pandas/core/indexes/base.py‎
Lines changed: 10 additions & 0 deletions b/‎pandas/core/indexes/base.py‎
Lines changed: 10 additions & 0 deletions
@@ -75,7 +75,7 @@ or convert from existing pandas data:
 
 .. ipython:: python
 
-   s1 = pd.Series([1, 2, np.nan], dtype="Int64")
+   s1 = pd.Series([1, 2, pd.NA], dtype="Int64")
    s1
    s2 = s1.astype("string")
    s2
 
@@ -8,6 +8,8 @@ from csv import (
 )
 import warnings
 
+from pandas._config import is_nan_na
+
 from pandas.util._exceptions import find_stack_level
 
 from pandas import StringDtype
@@ -43,7 +45,6 @@ from libc.string cimport (
     strncpy,
 )
 
-
 import numpy as np
 
 cimport numpy as cnp
@@ -1461,7 +1462,7 @@ def _maybe_upcast(
         if isinstance(arr, IntegerArray) and arr.isna().all():
             # use null instead of int64 in pyarrow
             arr = arr.to_numpy(na_value=None)
-        arr = ArrowExtensionArray(pa.array(arr))
+        arr = ArrowExtensionArray(pa.array(arr, from_pandas=is_nan_na()))
 
     return arr
 
 
@@ -1065,7 +1065,16 @@ def rank(
         (e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1).
     """
     is_datetimelike = needs_i8_conversion(values.dtype)
-    values = _ensure_data(values)
+    if (
+        isinstance(values.dtype, BaseMaskedDtype)
+        and values._hasna
+        and values.dtype.kind in "iuf"
+    ):
+        # e.g. test_rank_ea_small_values
+        # TODO: bug in the object-dtype path that we would get without this special casting.
+        values = values.to_numpy(dtype=np.float64, na_value=np.nan)
+    else:
+        values = _ensure_data(values)
 
     if values.ndim == 1:
         ranks = algos.rank_1d(
 
@@ -28,7 +28,6 @@ def to_numpy_dtype_inference(
     dtype: npt.DTypeLike | None,
     na_value,
     hasna: bool,
-    is_pyarrow: bool = True,
 ) -> tuple[npt.DTypeLike, Any]:
     if dtype is None and is_numeric_dtype(arr.dtype):
         dtype_given = False
@@ -41,7 +40,7 @@ def to_numpy_dtype_inference(
                 else:
                     dtype = arr.dtype.numpy_dtype  # type: ignore[union-attr]
                 if na_value is lib.no_default:
-                    if is_pyarrow and not is_nan_na():
+                    if not is_nan_na():
                         na_value = NA
                         dtype = np.dtype(object)
                     else:
 
@@ -352,9 +352,7 @@ def _from_sequence_of_strings(
             from pandas.core.tools.datetimes import to_datetime
 
             scalars = to_datetime(strings, errors="raise").date
-
-            scalars = pa.array(scalars, mask=mask.view(bool), type=pa_type)
-
+            scalars = pa.array(scalars, type=pa_type, mask=mask)
         elif pa.types.is_duration(pa_type):
             from pandas.core.tools.timedeltas import to_timedelta
 
@@ -965,7 +963,10 @@ def __len__(self) -> int:
     def __contains__(self, key) -> bool:
         # https://github.com/pandas-dev/pandas/pull/51307#issuecomment-1426372604
         if isna(key) and key is not self.dtype.na_value:
-            if self.dtype.kind == "f" and lib.is_float(key):
+            if lib.is_float(key) and is_nan_na():
+                return self.dtype.na_value in self
+            elif self.dtype.kind == "f" and lib.is_float(key):
+                # Check specifically for NaN
                 return pc.any(pc.is_nan(self._pa_array)).as_py()
 
             # e.g. date or timestamp types we do not allow None here to match pd.NA
@@ -1512,9 +1513,7 @@ def to_numpy(
         na_value: object = lib.no_default,
     ) -> np.ndarray:
         original_na_value = na_value
-        dtype, na_value = to_numpy_dtype_inference(
-            self, dtype, na_value, self._hasna, is_pyarrow=True
-        )
+        dtype, na_value = to_numpy_dtype_inference(self, dtype, na_value, self._hasna)
         pa_type = self._pa_array.type
         if not self._hasna or isna(na_value) or pa.types.is_null(pa_type):
             data = self
@@ -2073,7 +2072,7 @@ def __setitem__(self, key, value) -> None:
                 raise ValueError("Length of indexer and values mismatch")
             chunks = [
                 *self._pa_array[:key].chunks,
-                pa.array([value], type=self._pa_array.type),
+                pa.array([value], type=self._pa_array.type, from_pandas=is_nan_na()),
                 *self._pa_array[key + 1 :].chunks,
             ]
             data = pa.chunked_array(chunks).combine_chunks()
@@ -2127,7 +2126,7 @@ def _rank_calc(
                 pa_type = pa.float64()
             else:
                 pa_type = pa.uint64()
-            result = pa.array(ranked, type=pa_type)
+            result = pa.array(ranked, type=pa_type, from_pandas=is_nan_na())
             return result
 
         data = self._pa_array.combine_chunks()
@@ -2379,7 +2378,7 @@ def _to_numpy_and_type(value) -> tuple[np.ndarray, pa.DataType | None]:
         right, right_type = _to_numpy_and_type(right)
         pa_type = left_type or right_type
         result = np.where(cond, left, right)
-        return pa.array(result, type=pa_type)
+        return pa.array(result, type=pa_type, from_pandas=is_nan_na())
 
     @classmethod
     def _replace_with_mask(
@@ -2423,7 +2422,7 @@ def _replace_with_mask(
 
         result = np.array(values, dtype=object)
         result[mask] = replacements
-        return pa.array(result, type=values.type)
+        return pa.array(result, type=values.type, from_pandas=is_nan_na())
 
     # ------------------------------------------------------------------
     # GroupBy Methods
 
@@ -11,6 +11,8 @@
 
 import numpy as np
 
+from pandas._config import is_nan_na
+
 from pandas._libs import (
     lib,
     missing as libmissing,
@@ -309,7 +311,9 @@ def __setitem__(self, key, value) -> None:
     def __contains__(self, key) -> bool:
         if isna(key) and key is not self.dtype.na_value:
             # GH#52840
-            if self._data.dtype.kind == "f" and lib.is_float(key):
+            if lib.is_float(key) and is_nan_na():
+                key = self.dtype.na_value
+            elif self._data.dtype.kind == "f" and lib.is_float(key):
                 return bool((np.isnan(self._data) & ~self._mask).any())
 
         return bool(super().__contains__(key))
@@ -496,9 +500,7 @@ def to_numpy(
         array([ True, False, False])
         """
         hasna = self._hasna
-        dtype, na_value = to_numpy_dtype_inference(
-            self, dtype, na_value, hasna, is_pyarrow=False
-        )
+        dtype, na_value = to_numpy_dtype_inference(self, dtype, na_value, hasna)
         if dtype is None:
             dtype = object
 
@@ -669,6 +671,8 @@ def reconstruct(x: np.ndarray):
                     # reached in e.g. np.sqrt on BooleanArray
                     # we don't support float16
                     x = x.astype(np.float32)
+                if is_nan_na():
+                    m[np.isnan(x)] = True
                 return FloatingArray(x, m)
             else:
                 x[mask] = np.nan
@@ -874,6 +878,9 @@ def _maybe_mask_result(
         if result.dtype.kind == "f":
             from pandas.core.arrays import FloatingArray
 
+            if is_nan_na():
+                mask[np.isnan(result)] = True
+
             return FloatingArray(result, mask, copy=False)
 
         elif result.dtype.kind == "b":
 
@@ -8,6 +8,8 @@
 
 import numpy as np
 
+from pandas._config import is_nan_na
+
 from pandas._libs import (
     lib,
     missing as libmissing,
@@ -101,6 +103,8 @@ def __from_arrow__(
                 array = array.combine_chunks()
 
         data, mask = pyarrow_array_to_numpy_and_mask(array, dtype=self.numpy_dtype)
+        if data.dtype.kind == "f" and is_nan_na():
+            mask[np.isnan(data)] = False
         return array_class(data.copy(), ~mask, copy=False)
 
     @classmethod
@@ -195,9 +199,21 @@ def _coerce_to_data_and_mask(
         elif values.dtype.kind == "f":
             # np.isnan is faster than is_numeric_na() for floats
             # github issue: #60066
-            mask = np.isnan(values)
+            if is_nan_na():
+                mask = np.isnan(values)
+            else:
+                mask = np.zeros(len(values), dtype=np.bool_)
+                if dtype_cls.__name__.strip("_").startswith(("I", "U")):
+                    wrong = np.isnan(values)
+                    if wrong.any():
+                        raise ValueError("Cannot cast NaN value to Integer dtype.")
         else:
-            mask = libmissing.is_numeric_na(values)
+            if is_nan_na():
+                mask = libmissing.is_numeric_na(values)
+            else:
+                # is_numeric_na will raise on non-numeric NAs
+                libmissing.is_numeric_na(values)
+                mask = libmissing.is_pdna_or_none(values)
     else:
         assert len(mask) == len(values)
 
@@ -236,7 +252,6 @@ def _coerce_to_data_and_mask(
         values = values.astype(dtype, copy=copy)
     else:
         values = dtype_cls._safe_cast(values, dtype, copy=False)
-
     return values, mask, dtype, inferred_type
 
 
@@ -265,6 +280,10 @@ def __init__(
             # If we don't raise here, then accessing self.dtype would raise
             raise TypeError("FloatingArray does not support np.float16 dtype.")
 
+        # NB: if is_nan_na() is True
+        #  then caller is responsible for ensuring
+        #  assert mask[np.isnan(values)].all()
+
         super().__init__(values, mask, copy=copy)
 
     @cache_readonly
 
@@ -427,12 +427,12 @@ def is_terminal() -> bool:
         validator=is_one_of_factory([True, False, "warn"]),
     )
 
-with cf.config_prefix("mode"):
     cf.register_option(
         "nan_is_na",
-        True,
-        "Whether to make ArrowDtype arrays consistently treat NaN as "
-        "interchangeable with pd.NA",
+        os.environ.get("PANDAS_NAN_IS_NA", 0) == "1",
+        "Whether to treat NaN entries as interchangeable with pd.NA in "
+        "numpy-nullable and pyarrow float dtypes. See discussion in "
+        "https://github.com/pandas-dev/pandas/issues/32265",
         validator=is_one_of_factory([True, False]),
     )
 
 
@@ -18,7 +18,10 @@
 
 import numpy as np
 
-from pandas._config import using_string_dtype
+from pandas._config import (
+    is_nan_na,
+    using_string_dtype,
+)
 
 from pandas._libs import (
     Interval,
@@ -1053,7 +1056,10 @@ def convert_dtypes(
             elif input_array.dtype.kind in "fcb":
                 # TODO: de-dup with maybe_cast_to_integer_array?
                 arr = input_array[notna(input_array)]
-                if (arr.astype(int) == arr).all():
+                if len(arr) < len(input_array) and not is_nan_na():
+                    # In the presence of NaNs, we cannot convert to IntegerDtype
+                    pass
+                elif (arr.astype(int) == arr).all():
                     inferred_dtype = target_int_dtype
                 else:
                     inferred_dtype = input_array.dtype
@@ -1077,7 +1083,10 @@ def convert_dtypes(
                 if convert_integer:
                     # TODO: de-dup with maybe_cast_to_integer_array?
                     arr = input_array[notna(input_array)]
-                    if (arr.astype(int) == arr).all():
+                    if len(arr) < len(input_array) and not is_nan_na():
+                        # In the presence of NaNs, we can't convert to IntegerDtype
+                        inferred_dtype = inferred_float_dtype
+                    elif (arr.astype(int) == arr).all():
                         inferred_dtype = pandas_dtype_func("Int64")
                     else:
                         inferred_dtype = inferred_float_dtype
 
@@ -21,6 +21,7 @@
 
 from pandas._config import (
     get_option,
+    is_nan_na,
     using_string_dtype,
 )
 
@@ -161,6 +162,7 @@
     ExtensionArray,
     TimedeltaArray,
 )
+from pandas.core.arrays.floating import FloatingDtype
 from pandas.core.arrays.string_ import (
     StringArray,
     StringDtype,
@@ -6575,6 +6577,14 @@ def _maybe_cast_indexer(self, key):
         If we have a float key and are not a floating index, then try to cast
         to an int if equivalent.
         """
+        if (
+            is_float(key)
+            and np.isnan(key)
+            and isinstance(self.dtype, FloatingDtype)
+            and is_nan_na()
+        ):
+            # TODO: better place to do this?
+            key = self.dtype.na_value
         return key
 
     def _maybe_cast_listlike_indexer(self, target) -> Index: