diff --git a/pandas/tests/arrays/categorical/test_astype.py b/pandas/tests/arrays/categorical/test_astype.py index 42edb1f511391..7ed4da69f5a99 100644 --- a/pandas/tests/arrays/categorical/test_astype.py +++ b/pandas/tests/arrays/categorical/test_astype.py @@ -2,6 +2,7 @@ import pytest from pandas import ( + NA, Categorical, CategoricalDtype, CategoricalIndex, @@ -34,7 +35,7 @@ def test_astype_nan_to_int(self, cls, values): array([0, 0], dtype="timedelta64[ns]"), array([Period("2019"), Period("2020")], dtype="period[Y-DEC]"), array([Interval(0, 1), Interval(1, 2)], dtype="interval"), - array([1, np.nan], dtype="Int64"), + array([1, NA], dtype="Int64"), ], ) def test_astype_category_to_extension_dtype(self, expected): diff --git a/pandas/tests/arrays/floating/conftest.py b/pandas/tests/arrays/floating/conftest.py index 5e971c66029d5..fc29e3bfd9962 100644 --- a/pandas/tests/arrays/floating/conftest.py +++ b/pandas/tests/arrays/floating/conftest.py @@ -33,7 +33,7 @@ def data_missing(dtype): Fixture returning array with missing data according to parametrized float 'dtype'. """ - return pd.array([np.nan, 0.1], dtype=dtype) + return pd.array([pd.NA, 0.1], dtype=dtype) @pytest.fixture(params=["data", "data_missing"]) diff --git a/pandas/tests/arrays/floating/test_arithmetic.py b/pandas/tests/arrays/floating/test_arithmetic.py index 009fac4c2f5ed..777099e76fc73 100644 --- a/pandas/tests/arrays/floating/test_arithmetic.py +++ b/pandas/tests/arrays/floating/test_arithmetic.py @@ -104,9 +104,9 @@ def test_pow_array(dtype): def test_rpow_one_to_na(): # https://github.com/pandas-dev/pandas/issues/22022 # https://github.com/pandas-dev/pandas/issues/29997 - arr = pd.array([np.nan, np.nan], dtype="Float64") + arr = pd.array([pd.NA, pd.NA], dtype="Float64") result = np.array([1.0, 2.0]) ** arr - expected = pd.array([1.0, np.nan], dtype="Float64") + expected = pd.array([1.0, pd.NA], dtype="Float64") tm.assert_extension_array_equal(result, expected) @@ -187,14 +187,14 @@ def test_error_invalid_values(data, all_arithmetic_operators): def test_cross_type_arithmetic(): df = pd.DataFrame( { - "A": pd.array([1, 2, np.nan], dtype="Float64"), - "B": pd.array([1, np.nan, 3], dtype="Float32"), + "A": pd.array([1, 2, pd.NA], dtype="Float64"), + "B": pd.array([1, pd.NA, 3], dtype="Float32"), "C": np.array([1, 2, 3], dtype="float64"), } ) result = df.A + df.C - expected = pd.Series([2, 4, np.nan], dtype="Float64") + expected = pd.Series([2, 4, pd.NA], dtype="Float64") tm.assert_series_equal(result, expected) result = (df.A + df.C) * 3 == 12 @@ -202,7 +202,7 @@ def test_cross_type_arithmetic(): tm.assert_series_equal(result, expected) result = df.A + df.B - expected = pd.Series([2, np.nan, np.nan], dtype="Float64") + expected = pd.Series([2, pd.NA, pd.NA], dtype="Float64") tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py index 4007ee6b415c9..e1d237205a753 100644 --- a/pandas/tests/arrays/floating/test_construction.py +++ b/pandas/tests/arrays/floating/test_construction.py @@ -20,7 +20,7 @@ def test_floating_array_constructor(): mask = np.array([False, False, False, True], dtype="bool") result = FloatingArray(values, mask) - expected = pd.array([1, 2, 3, np.nan], dtype="Float64") + expected = pd.array([1, 2, 3, pd.NA], dtype="Float64") tm.assert_extension_array_equal(result, expected) tm.assert_numpy_array_equal(result._data, values) tm.assert_numpy_array_equal(result._mask, mask) diff --git a/pandas/tests/arrays/floating/test_function.py b/pandas/tests/arrays/floating/test_function.py index 9ea48bfb2413f..dffb2a1f6e1f5 100644 --- a/pandas/tests/arrays/floating/test_function.py +++ b/pandas/tests/arrays/floating/test_function.py @@ -11,7 +11,7 @@ # np.sign emits a warning with nans, @pytest.mark.filterwarnings("ignore:invalid value encountered in sign:RuntimeWarning") def test_ufuncs_single(ufunc): - a = pd.array([1, 2, -3, np.nan], dtype="Float64") + a = pd.array([1, 2, -3, pd.NA], dtype="Float64") result = ufunc(a) expected = pd.array(ufunc(a.astype(float)), dtype="Float64") tm.assert_extension_array_equal(result, expected) @@ -24,7 +24,7 @@ def test_ufuncs_single(ufunc): @pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt]) def test_ufuncs_single_float(ufunc): - a = pd.array([1.0, 0.2, 3.0, np.nan], dtype="Float64") + a = pd.array([1.0, 0.2, 3.0, pd.NA], dtype="Float64") with np.errstate(invalid="ignore"): result = ufunc(a) expected = pd.array(ufunc(a.astype(float)), dtype="Float64") @@ -40,7 +40,7 @@ def test_ufuncs_single_float(ufunc): @pytest.mark.parametrize("ufunc", [np.add, np.subtract]) def test_ufuncs_binary_float(ufunc): # two FloatingArrays - a = pd.array([1, 0.2, -3, np.nan], dtype="Float64") + a = pd.array([1, 0.2, -3, pd.NA], dtype="Float64") result = ufunc(a, a) expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Float64") tm.assert_extension_array_equal(result, expected) @@ -88,7 +88,7 @@ def test_ufunc_reduce_raises(values): ], ) def test_stat_method(pandasmethname, kwargs): - s = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, np.nan, np.nan], dtype="Float64") + s = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, pd.NA, pd.NA], dtype="Float64") pandasmeth = getattr(s, pandasmethname) result = pandasmeth(**kwargs) s2 = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], dtype="float64") diff --git a/pandas/tests/arrays/integer/conftest.py b/pandas/tests/arrays/integer/conftest.py index f73400dfe689e..79275522d49f5 100644 --- a/pandas/tests/arrays/integer/conftest.py +++ b/pandas/tests/arrays/integer/conftest.py @@ -1,4 +1,3 @@ -import numpy as np import pytest import pandas as pd @@ -40,7 +39,7 @@ def data(dtype): Used to test dtype conversion with and without missing values. """ return pd.array( - list(range(8)) + [np.nan] + list(range(10, 98)) + [np.nan] + [99, 100], + list(range(8)) + [pd.NA] + list(range(10, 98)) + [pd.NA] + [99, 100], dtype=dtype, ) @@ -53,7 +52,7 @@ def data_missing(dtype): Used to test dtype conversion with and without missing values. """ - return pd.array([np.nan, 1], dtype=dtype) + return pd.array([pd.NA, 1], dtype=dtype) @pytest.fixture(params=["data", "data_missing"]) diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py index 9fbea2022c87b..aeceb9b8a3cb1 100644 --- a/pandas/tests/arrays/integer/test_arithmetic.py +++ b/pandas/tests/arrays/integer/test_arithmetic.py @@ -154,9 +154,9 @@ def test_pow_array(): def test_rpow_one_to_na(): # https://github.com/pandas-dev/pandas/issues/22022 # https://github.com/pandas-dev/pandas/issues/29997 - arr = pd.array([np.nan, np.nan], dtype="Int64") + arr = pd.array([pd.NA, pd.NA], dtype="Int64") result = np.array([1.0, 2.0]) ** arr - expected = pd.array([1.0, np.nan], dtype="Float64") + expected = pd.array([1.0, pd.NA], dtype="Float64") tm.assert_extension_array_equal(result, expected) @@ -243,14 +243,14 @@ def test_arithmetic_conversion(all_arithmetic_operators, other): def test_cross_type_arithmetic(): df = pd.DataFrame( { - "A": pd.Series([1, 2, np.nan], dtype="Int64"), - "B": pd.Series([1, np.nan, 3], dtype="UInt8"), + "A": pd.Series([1, 2, pd.NA], dtype="Int64"), + "B": pd.Series([1, pd.NA, 3], dtype="UInt8"), "C": [1, 2, 3], } ) result = df.A + df.C - expected = pd.Series([2, 4, np.nan], dtype="Int64") + expected = pd.Series([2, 4, pd.NA], dtype="Int64") tm.assert_series_equal(result, expected) result = (df.A + df.C) * 3 == 12 @@ -258,7 +258,7 @@ def test_cross_type_arithmetic(): tm.assert_series_equal(result, expected) result = df.A + df.B - expected = pd.Series([2, np.nan, np.nan], dtype="Int64") + expected = pd.Series([2, pd.NA, pd.NA], dtype="Int64") tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/integer/test_construction.py b/pandas/tests/arrays/integer/test_construction.py index 64fe40e53a9d2..8eaa9ace027c9 100644 --- a/pandas/tests/arrays/integer/test_construction.py +++ b/pandas/tests/arrays/integer/test_construction.py @@ -77,7 +77,7 @@ def test_integer_array_constructor(): mask = np.array([False, False, False, True], dtype="bool") result = IntegerArray(values, mask) - expected = pd.array([1, 2, 3, np.nan], dtype="Int64") + expected = pd.array([1, 2, 3, pd.NA], dtype="Int64") tm.assert_extension_array_equal(result, expected) msg = r".* should be .* numpy array. Use the 'pd.array' function instead" @@ -191,7 +191,7 @@ def test_to_integer_array_float(): def test_to_integer_array_str(): result = IntegerArray._from_sequence(["1", "2", None], dtype="Int64") - expected = pd.array([1, 2, np.nan], dtype="Int64") + expected = pd.array([1, 2, pd.NA], dtype="Int64") tm.assert_extension_array_equal(result, expected) with pytest.raises( diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py index 7972ba7b9fb0f..1b4f070d47e4e 100644 --- a/pandas/tests/arrays/integer/test_dtypes.py +++ b/pandas/tests/arrays/integer/test_dtypes.py @@ -52,7 +52,7 @@ def test_preserve_dtypes(op): def test_astype_nansafe(): # see gh-22343 - arr = pd.array([np.nan, 1, 2], dtype="Int8") + arr = pd.array([pd.NA, 1, 2], dtype="Int8") msg = "cannot convert NA to integer" with pytest.raises(ValueError, match=msg): @@ -230,7 +230,7 @@ def test_construct_cast_invalid(dtype): with pytest.raises(TypeError, match=msg): pd.Series(arr).astype(dtype) - arr = [1.2, 2.3, 3.7, np.nan] + arr = [1.2, 2.3, 3.7, pd.NA] with pytest.raises(TypeError, match=msg): pd.array(arr, dtype=dtype) diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py index 33300fff925f6..77a0dd12534cc 100644 --- a/pandas/tests/arrays/integer/test_function.py +++ b/pandas/tests/arrays/integer/test_function.py @@ -10,7 +10,7 @@ # np.sign emits a warning with nans, @pytest.mark.filterwarnings("ignore:invalid value encountered in sign:RuntimeWarning") def test_ufuncs_single_int(ufunc): - a = pd.array([1, 2, -3, np.nan]) + a = pd.array([1, 2, -3, pd.NA], dtype="Int64") result = ufunc(a) expected = pd.array(ufunc(a.astype(float)), dtype="Int64") tm.assert_extension_array_equal(result, expected) @@ -23,7 +23,7 @@ def test_ufuncs_single_int(ufunc): @pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt]) def test_ufuncs_single_float(ufunc): - a = pd.array([1, 2, -3, np.nan]) + a = pd.array([1, 2, -3, pd.NA], dtype="Int64") with np.errstate(invalid="ignore"): result = ufunc(a) expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask) @@ -39,7 +39,7 @@ def test_ufuncs_single_float(ufunc): @pytest.mark.parametrize("ufunc", [np.add, np.subtract]) def test_ufuncs_binary_int(ufunc): # two IntegerArrays - a = pd.array([1, 2, -3, np.nan]) + a = pd.array([1, 2, -3, pd.NA], dtype="Int64") result = ufunc(a, a) expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Int64") tm.assert_extension_array_equal(result, expected) @@ -99,7 +99,7 @@ def test_ufunc_reduce_raises(values): ], ) def test_stat_method(pandasmethname, kwargs): - s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64") + s = pd.Series(data=[1, 2, 3, 4, 5, 6, pd.NA, pd.NA], dtype="Int64") pandasmeth = getattr(s, pandasmethname) result = pandasmeth(**kwargs) s2 = pd.Series(data=[1, 2, 3, 4, 5, 6], dtype="Int64") diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 3c0ef1e4d928b..fa939a390a66f 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -384,7 +384,7 @@ def test_array_copy(): ([1, 2], IntegerArray._from_sequence([1, 2], dtype="Int64")), ([1, None], IntegerArray._from_sequence([1, None], dtype="Int64")), ([1, pd.NA], IntegerArray._from_sequence([1, pd.NA], dtype="Int64")), - ([1, np.nan], IntegerArray._from_sequence([1, np.nan], dtype="Int64")), + ([1, np.nan], IntegerArray._from_sequence([1, pd.NA], dtype="Int64")), # float ([0.1, 0.2], FloatingArray._from_sequence([0.1, 0.2], dtype="Float64")), ([0.1, None], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")), diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index e3a821519c638..821f51ee95ad3 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -257,7 +257,7 @@ def test_numpy_array_all_dtypes(any_numpy_dtype): [ (pd.Categorical(["a", "b"]), "_codes"), (PeriodArray._from_sequence(["2000", "2001"], dtype="period[D]"), "_ndarray"), - (pd.array([0, np.nan], dtype="Int64"), "_data"), + (pd.array([0, pd.NA], dtype="Int64"), "_data"), (IntervalArray.from_breaks([0, 1]), "_left"), (SparseArray([0, 1]), "_sparse_values"), ( @@ -305,7 +305,7 @@ def test_array_multiindex_raises(): np.array([pd.Period("2000", freq="D"), pd.Period("2001", freq="D")]), False, ), - (pd.array([0, np.nan], dtype="Int64"), np.array([0, np.nan]), False), + (pd.array([0, pd.NA], dtype="Int64"), np.array([0, np.nan]), False), ( IntervalArray.from_breaks([0, 1, 2]), np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object), diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 4d766d6664218..c3e1d33ec93df 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2868,7 +2868,7 @@ def test_dt_components(): ) result = ser.dt.components expected = pd.DataFrame( - [[1, 0, 0, 2, 0, 3, 4], [None, None, None, None, None, None, None]], + [[1, 0, 0, 2, 0, 3, 4], [pd.NA, pd.NA, pd.NA, pd.NA, pd.NA, pd.NA, pd.NA]], columns=[ "days", "hours", @@ -2893,7 +2893,10 @@ def test_dt_components_large_values(): ) result = ser.dt.components expected = pd.DataFrame( - [[365, 23, 59, 59, 999, 0, 0], [None, None, None, None, None, None, None]], + [ + [365, 23, 59, 59, 999, 0, 0], + [pd.NA, pd.NA, pd.NA, pd.NA, pd.NA, pd.NA, pd.NA], + ], columns=[ "days", "hours", diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py index c7fe9e99ec6e5..0e9ffce07bf98 100644 --- a/pandas/tests/extension/test_masked.py +++ b/pandas/tests/extension/test_masked.py @@ -380,32 +380,19 @@ def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool): ) if op_name == "cumsum": - result = getattr(ser, op_name)(skipna=skipna) - expected = pd.Series( - pd.array( - getattr(ser.astype("float64"), op_name)(skipna=skipna), - dtype=expected_dtype, - ) - ) - tm.assert_series_equal(result, expected) + pass elif op_name in ["cummax", "cummin"]: - result = getattr(ser, op_name)(skipna=skipna) - expected = pd.Series( - pd.array( - getattr(ser.astype("float64"), op_name)(skipna=skipna), - dtype=ser.dtype, - ) - ) - tm.assert_series_equal(result, expected) + expected_dtype = ser.dtype # type: ignore[assignment] elif op_name == "cumprod": - result = getattr(ser[:12], op_name)(skipna=skipna) - expected = pd.Series( - pd.array( - getattr(ser[:12].astype("float64"), op_name)(skipna=skipna), - dtype=expected_dtype, - ) - ) - tm.assert_series_equal(result, expected) - + ser = ser[:12] else: raise NotImplementedError(f"{op_name} not supported") + + result = getattr(ser, op_name)(skipna=skipna) + expected = pd.Series( + pd.array( + getattr(ser.astype("float64"), op_name)(skipna=skipna), + dtype=expected_dtype, + ) + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index bef18dbaf8a8a..d846d7e5b77b9 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -249,10 +249,10 @@ def test_diff_sparse(self): 0, DataFrame( { - "a": [np.nan, 0, 1, 0, np.nan, np.nan, np.nan, 0], - "b": [np.nan, 1, np.nan, np.nan, -2, 1, np.nan, np.nan], - "c": np.repeat(np.nan, 8), - "d": [np.nan, 3, 5, 7, 9, 11, 13, 15], + "a": [pd.NA, 0, 1, 0, pd.NA, pd.NA, pd.NA, 0], + "b": [pd.NA, 1, pd.NA, pd.NA, -2, 1, pd.NA, pd.NA], + "c": np.repeat(pd.NA, 8), # type: ignore[call-overload] + "d": [pd.NA, 3, 5, 7, 9, 11, 13, 15], }, dtype="Int64", ), @@ -261,10 +261,10 @@ def test_diff_sparse(self): 1, DataFrame( { - "a": np.repeat(np.nan, 8), - "b": [0, 1, np.nan, 1, np.nan, np.nan, np.nan, 0], - "c": np.repeat(np.nan, 8), - "d": np.repeat(np.nan, 8), + "a": np.repeat(pd.NA, 8), # type: ignore[call-overload] + "b": [0, 1, pd.NA, 1, pd.NA, pd.NA, pd.NA, 0], + "c": np.repeat(pd.NA, 8), # type: ignore[call-overload] + "d": np.repeat(pd.NA, 8), # type: ignore[call-overload] }, dtype="Int64", ), @@ -275,9 +275,9 @@ def test_diff_integer_na(self, axis, expected): # GH#24171 IntegerNA Support for DataFrame.diff() df = DataFrame( { - "a": np.repeat([0, 1, np.nan, 2], 2), - "b": np.tile([0, 1, np.nan, 2], 2), - "c": np.repeat(np.nan, 8), + "a": np.repeat([0, 1, pd.NA, 2], 2), + "b": np.tile([0, 1, pd.NA, 2], 2), + "c": np.repeat(pd.NA, 8), "d": np.arange(1, 9) ** 2, }, dtype="Int64", diff --git a/pandas/tests/frame/methods/test_get_numeric_data.py b/pandas/tests/frame/methods/test_get_numeric_data.py index 6d097e75f6703..666fcb1b5143b 100644 --- a/pandas/tests/frame/methods/test_get_numeric_data.py +++ b/pandas/tests/frame/methods/test_get_numeric_data.py @@ -93,9 +93,9 @@ def test_get_numeric_data_extension_dtype(self): # GH#22290 df = DataFrame( { - "A": pd.array([-10, np.nan, 0, 10, 20, 30], dtype="Int64"), + "A": pd.array([-10, pd.NA, 0, 10, 20, 30], dtype="Int64"), "B": Categorical(list("abcabc")), - "C": pd.array([0, 1, 2, 3, np.nan, 5], dtype="UInt8"), + "C": pd.array([0, 1, 2, 3, pd.NA, 5], dtype="UInt8"), "D": IntervalArray.from_breaks(range(7)), } ) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 22fdfd3a01408..196129d40d5c0 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1348,8 +1348,10 @@ def test_unstack_sort_false(frame_or_series, dtype): ) else: expected_columns = ["b", "a"] + + item = pd.NA if dtype == "Float64" else np.nan expected = DataFrame( - [[1.0, np.nan], [np.nan, 2.0], [3.0, np.nan], [np.nan, 4.0]], + [[1.0, item], [item, 2.0], [3.0, item], [item, 4.0]], columns=expected_columns, index=MultiIndex.from_tuples( [("two", "z"), ("two", "y"), ("one", "z"), ("one", "y")] diff --git a/pandas/tests/groupby/methods/test_kurt.py b/pandas/tests/groupby/methods/test_kurt.py index 21b7c50c3c5aa..7aac23c2147fb 100644 --- a/pandas/tests/groupby/methods/test_kurt.py +++ b/pandas/tests/groupby/methods/test_kurt.py @@ -43,7 +43,7 @@ def test_groupby_kurt_arrow_float64(dtype): # Test groupby.kurt() with float64[pyarrow] and Float64 dtypes df = pd.DataFrame( { - "x": [1.0, np.nan, 3.2, 4.8, 2.3, 1.9, 8.9], + "x": [1.0, pd.NA, 3.2, 4.8, 2.3, 1.9, 8.9], "y": [1.6, 3.3, 3.2, 6.8, 1.3, 2.9, 9.0], }, dtype=dtype, diff --git a/pandas/tests/groupby/test_cumulative.py b/pandas/tests/groupby/test_cumulative.py index b0a0414c1feb2..a1ace9e6a4c0f 100644 --- a/pandas/tests/groupby/test_cumulative.py +++ b/pandas/tests/groupby/test_cumulative.py @@ -162,11 +162,12 @@ def test_cummin_getattr_series(): @pytest.mark.parametrize("method", ["cummin", "cummax"]) @pytest.mark.parametrize("dtype", ["UInt64", "Int64", "Float64", "float", "boolean"]) def test_cummin_max_all_nan_column(method, dtype): - base_df = DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [np.nan] * 8}) + item = np.nan if dtype == "float" else pd.NA + base_df = DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [item] * 8}) base_df["B"] = base_df["B"].astype(dtype) grouped = base_df.groupby("A") - expected = DataFrame({"B": [np.nan] * 8}, dtype=dtype) + expected = DataFrame({"B": [item] * 8}, dtype=dtype) result = getattr(grouped, method)() tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index 724ee0489f0a0..a64ed42ecccf8 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -432,6 +432,8 @@ def test_no_sort_keep_na(sequence_index, dtype, test_series, as_index): uniques = {"x": "x", "y": "y", "z": pd.NA} elif dtype in ("datetime64[ns]", "period[D]"): uniques = {"x": "2016-01-01", "y": "2017-01-01", "z": pd.NA} + elif dtype is not None and dtype.startswith(("I", "U", "F")): + uniques = {"x": 1, "y": 2, "z": pd.NA} else: uniques = {"x": 1, "y": 2, "z": np.nan} diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py index 014558bbf4bba..2b18fd175bb10 100644 --- a/pandas/tests/groupby/test_reductions.py +++ b/pandas/tests/groupby/test_reductions.py @@ -427,8 +427,8 @@ def test_mean_on_timedelta(): "values, dtype, result_dtype", [ ([0, 1, np.nan, 3, 4, 5, 6, 7, 8, 9], "float64", "float64"), - ([0, 1, np.nan, 3, 4, 5, 6, 7, 8, 9], "Float64", "Float64"), - ([0, 1, np.nan, 3, 4, 5, 6, 7, 8, 9], "Int64", "Float64"), + ([0, 1, pd.NA, 3, 4, 5, 6, 7, 8, 9], "Float64", "Float64"), + ([0, 1, pd.NA, 3, 4, 5, 6, 7, 8, 9], "Int64", "Float64"), ([0, 1, np.nan, 3, 4, 5, 6, 7, 8, 9], "timedelta64[ns]", "timedelta64[ns]"), ( pd.to_datetime( @@ -473,8 +473,8 @@ def test_mean_skipna(values, dtype, result_dtype, skipna): "values, dtype", [ ([0, 1, np.nan, 3, 4, 5, 6, 7, 8, 9], "float64"), - ([0, 1, np.nan, 3, 4, 5, 6, 7, 8, 9], "Float64"), - ([0, 1, np.nan, 3, 4, 5, 6, 7, 8, 9], "Int64"), + ([0, 1, pd.NA, 3, 4, 5, 6, 7, 8, 9], "Float64"), + ([0, 1, pd.NA, 3, 4, 5, 6, 7, 8, 9], "Int64"), ([0, 1, np.nan, 3, 4, 5, 6, 7, 8, 9], "timedelta64[ns]"), ], ) @@ -519,32 +519,32 @@ def test_sum_skipna_object(skipna): "func, values, dtype, result_dtype", [ ("prod", [0, 1, 3, np.nan, 4, 5, 6, 7, -8, 9], "float64", "float64"), - ("prod", [0, -1, 3, 4, 5, np.nan, 6, 7, 8, 9], "Float64", "Float64"), - ("prod", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Int64", "Int64"), + ("prod", [0, -1, 3, 4, 5, pd.NA, 6, 7, 8, 9], "Float64", "Float64"), + ("prod", [0, 1, 3, -4, 5, 6, 7, -8, pd.NA, 9], "Int64", "Int64"), ("prod", [np.nan] * 10, "float64", "float64"), - ("prod", [np.nan] * 10, "Float64", "Float64"), - ("prod", [np.nan] * 10, "Int64", "Int64"), + ("prod", [pd.NA] * 10, "Float64", "Float64"), + ("prod", [pd.NA] * 10, "Int64", "Int64"), ("var", [0, -1, 3, 4, np.nan, 5, 6, 7, 8, 9], "float64", "float64"), - ("var", [0, 1, 3, -4, 5, 6, 7, -8, 9, np.nan], "Float64", "Float64"), - ("var", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "Int64", "Float64"), + ("var", [0, 1, 3, -4, 5, 6, 7, -8, 9, pd.NA], "Float64", "Float64"), + ("var", [0, -1, 3, 4, 5, -6, 7, pd.NA, 8, 9], "Int64", "Float64"), ("var", [np.nan] * 10, "float64", "float64"), - ("var", [np.nan] * 10, "Float64", "Float64"), - ("var", [np.nan] * 10, "Int64", "Float64"), + ("var", [pd.NA] * 10, "Float64", "Float64"), + ("var", [pd.NA] * 10, "Int64", "Float64"), ("std", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "float64", "float64"), - ("std", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "Float64", "Float64"), - ("std", [0, 1, 3, -4, 5, 6, 7, -8, 9, np.nan], "Int64", "Float64"), + ("std", [0, -1, 3, 4, 5, -6, 7, pd.NA, 8, 9], "Float64", "Float64"), + ("std", [0, 1, 3, -4, 5, 6, 7, -8, 9, pd.NA], "Int64", "Float64"), ("std", [np.nan] * 10, "float64", "float64"), - ("std", [np.nan] * 10, "Float64", "Float64"), - ("std", [np.nan] * 10, "Int64", "Float64"), + ("std", [pd.NA] * 10, "Float64", "Float64"), + ("std", [pd.NA] * 10, "Int64", "Float64"), ("sem", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "float64", "float64"), - ("sem", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Float64", "Float64"), - ("sem", [0, -1, 3, 4, 5, -6, 7, 8, 9, np.nan], "Int64", "Float64"), + ("sem", [0, 1, 3, -4, 5, 6, 7, -8, pd.NA, 9], "Float64", "Float64"), + ("sem", [0, -1, 3, 4, 5, -6, 7, 8, 9, pd.NA], "Int64", "Float64"), ("sem", [np.nan] * 10, "float64", "float64"), - ("sem", [np.nan] * 10, "Float64", "Float64"), - ("sem", [np.nan] * 10, "Int64", "Float64"), + ("sem", [pd.NA] * 10, "Float64", "Float64"), + ("sem", [pd.NA] * 10, "Int64", "Float64"), ("min", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "float64", "float64"), - ("min", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Float64", "Float64"), - ("min", [0, -1, 3, 4, 5, -6, 7, 8, 9, np.nan], "Int64", "Int64"), + ("min", [0, 1, 3, -4, 5, 6, 7, -8, pd.NA, 9], "Float64", "Float64"), + ("min", [0, -1, 3, 4, 5, -6, 7, 8, 9, pd.NA], "Int64", "Int64"), ( "min", [0, 1, np.nan, 3, 4, 5, 6, 7, 8, 9], @@ -571,11 +571,11 @@ def test_sum_skipna_object(skipna): "datetime64[ns]", ), ("min", [np.nan] * 10, "float64", "float64"), - ("min", [np.nan] * 10, "Float64", "Float64"), - ("min", [np.nan] * 10, "Int64", "Int64"), + ("min", [pd.NA] * 10, "Float64", "Float64"), + ("min", [pd.NA] * 10, "Int64", "Int64"), ("max", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "float64", "float64"), - ("max", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Float64", "Float64"), - ("max", [0, -1, 3, 4, 5, -6, 7, 8, 9, np.nan], "Int64", "Int64"), + ("max", [0, 1, 3, -4, 5, 6, 7, -8, pd.NA, 9], "Float64", "Float64"), + ("max", [0, -1, 3, 4, 5, -6, 7, 8, 9, pd.NA], "Int64", "Int64"), ( "max", [0, 1, np.nan, 3, 4, 5, 6, 7, 8, 9], @@ -602,11 +602,11 @@ def test_sum_skipna_object(skipna): "datetime64[ns]", ), ("max", [np.nan] * 10, "float64", "float64"), - ("max", [np.nan] * 10, "Float64", "Float64"), - ("max", [np.nan] * 10, "Int64", "Int64"), + ("max", [pd.NA] * 10, "Float64", "Float64"), + ("max", [pd.NA] * 10, "Int64", "Int64"), ("median", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "float64", "float64"), - ("median", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Float64", "Float64"), - ("median", [0, -1, 3, 4, 5, -6, 7, 8, 9, np.nan], "Int64", "Float64"), + ("median", [0, 1, 3, -4, 5, 6, 7, -8, pd.NA, 9], "Float64", "Float64"), + ("median", [0, -1, 3, 4, 5, -6, 7, 8, 9, pd.NA], "Int64", "Float64"), ( "median", [0, 1, np.nan, 3, 4, 5, 6, 7, 8, 9], @@ -633,8 +633,8 @@ def test_sum_skipna_object(skipna): "datetime64[ns]", ), ("median", [np.nan] * 10, "float64", "float64"), - ("median", [np.nan] * 10, "Float64", "Float64"), - ("median", [np.nan] * 10, "Int64", "Float64"), + ("median", [pd.NA] * 10, "Float64", "Float64"), + ("median", [pd.NA] * 10, "Int64", "Float64"), ], ) def test_multifunc_skipna(func, values, dtype, result_dtype, skipna): diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 92827cf154394..cf5fc2977a28f 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -665,14 +665,14 @@ def test_from_frame_missing_values_multiIndex(): df = pd.DataFrame( { "a": Series([1, 2, None], dtype="Int64"), - "b": pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])), + "b": pd.Float64Dtype().__from_arrow__(pa.array([0.2, None, None])), } ) multi_indexed = MultiIndex.from_frame(df) expected = MultiIndex.from_arrays( [ Series([1, 2, None]).astype("Int64"), - pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])), + pd.Float64Dtype().__from_arrow__(pa.array([0.2, None, None])), ], names=["a", "b"], ) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index dd2d85c4755af..52f521d0d36eb 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -622,7 +622,7 @@ def test_to_csv_zip_infer_name(self, tmp_path, filename, expected_arcname): @pytest.mark.parametrize("df_new_type", ["Int64"]) def test_to_csv_na_rep_long_string(self, df_new_type): # see gh-25099 - df = DataFrame({"c": [float("nan")] * 3}) + df = DataFrame({"c": [pd.NA] * 3}) df = df.astype(df_new_type) expected_rows = ["c", "mynull", "mynull", "mynull"] expected = tm.convert_rows_list_to_csv_str(expected_rows) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index d895fd6e6770c..8762188c86235 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -2136,9 +2136,9 @@ def test_read_json_dtype_backend( # GH#50750 df = DataFrame( { - "a": Series([1, np.nan, 3], dtype="Int64"), + "a": Series([1, NA, 3], dtype="Int64"), "b": Series([1, 2, 3], dtype="Int64"), - "c": Series([1.5, np.nan, 2.5], dtype="Float64"), + "c": Series([1.5, NA, 2.5], dtype="Float64"), "d": Series([1.5, 2.0, 2.5], dtype="Float64"), "e": [True, False, None], "f": [True, False, True], @@ -2161,9 +2161,9 @@ def test_read_json_dtype_backend( expected = DataFrame( { - "a": Series([1, np.nan, 3], dtype="Int64"), + "a": Series([1, NA, 3], dtype="Int64"), "b": Series([1, 2, 3], dtype="Int64"), - "c": Series([1.5, np.nan, 2.5], dtype="Float64"), + "c": Series([1.5, NA, 2.5], dtype="Float64"), "d": Series([1.5, 2.0, 2.5], dtype="Float64"), "e": Series([True, False, NA], dtype="boolean"), "f": Series([True, False, True], dtype="boolean"), @@ -2218,7 +2218,7 @@ def test_read_json_pyarrow_with_dtype(self): def test_read_json_nullable_series(self, string_storage, dtype_backend, orient): # GH#50750 pa = pytest.importorskip("pyarrow") - ser = Series([1, np.nan, 3], dtype="Int64") + ser = Series([1, NA, 3], dtype="Int64") out = ser.to_json(orient=orient) with pd.option_context("mode.string_storage", string_storage): @@ -2226,7 +2226,7 @@ def test_read_json_nullable_series(self, string_storage, dtype_backend, orient): StringIO(out), dtype_backend=dtype_backend, orient=orient, typ="series" ) - expected = Series([1, np.nan, 3], dtype="Int64") + expected = Series([1, NA, 3], dtype="Int64") if dtype_backend == "pyarrow": from pandas.arrays import ArrowExtensionArray diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index e778193c147c1..904c3a047bab2 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -173,9 +173,9 @@ def test_read_feather_dtype_backend( # GH#50765 df = pd.DataFrame( { - "a": pd.Series([1, np.nan, 3], dtype="Int64"), + "a": pd.Series([1, pd.NA, 3], dtype="Int64"), "b": pd.Series([1, 2, 3], dtype="Int64"), - "c": pd.Series([1.5, np.nan, 2.5], dtype="Float64"), + "c": pd.Series([1.5, pd.NA, 2.5], dtype="Float64"), "d": pd.Series([1.5, 2.0, 2.5], dtype="Float64"), "e": [True, False, None], "f": [True, False, True], @@ -200,9 +200,9 @@ def test_read_feather_dtype_backend( expected = pd.DataFrame( { - "a": pd.Series([1, np.nan, 3], dtype="Int64"), + "a": pd.Series([1, pd.NA, 3], dtype="Int64"), "b": pd.Series([1, 2, 3], dtype="Int64"), - "c": pd.Series([1.5, np.nan, 2.5], dtype="Float64"), + "c": pd.Series([1.5, pd.NA, 2.5], dtype="Float64"), "d": pd.Series([1.5, 2.0, 2.5], dtype="Float64"), "e": pd.Series([True, False, pd.NA], dtype="boolean"), "f": pd.Series([True, False, True], dtype="boolean"), diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index bef28c4f027da..bad92f677c7ea 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -161,9 +161,9 @@ def test_dtype_backend(self, string_storage, dtype_backend, flavor_read_html): # GH#50286 df = DataFrame( { - "a": Series([1, np.nan, 3], dtype="Int64"), + "a": Series([1, NA, 3], dtype="Int64"), "b": Series([1, 2, 3], dtype="Int64"), - "c": Series([1.5, np.nan, 2.5], dtype="Float64"), + "c": Series([1.5, NA, 2.5], dtype="Float64"), "d": Series([1.5, 2.0, 2.5], dtype="Float64"), "e": [True, False, None], "f": [True, False, True], @@ -184,9 +184,9 @@ def test_dtype_backend(self, string_storage, dtype_backend, flavor_read_html): expected = DataFrame( { - "a": Series([1, np.nan, 3], dtype="Int64"), + "a": Series([1, NA, 3], dtype="Int64"), "b": Series([1, 2, 3], dtype="Int64"), - "c": Series([1.5, np.nan, 2.5], dtype="Float64"), + "c": Series([1.5, NA, 2.5], dtype="Float64"), "d": Series([1.5, 2.0, 2.5], dtype="Float64"), "e": Series([True, False, NA], dtype="boolean"), "f": Series([True, False, True], dtype="boolean"), diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index f8638ea104933..1b9ae5d8e7209 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -3692,9 +3692,9 @@ def test_read_sql_invalid_dtype_backend_table(conn, request, func, dtype_backend def dtype_backend_data() -> DataFrame: return DataFrame( { - "a": Series([1, np.nan, 3], dtype="Int64"), + "a": Series([1, pd.NA, 3], dtype="Int64"), "b": Series([1, 2, 3], dtype="Int64"), - "c": Series([1.5, np.nan, 2.5], dtype="Float64"), + "c": Series([1.5, pd.NA, 2.5], dtype="Float64"), "d": Series([1.5, 2.0, 2.5], dtype="Float64"), "e": [True, False, None], "f": [True, False, True], @@ -3716,9 +3716,9 @@ def func(string_storage, dtype_backend, conn_name) -> DataFrame: df = DataFrame( { - "a": Series([1, np.nan, 3], dtype="Int64"), + "a": Series([1, pd.NA, 3], dtype="Int64"), "b": Series([1, 2, 3], dtype="Int64"), - "c": Series([1.5, np.nan, 2.5], dtype="Float64"), + "c": Series([1.5, pd.NA, 2.5], dtype="Float64"), "d": Series([1.5, 2.0, 2.5], dtype="Float64"), "e": Series([True, False, pd.NA], dtype="boolean"), "f": Series([True, False, True], dtype="boolean"), diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 90fda2c10962b..3ebf4416f7289 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -2056,9 +2056,10 @@ def test_writer_118_exceptions(self, temp_file): ["numpy_nullable", pytest.param("pyarrow", marks=td.skip_if_no("pyarrow"))], ) def test_read_write_ea_dtypes(self, dtype_backend, temp_file, tmp_path): + dtype = "Int64" if dtype_backend == "numpy_nullable" else "int64[pyarrow]" df = DataFrame( { - "a": [1, 2, None], + "a": pd.array([1, 2, None], dtype=dtype), "b": ["a", "b", "c"], "c": [True, False, None], "d": [1.5, 2.5, 3.5], diff --git a/pandas/tests/io/xml/test_xml_dtypes.py b/pandas/tests/io/xml/test_xml_dtypes.py index 96ef50f9d7149..62cd515366bb9 100644 --- a/pandas/tests/io/xml/test_xml_dtypes.py +++ b/pandas/tests/io/xml/test_xml_dtypes.py @@ -8,6 +8,7 @@ import pandas.util._test_decorators as td from pandas import ( + NA, DataFrame, DatetimeIndex, Series, @@ -145,8 +146,8 @@ def test_dtypes_with_names(parser): df_expected = DataFrame( { "Col1": ["square", "circle", "triangle"], - "Col2": Series(["00360", "00360", "00180"]).astype("string"), - "Col3": Series([4.0, float("nan"), 3.0]).astype("Int64"), + "Col2": Series(["00360", "00360", "00180"], dtype="string"), + "Col3": Series([4.0, NA, 3.0], dtype="Int64"), "Col4": DatetimeIndex( ["2020-01-01", "2021-01-01", "2022-01-01"], dtype="M8[ns]" ), @@ -170,7 +171,7 @@ def test_dtype_nullable_int(parser): { "shape": ["square", "circle", "triangle"], "degrees": [360, 360, 180], - "sides": Series([4.0, float("nan"), 3.0]).astype("Int64"), + "sides": Series([4.0, NA, 3.0], dtype="Int64"), } ) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 485b50f65736e..807cf19269c85 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -219,7 +219,7 @@ def test_nan_int_timedelta_sum(self): df = DataFrame( { "A": Series([1, 2, NaT], dtype="timedelta64[ns]"), - "B": Series([1, 2, np.nan], dtype="Int64"), + "B": Series([1, 2, pd.NA], dtype="Int64"), } ) expected = Series({"A": Timedelta(3), "B": 3}) @@ -587,6 +587,7 @@ def test_sum_inf(self): @pytest.mark.parametrize("use_bottleneck", [True, False]) @pytest.mark.parametrize("method, unit", [("sum", 0.0), ("prod", 1.0)]) def test_empty(self, method, unit, use_bottleneck, dtype): + item = pd.NA if dtype in ["Float32", "Int64"] else np.nan with pd.option_context("use_bottleneck", use_bottleneck): # GH#9422 / GH#18921 # Entirely empty @@ -620,7 +621,7 @@ def test_empty(self, method, unit, use_bottleneck, dtype): assert isna(result) # All-NA - s = Series([np.nan], dtype=dtype) + s = Series([item], dtype=dtype) # NA by default result = getattr(s, method)() assert result == unit @@ -644,7 +645,7 @@ def test_empty(self, method, unit, use_bottleneck, dtype): assert isna(result) # Mix of valid, empty - s = Series([np.nan, 1], dtype=dtype) + s = Series([item, 1], dtype=dtype) # Default result = getattr(s, method)() assert result == 1.0 @@ -674,11 +675,11 @@ def test_empty(self, method, unit, use_bottleneck, dtype): result = getattr(s, method)(skipna=False, min_count=2) assert isna(result) - s = Series([np.nan], dtype=dtype) + s = Series([item], dtype=dtype) result = getattr(s, method)(min_count=2) assert isna(result) - s = Series([np.nan, 1], dtype=dtype) + s = Series([item, 1], dtype=dtype) result = getattr(s, method)(min_count=2) assert isna(result) @@ -694,7 +695,7 @@ def test_ops_consistency_on_empty_nullable(self, method, dtype): assert result is pd.NA # ALL-NA series - nser = Series([np.nan], dtype=dtype) + nser = Series([pd.NA], dtype=dtype) result = getattr(nser, method)() assert result is pd.NA diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index f3418ad047afe..a8e29ef03acc2 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -581,7 +581,7 @@ def test_merge_empty_frame(self, series_of_dtype, series_of_dtype2): @pytest.mark.parametrize( "series_of_dtype_all_na", [ - Series([np.nan], dtype="Int64"), + Series([pd.NA], dtype="Int64"), Series([np.nan], dtype="float"), Series([np.nan], dtype="object"), Series([pd.NaT]), @@ -2174,10 +2174,10 @@ def test_merging_with_bool_or_int_cateorical_column( def test_merge_on_int_array(self): # GH 23020 - df = DataFrame({"A": Series([1, 2, np.nan], dtype="Int64"), "B": 1}) + df = DataFrame({"A": Series([1, 2, pd.NA], dtype="Int64"), "B": 1}) result = merge(df, df, on="A") expected = DataFrame( - {"A": Series([1, 2, np.nan], dtype="Int64"), "B_x": 1, "B_y": 1} + {"A": Series([1, 2, pd.NA], dtype="Int64"), "B_x": 1, "B_y": 1} ) tm.assert_frame_equal(result, expected) @@ -2781,14 +2781,15 @@ def test_merge_on_left_categoricalindex(): @pytest.mark.parametrize("dtype", [None, "Int64"]) def test_merge_outer_with_NaN(dtype): # GH#43550 + item = np.nan if dtype is None else pd.NA left = DataFrame({"key": [1, 2], "col1": [1, 2]}, dtype=dtype) - right = DataFrame({"key": [np.nan, np.nan], "col2": [3, 4]}, dtype=dtype) + right = DataFrame({"key": [item, item], "col2": [3, 4]}, dtype=dtype) result = merge(left, right, on="key", how="outer") expected = DataFrame( { - "key": [1, 2, np.nan, np.nan], - "col1": [1, 2, np.nan, np.nan], - "col2": [np.nan, np.nan, 3, 4], + "key": [1, 2, item, item], + "col1": [1, 2, item, item], + "col2": [item, item, 3, 4], }, dtype=dtype, ) @@ -2798,9 +2799,9 @@ def test_merge_outer_with_NaN(dtype): result = merge(right, left, on="key", how="outer") expected = DataFrame( { - "key": [1, 2, np.nan, np.nan], - "col2": [np.nan, np.nan, 3, 4], - "col1": [1, 2, np.nan, np.nan], + "key": [1, 2, item, item], + "col2": [item, item, 3, 4], + "col1": [1, 2, item, item], }, dtype=dtype, ) @@ -2974,7 +2975,7 @@ def test_merge_combinations( def test_merge_ea_int_and_float_numpy(): # GH#46178 - df1 = DataFrame([1.0, np.nan], dtype=pd.Int64Dtype()) + df1 = DataFrame([1.0, pd.NA], dtype=pd.Int64Dtype()) df2 = DataFrame([1.5]) expected = DataFrame(columns=[0], dtype="Int64") diff --git a/pandas/tests/series/methods/test_case_when.py b/pandas/tests/series/methods/test_case_when.py index 7cb60a11644a3..acfc58bea728e 100644 --- a/pandas/tests/series/methods/test_case_when.py +++ b/pandas/tests/series/methods/test_case_when.py @@ -2,6 +2,7 @@ import pytest from pandas import ( + NA, DataFrame, Series, array as pd_array, @@ -99,7 +100,7 @@ def test_case_when_multiple_conditions_replacement_extension_dtype(df): (df["a"].gt(1) & df["b"].eq(5), pd_array([1, 2, 3], dtype="Int64")), ], ) - expected = Series([1, 2, np.nan], dtype="Float64") + expected = Series([1, 2, NA], dtype="Float64") tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py index 7c6a7893ba3a0..81af7ae2aea45 100644 --- a/pandas/tests/series/methods/test_rank.py +++ b/pandas/tests/series/methods/test_rank.py @@ -254,8 +254,7 @@ def test_rank_categorical(self): def test_rank_nullable_integer(self): # GH 56976 - exp = Series([np.nan, 2, np.nan, 3, 3, 2, 3, 1]) - exp = exp.astype("Int64") + exp = Series([None, 2, None, 3, 3, 2, 3, 1], dtype="Int64") result = exp.rank(na_option="keep") expected = Series([np.nan, 2.5, np.nan, 5.0, 5.0, 2.5, 5.0, 1.0]) diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 442d73cadfe47..e45e3f76b457f 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -421,7 +421,7 @@ def test_reindexing_with_float64_NA_log(): tm.assert_numpy_array_equal(result, expected) with tm.assert_produces_warning(None): result_log = np.log(s_reindex) - expected_log = Series([0, np.nan, np.nan], dtype=Float64Dtype()) + expected_log = Series([0, NA, NA], dtype=Float64Dtype()) tm.assert_series_equal(result_log, expected_log) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index f82451a2be84d..6d991235958af 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -2066,7 +2066,7 @@ def test_series_constructor_overflow_uint_ea_with_na(self, val): def test_series_constructor_overflow_uint_with_nan(self): # GH#38798 max_val = np.iinfo(np.uint64).max - 1 - result = Series([max_val, np.nan], dtype="UInt64") + result = Series([max_val, pd.NA], dtype="UInt64") expected = Series( IntegerArray( np.array([max_val, 1], dtype="uint64"), @@ -2077,7 +2077,7 @@ def test_series_constructor_overflow_uint_with_nan(self): def test_series_constructor_ea_all_na(self): # GH#38798 - result = Series([np.nan, np.nan], dtype="UInt64") + result = Series([pd.NA, pd.NA], dtype="UInt64") expected = Series( IntegerArray( np.array([1, 1], dtype="uint64"), diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 30e6ebf0eed13..506f1526990bf 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -996,32 +996,35 @@ def test_find_nan(any_string_dtype): ser = Series( ["ABCDEFG", np.nan, "DEFGHIJEF", np.nan, "XXXX"], dtype=any_string_dtype ) - expected_dtype = ( - np.float64 if is_object_or_nan_string_dtype(any_string_dtype) else "Int64" - ) + if is_object_or_nan_string_dtype(any_string_dtype): + expected_dtype = np.float64 + item = np.nan + else: + expected_dtype = "Int64" + item = pd.NA result = ser.str.find("EF") - expected = Series([4, np.nan, 1, np.nan, -1], dtype=expected_dtype) + expected = Series([4, item, 1, item, -1], dtype=expected_dtype) tm.assert_series_equal(result, expected) result = ser.str.rfind("EF") - expected = Series([4, np.nan, 7, np.nan, -1], dtype=expected_dtype) + expected = Series([4, item, 7, item, -1], dtype=expected_dtype) tm.assert_series_equal(result, expected) result = ser.str.find("EF", 3) - expected = Series([4, np.nan, 7, np.nan, -1], dtype=expected_dtype) + expected = Series([4, item, 7, item, -1], dtype=expected_dtype) tm.assert_series_equal(result, expected) result = ser.str.rfind("EF", 3) - expected = Series([4, np.nan, 7, np.nan, -1], dtype=expected_dtype) + expected = Series([4, item, 7, item, -1], dtype=expected_dtype) tm.assert_series_equal(result, expected) result = ser.str.find("EF", 3, 6) - expected = Series([4, np.nan, -1, np.nan, -1], dtype=expected_dtype) + expected = Series([4, item, -1, item, -1], dtype=expected_dtype) tm.assert_series_equal(result, expected) result = ser.str.rfind("EF", 3, 6) - expected = Series([4, np.nan, -1, np.nan, -1], dtype=expected_dtype) + expected = Series([4, item, -1, item, -1], dtype=expected_dtype) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 7083489ce7c8c..fb3a3b8d60b6b 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -7,6 +7,7 @@ import pytest from pandas import ( + NA, DataFrame, Index, MultiIndex, @@ -42,10 +43,14 @@ def test_iter_raises(): def test_count(any_string_dtype): ser = Series(["foo", "foofoo", np.nan, "foooofooofommmfoo"], dtype=any_string_dtype) result = ser.str.count("f[o]+") - expected_dtype = ( - np.float64 if is_object_or_nan_string_dtype(any_string_dtype) else "Int64" - ) - expected = Series([1, 2, np.nan, 4], dtype=expected_dtype) + if is_object_or_nan_string_dtype(any_string_dtype): + expected_dtype = np.float64 + item = np.nan + else: + expected_dtype = "Int64" + item = NA + + expected = Series([1, 2, item, 4], dtype=expected_dtype) tm.assert_series_equal(result, expected) @@ -313,10 +318,13 @@ def test_len(any_string_dtype): dtype=any_string_dtype, ) result = ser.str.len() - expected_dtype = ( - "float64" if is_object_or_nan_string_dtype(any_string_dtype) else "Int64" - ) - expected = Series([3, 4, 6, np.nan, 8, 4, 1], dtype=expected_dtype) + if is_object_or_nan_string_dtype(any_string_dtype): + expected_dtype = "float64" + item = np.nan + else: + expected_dtype = "Int64" + item = NA + expected = Series([3, 4, 6, item, 8, 4, 1], dtype=expected_dtype) tm.assert_series_equal(result, expected) @@ -387,12 +395,15 @@ def test_index_wrong_type_raises(index_or_series, any_string_dtype, method): ) def test_index_missing(any_string_dtype, method, exp): ser = Series(["abcb", "ab", "bcbe", np.nan], dtype=any_string_dtype) - expected_dtype = ( - np.float64 if is_object_or_nan_string_dtype(any_string_dtype) else "Int64" - ) + if is_object_or_nan_string_dtype(any_string_dtype): + expected_dtype = np.float64 + item = np.nan + else: + expected_dtype = "Int64" + item = NA result = getattr(ser.str, method)("b") - expected = Series(exp + [np.nan], dtype=expected_dtype) + expected = Series(exp + [item], dtype=expected_dtype) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 7fb421e27bb40..1525f648e2d5a 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -435,7 +435,7 @@ def test_parametrized_factorize_na_value(self, data, na_value): np.array(["b", "a"], dtype=object), ), ( - pd.array([2, 1, np.nan, 2], dtype="Int64"), + pd.array([2, 1, pd.NA, 2], dtype="Int64"), pd.array([2, 1], dtype="Int64"), ), ], diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index 869d41efa6c28..bf49afddbf09b 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -450,7 +450,7 @@ def test_exceptions(self, arg, codes, err, msg): safe_sort(values=arg, codes=codes) @pytest.mark.parametrize( - "arg, exp", [[[1, 3, 2], [1, 2, 3]], [[1, 3, np.nan, 2], [1, 2, 3, np.nan]]] + "arg, exp", [[[1, 3, 2], [1, 2, 3]], [[1, 3, NA, 2], [1, 2, 3, NA]]] ) def test_extension_array(self, arg, exp): a = array(arg, dtype="Int64") diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index b02fab70fb825..9bc88a7e0a824 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -3365,8 +3365,7 @@ def test_no_slicing_errors_in_should_cache(self, listlike): def test_nullable_integer_to_datetime(): # Test for #30050 - ser = Series([1, 2, None, 2**61, None]) - ser = ser.astype("Int64") + ser = Series([1, 2, None, 2**61, None], dtype="Int64") ser_copy = ser.copy() res = to_datetime(ser, unit="ns") diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 893f526fb3eb0..12e6be18244e1 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -703,11 +703,11 @@ def test_precision_float_conversion(strrep): @pytest.mark.parametrize( "values, expected", [ - (["1", "2", None], Series([1, 2, np.nan], dtype="Int64")), + (["1", "2", None], Series([1, 2, pd.NA], dtype="Int64")), (["1", "2", "3"], Series([1, 2, 3], dtype="Int64")), (["1", "2", 3], Series([1, 2, 3], dtype="Int64")), (["1", "2", 3.5], Series([1, 2, 3.5], dtype="Float64")), - (["1", None, 3.5], Series([1, np.nan, 3.5], dtype="Float64")), + (["1", None, 3.5], Series([1, pd.NA, 3.5], dtype="Float64")), (["1", "2", "3.5"], Series([1, 2, 3.5], dtype="Float64")), ], ) @@ -898,7 +898,7 @@ def test_to_numeric_dtype_backend_error(dtype_backend): dtype = "double[pyarrow]" else: dtype = "Float64" - expected = Series([np.nan, np.nan, np.nan], dtype=dtype) + expected = Series([pd.NA, pd.NA, pd.NA], dtype=dtype) tm.assert_series_equal(result, expected)