Skip to content

Commit 0327507

Browse files
committed
Better option name, fixture
1 parent 76bc3d2 commit 0327507

File tree

8 files changed

+50
-32
lines changed

8 files changed

+50
-32
lines changed

pandas/_config/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,6 @@ def using_string_dtype() -> bool:
3535
return _mode_options["infer_string"]
3636

3737

38-
def using_pyarrow_strict_nans() -> bool:
38+
def is_nan_na() -> bool:
3939
_mode_options = _global_config["mode"]
40-
return _mode_options["pyarrow_strict_nans"]
40+
return _mode_options["nan_is_na"]

pandas/conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2122,3 +2122,10 @@ def temp_file(tmp_path):
21222122
def monkeysession():
21232123
with pytest.MonkeyPatch.context() as mp:
21242124
yield mp
2125+
2126+
2127+
@pytest.fixture(params=[True, False])
2128+
def using_nan_is_na(request):
2129+
opt = request.param
2130+
with pd.option_context("mode.nan_is_na", opt):
2131+
yield opt

pandas/core/arrays/_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import numpy as np
99

10-
from pandas._config import using_pyarrow_strict_nans
10+
from pandas._config import is_nan_na
1111

1212
from pandas._libs import lib
1313
from pandas._libs.missing import NA
@@ -41,7 +41,7 @@ def to_numpy_dtype_inference(
4141
else:
4242
dtype = arr.dtype.numpy_dtype # type: ignore[union-attr]
4343
if na_value is lib.no_default:
44-
if is_pyarrow and using_pyarrow_strict_nans():
44+
if is_pyarrow and not is_nan_na():
4545
na_value = NA
4646
dtype = np.dtype(object)
4747
else:

pandas/core/arrays/arrow/array.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import numpy as np
1818

19-
from pandas._config import using_pyarrow_strict_nans
19+
from pandas._config import is_nan_na
2020

2121
from pandas._libs import lib
2222
from pandas._libs.missing import is_pdna_or_none
@@ -35,6 +35,7 @@
3535

3636
from pandas.core.dtypes.cast import (
3737
can_hold_element,
38+
construct_1d_object_array_from_listlike,
3839
infer_dtype_from_scalar,
3940
)
4041
from pandas.core.dtypes.common import (
@@ -555,7 +556,22 @@ def _box_pa_array(
555556
return pa_array
556557

557558
mask = None
558-
if getattr(value, "dtype", None) is None or value.dtype.kind not in "iumMf":
559+
if is_nan_na():
560+
try:
561+
arr_value = np.asarray(value)
562+
if arr_value.ndim > 1:
563+
# e.g. test_fixed_size_list we have list data. ndim > 1
564+
# means there were no scalar (NA) entries.
565+
mask = np.zeros(len(value), dtype=np.bool_)
566+
else:
567+
mask = isna(arr_value)
568+
except ValueError:
569+
# Ragged data that numpy raises on
570+
arr_value = construct_1d_object_array_from_listlike(value)
571+
mask = isna(arr_value)
572+
elif (
573+
getattr(value, "dtype", None) is None or value.dtype.kind not in "iumMf"
574+
):
559575
arr_value = np.asarray(value, dtype=object)
560576
# similar to isna(value) but exclude NaN, NaT, nat-like, nan-like
561577
mask = is_pdna_or_none(arr_value)
@@ -1490,7 +1506,9 @@ def to_numpy(
14901506
na_value: object = lib.no_default,
14911507
) -> np.ndarray:
14921508
original_na_value = na_value
1493-
dtype, na_value = to_numpy_dtype_inference(self, dtype, na_value, self._hasna)
1509+
dtype, na_value = to_numpy_dtype_inference(
1510+
self, dtype, na_value, self._hasna, is_pyarrow=True
1511+
)
14941512
pa_type = self._pa_array.type
14951513
if not self._hasna or isna(na_value) or pa.types.is_null(pa_type):
14961514
data = self
@@ -1522,7 +1540,7 @@ def to_numpy(
15221540
or (
15231541
original_na_value is lib.no_default
15241542
and is_float_dtype(dtype)
1525-
and not using_pyarrow_strict_nans()
1543+
and is_nan_na()
15261544
)
15271545
)
15281546
):

pandas/core/config_init.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -429,10 +429,11 @@ def is_terminal() -> bool:
429429

430430
with cf.config_prefix("mode"):
431431
cf.register_option(
432-
"pyarrow_strict_nans",
433-
True,
432+
"nan_is_na",
433+
False,
434434
# TODO: Change this to False before merging
435-
"Whether to make ArrowDtype arrays consistently treat NaN as distinct from NA",
435+
"Whether to make ArrowDtype arrays consistently treat NaN as "
436+
"interchangeable with pd.NA",
436437
validator=is_one_of_factory([True, False]),
437438
)
438439

pandas/tests/extension/test_arrow.py

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,6 @@
3232
import numpy as np
3333
import pytest
3434

35-
from pandas._config import using_pyarrow_strict_nans
36-
3735
from pandas._libs import lib
3836
from pandas._libs.tslibs import timezones
3937
from pandas.compat import (
@@ -278,17 +276,14 @@ def test_compare_scalar(self, data, comparison_op):
278276
self._compare_other(ser, data, comparison_op, data[0])
279277

280278
@pytest.mark.parametrize("na_action", [None, "ignore"])
281-
def test_map(self, data_missing, na_action):
279+
def test_map(self, data_missing, na_action, using_nan_is_na):
282280
if data_missing.dtype.kind in "mM":
283281
result = data_missing.map(lambda x: x, na_action=na_action)
284282
expected = data_missing.to_numpy(dtype=object)
285283
tm.assert_numpy_array_equal(result, expected)
286284
else:
287285
result = data_missing.map(lambda x: x, na_action=na_action)
288-
if (
289-
data_missing.dtype == "float32[pyarrow]"
290-
and not using_pyarrow_strict_nans()
291-
):
286+
if data_missing.dtype == "float32[pyarrow]" and using_nan_is_na:
292287
# map roundtrips through objects, which converts to float64
293288
expected = data_missing.to_numpy(dtype="float64", na_value=np.nan)
294289
else:
@@ -705,7 +700,7 @@ def test_setitem_preserves_views(self, data):
705700

706701
@pytest.mark.parametrize("dtype_backend", ["pyarrow", no_default])
707702
@pytest.mark.parametrize("engine", ["c", "python"])
708-
def test_EA_types(self, engine, data, dtype_backend, request):
703+
def test_EA_types(self, engine, data, dtype_backend, request, using_nan_is_na):
709704
pa_dtype = data.dtype.pyarrow_dtype
710705
if pa.types.is_decimal(pa_dtype):
711706
request.applymarker(
@@ -726,7 +721,7 @@ def test_EA_types(self, engine, data, dtype_backend, request):
726721
pytest.mark.xfail(reason="CSV parsers don't correctly handle binary")
727722
)
728723
df = pd.DataFrame({"with_dtype": pd.Series(data, dtype=str(data.dtype))})
729-
if using_pyarrow_strict_nans():
724+
if not using_nan_is_na:
730725
csv_output = df.to_csv(index=False, na_rep="NA")
731726
else:
732727
csv_output = df.to_csv(index=False, na_rep=np.nan)
@@ -1543,7 +1538,7 @@ def test_astype_errors_ignore():
15431538
tm.assert_frame_equal(result, expected)
15441539

15451540

1546-
def test_to_numpy_with_defaults(data):
1541+
def test_to_numpy_with_defaults(data, using_nan_is_na):
15471542
# GH49973
15481543
result = data.to_numpy()
15491544

@@ -1555,21 +1550,19 @@ def test_to_numpy_with_defaults(data):
15551550
else:
15561551
expected = np.array(data._pa_array)
15571552

1558-
if data._hasna and (
1559-
not is_numeric_dtype(data.dtype) or using_pyarrow_strict_nans()
1560-
):
1553+
if data._hasna and (not is_numeric_dtype(data.dtype) or not using_nan_is_na):
15611554
expected = expected.astype(object)
15621555
expected[pd.isna(data)] = pd.NA
15631556

15641557
tm.assert_numpy_array_equal(result, expected)
15651558

15661559

1567-
def test_to_numpy_int_with_na():
1560+
def test_to_numpy_int_with_na(using_nan_is_na):
15681561
# GH51227: ensure to_numpy does not convert int to float
15691562
data = [1, None]
15701563
arr = pd.array(data, dtype="int64[pyarrow]")
15711564
result = arr.to_numpy()
1572-
if using_pyarrow_strict_nans():
1565+
if not using_nan_is_na:
15731566
expected = np.array([1, pd.NA], dtype=object)
15741567
else:
15751568
expected = np.array([1, np.nan])
@@ -3534,10 +3527,10 @@ def test_cast_dictionary_different_value_dtype(arrow_type):
35343527
assert result.dtypes.iloc[0] == data_type
35353528

35363529

3537-
def test_map_numeric_na_action():
3530+
def test_map_numeric_na_action(using_nan_is_na):
35383531
ser = pd.Series([32, 40, None], dtype="int64[pyarrow]")
35393532
result = ser.map(lambda x: 42, na_action="ignore")
3540-
if using_pyarrow_strict_nans():
3533+
if not using_nan_is_na:
35413534
expected = pd.Series([42.0, 42.0, pd.NA], dtype="object")
35423535
else:
35433536
expected = pd.Series([42.0, 42.0, np.nan], dtype="float64")

pandas/tests/frame/methods/test_convert_dtypes.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_pyarrow_strict_nans
7-
86
import pandas.util._test_decorators as td
97

108
import pandas as pd
@@ -61,7 +59,7 @@ def test_convert_dtypes_retain_column_names(self):
6159
tm.assert_index_equal(result.columns, df.columns)
6260
assert result.columns.name == "cols"
6361

64-
def test_pyarrow_dtype_backend(self):
62+
def test_pyarrow_dtype_backend(self, using_nan_is_na):
6563
pa = pytest.importorskip("pyarrow")
6664
df = pd.DataFrame(
6765
{
@@ -76,7 +74,7 @@ def test_pyarrow_dtype_backend(self):
7674
)
7775
result = df.convert_dtypes(dtype_backend="pyarrow")
7876

79-
item = None if not using_pyarrow_strict_nans() else np.nan
77+
item = None if using_nan_is_na else np.nan
8078
expected = pd.DataFrame(
8179
{
8280
"a": pd.arrays.ArrowExtensionArray(

pandas/tests/series/methods/test_rank.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ def test_rank_tie_methods(self, ser, results, dtype, using_infer_string):
275275
dtype == "int64"
276276
or dtype == "int64[pyarrow]"
277277
or dtype == "uint64[pyarrow]"
278+
or dtype == "float64[pyarrow]"
278279
or (not using_infer_string and dtype == "str")
279280
):
280281
pytest.skip("int64/str does not support NaN")

0 commit comments

Comments
 (0)