20
20
)
21
21
22
22
23
- def na_val (dtype ):
24
- if dtype .storage == "pyarrow_numpy" :
25
- return np .nan
26
- else :
27
- return pd .NA
28
-
29
-
30
23
@pytest .fixture
31
24
def dtype (string_storage ):
32
25
"""Fixture giving StringDtype from parametrized 'string_storage'"""
@@ -41,22 +34,22 @@ def cls(dtype):
41
34
42
35
def test_repr (dtype ):
43
36
df = pd .DataFrame ({"A" : pd .array (["a" , pd .NA , "b" ], dtype = dtype )})
44
- if dtype .storage == "pyarrow_numpy" :
37
+ if dtype .na_value is np . nan :
45
38
expected = " A\n 0 a\n 1 NaN\n 2 b"
46
39
else :
47
40
expected = " A\n 0 a\n 1 <NA>\n 2 b"
48
41
assert repr (df ) == expected
49
42
50
- if dtype .storage == "pyarrow_numpy" :
43
+ if dtype .na_value is np . nan :
51
44
expected = "0 a\n 1 NaN\n 2 b\n Name: A, dtype: string"
52
45
else :
53
46
expected = "0 a\n 1 <NA>\n 2 b\n Name: A, dtype: string"
54
47
assert repr (df .A ) == expected
55
48
56
- if dtype .storage == "pyarrow" :
49
+ if dtype .storage == "pyarrow" and dtype . na_value is pd . NA :
57
50
arr_name = "ArrowStringArray"
58
51
expected = f"<{ arr_name } >\n ['a', <NA>, 'b']\n Length: 3, dtype: string"
59
- elif dtype .storage == "pyarrow_numpy" :
52
+ elif dtype .storage == "pyarrow" and dtype . na_value is np . nan :
60
53
arr_name = "ArrowStringArrayNumpySemantics"
61
54
expected = f"<{ arr_name } >\n ['a', nan, 'b']\n Length: 3, dtype: string"
62
55
else :
@@ -68,7 +61,7 @@ def test_repr(dtype):
68
61
def test_none_to_nan (cls , dtype ):
69
62
a = cls ._from_sequence (["a" , None , "b" ], dtype = dtype )
70
63
assert a [1 ] is not None
71
- assert a [1 ] is na_val ( a .dtype )
64
+ assert a [1 ] is a .dtype . na_value
72
65
73
66
74
67
def test_setitem_validates (cls , dtype ):
@@ -225,7 +218,7 @@ def test_comparison_methods_scalar(comparison_op, dtype):
225
218
a = pd .array (["a" , None , "c" ], dtype = dtype )
226
219
other = "a"
227
220
result = getattr (a , op_name )(other )
228
- if dtype .storage == "pyarrow_numpy" :
221
+ if dtype .na_value is np . nan :
229
222
expected = np .array ([getattr (item , op_name )(other ) for item in a ])
230
223
if comparison_op == operator .ne :
231
224
expected [1 ] = True
@@ -244,7 +237,7 @@ def test_comparison_methods_scalar_pd_na(comparison_op, dtype):
244
237
a = pd .array (["a" , None , "c" ], dtype = dtype )
245
238
result = getattr (a , op_name )(pd .NA )
246
239
247
- if dtype .storage == "pyarrow_numpy" :
240
+ if dtype .na_value is np . nan :
248
241
if operator .ne == comparison_op :
249
242
expected = np .array ([True , True , True ])
250
243
else :
@@ -271,7 +264,7 @@ def test_comparison_methods_scalar_not_string(comparison_op, dtype):
271
264
272
265
result = getattr (a , op_name )(other )
273
266
274
- if dtype .storage == "pyarrow_numpy" :
267
+ if dtype .na_value is np . nan :
275
268
expected_data = {
276
269
"__eq__" : [False , False , False ],
277
270
"__ne__" : [True , True , True ],
@@ -293,7 +286,7 @@ def test_comparison_methods_array(comparison_op, dtype):
293
286
a = pd .array (["a" , None , "c" ], dtype = dtype )
294
287
other = [None , None , "c" ]
295
288
result = getattr (a , op_name )(other )
296
- if dtype .storage == "pyarrow_numpy" :
289
+ if dtype .na_value is np . nan :
297
290
if operator .ne == comparison_op :
298
291
expected = np .array ([True , True , False ])
299
292
else :
@@ -387,7 +380,7 @@ def test_astype_int(dtype):
387
380
tm .assert_numpy_array_equal (result , expected )
388
381
389
382
arr = pd .array (["1" , pd .NA , "3" ], dtype = dtype )
390
- if dtype .storage == "pyarrow_numpy" :
383
+ if dtype .na_value is np . nan :
391
384
err = ValueError
392
385
msg = "cannot convert float NaN to integer"
393
386
else :
@@ -441,7 +434,7 @@ def test_min_max(method, skipna, dtype):
441
434
expected = "a" if method == "min" else "c"
442
435
assert result == expected
443
436
else :
444
- assert result is na_val ( arr .dtype )
437
+ assert result is arr .dtype . na_value
445
438
446
439
447
440
@pytest .mark .parametrize ("method" , ["min" , "max" ])
@@ -522,7 +515,7 @@ def test_arrow_roundtrip(dtype, string_storage2, request, using_infer_string):
522
515
expected = df .astype (f"string[{ string_storage2 } ]" )
523
516
tm .assert_frame_equal (result , expected )
524
517
# ensure the missing value is represented by NA and not np.nan or None
525
- assert result .loc [2 , "a" ] is na_val ( result ["a" ].dtype )
518
+ assert result .loc [2 , "a" ] is result ["a" ].dtype . na_value
526
519
527
520
528
521
@pytest .mark .filterwarnings ("ignore:Passing a BlockManager:DeprecationWarning" )
@@ -556,10 +549,10 @@ def test_arrow_load_from_zero_chunks(
556
549
557
550
558
551
def test_value_counts_na (dtype ):
559
- if getattr (dtype , "storage" , "" ) == "pyarrow" :
560
- exp_dtype = "int64[pyarrow]"
561
- elif getattr (dtype , "storage" , "" ) == "pyarrow_numpy" :
552
+ if dtype .na_value is np .nan :
562
553
exp_dtype = "int64"
554
+ elif dtype .storage == "pyarrow" :
555
+ exp_dtype = "int64[pyarrow]"
563
556
else :
564
557
exp_dtype = "Int64"
565
558
arr = pd .array (["a" , "b" , "a" , pd .NA ], dtype = dtype )
@@ -573,10 +566,10 @@ def test_value_counts_na(dtype):
573
566
574
567
575
568
def test_value_counts_with_normalize (dtype ):
576
- if getattr (dtype , "storage" , "" ) == "pyarrow" :
577
- exp_dtype = "double[pyarrow]"
578
- elif getattr (dtype , "storage" , "" ) == "pyarrow_numpy" :
569
+ if dtype .na_value is np .nan :
579
570
exp_dtype = np .float64
571
+ elif dtype .storage == "pyarrow" :
572
+ exp_dtype = "double[pyarrow]"
580
573
else :
581
574
exp_dtype = "Float64"
582
575
ser = pd .Series (["a" , "b" , "a" , pd .NA ], dtype = dtype )
@@ -586,10 +579,10 @@ def test_value_counts_with_normalize(dtype):
586
579
587
580
588
581
def test_value_counts_sort_false (dtype ):
589
- if getattr (dtype , "storage" , "" ) == "pyarrow" :
590
- exp_dtype = "int64[pyarrow]"
591
- elif getattr (dtype , "storage" , "" ) == "pyarrow_numpy" :
582
+ if dtype .na_value is np .nan :
592
583
exp_dtype = "int64"
584
+ elif dtype .storage == "pyarrow" :
585
+ exp_dtype = "int64[pyarrow]"
593
586
else :
594
587
exp_dtype = "Int64"
595
588
ser = pd .Series (["a" , "b" , "c" , "b" ], dtype = dtype )
@@ -621,7 +614,7 @@ def test_astype_from_float_dtype(float_dtype, dtype):
621
614
def test_to_numpy_returns_pdna_default (dtype ):
622
615
arr = pd .array (["a" , pd .NA , "b" ], dtype = dtype )
623
616
result = np .array (arr )
624
- expected = np .array (["a" , na_val ( dtype ) , "b" ], dtype = object )
617
+ expected = np .array (["a" , dtype . na_value , "b" ], dtype = object )
625
618
tm .assert_numpy_array_equal (result , expected )
626
619
627
620
@@ -661,7 +654,7 @@ def test_setitem_scalar_with_mask_validation(dtype):
661
654
mask = np .array ([False , True , False ])
662
655
663
656
ser [mask ] = None
664
- assert ser .array [1 ] is na_val ( ser .dtype )
657
+ assert ser .array [1 ] is ser .dtype . na_value
665
658
666
659
# for other non-string we should also raise an error
667
660
ser = pd .Series (["a" , "b" , "c" ], dtype = dtype )
0 commit comments