Skip to content

Commit a12b345

Browse files
fix bunch of tests with future.infer_string
1 parent 1ea64cf commit a12b345

File tree

24 files changed

+78
-37
lines changed

24 files changed

+78
-37
lines changed

pandas/_testing/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
import numpy as np
1414

15+
from pandas._config import using_string_dtype
1516
from pandas._config.localization import (
1617
can_set_locale,
1718
get_locales,
@@ -106,7 +107,10 @@
106107
ALL_FLOAT_DTYPES: list[Dtype] = [*FLOAT_NUMPY_DTYPES, *FLOAT_EA_DTYPES]
107108

108109
COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"]
109-
STRING_DTYPES: list[Dtype] = [str, "str", "U"]
110+
if using_string_dtype():
111+
STRING_DTYPES: list[Dtype] = [str, "U"]
112+
else:
113+
STRING_DTYPES: list[Dtype] = [str, "str", "U"]
110114
COMPLEX_FLOAT_DTYPES: list[Dtype] = [*COMPLEX_DTYPES, *FLOAT_NUMPY_DTYPES]
111115

112116
DATETIME64_DTYPES: list[Dtype] = ["datetime64[ns]", "M8[ns]"]

pandas/tests/apply/test_numba.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def test_numba_unsupported_dtypes(apply_axis):
110110

111111
with pytest.raises(
112112
ValueError,
113-
match="Column b must have a numeric dtype. Found 'object|string' instead",
113+
match="Column b must have a numeric dtype. Found 'object|str' instead",
114114
):
115115
df.apply(f, engine="numba", axis=apply_axis)
116116

pandas/tests/arrays/boolean/test_astype.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import pandas._testing as tm
66

77

8-
def test_astype():
8+
def test_astype(using_infer_string):
99
# with missing values
1010
arr = pd.array([True, False, None], dtype="boolean")
1111

@@ -20,8 +20,14 @@ def test_astype():
2020
tm.assert_numpy_array_equal(result, expected)
2121

2222
result = arr.astype("str")
23-
expected = np.array(["True", "False", "<NA>"], dtype=f"{tm.ENDIAN}U5")
24-
tm.assert_numpy_array_equal(result, expected)
23+
if using_infer_string:
24+
expected = pd.array(
25+
["True", "False", None], dtype=pd.StringDtype(na_value=np.nan)
26+
)
27+
tm.assert_extension_array_equal(result, expected)
28+
else:
29+
expected = np.array(["True", "False", "<NA>"], dtype=f"{tm.ENDIAN}U5")
30+
tm.assert_numpy_array_equal(result, expected)
2531

2632
# no missing values
2733
arr = pd.array([True, False, True], dtype="boolean")

pandas/tests/arrays/categorical/test_astype.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def test_astype(self, ordered):
8888
expected = np.array(cat)
8989
tm.assert_numpy_array_equal(result, expected)
9090

91-
msg = r"Cannot cast object|string dtype to float64"
91+
msg = r"Cannot cast object|str dtype to float64"
9292
with pytest.raises(ValueError, match=msg):
9393
cat.astype(float)
9494

pandas/tests/arrays/categorical/test_repr.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def test_print(self, using_infer_string):
2222
if using_infer_string:
2323
expected = [
2424
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
25-
"Categories (3, string): [a < b < c]",
25+
"Categories (3, str): [a < b < c]",
2626
]
2727
else:
2828
expected = [

pandas/tests/arrays/floating/test_astype.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,21 @@ def test_astype_to_integer_array():
6363
tm.assert_extension_array_equal(result, expected)
6464

6565

66-
def test_astype_str():
66+
def test_astype_str(using_infer_string):
6767
a = pd.array([0.1, 0.2, None], dtype="Float64")
68-
expected = np.array(["0.1", "0.2", "<NA>"], dtype="U32")
6968

70-
tm.assert_numpy_array_equal(a.astype(str), expected)
71-
tm.assert_numpy_array_equal(a.astype("str"), expected)
69+
if using_infer_string:
70+
expected = pd.array(["0.1", "0.2", None], dtype=pd.StringDtype(na_value=np.nan))
71+
tm.assert_extension_array_equal(a.astype("str"), expected)
72+
73+
# TODO(infer_string) this should also be a string array like above
74+
expected = np.array(["0.1", "0.2", "<NA>"], dtype="U32")
75+
tm.assert_numpy_array_equal(a.astype(str), expected)
76+
else:
77+
expected = np.array(["0.1", "0.2", "<NA>"], dtype="U32")
78+
79+
tm.assert_numpy_array_equal(a.astype(str), expected)
80+
tm.assert_numpy_array_equal(a.astype("str"), expected)
7281

7382

7483
def test_astype_copy():

pandas/tests/arrays/integer/test_dtypes.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -276,12 +276,21 @@ def test_to_numpy_na_raises(dtype):
276276
a.to_numpy(dtype=dtype)
277277

278278

279-
def test_astype_str():
279+
def test_astype_str(using_infer_string):
280280
a = pd.array([1, 2, None], dtype="Int64")
281-
expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")
282281

283-
tm.assert_numpy_array_equal(a.astype(str), expected)
284-
tm.assert_numpy_array_equal(a.astype("str"), expected)
282+
if using_infer_string:
283+
expected = pd.array(["1", "2", None], dtype=pd.StringDtype(na_value=np.nan))
284+
tm.assert_extension_array_equal(a.astype("str"), expected)
285+
286+
# TODO(infer_string) this should also be a string array like above
287+
expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")
288+
tm.assert_numpy_array_equal(a.astype(str), expected)
289+
else:
290+
expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")
291+
292+
tm.assert_numpy_array_equal(a.astype(str), expected)
293+
tm.assert_numpy_array_equal(a.astype("str"), expected)
285294

286295

287296
def test_astype_boolean():

pandas/tests/arrays/test_datetimelike.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,9 @@ def test_searchsorted(self):
297297
assert result == 10
298298

299299
@pytest.mark.parametrize("box", [None, "index", "series"])
300-
def test_searchsorted_castable_strings(self, arr1d, box, string_storage):
300+
def test_searchsorted_castable_strings(
301+
self, arr1d, box, string_storage, using_infer_string
302+
):
301303
arr = arr1d
302304
if box is None:
303305
pass
@@ -333,7 +335,8 @@ def test_searchsorted_castable_strings(self, arr1d, box, string_storage):
333335
TypeError,
334336
match=re.escape(
335337
f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', "
336-
"or array of those. Got string array instead."
338+
"or array of those. Got "
339+
f"{'str' if using_infer_string else 'string'} array instead."
337340
),
338341
):
339342
arr.searchsorted([str(arr[1]), "baz"])

pandas/tests/dtypes/test_dtypes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1061,7 +1061,7 @@ def test_str_vs_repr(self, ordered, using_infer_string):
10611061
c1 = CategoricalDtype(["a", "b"], ordered=ordered)
10621062
assert str(c1) == "category"
10631063
# Py2 will have unicode prefixes
1064-
dtype = "string" if using_infer_string else "object"
1064+
dtype = "str" if using_infer_string else "object"
10651065
pat = (
10661066
r"CategoricalDtype\(categories=\[.*\], ordered={ordered}, "
10671067
rf"categories_dtype={dtype}\)"

pandas/tests/frame/methods/test_astype.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def test_astype_str_float(self):
186186
tm.assert_frame_equal(result, expected)
187187

188188
@pytest.mark.parametrize("dtype_class", [dict, Series])
189-
def test_astype_dict_like(self, dtype_class):
189+
def test_astype_dict_like(self, dtype_class, using_infer_string):
190190
# GH7271 & GH16717
191191
a = Series(date_range("2010-01-04", periods=5))
192192
b = Series(range(5))
@@ -201,7 +201,10 @@ def test_astype_dict_like(self, dtype_class):
201201
expected = DataFrame(
202202
{
203203
"a": a,
204-
"b": Series(["0", "1", "2", "3", "4"], dtype="object"),
204+
"b": Series(
205+
["0", "1", "2", "3", "4"],
206+
dtype="str" if using_infer_string else "object",
207+
),
205208
"c": c,
206209
"d": Series([1.0, 2.0, 3.14, 4.0, 5.4], dtype="float32"),
207210
}
@@ -262,9 +265,9 @@ def test_astype_duplicate_col(self):
262265
a2 = Series([0, 1, 2, 3, 4], name="a")
263266
df = concat([a1, b, a2], axis=1)
264267

265-
result = df.astype(str)
268+
result = df.astype("str")
266269
a1_str = Series(["1", "2", "3", "4", "5"], dtype="str", name="a")
267-
b_str = Series(["0.1", "0.2", "0.4", "0.6", "0.8"], dtype=str, name="b")
270+
b_str = Series(["0.1", "0.2", "0.4", "0.6", "0.8"], dtype="str", name="b")
268271
a2_str = Series(["0", "1", "2", "3", "4"], dtype="str", name="a")
269272
expected = concat([a1_str, b_str, a2_str], axis=1)
270273
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)