Skip to content

Commit d815947

Browse files
authored
TST: collect string arithmetic tests (#62507)
1 parent 62d06bf commit d815947

File tree

6 files changed

+147
-134
lines changed

6 files changed

+147
-134
lines changed
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
from pathlib import Path
2+
3+
import numpy as np
4+
import pytest
5+
6+
from pandas.errors import Pandas4Warning
7+
8+
from pandas import (
9+
NA,
10+
ArrowDtype,
11+
Series,
12+
StringDtype,
13+
)
14+
import pandas._testing as tm
15+
16+
17+
def test_reversed_logical_ops(any_string_dtype):
18+
# GH#60234
19+
dtype = any_string_dtype
20+
warn = None if dtype == object else Pandas4Warning
21+
left = Series([True, False, False, True])
22+
right = Series(["", "", "b", "c"], dtype=dtype)
23+
24+
msg = "operations between boolean dtype and"
25+
with tm.assert_produces_warning(warn, match=msg):
26+
result = left | right
27+
expected = left | right.astype(bool)
28+
tm.assert_series_equal(result, expected)
29+
30+
with tm.assert_produces_warning(warn, match=msg):
31+
result = left & right
32+
expected = left & right.astype(bool)
33+
tm.assert_series_equal(result, expected)
34+
35+
with tm.assert_produces_warning(warn, match=msg):
36+
result = left ^ right
37+
expected = left ^ right.astype(bool)
38+
tm.assert_series_equal(result, expected)
39+
40+
41+
def test_pathlib_path_division(any_string_dtype, request):
42+
# GH#61940
43+
if any_string_dtype == object:
44+
mark = pytest.mark.xfail(
45+
reason="with NA present we go through _masked_arith_op which "
46+
"raises TypeError bc Path is not recognized by lib.is_scalar."
47+
)
48+
request.applymarker(mark)
49+
50+
item = Path("/Users/Irv/")
51+
ser = Series(["A", "B", NA], dtype=any_string_dtype)
52+
53+
result = item / ser
54+
expected = Series([item / "A", item / "B", ser.dtype.na_value], dtype=object)
55+
tm.assert_series_equal(result, expected)
56+
57+
result = ser / item
58+
expected = Series(["A" / item, "B" / item, ser.dtype.na_value], dtype=object)
59+
tm.assert_series_equal(result, expected)
60+
61+
62+
def test_mixed_object_comparison(any_string_dtype):
63+
# GH#60228
64+
dtype = any_string_dtype
65+
ser = Series(["a", "b"], dtype=dtype)
66+
67+
mixed = Series([1, "b"], dtype=object)
68+
69+
result = ser == mixed
70+
expected = Series([False, True], dtype=bool)
71+
if dtype == object:
72+
pass
73+
elif dtype.storage == "python" and dtype.na_value is NA:
74+
expected = expected.astype("boolean")
75+
elif dtype.storage == "pyarrow" and dtype.na_value is NA:
76+
expected = expected.astype("bool[pyarrow]")
77+
78+
tm.assert_series_equal(result, expected)
79+
80+
81+
def test_pyarrow_numpy_string_invalid():
82+
# GH#56008
83+
pa = pytest.importorskip("pyarrow")
84+
ser = Series([False, True])
85+
ser2 = Series(["a", "b"], dtype=StringDtype(na_value=np.nan))
86+
result = ser == ser2
87+
expected_eq = Series(False, index=ser.index)
88+
tm.assert_series_equal(result, expected_eq)
89+
90+
result = ser != ser2
91+
expected_ne = Series(True, index=ser.index)
92+
tm.assert_series_equal(result, expected_ne)
93+
94+
with pytest.raises(TypeError, match="Invalid comparison"):
95+
ser > ser2
96+
97+
# GH#59505
98+
ser3 = ser2.astype("string[pyarrow]")
99+
result3_eq = ser3 == ser
100+
tm.assert_series_equal(result3_eq, expected_eq.astype("bool[pyarrow]"))
101+
result3_ne = ser3 != ser
102+
tm.assert_series_equal(result3_ne, expected_ne.astype("bool[pyarrow]"))
103+
104+
with pytest.raises(TypeError, match="Invalid comparison"):
105+
ser > ser3
106+
107+
ser4 = ser2.astype(ArrowDtype(pa.string()))
108+
result4_eq = ser4 == ser
109+
tm.assert_series_equal(result4_eq, expected_eq.astype("bool[pyarrow]"))
110+
result4_ne = ser4 != ser
111+
tm.assert_series_equal(result4_ne, expected_ne.astype("bool[pyarrow]"))
112+
113+
with pytest.raises(TypeError, match="Invalid comparison"):
114+
ser > ser4

pandas/tests/extension/test_string.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -288,19 +288,3 @@ def test_searchsorted_with_na_raises(data_for_sorting, as_series):
288288
)
289289
with pytest.raises(ValueError, match=msg):
290290
arr.searchsorted(b)
291-
292-
293-
def test_mixed_object_comparison(dtype):
294-
# GH#60228
295-
ser = pd.Series(["a", "b"], dtype=dtype)
296-
297-
mixed = pd.Series([1, "b"], dtype=object)
298-
299-
result = ser == mixed
300-
expected = pd.Series([False, True], dtype=bool)
301-
if dtype.storage == "python" and dtype.na_value is pd.NA:
302-
expected = expected.astype("boolean")
303-
elif dtype.storage == "pyarrow" and dtype.na_value is pd.NA:
304-
expected = expected.astype("bool[pyarrow]")
305-
306-
tm.assert_series_equal(result, expected)

pandas/tests/io/formats/test_format.py

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1922,37 +1922,6 @@ def test_3d_array(self):
19221922
assert res[0] == " [[True, True], [False, False]]"
19231923
assert res[1] == " [[False, True], [True, False]]"
19241924

1925-
def test_2d_extension_type(self):
1926-
# GH 33770
1927-
1928-
# Define a stub extension type with just enough code to run Series.__repr__()
1929-
class DtypeStub(pd.api.extensions.ExtensionDtype):
1930-
@property
1931-
def type(self):
1932-
return np.ndarray
1933-
1934-
@property
1935-
def name(self):
1936-
return "DtypeStub"
1937-
1938-
class ExtTypeStub(pd.api.extensions.ExtensionArray):
1939-
def __len__(self) -> int:
1940-
return 2
1941-
1942-
def __getitem__(self, ix):
1943-
return [ix == 1, ix == 0]
1944-
1945-
@property
1946-
def dtype(self):
1947-
return DtypeStub()
1948-
1949-
series = Series(ExtTypeStub(), copy=False)
1950-
res = repr(series) # This line crashed before #33770 was fixed.
1951-
expected = "\n".join(
1952-
["0 [False True]", "1 [True False]", "dtype: DtypeStub"]
1953-
)
1954-
assert res == expected
1955-
19561925

19571926
def _three_digit_exp():
19581927
return f"{1.7e8:.4g}" == "1.7e+008"

pandas/tests/series/test_formats.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,37 @@ def test_different_null_objects(self):
280280
expected = "True 1\nNone 2\nNaN 3\nNaT 4\ndtype: int64"
281281
assert result == expected
282282

283+
def test_2d_extension_type(self):
284+
# GH#33770
285+
286+
# Define a stub extension type with just enough code to run Series.__repr__()
287+
class DtypeStub(pd.api.extensions.ExtensionDtype):
288+
@property
289+
def type(self):
290+
return np.ndarray
291+
292+
@property
293+
def name(self):
294+
return "DtypeStub"
295+
296+
class ExtTypeStub(pd.api.extensions.ExtensionArray):
297+
def __len__(self) -> int:
298+
return 2
299+
300+
def __getitem__(self, ix):
301+
return [ix == 1, ix == 0]
302+
303+
@property
304+
def dtype(self):
305+
return DtypeStub()
306+
307+
series = Series(ExtTypeStub(), copy=False)
308+
res = repr(series) # This line crashed before GH#33770 was fixed.
309+
expected = "\n".join(
310+
["0 [False True]", "1 [True False]", "dtype: DtypeStub"]
311+
)
312+
assert res == expected
313+
283314

284315
class TestCategoricalRepr:
285316
def test_categorical_repr_unicode(self):

pandas/tests/series/test_logical_ops.py

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,9 @@
55
import pytest
66

77
from pandas import (
8-
ArrowDtype,
98
DataFrame,
109
Index,
1110
Series,
12-
StringDtype,
1311
bdate_range,
1412
)
1513
import pandas._testing as tm
@@ -510,39 +508,3 @@ def test_int_dtype_different_index_not_bool(self):
510508

511509
result = ser1 ^ ser2
512510
tm.assert_series_equal(result, expected)
513-
514-
# TODO: this belongs in comparison tests
515-
def test_pyarrow_numpy_string_invalid(self):
516-
# GH#56008
517-
pa = pytest.importorskip("pyarrow")
518-
ser = Series([False, True])
519-
ser2 = Series(["a", "b"], dtype=StringDtype(na_value=np.nan))
520-
result = ser == ser2
521-
expected_eq = Series(False, index=ser.index)
522-
tm.assert_series_equal(result, expected_eq)
523-
524-
result = ser != ser2
525-
expected_ne = Series(True, index=ser.index)
526-
tm.assert_series_equal(result, expected_ne)
527-
528-
with pytest.raises(TypeError, match="Invalid comparison"):
529-
ser > ser2
530-
531-
# GH#59505
532-
ser3 = ser2.astype("string[pyarrow]")
533-
result3_eq = ser3 == ser
534-
tm.assert_series_equal(result3_eq, expected_eq.astype("bool[pyarrow]"))
535-
result3_ne = ser3 != ser
536-
tm.assert_series_equal(result3_ne, expected_ne.astype("bool[pyarrow]"))
537-
538-
with pytest.raises(TypeError, match="Invalid comparison"):
539-
ser > ser3
540-
541-
ser4 = ser2.astype(ArrowDtype(pa.string()))
542-
result4_eq = ser4 == ser
543-
tm.assert_series_equal(result4_eq, expected_eq.astype("bool[pyarrow]"))
544-
result4_ne = ser4 != ser
545-
tm.assert_series_equal(result4_ne, expected_ne.astype("bool[pyarrow]"))
546-
547-
with pytest.raises(TypeError, match="Invalid comparison"):
548-
ser > ser4

pandas/tests/strings/test_strings.py

Lines changed: 2 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,11 @@
22
datetime,
33
timedelta,
44
)
5-
from pathlib import Path
65

76
import numpy as np
87
import pytest
98

109
from pandas.compat import pa_version_under21p0
11-
from pandas.errors import Pandas4Warning
1210

1311
from pandas import (
1412
NA,
@@ -315,14 +313,14 @@ def test_isnumeric_unicode_missing(method, expected, any_string_dtype):
315313
tm.assert_series_equal(result, expected)
316314

317315

318-
def test_spilt_join_roundtrip(any_string_dtype):
316+
def test_split_join_roundtrip(any_string_dtype):
319317
ser = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype)
320318
result = ser.str.split("_").str.join("_")
321319
expected = ser.astype(object)
322320
tm.assert_series_equal(result, expected)
323321

324322

325-
def test_spilt_join_roundtrip_mixed_object():
323+
def test_split_join_roundtrip_mixed_object():
326324
ser = Series(
327325
["a_b", np.nan, "asdf_cas_asdf", True, datetime.today(), "foo", None, 1, 2.0]
328326
)
@@ -820,48 +818,3 @@ def test_decode_with_dtype_none():
820818
result = ser.str.decode("utf-8", dtype=None)
821819
expected = Series(["a", "b", "c"], dtype="str")
822820
tm.assert_series_equal(result, expected)
823-
824-
825-
def test_reversed_logical_ops(any_string_dtype):
826-
# GH#60234
827-
dtype = any_string_dtype
828-
warn = None if dtype == object else Pandas4Warning
829-
left = Series([True, False, False, True])
830-
right = Series(["", "", "b", "c"], dtype=dtype)
831-
832-
msg = "operations between boolean dtype and"
833-
with tm.assert_produces_warning(warn, match=msg):
834-
result = left | right
835-
expected = left | right.astype(bool)
836-
tm.assert_series_equal(result, expected)
837-
838-
with tm.assert_produces_warning(warn, match=msg):
839-
result = left & right
840-
expected = left & right.astype(bool)
841-
tm.assert_series_equal(result, expected)
842-
843-
with tm.assert_produces_warning(warn, match=msg):
844-
result = left ^ right
845-
expected = left ^ right.astype(bool)
846-
tm.assert_series_equal(result, expected)
847-
848-
849-
def test_pathlib_path_division(any_string_dtype, request):
850-
# GH#61940
851-
if any_string_dtype == object:
852-
mark = pytest.mark.xfail(
853-
reason="with NA present we go through _masked_arith_op which "
854-
"raises TypeError bc Path is not recognized by lib.is_scalar."
855-
)
856-
request.applymarker(mark)
857-
858-
item = Path("/Users/Irv/")
859-
ser = Series(["A", "B", NA], dtype=any_string_dtype)
860-
861-
result = item / ser
862-
expected = Series([item / "A", item / "B", ser.dtype.na_value], dtype=object)
863-
tm.assert_series_equal(result, expected)
864-
865-
result = ser / item
866-
expected = Series(["A" / item, "B" / item, ser.dtype.na_value], dtype=object)
867-
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)