diff --git a/pandas/tests/arithmetic/test_string.py b/pandas/tests/arithmetic/test_string.py new file mode 100644 index 0000000000000..3a038dc3a1fb0 --- /dev/null +++ b/pandas/tests/arithmetic/test_string.py @@ -0,0 +1,114 @@ +from pathlib import Path + +import numpy as np +import pytest + +from pandas.errors import Pandas4Warning + +from pandas import ( + NA, + ArrowDtype, + Series, + StringDtype, +) +import pandas._testing as tm + + +def test_reversed_logical_ops(any_string_dtype): + # GH#60234 + dtype = any_string_dtype + warn = None if dtype == object else Pandas4Warning + left = Series([True, False, False, True]) + right = Series(["", "", "b", "c"], dtype=dtype) + + msg = "operations between boolean dtype and" + with tm.assert_produces_warning(warn, match=msg): + result = left | right + expected = left | right.astype(bool) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(warn, match=msg): + result = left & right + expected = left & right.astype(bool) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(warn, match=msg): + result = left ^ right + expected = left ^ right.astype(bool) + tm.assert_series_equal(result, expected) + + +def test_pathlib_path_division(any_string_dtype, request): + # GH#61940 + if any_string_dtype == object: + mark = pytest.mark.xfail( + reason="with NA present we go through _masked_arith_op which " + "raises TypeError bc Path is not recognized by lib.is_scalar." + ) + request.applymarker(mark) + + item = Path("/Users/Irv/") + ser = Series(["A", "B", NA], dtype=any_string_dtype) + + result = item / ser + expected = Series([item / "A", item / "B", ser.dtype.na_value], dtype=object) + tm.assert_series_equal(result, expected) + + result = ser / item + expected = Series(["A" / item, "B" / item, ser.dtype.na_value], dtype=object) + tm.assert_series_equal(result, expected) + + +def test_mixed_object_comparison(any_string_dtype): + # GH#60228 + dtype = any_string_dtype + ser = Series(["a", "b"], dtype=dtype) + + mixed = Series([1, "b"], dtype=object) + + result = ser == mixed + expected = Series([False, True], dtype=bool) + if dtype == object: + pass + elif dtype.storage == "python" and dtype.na_value is NA: + expected = expected.astype("boolean") + elif dtype.storage == "pyarrow" and dtype.na_value is NA: + expected = expected.astype("bool[pyarrow]") + + tm.assert_series_equal(result, expected) + + +def test_pyarrow_numpy_string_invalid(): + # GH#56008 + pa = pytest.importorskip("pyarrow") + ser = Series([False, True]) + ser2 = Series(["a", "b"], dtype=StringDtype(na_value=np.nan)) + result = ser == ser2 + expected_eq = Series(False, index=ser.index) + tm.assert_series_equal(result, expected_eq) + + result = ser != ser2 + expected_ne = Series(True, index=ser.index) + tm.assert_series_equal(result, expected_ne) + + with pytest.raises(TypeError, match="Invalid comparison"): + ser > ser2 + + # GH#59505 + ser3 = ser2.astype("string[pyarrow]") + result3_eq = ser3 == ser + tm.assert_series_equal(result3_eq, expected_eq.astype("bool[pyarrow]")) + result3_ne = ser3 != ser + tm.assert_series_equal(result3_ne, expected_ne.astype("bool[pyarrow]")) + + with pytest.raises(TypeError, match="Invalid comparison"): + ser > ser3 + + ser4 = ser2.astype(ArrowDtype(pa.string())) + result4_eq = ser4 == ser + tm.assert_series_equal(result4_eq, expected_eq.astype("bool[pyarrow]")) + result4_ne = ser4 != ser + tm.assert_series_equal(result4_ne, expected_ne.astype("bool[pyarrow]")) + + with pytest.raises(TypeError, match="Invalid comparison"): + ser > ser4 diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 3793f06e4cca9..e373ff12c4086 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -288,19 +288,3 @@ def test_searchsorted_with_na_raises(data_for_sorting, as_series): ) with pytest.raises(ValueError, match=msg): arr.searchsorted(b) - - -def test_mixed_object_comparison(dtype): - # GH#60228 - ser = pd.Series(["a", "b"], dtype=dtype) - - mixed = pd.Series([1, "b"], dtype=object) - - result = ser == mixed - expected = pd.Series([False, True], dtype=bool) - if dtype.storage == "python" and dtype.na_value is pd.NA: - expected = expected.astype("boolean") - elif dtype.storage == "pyarrow" and dtype.na_value is pd.NA: - expected = expected.astype("bool[pyarrow]") - - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index a485578b139dc..d2da38be865a0 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1922,37 +1922,6 @@ def test_3d_array(self): assert res[0] == " [[True, True], [False, False]]" assert res[1] == " [[False, True], [True, False]]" - def test_2d_extension_type(self): - # GH 33770 - - # Define a stub extension type with just enough code to run Series.__repr__() - class DtypeStub(pd.api.extensions.ExtensionDtype): - @property - def type(self): - return np.ndarray - - @property - def name(self): - return "DtypeStub" - - class ExtTypeStub(pd.api.extensions.ExtensionArray): - def __len__(self) -> int: - return 2 - - def __getitem__(self, ix): - return [ix == 1, ix == 0] - - @property - def dtype(self): - return DtypeStub() - - series = Series(ExtTypeStub(), copy=False) - res = repr(series) # This line crashed before #33770 was fixed. - expected = "\n".join( - ["0 [False True]", "1 [True False]", "dtype: DtypeStub"] - ) - assert res == expected - def _three_digit_exp(): return f"{1.7e8:.4g}" == "1.7e+008" diff --git a/pandas/tests/series/test_formats.py b/pandas/tests/series/test_formats.py index 4242c57e05083..f38a12170c918 100644 --- a/pandas/tests/series/test_formats.py +++ b/pandas/tests/series/test_formats.py @@ -280,6 +280,37 @@ def test_different_null_objects(self): expected = "True 1\nNone 2\nNaN 3\nNaT 4\ndtype: int64" assert result == expected + def test_2d_extension_type(self): + # GH#33770 + + # Define a stub extension type with just enough code to run Series.__repr__() + class DtypeStub(pd.api.extensions.ExtensionDtype): + @property + def type(self): + return np.ndarray + + @property + def name(self): + return "DtypeStub" + + class ExtTypeStub(pd.api.extensions.ExtensionArray): + def __len__(self) -> int: + return 2 + + def __getitem__(self, ix): + return [ix == 1, ix == 0] + + @property + def dtype(self): + return DtypeStub() + + series = Series(ExtTypeStub(), copy=False) + res = repr(series) # This line crashed before GH#33770 was fixed. + expected = "\n".join( + ["0 [False True]", "1 [True False]", "dtype: DtypeStub"] + ) + assert res == expected + class TestCategoricalRepr: def test_categorical_repr_unicode(self): diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py index 8f63819b09238..2800bc7580644 100644 --- a/pandas/tests/series/test_logical_ops.py +++ b/pandas/tests/series/test_logical_ops.py @@ -5,11 +5,9 @@ import pytest from pandas import ( - ArrowDtype, DataFrame, Index, Series, - StringDtype, bdate_range, ) import pandas._testing as tm @@ -510,39 +508,3 @@ def test_int_dtype_different_index_not_bool(self): result = ser1 ^ ser2 tm.assert_series_equal(result, expected) - - # TODO: this belongs in comparison tests - def test_pyarrow_numpy_string_invalid(self): - # GH#56008 - pa = pytest.importorskip("pyarrow") - ser = Series([False, True]) - ser2 = Series(["a", "b"], dtype=StringDtype(na_value=np.nan)) - result = ser == ser2 - expected_eq = Series(False, index=ser.index) - tm.assert_series_equal(result, expected_eq) - - result = ser != ser2 - expected_ne = Series(True, index=ser.index) - tm.assert_series_equal(result, expected_ne) - - with pytest.raises(TypeError, match="Invalid comparison"): - ser > ser2 - - # GH#59505 - ser3 = ser2.astype("string[pyarrow]") - result3_eq = ser3 == ser - tm.assert_series_equal(result3_eq, expected_eq.astype("bool[pyarrow]")) - result3_ne = ser3 != ser - tm.assert_series_equal(result3_ne, expected_ne.astype("bool[pyarrow]")) - - with pytest.raises(TypeError, match="Invalid comparison"): - ser > ser3 - - ser4 = ser2.astype(ArrowDtype(pa.string())) - result4_eq = ser4 == ser - tm.assert_series_equal(result4_eq, expected_eq.astype("bool[pyarrow]")) - result4_ne = ser4 != ser - tm.assert_series_equal(result4_ne, expected_ne.astype("bool[pyarrow]")) - - with pytest.raises(TypeError, match="Invalid comparison"): - ser > ser4 diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index fc0dd23334706..283c7ad50c814 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -2,13 +2,11 @@ datetime, timedelta, ) -from pathlib import Path import numpy as np import pytest from pandas.compat import pa_version_under21p0 -from pandas.errors import Pandas4Warning from pandas import ( NA, @@ -315,14 +313,14 @@ def test_isnumeric_unicode_missing(method, expected, any_string_dtype): tm.assert_series_equal(result, expected) -def test_spilt_join_roundtrip(any_string_dtype): +def test_split_join_roundtrip(any_string_dtype): ser = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype) result = ser.str.split("_").str.join("_") expected = ser.astype(object) tm.assert_series_equal(result, expected) -def test_spilt_join_roundtrip_mixed_object(): +def test_split_join_roundtrip_mixed_object(): ser = Series( ["a_b", np.nan, "asdf_cas_asdf", True, datetime.today(), "foo", None, 1, 2.0] ) @@ -820,48 +818,3 @@ def test_decode_with_dtype_none(): result = ser.str.decode("utf-8", dtype=None) expected = Series(["a", "b", "c"], dtype="str") tm.assert_series_equal(result, expected) - - -def test_reversed_logical_ops(any_string_dtype): - # GH#60234 - dtype = any_string_dtype - warn = None if dtype == object else Pandas4Warning - left = Series([True, False, False, True]) - right = Series(["", "", "b", "c"], dtype=dtype) - - msg = "operations between boolean dtype and" - with tm.assert_produces_warning(warn, match=msg): - result = left | right - expected = left | right.astype(bool) - tm.assert_series_equal(result, expected) - - with tm.assert_produces_warning(warn, match=msg): - result = left & right - expected = left & right.astype(bool) - tm.assert_series_equal(result, expected) - - with tm.assert_produces_warning(warn, match=msg): - result = left ^ right - expected = left ^ right.astype(bool) - tm.assert_series_equal(result, expected) - - -def test_pathlib_path_division(any_string_dtype, request): - # GH#61940 - if any_string_dtype == object: - mark = pytest.mark.xfail( - reason="with NA present we go through _masked_arith_op which " - "raises TypeError bc Path is not recognized by lib.is_scalar." - ) - request.applymarker(mark) - - item = Path("/Users/Irv/") - ser = Series(["A", "B", NA], dtype=any_string_dtype) - - result = item / ser - expected = Series([item / "A", item / "B", ser.dtype.na_value], dtype=object) - tm.assert_series_equal(result, expected) - - result = ser / item - expected = Series(["A" / item, "B" / item, ser.dtype.na_value], dtype=object) - tm.assert_series_equal(result, expected)