Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 114 additions & 0 deletions pandas/tests/arithmetic/test_string.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
from pathlib import Path

import numpy as np
import pytest

from pandas.errors import Pandas4Warning

from pandas import (
NA,
ArrowDtype,
Series,
StringDtype,
)
import pandas._testing as tm


def test_reversed_logical_ops(any_string_dtype):
# GH#60234
dtype = any_string_dtype
warn = None if dtype == object else Pandas4Warning
left = Series([True, False, False, True])
right = Series(["", "", "b", "c"], dtype=dtype)

msg = "operations between boolean dtype and"
with tm.assert_produces_warning(warn, match=msg):
result = left | right
expected = left | right.astype(bool)
tm.assert_series_equal(result, expected)

with tm.assert_produces_warning(warn, match=msg):
result = left & right
expected = left & right.astype(bool)
tm.assert_series_equal(result, expected)

with tm.assert_produces_warning(warn, match=msg):
result = left ^ right
expected = left ^ right.astype(bool)
tm.assert_series_equal(result, expected)


def test_pathlib_path_division(any_string_dtype, request):
# GH#61940
if any_string_dtype == object:
mark = pytest.mark.xfail(
reason="with NA present we go through _masked_arith_op which "
"raises TypeError bc Path is not recognized by lib.is_scalar."
)
request.applymarker(mark)

item = Path("/Users/Irv/")
ser = Series(["A", "B", NA], dtype=any_string_dtype)

result = item / ser
expected = Series([item / "A", item / "B", ser.dtype.na_value], dtype=object)
tm.assert_series_equal(result, expected)

result = ser / item
expected = Series(["A" / item, "B" / item, ser.dtype.na_value], dtype=object)
tm.assert_series_equal(result, expected)


def test_mixed_object_comparison(any_string_dtype):
# GH#60228
dtype = any_string_dtype
ser = Series(["a", "b"], dtype=dtype)

mixed = Series([1, "b"], dtype=object)

result = ser == mixed
expected = Series([False, True], dtype=bool)
if dtype == object:
pass
elif dtype.storage == "python" and dtype.na_value is NA:
expected = expected.astype("boolean")
elif dtype.storage == "pyarrow" and dtype.na_value is NA:
expected = expected.astype("bool[pyarrow]")

tm.assert_series_equal(result, expected)


def test_pyarrow_numpy_string_invalid():
# GH#56008
pa = pytest.importorskip("pyarrow")
ser = Series([False, True])
ser2 = Series(["a", "b"], dtype=StringDtype(na_value=np.nan))
result = ser == ser2
expected_eq = Series(False, index=ser.index)
tm.assert_series_equal(result, expected_eq)

result = ser != ser2
expected_ne = Series(True, index=ser.index)
tm.assert_series_equal(result, expected_ne)

with pytest.raises(TypeError, match="Invalid comparison"):
ser > ser2

# GH#59505
ser3 = ser2.astype("string[pyarrow]")
result3_eq = ser3 == ser
tm.assert_series_equal(result3_eq, expected_eq.astype("bool[pyarrow]"))
result3_ne = ser3 != ser
tm.assert_series_equal(result3_ne, expected_ne.astype("bool[pyarrow]"))

with pytest.raises(TypeError, match="Invalid comparison"):
ser > ser3

ser4 = ser2.astype(ArrowDtype(pa.string()))
result4_eq = ser4 == ser
tm.assert_series_equal(result4_eq, expected_eq.astype("bool[pyarrow]"))
result4_ne = ser4 != ser
tm.assert_series_equal(result4_ne, expected_ne.astype("bool[pyarrow]"))

with pytest.raises(TypeError, match="Invalid comparison"):
ser > ser4
16 changes: 0 additions & 16 deletions pandas/tests/extension/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,19 +288,3 @@ def test_searchsorted_with_na_raises(data_for_sorting, as_series):
)
with pytest.raises(ValueError, match=msg):
arr.searchsorted(b)


def test_mixed_object_comparison(dtype):
# GH#60228
ser = pd.Series(["a", "b"], dtype=dtype)

mixed = pd.Series([1, "b"], dtype=object)

result = ser == mixed
expected = pd.Series([False, True], dtype=bool)
if dtype.storage == "python" and dtype.na_value is pd.NA:
expected = expected.astype("boolean")
elif dtype.storage == "pyarrow" and dtype.na_value is pd.NA:
expected = expected.astype("bool[pyarrow]")

tm.assert_series_equal(result, expected)
31 changes: 0 additions & 31 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1922,37 +1922,6 @@ def test_3d_array(self):
assert res[0] == " [[True, True], [False, False]]"
assert res[1] == " [[False, True], [True, False]]"

def test_2d_extension_type(self):
# GH 33770

# Define a stub extension type with just enough code to run Series.__repr__()
class DtypeStub(pd.api.extensions.ExtensionDtype):
@property
def type(self):
return np.ndarray

@property
def name(self):
return "DtypeStub"

class ExtTypeStub(pd.api.extensions.ExtensionArray):
def __len__(self) -> int:
return 2

def __getitem__(self, ix):
return [ix == 1, ix == 0]

@property
def dtype(self):
return DtypeStub()

series = Series(ExtTypeStub(), copy=False)
res = repr(series) # This line crashed before #33770 was fixed.
expected = "\n".join(
["0 [False True]", "1 [True False]", "dtype: DtypeStub"]
)
assert res == expected


def _three_digit_exp():
return f"{1.7e8:.4g}" == "1.7e+008"
Expand Down
31 changes: 31 additions & 0 deletions pandas/tests/series/test_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,37 @@ def test_different_null_objects(self):
expected = "True 1\nNone 2\nNaN 3\nNaT 4\ndtype: int64"
assert result == expected

def test_2d_extension_type(self):
# GH#33770

# Define a stub extension type with just enough code to run Series.__repr__()
class DtypeStub(pd.api.extensions.ExtensionDtype):
@property
def type(self):
return np.ndarray

@property
def name(self):
return "DtypeStub"

class ExtTypeStub(pd.api.extensions.ExtensionArray):
def __len__(self) -> int:
return 2

def __getitem__(self, ix):
return [ix == 1, ix == 0]

@property
def dtype(self):
return DtypeStub()

series = Series(ExtTypeStub(), copy=False)
res = repr(series) # This line crashed before GH#33770 was fixed.
expected = "\n".join(
["0 [False True]", "1 [True False]", "dtype: DtypeStub"]
)
assert res == expected


class TestCategoricalRepr:
def test_categorical_repr_unicode(self):
Expand Down
38 changes: 0 additions & 38 deletions pandas/tests/series/test_logical_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@
import pytest

from pandas import (
ArrowDtype,
DataFrame,
Index,
Series,
StringDtype,
bdate_range,
)
import pandas._testing as tm
Expand Down Expand Up @@ -510,39 +508,3 @@ def test_int_dtype_different_index_not_bool(self):

result = ser1 ^ ser2
tm.assert_series_equal(result, expected)

# TODO: this belongs in comparison tests
def test_pyarrow_numpy_string_invalid(self):
# GH#56008
pa = pytest.importorskip("pyarrow")
ser = Series([False, True])
ser2 = Series(["a", "b"], dtype=StringDtype(na_value=np.nan))
result = ser == ser2
expected_eq = Series(False, index=ser.index)
tm.assert_series_equal(result, expected_eq)

result = ser != ser2
expected_ne = Series(True, index=ser.index)
tm.assert_series_equal(result, expected_ne)

with pytest.raises(TypeError, match="Invalid comparison"):
ser > ser2

# GH#59505
ser3 = ser2.astype("string[pyarrow]")
result3_eq = ser3 == ser
tm.assert_series_equal(result3_eq, expected_eq.astype("bool[pyarrow]"))
result3_ne = ser3 != ser
tm.assert_series_equal(result3_ne, expected_ne.astype("bool[pyarrow]"))

with pytest.raises(TypeError, match="Invalid comparison"):
ser > ser3

ser4 = ser2.astype(ArrowDtype(pa.string()))
result4_eq = ser4 == ser
tm.assert_series_equal(result4_eq, expected_eq.astype("bool[pyarrow]"))
result4_ne = ser4 != ser
tm.assert_series_equal(result4_ne, expected_ne.astype("bool[pyarrow]"))

with pytest.raises(TypeError, match="Invalid comparison"):
ser > ser4
51 changes: 2 additions & 49 deletions pandas/tests/strings/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@
datetime,
timedelta,
)
from pathlib import Path

import numpy as np
import pytest

from pandas.compat import pa_version_under21p0
from pandas.errors import Pandas4Warning

from pandas import (
NA,
Expand Down Expand Up @@ -315,14 +313,14 @@ def test_isnumeric_unicode_missing(method, expected, any_string_dtype):
tm.assert_series_equal(result, expected)


def test_spilt_join_roundtrip(any_string_dtype):
def test_split_join_roundtrip(any_string_dtype):
ser = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype)
result = ser.str.split("_").str.join("_")
expected = ser.astype(object)
tm.assert_series_equal(result, expected)


def test_spilt_join_roundtrip_mixed_object():
def test_split_join_roundtrip_mixed_object():
ser = Series(
["a_b", np.nan, "asdf_cas_asdf", True, datetime.today(), "foo", None, 1, 2.0]
)
Expand Down Expand Up @@ -820,48 +818,3 @@ def test_decode_with_dtype_none():
result = ser.str.decode("utf-8", dtype=None)
expected = Series(["a", "b", "c"], dtype="str")
tm.assert_series_equal(result, expected)


def test_reversed_logical_ops(any_string_dtype):
# GH#60234
dtype = any_string_dtype
warn = None if dtype == object else Pandas4Warning
left = Series([True, False, False, True])
right = Series(["", "", "b", "c"], dtype=dtype)

msg = "operations between boolean dtype and"
with tm.assert_produces_warning(warn, match=msg):
result = left | right
expected = left | right.astype(bool)
tm.assert_series_equal(result, expected)

with tm.assert_produces_warning(warn, match=msg):
result = left & right
expected = left & right.astype(bool)
tm.assert_series_equal(result, expected)

with tm.assert_produces_warning(warn, match=msg):
result = left ^ right
expected = left ^ right.astype(bool)
tm.assert_series_equal(result, expected)


def test_pathlib_path_division(any_string_dtype, request):
# GH#61940
if any_string_dtype == object:
mark = pytest.mark.xfail(
reason="with NA present we go through _masked_arith_op which "
"raises TypeError bc Path is not recognized by lib.is_scalar."
)
request.applymarker(mark)

item = Path("/Users/Irv/")
ser = Series(["A", "B", NA], dtype=any_string_dtype)

result = item / ser
expected = Series([item / "A", item / "B", ser.dtype.na_value], dtype=object)
tm.assert_series_equal(result, expected)

result = ser / item
expected = Series(["A" / item, "B" / item, ser.dtype.na_value], dtype=object)
tm.assert_series_equal(result, expected)
Loading