Skip to content
Merged
13 changes: 4 additions & 9 deletions pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW

from pandas.core.dtypes.common import (
is_float_dtype,
is_integer_dtype,
Expand Down Expand Up @@ -444,13 +440,12 @@ def test_constructor_str_unknown(self):
with pytest.raises(ValueError, match="Unknown dtype"):
Categorical([1, 2], dtype="foo")

@pytest.mark.xfail(
using_string_dtype() and HAS_PYARROW, reason="Can't be NumPy strings"
)
def test_constructor_np_strs(self):
# GH#31499 Hashtable.map_locations needs to work on np.str_ objects
cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")])
assert all(isinstance(x, np.str_) for x in cat.categories)
# We can't pass all-strings because the constructor would cast
# those to StringDtype post-PDEP14
cat = Categorical(["1", "0", "1", 2], [np.str_("0"), np.str_("1"), 2])
assert all(isinstance(x, (np.str_, int)) for x in cat.categories)

def test_constructor_from_categorical_with_dtype(self):
dtype = CategoricalDtype(["a", "b", "c"], ordered=True)
Expand Down
27 changes: 19 additions & 8 deletions pandas/tests/arrays/categorical/test_repr.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
Categorical,
Expand Down Expand Up @@ -77,17 +74,19 @@ def test_print_none_width(self):
with option_context("display.width", None):
assert exp == repr(a)

@pytest.mark.skipif(
using_string_dtype(),
reason="Change once infer_string is set to True by default",
)
def test_unicode_print(self):
def test_unicode_print(self, using_infer_string):
c = Categorical(["aaaaa", "bb", "cccc"] * 20)
expected = """\
['aaaaa', 'bb', 'cccc', 'aaaaa', 'bb', ..., 'bb', 'cccc', 'aaaaa', 'bb', 'cccc']
Length: 60
Categories (3, object): ['aaaaa', 'bb', 'cccc']"""

if using_infer_string:
expected = expected.replace(
"(3, object): ['aaaaa', 'bb', 'cccc']",
"(3, str): [aaaaa, bb, cccc]",
)

assert repr(c) == expected

c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20)
Expand All @@ -96,6 +95,12 @@ def test_unicode_print(self):
Length: 60
Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa: E501

if using_infer_string:
expected = expected.replace(
"(3, object): ['ああああ', 'いいいいい', 'ううううううう']",
"(3, str): [ああああ, いいいいい, ううううううう]",
)

assert repr(c) == expected

# unicode option should not affect to Categorical, as it doesn't care
Expand All @@ -106,6 +111,12 @@ def test_unicode_print(self):
Length: 60
Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa: E501

if using_infer_string:
expected = expected.replace(
"(3, object): ['ああああ', 'いいいいい', 'ううううううう']",
"(3, str): [ああああ, いいいいい, ううううううう]",
)

assert repr(c) == expected

def test_categorical_repr(self):
Expand Down
12 changes: 2 additions & 10 deletions pandas/tests/frame/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -745,10 +743,7 @@ def test_astype_tz_object_conversion(self, tz):
result = result.astype({"tz": "datetime64[ns, Europe/London]"})
tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) GH#60639")
def test_astype_dt64_to_string(
self, frame_or_series, tz_naive_fixture, using_infer_string
):
def test_astype_dt64_to_string(self, frame_or_series, tz_naive_fixture):
# GH#41409
tz = tz_naive_fixture

Expand All @@ -766,10 +761,7 @@ def test_astype_dt64_to_string(
item = result.iloc[0]
if frame_or_series is DataFrame:
item = item.iloc[0]
if using_infer_string:
assert item is np.nan
else:
assert item is pd.NA
assert item is pd.NA

# For non-NA values, we should match what we get for non-EA str
alt = obj.astype(str)
Expand Down
12 changes: 5 additions & 7 deletions pandas/tests/groupby/test_timegrouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -76,10 +74,7 @@ def groupby_with_truncated_bingrouper(frame_for_truncated_bingrouper):


class TestGroupBy:
# TODO(infer_string) resample sum introduces 0's
# https://github.com/pandas-dev/pandas/issues/60229
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_groupby_with_timegrouper(self):
def test_groupby_with_timegrouper(self, using_infer_string):
# GH 4161
# TimeGrouper requires a sorted index
# also verifies that the resultant index has the correct name
Expand Down Expand Up @@ -116,8 +111,11 @@ def test_groupby_with_timegrouper(self):
{"Buyer": 0, "Quantity": 0},
index=exp_dti,
)
# Cast to object to avoid implicit cast when setting entry to "CarlCarlCarl"
# Cast to object/str to avoid implicit cast when setting
# entry to "CarlCarlCarl"
expected = expected.astype({"Buyer": object})
if using_infer_string:
expected = expected.astype({"Buyer": "str"})
expected.iloc[0, 0] = "CarlCarlCarl"
expected.iloc[6, 0] = "CarlCarl"
expected.iloc[18, 0] = "Joe"
Expand Down
17 changes: 11 additions & 6 deletions pandas/tests/indexes/base_class/test_formats.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype
import pandas._config.config as cf

from pandas import Index
Expand All @@ -16,7 +15,6 @@ def test_repr_is_valid_construction_code(self):
res = eval(repr(idx))
tm.assert_index_equal(res, idx)

@pytest.mark.xfail(using_string_dtype(), reason="repr different")
@pytest.mark.parametrize(
"index,expected",
[
Expand Down Expand Up @@ -77,11 +75,13 @@ def test_repr_is_valid_construction_code(self):
),
],
)
def test_string_index_repr(self, index, expected):
def test_string_index_repr(self, index, expected, using_infer_string):
result = repr(index)
if using_infer_string:
expected = expected.replace("dtype='object'", "dtype='str'")

assert result == expected

@pytest.mark.xfail(using_string_dtype(), reason="repr different")
@pytest.mark.parametrize(
"index,expected",
[
Expand Down Expand Up @@ -121,11 +121,16 @@ def test_string_index_repr(self, index, expected):
),
],
)
def test_string_index_repr_with_unicode_option(self, index, expected):
def test_string_index_repr_with_unicode_option(
self, index, expected, using_infer_string
):
# Enable Unicode option -----------------------------------------
with cf.option_context("display.unicode.east_asian_width", True):
result = repr(index)
assert result == expected

if using_infer_string:
expected = expected.replace("dtype='object'", "dtype='str'")
assert result == expected

def test_repr_summary(self):
with cf.option_context("display.max_seq_items", 10):
Expand Down
44 changes: 36 additions & 8 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -1395,8 +1393,7 @@ def test_unicode_name_in_footer(self):
sf = fmt.SeriesFormatter(s, name="\u05e2\u05d1\u05e8\u05d9\u05ea")
sf._get_footer() # should not raise exception

@pytest.mark.xfail(using_string_dtype(), reason="Fixup when arrow is default")
def test_east_asian_unicode_series(self):
def test_east_asian_unicode_series(self, using_infer_string):
# not aligned properly because of east asian width

# unicode index
Expand All @@ -1409,6 +1406,8 @@ def test_east_asian_unicode_series(self):
"ええええ D\ndtype: object",
]
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

# unicode values
Expand All @@ -1422,7 +1421,8 @@ def test_east_asian_unicode_series(self):
"dtype: object",
]
)

if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

# both
Expand All @@ -1439,7 +1439,8 @@ def test_east_asian_unicode_series(self):
"dtype: object",
]
)

if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

# unicode footer
Expand All @@ -1452,6 +1453,8 @@ def test_east_asian_unicode_series(self):
"ああ あ\nいいいい いい\nう ううう\n"
"えええ ええええ\nName: おおおおおおお, dtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

# MultiIndex
Expand Down Expand Up @@ -1495,6 +1498,8 @@ def test_east_asian_unicode_series(self):
"3 ええええ\n"
"Name: おおおおおおお, Length: 4, dtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

s.index = ["ああ", "いいいい", "う", "えええ"]
Expand All @@ -1503,6 +1508,8 @@ def test_east_asian_unicode_series(self):
"えええ ええええ\n"
"Name: おおおおおおお, Length: 4, dtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

# Enable Unicode option -----------------------------------------
Expand All @@ -1516,6 +1523,8 @@ def test_east_asian_unicode_series(self):
"あ a\nいい bb\nううう CCC\n"
"ええええ D\ndtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

# unicode values
Expand All @@ -1527,6 +1536,8 @@ def test_east_asian_unicode_series(self):
"a あ\nbb いい\nc ううう\n"
"ddd ええええ\ndtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected
# both
s = Series(
Expand All @@ -1539,6 +1550,8 @@ def test_east_asian_unicode_series(self):
"う ううう\n"
"えええ ええええ\ndtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

# unicode footer
Expand All @@ -1554,6 +1567,8 @@ def test_east_asian_unicode_series(self):
"えええ ええええ\n"
"Name: おおおおおおお, dtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

# MultiIndex
Expand Down Expand Up @@ -1599,6 +1614,8 @@ def test_east_asian_unicode_series(self):
"3 ええええ\n"
"Name: おおおおおおお, Length: 4, dtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

s.index = ["ああ", "いいいい", "う", "えええ"]
Expand All @@ -1608,6 +1625,8 @@ def test_east_asian_unicode_series(self):
"えええ ええええ\n"
"Name: おおおおおおお, Length: 4, dtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

# ambiguous unicode
Expand All @@ -1621,6 +1640,8 @@ def test_east_asian_unicode_series(self):
"¡¡ ううう\n"
"えええ ええええ\ndtype: object"
)
if using_infer_string:
expected = expected.replace("dtype: object", "dtype: str")
assert repr(s) == expected

def test_float_trim_zeros(self):
Expand Down Expand Up @@ -1770,27 +1791,34 @@ def chck_ncols(self, s):
ncolsizes = len({len(line.strip()) for line in lines})
assert ncolsizes == 1

@pytest.mark.xfail(using_string_dtype(), reason="change when arrow is default")
def test_format_explicit(self):
def test_format_explicit(self, using_infer_string):
test_sers = gen_series_formatting()
with option_context("display.max_rows", 4, "display.show_dimensions", False):
res = repr(test_sers["onel"])
exp = "0 a\n1 a\n ..\n98 a\n99 a\ndtype: object"
if using_infer_string:
exp = exp.replace("dtype: object", "dtype: str")
assert exp == res
res = repr(test_sers["twol"])
exp = "0 ab\n1 ab\n ..\n98 ab\n99 ab\ndtype: object"
if using_infer_string:
exp = exp.replace("dtype: object", "dtype: str")
assert exp == res
res = repr(test_sers["asc"])
exp = (
"0 a\n1 ab\n ... \n4 abcde\n5 "
"abcdef\ndtype: object"
)
if using_infer_string:
exp = exp.replace("dtype: object", "dtype: str")
assert exp == res
res = repr(test_sers["desc"])
exp = (
"5 abcdef\n4 abcde\n ... \n1 ab\n0 "
"a\ndtype: object"
)
if using_infer_string:
exp = exp.replace("dtype: object", "dtype: str")
assert exp == res

def test_ncols(self):
Expand Down
Loading