Skip to content

Commit 7801bb3

Browse files
TST (string dtype): duplicate pandas/tests/indexes/object tests specifically for string dtypes
1 parent 8d2ca0b commit 7801bb3

File tree

5 files changed

+146
-82
lines changed

5 files changed

+146
-82
lines changed

pandas/tests/indexes/object/test_astype.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,7 @@
33
from pandas import (
44
Index,
55
NaT,
6-
Series,
76
)
8-
import pandas._testing as tm
9-
10-
11-
def test_astype_str_from_bytes():
12-
# https://github.com/pandas-dev/pandas/issues/38607
13-
# GH#49658 pre-2.0 Index called .values.astype(str) here, which effectively
14-
# did a .decode() on the bytes object. In 2.0 we go through
15-
# ensure_string_array which does f"{val}"
16-
idx = Index(["あ", b"a"], dtype="object")
17-
result = idx.astype(str)
18-
expected = Index(["あ", "a"], dtype="str")
19-
tm.assert_index_equal(result, expected)
20-
21-
# while we're here, check that Series.astype behaves the same
22-
result = Series(idx).astype(str)
23-
expected = Series(expected, dtype="str")
24-
tm.assert_series_equal(result, expected)
257

268

279
def test_astype_invalid_nas_to_tdt64_raises():

pandas/tests/indexes/object/test_indexing.py

Lines changed: 7 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
is_matching_na,
99
)
1010

11-
import pandas as pd
1211
from pandas import Index
1312
import pandas._testing as tm
1413

@@ -23,13 +22,13 @@ class TestGetIndexer:
2322
)
2423
def test_get_indexer_strings(self, method, expected):
2524
expected = np.array(expected, dtype=np.intp)
26-
index = Index(["b", "c"])
25+
index = Index(["b", "c"], dtype=object)
2726
actual = index.get_indexer(["a", "b", "c", "d"], method=method)
2827

2928
tm.assert_numpy_array_equal(actual, expected)
3029

31-
def test_get_indexer_strings_raises(self, using_infer_string):
32-
index = Index(["b", "c"])
30+
def test_get_indexer_strings_raises(self):
31+
index = Index(["b", "c"], dtype=object)
3332

3433
msg = "|".join(
3534
[
@@ -74,7 +73,7 @@ def test_get_indexer_non_unique_nas(
7473
# even though this isn't non-unique, this should still work
7574
if using_infer_string and (nulls_fixture is None or nulls_fixture is NA):
7675
request.applymarker(pytest.mark.xfail(reason="NAs are cast to NaN"))
77-
index = Index(["a", "b", nulls_fixture])
76+
index = Index(["a", "b", nulls_fixture], dtype=object)
7877
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
7978

8079
expected_indexer = np.array([2], dtype=np.intp)
@@ -83,7 +82,7 @@ def test_get_indexer_non_unique_nas(
8382
tm.assert_numpy_array_equal(missing, expected_missing)
8483

8584
# actually non-unique
86-
index = Index(["a", nulls_fixture, "b", nulls_fixture])
85+
index = Index(["a", nulls_fixture, "b", nulls_fixture], dtype=object)
8786
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
8887

8988
expected_indexer = np.array([1, 3], dtype=np.intp)
@@ -92,10 +91,10 @@ def test_get_indexer_non_unique_nas(
9291

9392
# matching-but-not-identical nans
9493
if is_matching_na(nulls_fixture, float("NaN")):
95-
index = Index(["a", float("NaN"), "b", float("NaN")])
94+
index = Index(["a", float("NaN"), "b", float("NaN")], dtype=object)
9695
match_but_not_identical = True
9796
elif is_matching_na(nulls_fixture, Decimal("NaN")):
98-
index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")])
97+
index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")], dtype=object)
9998
match_but_not_identical = True
10099
else:
101100
match_but_not_identical = False
@@ -156,59 +155,3 @@ def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2):
156155
expected_indexer = np.array([1, 3], dtype=np.intp)
157156
tm.assert_numpy_array_equal(indexer, expected_indexer)
158157
tm.assert_numpy_array_equal(missing, expected_missing)
159-
160-
161-
class TestSliceLocs:
162-
@pytest.mark.parametrize(
163-
"in_slice,expected",
164-
[
165-
# error: Slice index must be an integer or None
166-
(pd.IndexSlice[::-1], "yxdcb"),
167-
(pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc]
168-
(pd.IndexSlice["b"::-1], "b"), # type: ignore[misc]
169-
(pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc]
170-
(pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc]
171-
(pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc]
172-
(pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc]
173-
# absent labels
174-
(pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc]
175-
(pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc]
176-
(pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc]
177-
(pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc]
178-
(pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc]
179-
(pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc]
180-
(pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc]
181-
(pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc]
182-
(pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc]
183-
],
184-
)
185-
def test_slice_locs_negative_step(self, in_slice, expected, any_string_dtype):
186-
index = Index(list("bcdxy"), dtype=any_string_dtype)
187-
188-
s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
189-
result = index[s_start : s_stop : in_slice.step]
190-
expected = Index(list(expected), dtype=any_string_dtype)
191-
tm.assert_index_equal(result, expected)
192-
193-
def test_slice_locs_negative_step_oob(self, any_string_dtype):
194-
index = Index(list("bcdxy"), dtype=any_string_dtype)
195-
196-
result = index[-10:5:1]
197-
tm.assert_index_equal(result, index)
198-
199-
result = index[4:-10:-1]
200-
expected = Index(list("yxdcb"), dtype=any_string_dtype)
201-
tm.assert_index_equal(result, expected)
202-
203-
def test_slice_locs_dup(self):
204-
index = Index(["a", "a", "b", "c", "d", "d"])
205-
assert index.slice_locs("a", "d") == (0, 6)
206-
assert index.slice_locs(end="d") == (0, 6)
207-
assert index.slice_locs("a", "c") == (0, 4)
208-
assert index.slice_locs("b", "d") == (2, 6)
209-
210-
index2 = index[::-1]
211-
assert index2.slice_locs("d", "a") == (0, 6)
212-
assert index2.slice_locs(end="a") == (0, 6)
213-
assert index2.slice_locs("d", "b") == (0, 4)
214-
assert index2.slice_locs("c", "a") == (2, 6)

pandas/tests/indexes/string/__init__.py

Whitespace-only changes.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from pandas import (
2+
Index,
3+
Series,
4+
)
5+
import pandas._testing as tm
6+
7+
8+
def test_astype_str_from_bytes():
9+
# https://github.com/pandas-dev/pandas/issues/38607
10+
# GH#49658 pre-2.0 Index called .values.astype(str) here, which effectively
11+
# did a .decode() on the bytes object. In 2.0 we go through
12+
# ensure_string_array which does f"{val}"
13+
idx = Index(["あ", b"a"], dtype="object")
14+
result = idx.astype(str)
15+
expected = Index(["あ", "a"], dtype="str")
16+
tm.assert_index_equal(result, expected)
17+
18+
# while we're here, check that Series.astype behaves the same
19+
result = Series(idx).astype(str)
20+
expected = Series(expected, dtype="str")
21+
tm.assert_series_equal(result, expected)
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import numpy as np
2+
import pytest
3+
4+
import pandas as pd
5+
from pandas import Index
6+
import pandas._testing as tm
7+
8+
9+
class TestGetIndexer:
10+
@pytest.mark.parametrize(
11+
"method,expected",
12+
[
13+
("pad", [-1, 0, 1, 1]),
14+
("backfill", [0, 0, 1, -1]),
15+
],
16+
)
17+
def test_get_indexer_strings(self, any_string_dtype, method, expected):
18+
expected = np.array(expected, dtype=np.intp)
19+
index = Index(["b", "c"], dtype=any_string_dtype)
20+
actual = index.get_indexer(["a", "b", "c", "d"], method=method)
21+
22+
tm.assert_numpy_array_equal(actual, expected)
23+
24+
def test_get_indexer_strings_raises(self, any_string_dtype):
25+
index = Index(["b", "c"], dtype=any_string_dtype)
26+
27+
msg = "|".join(
28+
[
29+
"operation 'sub' not supported for dtype 'str",
30+
r"unsupported operand type\(s\) for -: 'str' and 'str'",
31+
]
32+
)
33+
with pytest.raises(TypeError, match=msg):
34+
index.get_indexer(["a", "b", "c", "d"], method="nearest")
35+
36+
with pytest.raises(TypeError, match=msg):
37+
index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)
38+
39+
with pytest.raises(TypeError, match=msg):
40+
index.get_indexer(
41+
["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
42+
)
43+
44+
45+
class TestGetIndexerNonUnique:
46+
@pytest.mark.xfail(reason="TODO(infer_string)", strict=False)
47+
def test_get_indexer_non_unique_nas(self, any_string_dtype, nulls_fixture):
48+
index = Index(["a", "b", None], dtype=any_string_dtype)
49+
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
50+
51+
expected_indexer = np.array([2], dtype=np.intp)
52+
expected_missing = np.array([], dtype=np.intp)
53+
tm.assert_numpy_array_equal(indexer, expected_indexer)
54+
tm.assert_numpy_array_equal(missing, expected_missing)
55+
56+
# actually non-unique
57+
index = Index(["a", None, "b", None], dtype=any_string_dtype)
58+
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
59+
60+
expected_indexer = np.array([1, 3], dtype=np.intp)
61+
tm.assert_numpy_array_equal(indexer, expected_indexer)
62+
tm.assert_numpy_array_equal(missing, expected_missing)
63+
64+
65+
class TestSliceLocs:
66+
@pytest.mark.parametrize(
67+
"in_slice,expected",
68+
[
69+
# error: Slice index must be an integer or None
70+
(pd.IndexSlice[::-1], "yxdcb"),
71+
(pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc]
72+
(pd.IndexSlice["b"::-1], "b"), # type: ignore[misc]
73+
(pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc]
74+
(pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc]
75+
(pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc]
76+
(pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc]
77+
# absent labels
78+
(pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc]
79+
(pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc]
80+
(pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc]
81+
(pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc]
82+
(pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc]
83+
(pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc]
84+
(pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc]
85+
(pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc]
86+
(pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc]
87+
],
88+
)
89+
def test_slice_locs_negative_step(self, in_slice, expected, any_string_dtype):
90+
index = Index(list("bcdxy"), dtype=any_string_dtype)
91+
92+
s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
93+
result = index[s_start : s_stop : in_slice.step]
94+
expected = Index(list(expected), dtype=any_string_dtype)
95+
tm.assert_index_equal(result, expected)
96+
97+
def test_slice_locs_negative_step_oob(self, any_string_dtype):
98+
index = Index(list("bcdxy"), dtype=any_string_dtype)
99+
100+
result = index[-10:5:1]
101+
tm.assert_index_equal(result, index)
102+
103+
result = index[4:-10:-1]
104+
expected = Index(list("yxdcb"), dtype=any_string_dtype)
105+
tm.assert_index_equal(result, expected)
106+
107+
def test_slice_locs_dup(self, any_string_dtype):
108+
index = Index(["a", "a", "b", "c", "d", "d"], dtype=any_string_dtype)
109+
assert index.slice_locs("a", "d") == (0, 6)
110+
assert index.slice_locs(end="d") == (0, 6)
111+
assert index.slice_locs("a", "c") == (0, 4)
112+
assert index.slice_locs("b", "d") == (2, 6)
113+
114+
index2 = index[::-1]
115+
assert index2.slice_locs("d", "a") == (0, 6)
116+
assert index2.slice_locs(end="a") == (0, 6)
117+
assert index2.slice_locs("d", "b") == (0, 4)
118+
assert index2.slice_locs("c", "a") == (2, 6)

0 commit comments

Comments
 (0)