Skip to content

Commit bb148ba

Browse files
support get_indexer
1 parent e007299 commit bb148ba

File tree

2 files changed

+30
-6
lines changed

2 files changed

+30
-6
lines changed

pandas/core/indexes/base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6222,6 +6222,10 @@ def _maybe_downcast_for_indexing(self, other: Index) -> tuple[Index, Index]:
62226222
# let's instead try with a straight Index
62236223
self = Index(self._values)
62246224

6225+
elif self.dtype == "string" and other.dtype == "object":
6226+
if lib.is_string_array(other._values, skipna=True):
6227+
return self, other.astype(self.dtype)
6228+
62256229
if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype):
62266230
# Reverse op so we dont need to re-implement on the subclasses
62276231
other, self = other._maybe_downcast_for_indexing(self)

pandas/tests/indexes/string/test_indexing.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,21 +72,41 @@ def test_get_indexer_strings_raises(self, any_string_dtype):
7272
["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
7373
)
7474

75+
@pytest.mark.parametrize("null", [None, np.nan, float("nan"), pd.NA])
76+
def test_get_indexer_missing(self, any_string_dtype, null):
77+
# NaT and Decimal("NaN") from null_fixture are not supported for string dtype
78+
index = Index(["a", "b", null], dtype=any_string_dtype)
79+
result = index.get_indexer(["a", null, "c"])
80+
expected = np.array([0, 2, -1], dtype=np.intp)
81+
tm.assert_numpy_array_equal(result, expected)
82+
7583

7684
class TestGetIndexerNonUnique:
77-
@pytest.mark.xfail(reason="TODO(infer_string)", strict=False)
78-
def test_get_indexer_non_unique_nas(self, any_string_dtype, nulls_fixture):
79-
index = Index(["a", "b", None], dtype=any_string_dtype)
80-
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
85+
@pytest.mark.parametrize("null", [None, np.nan, float("nan"), pd.NA])
86+
def test_get_indexer_non_unique_nas(self, request, any_string_dtype, null):
87+
if (
88+
any_string_dtype == "string"
89+
and any_string_dtype.na_value is pd.NA
90+
and isinstance(null, float)
91+
):
92+
# TODO(infer_string)
93+
request.applymarker(
94+
pytest.mark.xfail(
95+
reason="NA-variant string dtype does not work with NaN"
96+
)
97+
)
98+
99+
index = Index(["a", "b", null], dtype=any_string_dtype)
100+
indexer, missing = index.get_indexer_non_unique([null])
81101

82102
expected_indexer = np.array([2], dtype=np.intp)
83103
expected_missing = np.array([], dtype=np.intp)
84104
tm.assert_numpy_array_equal(indexer, expected_indexer)
85105
tm.assert_numpy_array_equal(missing, expected_missing)
86106

87107
# actually non-unique
88-
index = Index(["a", None, "b", None], dtype=any_string_dtype)
89-
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
108+
index = Index(["a", null, "b", null], dtype=any_string_dtype)
109+
indexer, missing = index.get_indexer_non_unique([null])
90110

91111
expected_indexer = np.array([1, 3], dtype=np.intp)
92112
tm.assert_numpy_array_equal(indexer, expected_indexer)

0 commit comments

Comments
 (0)