Skip to content

Commit 43a3edf

Browse files
limit get_loc to exact match for now
1 parent 3c62a8d commit 43a3edf

File tree

6 files changed

+47
-44
lines changed

6 files changed

+47
-44
lines changed

pandas/_libs/index.pyx

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -561,15 +561,23 @@ cdef class StringObjectEngine(ObjectEngine):
561561

562562
cdef:
563563
object na_value
564+
bint uses_na
564565

565566
def __init__(self, ndarray values, na_value):
566567
super().__init__(values)
567568
self.na_value = na_value
569+
self.uses_na = na_value is C_NA
570+
571+
cdef bint _checknull(self, object val):
572+
if self.uses_na:
573+
return val is C_NA
574+
else:
575+
return util.is_nan(val)
568576

569577
cdef _check_type(self, object val):
570578
if isinstance(val, str):
571579
return val
572-
elif checknull(val):
580+
elif self._checknull(val):
573581
return self.na_value
574582
else:
575583
raise KeyError(val)

pandas/core/indexes/base.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5974,7 +5974,6 @@ def _should_fallback_to_positional(self) -> bool:
59745974
def get_indexer_non_unique(
59755975
self, target
59765976
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
5977-
target = ensure_index(target)
59785977
target = self._maybe_cast_listlike_indexer(target)
59795978

59805979
if not self._should_compare(target) and not self._should_partial_index(target):
@@ -6222,10 +6221,6 @@ def _maybe_downcast_for_indexing(self, other: Index) -> tuple[Index, Index]:
62226221
# let's instead try with a straight Index
62236222
self = Index(self._values)
62246223

6225-
elif self.dtype == "string" and other.dtype == "object":
6226-
if lib.is_string_array(other._values, skipna=True): # type: ignore[arg-type]
6227-
return self, other.astype(self.dtype)
6228-
62296224
if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype):
62306225
# Reverse op so we dont need to re-implement on the subclasses
62316226
other, self = other._maybe_downcast_for_indexing(self)

pandas/tests/frame/indexing/test_indexing.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import numpy as np
1010
import pytest
1111

12+
from pandas._config import using_string_dtype
13+
1214
from pandas._libs import iNaT
1315
from pandas.errors import InvalidIndexError
1416

@@ -501,6 +503,7 @@ def test_setitem_ambig(self, using_infer_string):
501503
else:
502504
assert dm[2].dtype == np.object_
503505

506+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
504507
def test_setitem_None(self, float_frame):
505508
# GH #766
506509
float_frame[None] = float_frame["A"]

pandas/tests/frame/test_arithmetic.py

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
import numpy as np
1212
import pytest
1313

14+
from pandas._config import using_string_dtype
15+
1416
import pandas as pd
1517
from pandas import (
1618
DataFrame,
@@ -2099,21 +2101,12 @@ def test_enum_column_equality():
20992101
tm.assert_series_equal(result, expected)
21002102

21012103

2102-
def test_mixed_col_index_dtype(any_string_dtype):
2104+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
2105+
def test_mixed_col_index_dtype():
21032106
# GH 47382
2104-
df1 = DataFrame(
2105-
columns=Index(list("abc"), dtype=any_string_dtype), data=1.0, index=[0]
2106-
)
2107-
df2 = DataFrame(columns=Index(list("abc"), dtype="object"), data=0.0, index=[0])
2108-
2107+
df1 = DataFrame(columns=list("abc"), data=1.0, index=[0])
2108+
df2 = DataFrame(columns=list("abc"), data=0.0, index=[0])
2109+
df1.columns = df2.columns.astype("string")
21092110
result = df1 + df2
2110-
expected = DataFrame(
2111-
columns=Index(list("abc"), dtype=any_string_dtype), data=1.0, index=[0]
2112-
)
2113-
tm.assert_frame_equal(result, expected)
2114-
2115-
result = df2 + df1
2116-
expected = DataFrame(
2117-
columns=Index(list("abc"), dtype="object"), data=1.0, index=[0]
2118-
)
2111+
expected = DataFrame(columns=list("abc"), data=1.0, index=[0])
21192112
tm.assert_frame_equal(result, expected)

pandas/tests/indexes/string/test_indexing.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@
66
import pandas._testing as tm
77

88

9+
def _isnan(val):
10+
try:
11+
return val is not pd.NA and np.isnan(val)
12+
except TypeError:
13+
return False
14+
15+
916
class TestGetLoc:
1017
def test_get_loc(self, any_string_dtype):
1118
index = Index(["a", "b", "c"], dtype=any_string_dtype)
@@ -34,7 +41,14 @@ def test_get_loc_non_missing(self, any_string_dtype, nulls_fixture):
3441

3542
def test_get_loc_missing(self, any_string_dtype, nulls_fixture):
3643
index = Index(["a", "b", nulls_fixture], dtype=any_string_dtype)
37-
assert index.get_loc(nulls_fixture) == 2
44+
if any_string_dtype == "string" and (
45+
(any_string_dtype.na_value is pd.NA and nulls_fixture is not pd.NA)
46+
or (_isnan(any_string_dtype.na_value) and not _isnan(nulls_fixture))
47+
):
48+
with pytest.raises(KeyError):
49+
index.get_loc(nulls_fixture)
50+
else:
51+
assert index.get_loc(nulls_fixture) == 2
3852

3953

4054
class TestGetIndexer:
@@ -83,32 +97,20 @@ def test_get_indexer_missing(self, any_string_dtype, null):
8397

8498
class TestGetIndexerNonUnique:
8599
@pytest.mark.parametrize("null", [None, np.nan, float("nan"), pd.NA])
86-
def test_get_indexer_non_unique_nas(self, request, any_string_dtype, null):
87-
if (
88-
any_string_dtype == "string"
89-
and any_string_dtype.na_value is pd.NA
90-
and isinstance(null, float)
91-
):
92-
# TODO(infer_string)
93-
request.applymarker(
94-
pytest.mark.xfail(
95-
reason="NA-variant string dtype does not work with NaN"
96-
)
97-
)
98-
100+
def test_get_indexer_non_unique_nas(self, any_string_dtype, null):
99101
index = Index(["a", "b", null], dtype=any_string_dtype)
100-
indexer, missing = index.get_indexer_non_unique([null])
102+
indexer, missing = index.get_indexer_non_unique(["a", null])
101103

102-
expected_indexer = np.array([2], dtype=np.intp)
104+
expected_indexer = np.array([0, 2], dtype=np.intp)
103105
expected_missing = np.array([], dtype=np.intp)
104106
tm.assert_numpy_array_equal(indexer, expected_indexer)
105107
tm.assert_numpy_array_equal(missing, expected_missing)
106108

107109
# actually non-unique
108110
index = Index(["a", null, "b", null], dtype=any_string_dtype)
109-
indexer, missing = index.get_indexer_non_unique([null])
111+
indexer, missing = index.get_indexer_non_unique(["a", null])
110112

111-
expected_indexer = np.array([1, 3], dtype=np.intp)
113+
expected_indexer = np.array([0, 1, 3], dtype=np.intp)
112114
tm.assert_numpy_array_equal(indexer, expected_indexer)
113115
tm.assert_numpy_array_equal(missing, expected_missing)
114116

pandas/tests/reshape/test_pivot.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2668,8 +2668,6 @@ def test_pivot_columns_not_given(self):
26682668
with pytest.raises(TypeError, match="missing 1 required keyword-only argument"):
26692669
df.pivot()
26702670

2671-
# this still fails because columns=None gets passed down to unstack as level=None
2672-
# while at that point None was converted to NaN
26732671
@pytest.mark.xfail(
26742672
using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
26752673
)
@@ -2688,7 +2686,10 @@ def test_pivot_columns_is_none(self):
26882686
expected = DataFrame({1: 3}, index=Index([2], name="b"))
26892687
tm.assert_frame_equal(result, expected)
26902688

2691-
def test_pivot_index_is_none(self, using_infer_string):
2689+
@pytest.mark.xfail(
2690+
using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
2691+
)
2692+
def test_pivot_index_is_none(self):
26922693
# GH#48293
26932694
df = DataFrame({None: [1], "b": 2, "c": 3})
26942695

@@ -2699,10 +2700,11 @@ def test_pivot_index_is_none(self, using_infer_string):
26992700

27002701
result = df.pivot(columns="b", index=None, values="c")
27012702
expected = DataFrame(3, index=[1], columns=Index([2], name="b"))
2702-
if using_infer_string:
2703-
expected.index.name = np.nan
27042703
tm.assert_frame_equal(result, expected)
27052704

2705+
@pytest.mark.xfail(
2706+
using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
2707+
)
27062708
def test_pivot_values_is_none(self):
27072709
# GH#48293
27082710
df = DataFrame({None: [1], "b": 2, "c": 3})

0 commit comments

Comments
 (0)