Skip to content

Commit 2f5221f

Browse files
committed
TST (string-dtype): Adjust indexing string tests
1 parent a7a1410 commit 2f5221f

File tree

4 files changed

+76
-56
lines changed

4 files changed

+76
-56
lines changed

pandas/core/arrays/string_.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,10 @@ def __setitem__(self, key, value) -> None:
688688
else:
689689
if not is_array_like(value):
690690
value = np.asarray(value, dtype=object)
691+
else:
692+
# cast categories and friends to arrays to see if values are
693+
# compatible, compatibility with arrow backed strings
694+
value = np.asarray(value)
691695
if len(value) and not lib.is_string_array(value, skipna=True):
692696
raise TypeError("Must provide strings.")
693697

pandas/tests/indexing/test_iloc.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
import numpy as np
77
import pytest
88

9-
from pandas._config import using_string_dtype
10-
119
from pandas.errors import IndexingError
1210

1311
from pandas import (
@@ -1198,22 +1196,25 @@ def test_iloc_getitem_int_single_ea_block_view(self):
11981196
arr[2] = arr[-1]
11991197
assert ser[0] == arr[-1]
12001198

1201-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
1202-
def test_iloc_setitem_multicolumn_to_datetime(self):
1199+
def test_iloc_setitem_multicolumn_to_datetime(self, using_infer_string):
12031200
# GH#20511
12041201
df = DataFrame({"A": ["2022-01-01", "2022-01-02"], "B": ["2021", "2022"]})
12051202

1206-
df.iloc[:, [0]] = DataFrame({"A": to_datetime(["2021", "2022"])})
1207-
expected = DataFrame(
1208-
{
1209-
"A": [
1210-
Timestamp("2021-01-01 00:00:00"),
1211-
Timestamp("2022-01-01 00:00:00"),
1212-
],
1213-
"B": ["2021", "2022"],
1214-
}
1215-
)
1216-
tm.assert_frame_equal(df, expected, check_dtype=False)
1203+
if using_infer_string:
1204+
with pytest.raises(TypeError, match="Invalid value"):
1205+
df.iloc[:, [0]] = DataFrame({"A": to_datetime(["2021", "2022"])})
1206+
else:
1207+
df.iloc[:, [0]] = DataFrame({"A": to_datetime(["2021", "2022"])})
1208+
expected = DataFrame(
1209+
{
1210+
"A": [
1211+
Timestamp("2021-01-01 00:00:00"),
1212+
Timestamp("2022-01-01 00:00:00"),
1213+
],
1214+
"B": ["2021", "2022"],
1215+
}
1216+
)
1217+
tm.assert_frame_equal(df, expected, check_dtype=False)
12171218

12181219

12191220
class TestILocErrors:

pandas/tests/indexing/test_indexing.py

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
import numpy as np
99
import pytest
1010

11-
from pandas._config import using_string_dtype
12-
1311
from pandas.errors import IndexingError
1412

1513
from pandas.core.dtypes.common import (
@@ -528,7 +526,6 @@ def test_string_slice_empty(self):
528526
with pytest.raises(KeyError, match="^0$"):
529527
df.loc["2011", 0]
530528

531-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
532529
def test_astype_assignment(self, using_infer_string):
533530
# GH4312 (iloc)
534531
df_orig = DataFrame(
@@ -539,34 +536,44 @@ def test_astype_assignment(self, using_infer_string):
539536

540537
# with the enforcement of GH#45333 in 2.0, this setting is attempted inplace,
541538
# so object dtype is retained
542-
df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64)
543-
expected = DataFrame(
544-
[[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
545-
)
546-
if not using_infer_string:
539+
if using_infer_string:
540+
with pytest.raises(TypeError, match="Invalid value"):
541+
df.iloc[:, 0] = df.iloc[:, 0].astype(np.int64)
542+
else:
543+
df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64)
544+
expected = DataFrame(
545+
[[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
546+
)
547547
expected["A"] = expected["A"].astype(object)
548548
expected["B"] = expected["B"].astype(object)
549-
tm.assert_frame_equal(df, expected)
549+
tm.assert_frame_equal(df, expected)
550550

551551
# GH5702 (loc)
552552
df = df_orig.copy()
553-
df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64)
554-
expected = DataFrame(
555-
[[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
556-
)
557-
if not using_infer_string:
553+
if using_infer_string:
554+
with pytest.raises(TypeError, match="Invalid value"):
555+
df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64)
556+
else:
557+
df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64)
558+
expected = DataFrame(
559+
[[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
560+
)
558561
expected["A"] = expected["A"].astype(object)
559-
tm.assert_frame_equal(df, expected)
562+
tm.assert_frame_equal(df, expected)
560563

561564
df = df_orig.copy()
562-
df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64)
563-
expected = DataFrame(
564-
[["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
565-
)
566-
if not using_infer_string:
565+
566+
if using_infer_string:
567+
with pytest.raises(TypeError, match="Invalid value"):
568+
df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64)
569+
else:
570+
df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64)
571+
expected = DataFrame(
572+
[["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
573+
)
567574
expected["B"] = expected["B"].astype(object)
568575
expected["C"] = expected["C"].astype(object)
569-
tm.assert_frame_equal(df, expected)
576+
tm.assert_frame_equal(df, expected)
570577

571578
def test_astype_assignment_full_replacements(self):
572579
# full replacements / no nans

pandas/tests/indexing/test_loc.py

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""test label based indexing with loc"""
22

33
from collections import namedtuple
4+
import contextlib
45
from datetime import (
56
date,
67
datetime,
@@ -13,10 +14,7 @@
1314
import numpy as np
1415
import pytest
1516

16-
from pandas._config import using_string_dtype
17-
1817
from pandas._libs import index as libindex
19-
from pandas.compat import HAS_PYARROW
2018
from pandas.errors import IndexingError
2119

2220
import pandas as pd
@@ -615,8 +613,7 @@ def test_loc_setitem_consistency_empty(self):
615613
expected["x"] = expected["x"].astype(np.int64)
616614
tm.assert_frame_equal(df, expected)
617615

618-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
619-
def test_loc_setitem_consistency_slice_column_len(self):
616+
def test_loc_setitem_consistency_slice_column_len(self, using_infer_string):
620617
# .loc[:,column] setting with slice == len of the column
621618
# GH10408
622619
levels = [
@@ -640,12 +637,23 @@ def test_loc_setitem_consistency_slice_column_len(self):
640637
]
641638
df = DataFrame(values, index=mi, columns=cols)
642639

643-
df.loc[:, ("Respondent", "StartDate")] = to_datetime(
644-
df.loc[:, ("Respondent", "StartDate")]
645-
)
646-
df.loc[:, ("Respondent", "EndDate")] = to_datetime(
647-
df.loc[:, ("Respondent", "EndDate")]
648-
)
640+
ctx = contextlib.nullcontext()
641+
if using_infer_string:
642+
ctx = pytest.raises(TypeError, match="Invalid value")
643+
644+
with ctx:
645+
df.loc[:, ("Respondent", "StartDate")] = to_datetime(
646+
df.loc[:, ("Respondent", "StartDate")]
647+
)
648+
with ctx:
649+
df.loc[:, ("Respondent", "EndDate")] = to_datetime(
650+
df.loc[:, ("Respondent", "EndDate")]
651+
)
652+
653+
if using_infer_string:
654+
# infer-objects won't infer stuff anymore
655+
return
656+
649657
df = df.infer_objects()
650658

651659
# Adding a new key
@@ -1211,20 +1219,23 @@ def test_loc_reverse_assignment(self):
12111219

12121220
tm.assert_series_equal(result, expected)
12131221

1214-
@pytest.mark.xfail(using_string_dtype(), reason="can't set int into string")
1215-
def test_loc_setitem_str_to_small_float_conversion_type(self):
1222+
def test_loc_setitem_str_to_small_float_conversion_type(self, using_infer_string):
12161223
# GH#20388
12171224

12181225
col_data = [str(np.random.default_rng(2).random() * 1e-12) for _ in range(5)]
12191226
result = DataFrame(col_data, columns=["A"])
1220-
expected = DataFrame(col_data, columns=["A"], dtype=object)
1227+
expected = DataFrame(col_data, columns=["A"])
12211228
tm.assert_frame_equal(result, expected)
12221229

12231230
# assigning with loc/iloc attempts to set the values inplace, which
12241231
# in this case is successful
1225-
result.loc[result.index, "A"] = [float(x) for x in col_data]
1226-
expected = DataFrame(col_data, columns=["A"], dtype=float).astype(object)
1227-
tm.assert_frame_equal(result, expected)
1232+
if using_infer_string:
1233+
with pytest.raises(TypeError, match="Scalar must"):
1234+
result.loc[result.index, "A"] = [float(x) for x in col_data]
1235+
else:
1236+
result.loc[result.index, "A"] = [float(x) for x in col_data]
1237+
expected = DataFrame(col_data, columns=["A"], dtype=float).astype(object)
1238+
tm.assert_frame_equal(result, expected)
12281239

12291240
# assigning the entire column using __setitem__ swaps in the new array
12301241
# GH#???
@@ -1389,9 +1400,6 @@ def test_loc_setitem_categorical_values_partial_column_slice(self):
13891400
df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"])
13901401
df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"])
13911402

1392-
@pytest.mark.xfail(
1393-
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
1394-
)
13951403
def test_loc_setitem_single_row_categorical(self, using_infer_string):
13961404
# GH#25495
13971405
df = DataFrame({"Alpha": ["a"], "Numeric": [0]})

0 commit comments

Comments
 (0)