Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,10 @@ def __setitem__(self, key, value) -> None:
else:
if not is_array_like(value):
value = np.asarray(value, dtype=object)
else:
# cast categories and friends to arrays to see if values are
# compatible, compatibility with arrow backed strings
value = np.asarray(value)
if len(value) and not lib.is_string_array(value, skipna=True):
raise TypeError("Must provide strings.")

Expand Down
31 changes: 16 additions & 15 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.errors import IndexingError

from pandas import (
Expand Down Expand Up @@ -1198,22 +1196,25 @@ def test_iloc_getitem_int_single_ea_block_view(self):
arr[2] = arr[-1]
assert ser[0] == arr[-1]

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_iloc_setitem_multicolumn_to_datetime(self):
def test_iloc_setitem_multicolumn_to_datetime(self, using_infer_string):
# GH#20511
df = DataFrame({"A": ["2022-01-01", "2022-01-02"], "B": ["2021", "2022"]})

df.iloc[:, [0]] = DataFrame({"A": to_datetime(["2021", "2022"])})
expected = DataFrame(
{
"A": [
Timestamp("2021-01-01 00:00:00"),
Timestamp("2022-01-01 00:00:00"),
],
"B": ["2021", "2022"],
}
)
tm.assert_frame_equal(df, expected, check_dtype=False)
if using_infer_string:
with pytest.raises(TypeError, match="Invalid value"):
df.iloc[:, [0]] = DataFrame({"A": to_datetime(["2021", "2022"])})
else:
df.iloc[:, [0]] = DataFrame({"A": to_datetime(["2021", "2022"])})
expected = DataFrame(
{
"A": [
Timestamp("2021-01-01 00:00:00"),
Timestamp("2022-01-01 00:00:00"),
],
"B": ["2021", "2022"],
}
)
tm.assert_frame_equal(df, expected, check_dtype=False)


class TestILocErrors:
Expand Down
18 changes: 7 additions & 11 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.errors import IndexingError

from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -528,12 +526,12 @@ def test_string_slice_empty(self):
with pytest.raises(KeyError, match="^0$"):
df.loc["2011", 0]

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_astype_assignment(self, using_infer_string):
# GH4312 (iloc)
df_orig = DataFrame(
[["1", "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
)
df_orig[list("ABCDG")] = df_orig[list("ABCDG")].astype(object)

df = df_orig.copy()

Expand All @@ -543,9 +541,9 @@ def test_astype_assignment(self, using_infer_string):
expected = DataFrame(
[[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
)
if not using_infer_string:
expected["A"] = expected["A"].astype(object)
expected["B"] = expected["B"].astype(object)
expected[list("CDG")] = expected[list("CDG")].astype(object)
expected["A"] = expected["A"].astype(object)
expected["B"] = expected["B"].astype(object)
tm.assert_frame_equal(df, expected)

# GH5702 (loc)
Expand All @@ -554,18 +552,16 @@ def test_astype_assignment(self, using_infer_string):
expected = DataFrame(
[[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
)
if not using_infer_string:
expected["A"] = expected["A"].astype(object)
expected[list("ABCDG")] = expected[list("ABCDG")].astype(object)
tm.assert_frame_equal(df, expected)

df = df_orig.copy()

df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64)
expected = DataFrame(
[["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
)
if not using_infer_string:
expected["B"] = expected["B"].astype(object)
expected["C"] = expected["C"].astype(object)
expected[list("ABCDG")] = expected[list("ABCDG")].astype(object)
tm.assert_frame_equal(df, expected)

def test_astype_assignment_full_replacements(self):
Expand Down
48 changes: 28 additions & 20 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""test label based indexing with loc"""

from collections import namedtuple
import contextlib
from datetime import (
date,
datetime,
Expand All @@ -13,10 +14,7 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas._libs import index as libindex
from pandas.compat import HAS_PYARROW
from pandas.errors import IndexingError

import pandas as pd
Expand Down Expand Up @@ -615,8 +613,7 @@ def test_loc_setitem_consistency_empty(self):
expected["x"] = expected["x"].astype(np.int64)
tm.assert_frame_equal(df, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_loc_setitem_consistency_slice_column_len(self):
def test_loc_setitem_consistency_slice_column_len(self, using_infer_string):
# .loc[:,column] setting with slice == len of the column
# GH10408
levels = [
Expand All @@ -640,12 +637,23 @@ def test_loc_setitem_consistency_slice_column_len(self):
]
df = DataFrame(values, index=mi, columns=cols)

df.loc[:, ("Respondent", "StartDate")] = to_datetime(
df.loc[:, ("Respondent", "StartDate")]
)
df.loc[:, ("Respondent", "EndDate")] = to_datetime(
df.loc[:, ("Respondent", "EndDate")]
)
ctx = contextlib.nullcontext()
if using_infer_string:
ctx = pytest.raises(TypeError, match="Invalid value")

with ctx:
df.loc[:, ("Respondent", "StartDate")] = to_datetime(
df.loc[:, ("Respondent", "StartDate")]
)
with ctx:
df.loc[:, ("Respondent", "EndDate")] = to_datetime(
df.loc[:, ("Respondent", "EndDate")]
)

if using_infer_string:
# infer-objects won't infer stuff anymore
return

df = df.infer_objects()

# Adding a new key
Expand Down Expand Up @@ -1211,20 +1219,23 @@ def test_loc_reverse_assignment(self):

tm.assert_series_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="can't set int into string")
def test_loc_setitem_str_to_small_float_conversion_type(self):
def test_loc_setitem_str_to_small_float_conversion_type(self, using_infer_string):
# GH#20388

col_data = [str(np.random.default_rng(2).random() * 1e-12) for _ in range(5)]
result = DataFrame(col_data, columns=["A"])
expected = DataFrame(col_data, columns=["A"], dtype=object)
expected = DataFrame(col_data, columns=["A"])
tm.assert_frame_equal(result, expected)

# assigning with loc/iloc attempts to set the values inplace, which
# in this case is successful
result.loc[result.index, "A"] = [float(x) for x in col_data]
expected = DataFrame(col_data, columns=["A"], dtype=float).astype(object)
tm.assert_frame_equal(result, expected)
if using_infer_string:
with pytest.raises(TypeError, match="Scalar must"):
result.loc[result.index, "A"] = [float(x) for x in col_data]
else:
result.loc[result.index, "A"] = [float(x) for x in col_data]
expected = DataFrame(col_data, columns=["A"], dtype=float).astype(object)
tm.assert_frame_equal(result, expected)

# assigning the entire column using __setitem__ swaps in the new array
# GH#???
Expand Down Expand Up @@ -1389,9 +1400,6 @@ def test_loc_setitem_categorical_values_partial_column_slice(self):
df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"])
df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"])

@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
)
def test_loc_setitem_single_row_categorical(self, using_infer_string):
# GH#25495
df = DataFrame({"Alpha": ["a"], "Numeric": [0]})
Expand Down
Loading