Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Fixed regressions
- Fixed regression in :func:`concat` materializing :class:`Index` during sorting even if :class:`Index` was already sorted (:issue:`47501`)
- Fixed regression in :meth:`DataFrame.loc` not updating the cache correctly after values were set (:issue:`47867`)
- Fixed regression in :meth:`DataFrame.loc` not aligning index in some cases when setting a :class:`DataFrame` (:issue:`47578`)
- Fixed regression in :meth:`DataFrame.loc` setting a length-1 array like value to a single value in the DataFrame (:issue:`46268`)
- Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`)
-

Expand Down
6 changes: 4 additions & 2 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1854,8 +1854,10 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str):
# We get here in one case via .loc with a all-False mask
pass

elif self._is_scalar_access(indexer):
# We are setting nested data
elif self._is_scalar_access(indexer) and is_object_dtype(
self.obj.dtypes[ilocs[0]]
):
# We are setting nested data, only possible for object dtype data
self._setitem_single_column(indexer[1], value, pi)

elif len(ilocs) == len(value):
Expand Down
93 changes: 93 additions & 0 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" test fancy indexing & misc """

import array
from datetime import datetime
import re
import weakref
Expand Down Expand Up @@ -1019,3 +1020,95 @@ def test_ser_list_indexer_exceeds_dimensions(indexer_li):
res = indexer_li(ser)[[0, 0]]
exp = Series([10, 10], index=Index([0, 0]))
tm.assert_series_equal(res, exp)


@pytest.mark.parametrize(
"value", [(0, 1), [0, 1], np.array([0, 1]), array.array("b", [0, 1])]
)
def test_scalar_setitem_with_nested_value(value):
# For numeric data, we try to unpack and thus raise for mismatching length
df = DataFrame({"A": [1, 2, 3]})
msg = "|".join(
[
"Must have equal len keys and value",
"setting an array element with a sequence",
]
)
with pytest.raises(ValueError, match=msg):
df.loc[0, "B"] = value

# TODO For object dtype this happens as well, but should we rather preserve
# the nested data and set as such?
df = DataFrame({"A": [1, 2, 3], "B": np.array([1, "a", "b"], dtype=object)})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this sound good

with pytest.raises(ValueError, match="Must have equal len keys and value"):
df.loc[0, "B"] = value
# if isinstance(value, np.ndarray):
# assert (df.loc[0, "B"] == value).all()
# else:
# assert df.loc[0, "B"] == value


@pytest.mark.parametrize(
"value", [(0, 1), [0, 1], np.array([0, 1]), array.array("b", [0, 1])]
)
def test_scalar_setitem_series_with_nested_value(value, indexer_sli):
# For numeric data, we try to unpack and thus raise for mismatching length
ser = Series([1, 2, 3])
with pytest.raises(ValueError, match="setting an array element with a sequence"):
indexer_sli(ser)[0] = value

# but for object dtype we preserve the nested data and set as such
ser = Series([1, "a", "b"], dtype=object)
indexer_sli(ser)[0] = value
if isinstance(value, np.ndarray):
assert (ser.loc[0] == value).all()
else:
assert ser.loc[0] == value


@pytest.mark.parametrize(
"value", [(0.0,), [0.0], np.array([0.0]), array.array("d", [0.0])]
)
def test_scalar_setitem_with_nested_value_length1(value):
# https://github.com/pandas-dev/pandas/issues/46268

# For numeric data, assigning length-1 array to scalar position gets unpacked
df = DataFrame({"A": [1, 2, 3]})
df.loc[0, "B"] = value
expected = DataFrame({"A": [1, 2, 3], "B": [0.0, np.nan, np.nan]})
tm.assert_frame_equal(df, expected)

# but for object dtype we preserve the nested data
df = DataFrame({"A": [1, 2, 3], "B": np.array([1, "a", "b"], dtype=object)})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we do the above, this is correct I think

df.loc[0, "B"] = value
if isinstance(value, np.ndarray):
assert (df.loc[0, "B"] == value).all()
else:
assert df.loc[0, "B"] == value


@pytest.mark.parametrize(
"value", [(0.0,), [0.0], np.array([0.0]), array.array("d", [0.0])]
)
def test_scalar_setitem_series_with_nested_value_length1(value, indexer_sli):
# For numeric data, assigning length-1 array to scalar position gets unpacked
# TODO this only happens in case of ndarray, should we make this consistent
# for all list-likes? (as happens for DataFrame.(i)loc, see test above)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I think this should unpack for everything. The DataFrame case already does

ser = Series([1.0, 2.0, 3.0])
if isinstance(value, np.ndarray):
indexer_sli(ser)[0] = value
expected = Series([0.0, 2.0, 3.0])
tm.assert_series_equal(ser, expected)
else:
with pytest.raises(
ValueError, match="setting an array element with a sequence"
):
indexer_sli(ser)[0] = value

# but for object dtype we preserve the nested data
ser = Series([1, "a", "b"], dtype=object)
indexer_sli(ser)[0] = value
if isinstance(value, np.ndarray):
assert (ser.loc[0] == value).all()
else:
assert ser.loc[0] == value