Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions doc/source/whatsnew/v2.3.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ Bug fixes
- Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch``
with a compiled regex and custom flags (:issue:`62240`)


Improvements and fixes for Copy-on-Write
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Bug fixes
^^^^^^^^^

- The :meth:`DataFrame.iloc` now works correctly with ``copy_on_write`` option when assigning values after subsetting the columns of a homogeneous DataFrame (:issue:`60309`)


.. ---------------------------------------------------------------------------
.. _whatsnew_233.contributors:

Expand Down
23 changes: 21 additions & 2 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,8 +572,27 @@ def setitem(self, indexer, value) -> Self:
self._iset_split_block( # type: ignore[attr-defined]
0, blk_loc, values
)
# first block equals values
self.blocks[0].setitem((indexer[0], np.arange(len(blk_loc))), value)

indexer = list(indexer)
# first block equals values we are setting to -> set to all columns
if lib.is_integer(indexer[1]):
col_indexer = 0
elif len(blk_loc) > 1:
col_indexer = slice(None) # type: ignore[assignment]
else:
col_indexer = np.arange(len(blk_loc)) # type: ignore[assignment]
indexer[1] = col_indexer

row_indexer = indexer[0]
if isinstance(row_indexer, np.ndarray) and row_indexer.ndim == 2:
# numpy cannot handle a 2d indexer in combo with a slice
row_indexer = np.squeeze(row_indexer, axis=1)
if isinstance(row_indexer, np.ndarray) and len(row_indexer) == 0:
# numpy does not like empty indexer combined with slice
# and we are setting nothing anyway
return self
indexer[0] = row_indexer
self.blocks[0].setitem(tuple(indexer), value)
return self
# No need to split if we either set all columns or on a single block
# manager
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1427,13 +1427,17 @@ def test_set_2d_casting_date_to_int(self, col, indexer):
)
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize("has_ref", [True, False])
@pytest.mark.parametrize("col", [{}, {"name": "a"}])
def test_loc_setitem_reordering_with_all_true_indexer(self, col):
def test_loc_setitem_reordering_with_all_true_indexer(self, col, has_ref):
# GH#48701
n = 17
df = DataFrame({**col, "x": range(n), "y": range(n)})
value = df[["x", "y"]].copy()
expected = df.copy()
df.loc[n * [True], ["x", "y"]] = df[["x", "y"]]
if has_ref:
view = df[:] # noqa: F841
df.loc[n * [True], ["x", "y"]] = value
tm.assert_frame_equal(df, expected)

def test_loc_rhs_empty_warning(self):
Expand Down
69 changes: 69 additions & 0 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -1370,6 +1370,75 @@ def test_frame_setitem_empty_dataframe(self):
)
tm.assert_frame_equal(df, expected)

def test_iloc_setitem_view_2dblock(self):
# https://github.com/pandas-dev/pandas/issues/60309
df_parent = DataFrame(
{
"A": [1, 4, 1, 5],
"B": [2, 5, 2, 6],
"C": [3, 6, 1, 7],
"D": [8, 9, 10, 11],
}
)
df_orig = df_parent.copy()
df = df_parent[["B", "C"]]

# Perform the iloc operation
df.iloc[[1, 3], :] = [[2, 2], [2, 2]]

# Check that original DataFrame is unchanged
tm.assert_frame_equal(df_parent, df_orig)

# Check that df is modified correctly
expected = DataFrame({"B": [2, 2, 2, 2], "C": [3, 2, 1, 2]}, index=df.index)
tm.assert_frame_equal(df, expected)

# with setting to subset of columns
df = df_parent[["B", "C", "D"]]
df.iloc[[1, 3], 0:3:2] = [[2, 2], [2, 2]]
tm.assert_frame_equal(df_parent, df_orig)
expected = DataFrame(
{"B": [2, 2, 2, 2], "C": [3, 6, 1, 7], "D": [8, 2, 10, 2]}, index=df.index
)
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize(
"indexer, value",
[
(([0, 2], slice(None)), [[2, 2, 2, 2], [2, 2, 2, 2]]),
((slice(None), slice(None)), 2),
((0, [1, 3]), [2, 2]),
(([0], 1), [2]),
(([0], np.int64(1)), [2]),
((slice(None), np.int64(1)), [2, 2, 2]),
((slice(None, 2), np.int64(1)), [2, 2]),
(
(np.array([False, True, False]), np.array([False, True, False, True])),
[2, 2],
),
],
)
def test_setitem_2dblock_with_ref(self, indexer, value):
# https://github.com/pandas-dev/pandas/issues/60309
arr = np.arange(12).reshape(3, 4)

df_parent = DataFrame(arr.copy(), columns=list("ABCD"))
# the test is specifically for the case where the df is backed by a single
# block (taking the non-split path)
assert df_parent._mgr.is_single_block
df_orig = df_parent.copy()
df = df_parent[:]

df.iloc[indexer] = value

# Check that original DataFrame is unchanged
tm.assert_frame_equal(df_parent, df_orig)

# Check that df is modified correctly
arr[indexer] = value
expected = DataFrame(arr, columns=list("ABCD"))
tm.assert_frame_equal(df, expected)


def test_full_setter_loc_incompatible_dtype():
# https://github.com/pandas-dev/pandas/issues/55791
Expand Down
9 changes: 5 additions & 4 deletions pandas/tests/frame/methods/test_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,14 +155,15 @@ def test_update_with_different_dtype(self):
with pytest.raises(TypeError, match="Invalid value"):
df.update({"c": Series(["foo"], index=[0])})

def test_update_modify_view(self, using_infer_string):
@pytest.mark.parametrize("dtype", ["str", object])
def test_update_modify_view(self, dtype):
# GH#47188
df = DataFrame({"A": ["1", np.nan], "B": ["100", np.nan]})
df2 = DataFrame({"A": ["a", "x"], "B": ["100", "200"]})
df = DataFrame({"A": ["1", np.nan], "B": ["100", np.nan]}, dtype=dtype)
df2 = DataFrame({"A": ["a", "x"], "B": ["100", "200"]}, dtype=dtype)
df2_orig = df2.copy()
result_view = df2[:]
df2.update(df)
expected = DataFrame({"A": ["1", "x"], "B": ["100", "200"]})
expected = DataFrame({"A": ["1", "x"], "B": ["100", "200"]}, dtype=dtype)
tm.assert_frame_equal(df2, expected)
tm.assert_frame_equal(result_view, df2_orig)

Expand Down
9 changes: 8 additions & 1 deletion pandas/tests/indexing/multiindex/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,21 @@ def frame_random_data_integer_multi_index():


class TestMultiIndexLoc:
def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_data):
@pytest.mark.parametrize("has_ref", [True, False])
def test_loc_setitem_frame_with_multiindex(
self, multiindex_dataframe_random_data, has_ref
):
frame = multiindex_dataframe_random_data
if has_ref:
view = frame[:]
frame.loc[("bar", "two"), "B"] = 5
assert frame.loc[("bar", "two"), "B"] == 5

# with integer labels
df = frame.copy()
df.columns = list(range(3))
if has_ref:
view = df[:] # noqa: F841
df.loc[("bar", "two"), 1] = 7
assert df.loc[("bar", "two"), 1] == 7

Expand Down
69 changes: 54 additions & 15 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,16 @@ def test_iloc_setitem_fullcol_categorical(self, indexer_li, key):
expected = DataFrame({0: Series(cat.astype(object), dtype=object), 1: range(3)})
tm.assert_frame_equal(df, expected)

def test_iloc_setitem_ea_inplace(self, frame_or_series, index_or_series_or_array):
@pytest.mark.parametrize("has_ref", [True, False])
def test_iloc_setitem_ea_inplace(
self, frame_or_series, index_or_series_or_array, has_ref
):
# GH#38952 Case with not setting a full column
# IntegerArray without NAs
arr = array([1, 2, 3, 4])
obj = frame_or_series(arr.to_numpy("i8"))
if has_ref:
view = obj[:] # noqa: F841

if frame_or_series is Series:
values = obj.values
Expand All @@ -125,11 +130,12 @@ def test_iloc_setitem_ea_inplace(self, frame_or_series, index_or_series_or_array
tm.assert_equal(obj, expected)

# Check that we are actually in-place
if frame_or_series is Series:
assert obj.values is not values
assert np.shares_memory(obj.values, values)
else:
assert np.shares_memory(obj[0].values, values)
if not has_ref:
if frame_or_series is Series:
assert obj.values is not values
assert np.shares_memory(obj.values, values)
else:
assert np.shares_memory(obj[0].values, values)

def test_is_scalar_access(self):
# GH#32085 index with duplicates doesn't matter for _is_scalar_access
Expand Down Expand Up @@ -426,12 +432,15 @@ def test_iloc_getitem_slice_dups(self):
tm.assert_frame_equal(df.iloc[10:, :2], df2)
tm.assert_frame_equal(df.iloc[10:, 2:], df1)

def test_iloc_setitem(self):
@pytest.mark.parametrize("has_ref", [True, False])
def test_iloc_setitem(sel, has_ref):
df = DataFrame(
np.random.default_rng(2).standard_normal((4, 4)),
index=np.arange(0, 8, 2),
columns=np.arange(0, 12, 3),
)
if has_ref:
view = df[:] # noqa: F841

df.iloc[1, 1] = 1
result = df.iloc[1, 1]
Expand All @@ -448,27 +457,35 @@ def test_iloc_setitem(self):
expected = Series([0, 1, 0], index=[4, 5, 6])
tm.assert_series_equal(s, expected)

def test_iloc_setitem_axis_argument(self):
@pytest.mark.parametrize("has_ref", [True, False])
def test_iloc_setitem_axis_argument(self, has_ref):
# GH45032
df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]])
df[1] = df[1].astype(object)
if has_ref:
view = df[:]
expected = DataFrame([[6, "c", 10], [7, "d", 11], [5, 5, 5]])
expected[1] = expected[1].astype(object)
df.iloc(axis=0)[2] = 5
tm.assert_frame_equal(df, expected)

df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]])
df[1] = df[1].astype(object)
if has_ref:
view = df[:] # noqa: F841
expected = DataFrame([[6, "c", 5], [7, "d", 5], [8, "e", 5]])
expected[1] = expected[1].astype(object)
df.iloc(axis=1)[2] = 5
tm.assert_frame_equal(df, expected)

def test_iloc_setitem_list(self):
@pytest.mark.parametrize("has_ref", [True, False])
def test_iloc_setitem_list(self, has_ref):
# setitem with an iloc list
df = DataFrame(
np.arange(9).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"]
)
if has_ref:
view = df[:] # noqa: F841
df.iloc[[0, 1], [1, 2]]
df.iloc[[0, 1], [1, 2]] += 100

Expand Down Expand Up @@ -663,12 +680,15 @@ def test_iloc_getitem_doc_issue(self):
expected = DataFrame(arr[1:5, 2:4], index=index[1:5], columns=columns[2:4])
tm.assert_frame_equal(result, expected)

def test_iloc_setitem_series(self):
@pytest.mark.parametrize("has_ref", [True, False])
def test_iloc_setitem_series(self, has_ref):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
index=list("abcdefghij"),
columns=list("ABCD"),
)
if has_ref:
view = df[:] # noqa: F841

df.iloc[1, 1] = 1
result = df.iloc[1, 1]
Expand Down Expand Up @@ -697,32 +717,40 @@ def test_iloc_setitem_series(self):
expected = Series([0, 1, 2, 3, 4, 5])
tm.assert_series_equal(result, expected)

def test_iloc_setitem_list_of_lists(self):
@pytest.mark.parametrize("has_ref", [True, False])
def test_iloc_setitem_list_of_lists(self, has_ref):
# GH 7551
# list-of-list is set incorrectly in mixed vs. single dtyped frames
df = DataFrame(
{"A": np.arange(5, dtype="int64"), "B": np.arange(5, 10, dtype="int64")}
)
if has_ref:
view = df[:]
df.iloc[2:4] = [[10, 11], [12, 13]]
expected = DataFrame({"A": [0, 1, 10, 12, 4], "B": [5, 6, 11, 13, 9]})
tm.assert_frame_equal(df, expected)

df = DataFrame(
{"A": ["a", "b", "c", "d", "e"], "B": np.arange(5, 10, dtype="int64")}
)
if has_ref:
view = df[:] # noqa: F841
df.iloc[2:4] = [["x", 11], ["y", 13]]
expected = DataFrame({"A": ["a", "b", "x", "y", "e"], "B": [5, 6, 11, 13, 9]})
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize("has_ref", [True, False])
@pytest.mark.parametrize("indexer", [[0], slice(None, 1, None), np.array([0])])
@pytest.mark.parametrize("value", [["Z"], np.array(["Z"])])
def test_iloc_setitem_with_scalar_index(self, indexer, value):
def test_iloc_setitem_with_scalar_index(self, has_ref, indexer, value):
# GH #19474
# assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated
# elementwisely, not using "setter('A', ['Z'])".

# Set object type to avoid upcast when setting "Z"
df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]).astype({"A": object})
if has_ref:
view = df[:] # noqa: F841
df.iloc[0, indexer] = value
result = df.iloc[0, 0]

Expand Down Expand Up @@ -1048,25 +1076,33 @@ def test_iloc_setitem_bool_indexer(self, klass):
expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]})
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize("has_ref", [True, False])
@pytest.mark.parametrize("indexer", [[1], slice(1, 2)])
def test_iloc_setitem_pure_position_based(self, indexer):
def test_iloc_setitem_pure_position_based(self, indexer, has_ref):
# GH#22046
df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]})
df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
if has_ref:
view = df2[:] # noqa: F841
df2.iloc[:, indexer] = df1.iloc[:, [0]]
expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]})
tm.assert_frame_equal(df2, expected)

def test_iloc_setitem_dictionary_value(self):
@pytest.mark.parametrize("has_ref", [True, False])
def test_iloc_setitem_dictionary_value(self, has_ref):
# GH#37728
df = DataFrame({"x": [1, 2], "y": [2, 2]})
if has_ref:
view = df[:]
rhs = {"x": 9, "y": 99}
df.iloc[1] = rhs
expected = DataFrame({"x": [1, 9], "y": [2, 99]})
tm.assert_frame_equal(df, expected)

# GH#38335 same thing, mixed dtypes
df = DataFrame({"x": [1, 2], "y": [2.0, 2.0]})
if has_ref:
view = df[:] # noqa: F841
df.iloc[1] = rhs
expected = DataFrame({"x": [1, 9], "y": [2.0, 99.0]})
tm.assert_frame_equal(df, expected)
Expand Down Expand Up @@ -1272,10 +1308,13 @@ def test_iloc_float_raises(self, series_with_simple_index, frame_or_series):
with pytest.raises(IndexError, match=_slice_iloc_msg):
obj.iloc[3.0] = 0

def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame):
@pytest.mark.parametrize("has_ref", [True, False])
def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame, has_ref):
with pytest.raises(IndexingError, match="Too many indexers"):
float_frame.iloc[:, :, :]

if has_ref:
view = float_frame[:] # noqa: F841
with pytest.raises(IndexError, match="too many indices for array"):
# GH#32257 we let numpy do validation, get their exception
float_frame.iloc[:, :, :] = 1
Expand Down
Loading
Loading