Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions doc/source/whatsnew/v2.3.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,16 @@ Bug fixes
with a compiled regex and custom flags (:issue:`62240`)
- Fix :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`)


Improvements and fixes for Copy-on-Write
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Bug fixes
^^^^^^^^^

- The :meth:`DataFrame.iloc` now works correctly with ``copy_on_write`` option when assigning values after subsetting the columns of a homogeneous DataFrame (:issue:`60309`)


.. ---------------------------------------------------------------------------
.. _whatsnew_233.contributors:

Expand Down
23 changes: 21 additions & 2 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,8 +405,27 @@ def setitem(self, indexer, value, warn: bool = True) -> Self:
self._iset_split_block( # type: ignore[attr-defined]
0, blk_loc, values
)
# first block equals values
self.blocks[0].setitem((indexer[0], np.arange(len(blk_loc))), value)

indexer = list(indexer)
# first block equals values we are setting to -> set to all columns
if lib.is_integer(indexer[1]):
col_indexer = 0
elif len(blk_loc) > 1:
col_indexer = slice(None) # type: ignore[assignment]
else:
col_indexer = np.arange(len(blk_loc)) # type: ignore[assignment]
indexer[1] = col_indexer

row_indexer = indexer[0]
if isinstance(row_indexer, np.ndarray) and row_indexer.ndim == 2:
# numpy cannot handle a 2d indexer in combo with a slice
row_indexer = np.squeeze(row_indexer, axis=1)
if isinstance(row_indexer, np.ndarray) and len(row_indexer) == 0:
# numpy does not like empty indexer combined with slice
# and we are setting nothing anyway
return self
indexer[0] = row_indexer
self.blocks[0].setitem(tuple(indexer), value)
return self
# No need to split if we either set all columns or on a single block
# manager
Expand Down
9 changes: 7 additions & 2 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1500,13 +1500,18 @@ def test_set_2d_casting_date_to_int(self, col, indexer):
)
tm.assert_frame_equal(df, expected)

@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
@pytest.mark.parametrize("has_ref", [True, False])
@pytest.mark.parametrize("col", [{}, {"name": "a"}])
def test_loc_setitem_reordering_with_all_true_indexer(self, col):
def test_loc_setitem_reordering_with_all_true_indexer(self, col, has_ref):
# GH#48701
n = 17
df = DataFrame({**col, "x": range(n), "y": range(n)})
value = df[["x", "y"]].copy()
expected = df.copy()
df.loc[n * [True], ["x", "y"]] = df[["x", "y"]]
if has_ref:
view = df[:] # noqa: F841
df.loc[n * [True], ["x", "y"]] = value
tm.assert_frame_equal(df, expected)

def test_loc_rhs_empty_warning(self):
Expand Down
89 changes: 89 additions & 0 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
import pytest

from pandas.errors import SettingWithCopyWarning
import pandas.util._test_decorators as td

from pandas.core.dtypes.base import _registry as ea_registry
Expand Down Expand Up @@ -1400,6 +1401,94 @@ def test_frame_setitem_empty_dataframe(self):
)
tm.assert_frame_equal(df, expected)

def test_iloc_setitem_view_2dblock(self, using_copy_on_write, warn_copy_on_write):
# https://github.com/pandas-dev/pandas/issues/60309
df_parent = DataFrame(
{
"A": [1, 4, 1, 5],
"B": [2, 5, 2, 6],
"C": [3, 6, 1, 7],
"D": [8, 9, 10, 11],
}
)
df_orig = df_parent.copy()
df = df_parent[["B", "C"]]

# Perform the iloc operation
if using_copy_on_write:
df.iloc[[1, 3], :] = [[2, 2], [2, 2]]

# Check that original DataFrame is unchanged
tm.assert_frame_equal(df_parent, df_orig)
elif warn_copy_on_write:
# TODO(COW): should this warn?
# with tm.assert_cow_warning(warn_copy_on_write):
df.iloc[[1, 3], :] = [[2, 2], [2, 2]]
else:
with pd.option_context("chained_assignment", "warn"):
with tm.assert_produces_warning(SettingWithCopyWarning):
df.iloc[[1, 3], :] = [[2, 2], [2, 2]]

# Check that df is modified correctly
expected = DataFrame({"B": [2, 2, 2, 2], "C": [3, 2, 1, 2]}, index=df.index)
tm.assert_frame_equal(df, expected)

# with setting to subset of columns
df = df_parent[["B", "C", "D"]]
if using_copy_on_write or warn_copy_on_write:
df.iloc[[1, 3], 0:3:2] = [[2, 2], [2, 2]]
tm.assert_frame_equal(df_parent, df_orig)
else:
with pd.option_context("chained_assignment", "warn"):
with tm.assert_produces_warning(SettingWithCopyWarning):
df.iloc[[1, 3], 0:3:2] = [[2, 2], [2, 2]]

expected = DataFrame(
{"B": [2, 2, 2, 2], "C": [3, 6, 1, 7], "D": [8, 2, 10, 2]}, index=df.index
)
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize(
"indexer, value",
[
(([0, 2], slice(None)), [[2, 2, 2, 2], [2, 2, 2, 2]]),
((slice(None), slice(None)), 2),
((0, [1, 3]), [2, 2]),
(([0], 1), [2]),
(([0], np.int64(1)), [2]),
((slice(None), np.int64(1)), [2, 2, 2]),
((slice(None, 2), np.int64(1)), [2, 2]),
(
(np.array([False, True, False]), np.array([False, True, False, True])),
[2, 2],
),
],
)
def test_setitem_2dblock_with_ref(
self, indexer, value, using_copy_on_write, warn_copy_on_write
):
# https://github.com/pandas-dev/pandas/issues/60309
arr = np.arange(12).reshape(3, 4)

df_parent = DataFrame(arr.copy(), columns=list("ABCD"))
# the test is specifically for the case where the df is backed by a single
# block (taking the non-split path)
assert df_parent._mgr.is_single_block
df_orig = df_parent.copy()
df = df_parent[:]

with tm.assert_cow_warning(warn_copy_on_write):
df.iloc[indexer] = value

# Check that original DataFrame is unchanged
if using_copy_on_write:
tm.assert_frame_equal(df_parent, df_orig)

# Check that df is modified correctly
arr[indexer] = value
expected = DataFrame(arr, columns=list("ABCD"))
tm.assert_frame_equal(df, expected)


def test_full_setter_loc_incompatible_dtype():
# https://github.com/pandas-dev/pandas/issues/55791
Expand Down
10 changes: 9 additions & 1 deletion pandas/tests/indexing/multiindex/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,22 @@ def frame_random_data_integer_multi_index():


class TestMultiIndexLoc:
def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_data):
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
@pytest.mark.parametrize("has_ref", [True, False])
def test_loc_setitem_frame_with_multiindex(
self, multiindex_dataframe_random_data, has_ref
):
frame = multiindex_dataframe_random_data
if has_ref:
view = frame[:]
frame.loc[("bar", "two"), "B"] = 5
assert frame.loc[("bar", "two"), "B"] == 5

# with integer labels
df = frame.copy()
df.columns = list(range(3))
if has_ref:
view = df[:] # noqa: F841
df.loc[("bar", "two"), 1] = 7
assert df.loc[("bar", "two"), 1] == 7

Expand Down
Loading
Loading