Skip to content

Commit fd2f7eb

Browse files
[backport 2.3.x] BUG: Fixed assign failure when with Copy-on-Write (#60941) (#62409)
Co-authored-by: ChiLin Chiu <[email protected]>
1 parent fd40f9a commit fd2f7eb

File tree

7 files changed

+225
-26
lines changed

7 files changed

+225
-26
lines changed

doc/source/whatsnew/v2.3.3.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,16 @@ Bug fixes
2727
with a compiled regex and custom flags (:issue:`62240`)
2828
- Fix :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`)
2929

30+
31+
Improvements and fixes for Copy-on-Write
32+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
33+
34+
Bug fixes
35+
^^^^^^^^^
36+
37+
- The :meth:`DataFrame.iloc` now works correctly with ``copy_on_write`` option when assigning values after subsetting the columns of a homogeneous DataFrame (:issue:`60309`)
38+
39+
3040
.. ---------------------------------------------------------------------------
3141
.. _whatsnew_233.contributors:
3242

pandas/core/internals/managers.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -405,8 +405,27 @@ def setitem(self, indexer, value, warn: bool = True) -> Self:
405405
self._iset_split_block( # type: ignore[attr-defined]
406406
0, blk_loc, values
407407
)
408-
# first block equals values
409-
self.blocks[0].setitem((indexer[0], np.arange(len(blk_loc))), value)
408+
409+
indexer = list(indexer)
410+
# first block equals values we are setting to -> set to all columns
411+
if lib.is_integer(indexer[1]):
412+
col_indexer = 0
413+
elif len(blk_loc) > 1:
414+
col_indexer = slice(None) # type: ignore[assignment]
415+
else:
416+
col_indexer = np.arange(len(blk_loc)) # type: ignore[assignment]
417+
indexer[1] = col_indexer
418+
419+
row_indexer = indexer[0]
420+
if isinstance(row_indexer, np.ndarray) and row_indexer.ndim == 2:
421+
# numpy cannot handle a 2d indexer in combo with a slice
422+
row_indexer = np.squeeze(row_indexer, axis=1)
423+
if isinstance(row_indexer, np.ndarray) and len(row_indexer) == 0:
424+
# numpy does not like empty indexer combined with slice
425+
# and we are setting nothing anyway
426+
return self
427+
indexer[0] = row_indexer
428+
self.blocks[0].setitem(tuple(indexer), value)
410429
return self
411430
# No need to split if we either set all columns or on a single block
412431
# manager

pandas/tests/frame/indexing/test_indexing.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1500,13 +1500,18 @@ def test_set_2d_casting_date_to_int(self, col, indexer):
15001500
)
15011501
tm.assert_frame_equal(df, expected)
15021502

1503+
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
1504+
@pytest.mark.parametrize("has_ref", [True, False])
15031505
@pytest.mark.parametrize("col", [{}, {"name": "a"}])
1504-
def test_loc_setitem_reordering_with_all_true_indexer(self, col):
1506+
def test_loc_setitem_reordering_with_all_true_indexer(self, col, has_ref):
15051507
# GH#48701
15061508
n = 17
15071509
df = DataFrame({**col, "x": range(n), "y": range(n)})
1510+
value = df[["x", "y"]].copy()
15081511
expected = df.copy()
1509-
df.loc[n * [True], ["x", "y"]] = df[["x", "y"]]
1512+
if has_ref:
1513+
view = df[:] # noqa: F841
1514+
df.loc[n * [True], ["x", "y"]] = value
15101515
tm.assert_frame_equal(df, expected)
15111516

15121517
def test_loc_rhs_empty_warning(self):

pandas/tests/frame/indexing/test_setitem.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import numpy as np
44
import pytest
55

6+
from pandas.errors import SettingWithCopyWarning
67
import pandas.util._test_decorators as td
78

89
from pandas.core.dtypes.base import _registry as ea_registry
@@ -1400,6 +1401,94 @@ def test_frame_setitem_empty_dataframe(self):
14001401
)
14011402
tm.assert_frame_equal(df, expected)
14021403

1404+
def test_iloc_setitem_view_2dblock(self, using_copy_on_write, warn_copy_on_write):
1405+
# https://github.com/pandas-dev/pandas/issues/60309
1406+
df_parent = DataFrame(
1407+
{
1408+
"A": [1, 4, 1, 5],
1409+
"B": [2, 5, 2, 6],
1410+
"C": [3, 6, 1, 7],
1411+
"D": [8, 9, 10, 11],
1412+
}
1413+
)
1414+
df_orig = df_parent.copy()
1415+
df = df_parent[["B", "C"]]
1416+
1417+
# Perform the iloc operation
1418+
if using_copy_on_write:
1419+
df.iloc[[1, 3], :] = [[2, 2], [2, 2]]
1420+
1421+
# Check that original DataFrame is unchanged
1422+
tm.assert_frame_equal(df_parent, df_orig)
1423+
elif warn_copy_on_write:
1424+
# TODO(COW): should this warn?
1425+
# with tm.assert_cow_warning(warn_copy_on_write):
1426+
df.iloc[[1, 3], :] = [[2, 2], [2, 2]]
1427+
else:
1428+
with pd.option_context("chained_assignment", "warn"):
1429+
with tm.assert_produces_warning(SettingWithCopyWarning):
1430+
df.iloc[[1, 3], :] = [[2, 2], [2, 2]]
1431+
1432+
# Check that df is modified correctly
1433+
expected = DataFrame({"B": [2, 2, 2, 2], "C": [3, 2, 1, 2]}, index=df.index)
1434+
tm.assert_frame_equal(df, expected)
1435+
1436+
# with setting to subset of columns
1437+
df = df_parent[["B", "C", "D"]]
1438+
if using_copy_on_write or warn_copy_on_write:
1439+
df.iloc[[1, 3], 0:3:2] = [[2, 2], [2, 2]]
1440+
tm.assert_frame_equal(df_parent, df_orig)
1441+
else:
1442+
with pd.option_context("chained_assignment", "warn"):
1443+
with tm.assert_produces_warning(SettingWithCopyWarning):
1444+
df.iloc[[1, 3], 0:3:2] = [[2, 2], [2, 2]]
1445+
1446+
expected = DataFrame(
1447+
{"B": [2, 2, 2, 2], "C": [3, 6, 1, 7], "D": [8, 2, 10, 2]}, index=df.index
1448+
)
1449+
tm.assert_frame_equal(df, expected)
1450+
1451+
@pytest.mark.parametrize(
1452+
"indexer, value",
1453+
[
1454+
(([0, 2], slice(None)), [[2, 2, 2, 2], [2, 2, 2, 2]]),
1455+
((slice(None), slice(None)), 2),
1456+
((0, [1, 3]), [2, 2]),
1457+
(([0], 1), [2]),
1458+
(([0], np.int64(1)), [2]),
1459+
((slice(None), np.int64(1)), [2, 2, 2]),
1460+
((slice(None, 2), np.int64(1)), [2, 2]),
1461+
(
1462+
(np.array([False, True, False]), np.array([False, True, False, True])),
1463+
[2, 2],
1464+
),
1465+
],
1466+
)
1467+
def test_setitem_2dblock_with_ref(
1468+
self, indexer, value, using_copy_on_write, warn_copy_on_write
1469+
):
1470+
# https://github.com/pandas-dev/pandas/issues/60309
1471+
arr = np.arange(12).reshape(3, 4)
1472+
1473+
df_parent = DataFrame(arr.copy(), columns=list("ABCD"))
1474+
# the test is specifically for the case where the df is backed by a single
1475+
# block (taking the non-split path)
1476+
assert df_parent._mgr.is_single_block
1477+
df_orig = df_parent.copy()
1478+
df = df_parent[:]
1479+
1480+
with tm.assert_cow_warning(warn_copy_on_write):
1481+
df.iloc[indexer] = value
1482+
1483+
# Check that original DataFrame is unchanged
1484+
if using_copy_on_write:
1485+
tm.assert_frame_equal(df_parent, df_orig)
1486+
1487+
# Check that df is modified correctly
1488+
arr[indexer] = value
1489+
expected = DataFrame(arr, columns=list("ABCD"))
1490+
tm.assert_frame_equal(df, expected)
1491+
14031492

14041493
def test_full_setter_loc_incompatible_dtype():
14051494
# https://github.com/pandas-dev/pandas/issues/55791

pandas/tests/indexing/multiindex/test_loc.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,22 @@ def frame_random_data_integer_multi_index():
3333

3434

3535
class TestMultiIndexLoc:
36-
def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_data):
36+
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
37+
@pytest.mark.parametrize("has_ref", [True, False])
38+
def test_loc_setitem_frame_with_multiindex(
39+
self, multiindex_dataframe_random_data, has_ref
40+
):
3741
frame = multiindex_dataframe_random_data
42+
if has_ref:
43+
view = frame[:]
3844
frame.loc[("bar", "two"), "B"] = 5
3945
assert frame.loc[("bar", "two"), "B"] == 5
4046

4147
# with integer labels
4248
df = frame.copy()
4349
df.columns = list(range(3))
50+
if has_ref:
51+
view = df[:] # noqa: F841
4452
df.loc[("bar", "two"), 1] = 7
4553
assert df.loc[("bar", "two"), 1] == 7
4654

0 commit comments

Comments
 (0)