Skip to content

Commit e79f156

Browse files
BUG: Fixed assign failure when with Copy-on-Write (#60941)
Co-authored-by: Joris Van den Bossche <[email protected]>
1 parent 430a9a8 commit e79f156

File tree

8 files changed

+197
-31
lines changed

8 files changed

+197
-31
lines changed

doc/source/whatsnew/v2.3.3.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,16 @@ Bug fixes
3737
with a compiled regex and custom flags (:issue:`62240`)
3838
- Fix :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`)
3939

40+
41+
Improvements and fixes for Copy-on-Write
42+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
43+
44+
Bug fixes
45+
^^^^^^^^^
46+
47+
- The :meth:`DataFrame.iloc` now works correctly with ``copy_on_write`` option when assigning values after subsetting the columns of a homogeneous DataFrame (:issue:`60309`)
48+
49+
4050
.. ---------------------------------------------------------------------------
4151
.. _whatsnew_233.contributors:
4252

pandas/core/internals/managers.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -572,8 +572,27 @@ def setitem(self, indexer, value) -> Self:
572572
self._iset_split_block( # type: ignore[attr-defined]
573573
0, blk_loc, values
574574
)
575-
# first block equals values
576-
self.blocks[0].setitem((indexer[0], np.arange(len(blk_loc))), value)
575+
576+
indexer = list(indexer)
577+
# first block equals values we are setting to -> set to all columns
578+
if lib.is_integer(indexer[1]):
579+
col_indexer = 0
580+
elif len(blk_loc) > 1:
581+
col_indexer = slice(None) # type: ignore[assignment]
582+
else:
583+
col_indexer = np.arange(len(blk_loc)) # type: ignore[assignment]
584+
indexer[1] = col_indexer
585+
586+
row_indexer = indexer[0]
587+
if isinstance(row_indexer, np.ndarray) and row_indexer.ndim == 2:
588+
# numpy cannot handle a 2d indexer in combo with a slice
589+
row_indexer = np.squeeze(row_indexer, axis=1)
590+
if isinstance(row_indexer, np.ndarray) and len(row_indexer) == 0:
591+
# numpy does not like empty indexer combined with slice
592+
# and we are setting nothing anyway
593+
return self
594+
indexer[0] = row_indexer
595+
self.blocks[0].setitem(tuple(indexer), value)
577596
return self
578597
# No need to split if we either set all columns or on a single block
579598
# manager

pandas/tests/frame/indexing/test_indexing.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1427,13 +1427,17 @@ def test_set_2d_casting_date_to_int(self, col, indexer):
14271427
)
14281428
tm.assert_frame_equal(df, expected)
14291429

1430+
@pytest.mark.parametrize("has_ref", [True, False])
14301431
@pytest.mark.parametrize("col", [{}, {"name": "a"}])
1431-
def test_loc_setitem_reordering_with_all_true_indexer(self, col):
1432+
def test_loc_setitem_reordering_with_all_true_indexer(self, col, has_ref):
14321433
# GH#48701
14331434
n = 17
14341435
df = DataFrame({**col, "x": range(n), "y": range(n)})
1436+
value = df[["x", "y"]].copy()
14351437
expected = df.copy()
1436-
df.loc[n * [True], ["x", "y"]] = df[["x", "y"]]
1438+
if has_ref:
1439+
view = df[:] # noqa: F841
1440+
df.loc[n * [True], ["x", "y"]] = value
14371441
tm.assert_frame_equal(df, expected)
14381442

14391443
def test_loc_rhs_empty_warning(self):

pandas/tests/frame/indexing/test_setitem.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1370,6 +1370,75 @@ def test_frame_setitem_empty_dataframe(self):
13701370
)
13711371
tm.assert_frame_equal(df, expected)
13721372

1373+
def test_iloc_setitem_view_2dblock(self):
1374+
# https://github.com/pandas-dev/pandas/issues/60309
1375+
df_parent = DataFrame(
1376+
{
1377+
"A": [1, 4, 1, 5],
1378+
"B": [2, 5, 2, 6],
1379+
"C": [3, 6, 1, 7],
1380+
"D": [8, 9, 10, 11],
1381+
}
1382+
)
1383+
df_orig = df_parent.copy()
1384+
df = df_parent[["B", "C"]]
1385+
1386+
# Perform the iloc operation
1387+
df.iloc[[1, 3], :] = [[2, 2], [2, 2]]
1388+
1389+
# Check that original DataFrame is unchanged
1390+
tm.assert_frame_equal(df_parent, df_orig)
1391+
1392+
# Check that df is modified correctly
1393+
expected = DataFrame({"B": [2, 2, 2, 2], "C": [3, 2, 1, 2]}, index=df.index)
1394+
tm.assert_frame_equal(df, expected)
1395+
1396+
# with setting to subset of columns
1397+
df = df_parent[["B", "C", "D"]]
1398+
df.iloc[[1, 3], 0:3:2] = [[2, 2], [2, 2]]
1399+
tm.assert_frame_equal(df_parent, df_orig)
1400+
expected = DataFrame(
1401+
{"B": [2, 2, 2, 2], "C": [3, 6, 1, 7], "D": [8, 2, 10, 2]}, index=df.index
1402+
)
1403+
tm.assert_frame_equal(df, expected)
1404+
1405+
@pytest.mark.parametrize(
1406+
"indexer, value",
1407+
[
1408+
(([0, 2], slice(None)), [[2, 2, 2, 2], [2, 2, 2, 2]]),
1409+
((slice(None), slice(None)), 2),
1410+
((0, [1, 3]), [2, 2]),
1411+
(([0], 1), [2]),
1412+
(([0], np.int64(1)), [2]),
1413+
((slice(None), np.int64(1)), [2, 2, 2]),
1414+
((slice(None, 2), np.int64(1)), [2, 2]),
1415+
(
1416+
(np.array([False, True, False]), np.array([False, True, False, True])),
1417+
[2, 2],
1418+
),
1419+
],
1420+
)
1421+
def test_setitem_2dblock_with_ref(self, indexer, value):
1422+
# https://github.com/pandas-dev/pandas/issues/60309
1423+
arr = np.arange(12).reshape(3, 4)
1424+
1425+
df_parent = DataFrame(arr.copy(), columns=list("ABCD"))
1426+
# the test is specifically for the case where the df is backed by a single
1427+
# block (taking the non-split path)
1428+
assert df_parent._mgr.is_single_block
1429+
df_orig = df_parent.copy()
1430+
df = df_parent[:]
1431+
1432+
df.iloc[indexer] = value
1433+
1434+
# Check that original DataFrame is unchanged
1435+
tm.assert_frame_equal(df_parent, df_orig)
1436+
1437+
# Check that df is modified correctly
1438+
arr[indexer] = value
1439+
expected = DataFrame(arr, columns=list("ABCD"))
1440+
tm.assert_frame_equal(df, expected)
1441+
13731442

13741443
def test_full_setter_loc_incompatible_dtype():
13751444
# https://github.com/pandas-dev/pandas/issues/55791

pandas/tests/frame/methods/test_update.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -155,14 +155,15 @@ def test_update_with_different_dtype(self):
155155
with pytest.raises(TypeError, match="Invalid value"):
156156
df.update({"c": Series(["foo"], index=[0])})
157157

158-
def test_update_modify_view(self, using_infer_string):
158+
@pytest.mark.parametrize("dtype", ["str", object])
159+
def test_update_modify_view(self, dtype):
159160
# GH#47188
160-
df = DataFrame({"A": ["1", np.nan], "B": ["100", np.nan]})
161-
df2 = DataFrame({"A": ["a", "x"], "B": ["100", "200"]})
161+
df = DataFrame({"A": ["1", np.nan], "B": ["100", np.nan]}, dtype=dtype)
162+
df2 = DataFrame({"A": ["a", "x"], "B": ["100", "200"]}, dtype=dtype)
162163
df2_orig = df2.copy()
163164
result_view = df2[:]
164165
df2.update(df)
165-
expected = DataFrame({"A": ["1", "x"], "B": ["100", "200"]})
166+
expected = DataFrame({"A": ["1", "x"], "B": ["100", "200"]}, dtype=dtype)
166167
tm.assert_frame_equal(df2, expected)
167168
tm.assert_frame_equal(result_view, df2_orig)
168169

pandas/tests/indexing/multiindex/test_loc.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,21 @@ def frame_random_data_integer_multi_index():
2222

2323

2424
class TestMultiIndexLoc:
25-
def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_data):
25+
@pytest.mark.parametrize("has_ref", [True, False])
26+
def test_loc_setitem_frame_with_multiindex(
27+
self, multiindex_dataframe_random_data, has_ref
28+
):
2629
frame = multiindex_dataframe_random_data
30+
if has_ref:
31+
view = frame[:]
2732
frame.loc[("bar", "two"), "B"] = 5
2833
assert frame.loc[("bar", "two"), "B"] == 5
2934

3035
# with integer labels
3136
df = frame.copy()
3237
df.columns = list(range(3))
38+
if has_ref:
39+
view = df[:] # noqa: F841
3340
df.loc[("bar", "two"), 1] = 7
3441
assert df.loc[("bar", "two"), 1] == 7
3542

pandas/tests/indexing/test_iloc.py

Lines changed: 54 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,16 @@ def test_iloc_setitem_fullcol_categorical(self, indexer_li, key):
105105
expected = DataFrame({0: Series(cat.astype(object), dtype=object), 1: range(3)})
106106
tm.assert_frame_equal(df, expected)
107107

108-
def test_iloc_setitem_ea_inplace(self, frame_or_series, index_or_series_or_array):
108+
@pytest.mark.parametrize("has_ref", [True, False])
109+
def test_iloc_setitem_ea_inplace(
110+
self, frame_or_series, index_or_series_or_array, has_ref
111+
):
109112
# GH#38952 Case with not setting a full column
110113
# IntegerArray without NAs
111114
arr = array([1, 2, 3, 4])
112115
obj = frame_or_series(arr.to_numpy("i8"))
116+
if has_ref:
117+
view = obj[:] # noqa: F841
113118

114119
if frame_or_series is Series:
115120
values = obj.values
@@ -125,11 +130,12 @@ def test_iloc_setitem_ea_inplace(self, frame_or_series, index_or_series_or_array
125130
tm.assert_equal(obj, expected)
126131

127132
# Check that we are actually in-place
128-
if frame_or_series is Series:
129-
assert obj.values is not values
130-
assert np.shares_memory(obj.values, values)
131-
else:
132-
assert np.shares_memory(obj[0].values, values)
133+
if not has_ref:
134+
if frame_or_series is Series:
135+
assert obj.values is not values
136+
assert np.shares_memory(obj.values, values)
137+
else:
138+
assert np.shares_memory(obj[0].values, values)
133139

134140
def test_is_scalar_access(self):
135141
# GH#32085 index with duplicates doesn't matter for _is_scalar_access
@@ -426,12 +432,15 @@ def test_iloc_getitem_slice_dups(self):
426432
tm.assert_frame_equal(df.iloc[10:, :2], df2)
427433
tm.assert_frame_equal(df.iloc[10:, 2:], df1)
428434

429-
def test_iloc_setitem(self):
435+
@pytest.mark.parametrize("has_ref", [True, False])
436+
def test_iloc_setitem(sel, has_ref):
430437
df = DataFrame(
431438
np.random.default_rng(2).standard_normal((4, 4)),
432439
index=np.arange(0, 8, 2),
433440
columns=np.arange(0, 12, 3),
434441
)
442+
if has_ref:
443+
view = df[:] # noqa: F841
435444

436445
df.iloc[1, 1] = 1
437446
result = df.iloc[1, 1]
@@ -448,27 +457,35 @@ def test_iloc_setitem(self):
448457
expected = Series([0, 1, 0], index=[4, 5, 6])
449458
tm.assert_series_equal(s, expected)
450459

451-
def test_iloc_setitem_axis_argument(self):
460+
@pytest.mark.parametrize("has_ref", [True, False])
461+
def test_iloc_setitem_axis_argument(self, has_ref):
452462
# GH45032
453463
df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]])
454464
df[1] = df[1].astype(object)
465+
if has_ref:
466+
view = df[:]
455467
expected = DataFrame([[6, "c", 10], [7, "d", 11], [5, 5, 5]])
456468
expected[1] = expected[1].astype(object)
457469
df.iloc(axis=0)[2] = 5
458470
tm.assert_frame_equal(df, expected)
459471

460472
df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]])
461473
df[1] = df[1].astype(object)
474+
if has_ref:
475+
view = df[:] # noqa: F841
462476
expected = DataFrame([[6, "c", 5], [7, "d", 5], [8, "e", 5]])
463477
expected[1] = expected[1].astype(object)
464478
df.iloc(axis=1)[2] = 5
465479
tm.assert_frame_equal(df, expected)
466480

467-
def test_iloc_setitem_list(self):
481+
@pytest.mark.parametrize("has_ref", [True, False])
482+
def test_iloc_setitem_list(self, has_ref):
468483
# setitem with an iloc list
469484
df = DataFrame(
470485
np.arange(9).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"]
471486
)
487+
if has_ref:
488+
view = df[:] # noqa: F841
472489
df.iloc[[0, 1], [1, 2]]
473490
df.iloc[[0, 1], [1, 2]] += 100
474491

@@ -663,12 +680,15 @@ def test_iloc_getitem_doc_issue(self):
663680
expected = DataFrame(arr[1:5, 2:4], index=index[1:5], columns=columns[2:4])
664681
tm.assert_frame_equal(result, expected)
665682

666-
def test_iloc_setitem_series(self):
683+
@pytest.mark.parametrize("has_ref", [True, False])
684+
def test_iloc_setitem_series(self, has_ref):
667685
df = DataFrame(
668686
np.random.default_rng(2).standard_normal((10, 4)),
669687
index=list("abcdefghij"),
670688
columns=list("ABCD"),
671689
)
690+
if has_ref:
691+
view = df[:] # noqa: F841
672692

673693
df.iloc[1, 1] = 1
674694
result = df.iloc[1, 1]
@@ -697,32 +717,40 @@ def test_iloc_setitem_series(self):
697717
expected = Series([0, 1, 2, 3, 4, 5])
698718
tm.assert_series_equal(result, expected)
699719

700-
def test_iloc_setitem_list_of_lists(self):
720+
@pytest.mark.parametrize("has_ref", [True, False])
721+
def test_iloc_setitem_list_of_lists(self, has_ref):
701722
# GH 7551
702723
# list-of-list is set incorrectly in mixed vs. single dtyped frames
703724
df = DataFrame(
704725
{"A": np.arange(5, dtype="int64"), "B": np.arange(5, 10, dtype="int64")}
705726
)
727+
if has_ref:
728+
view = df[:]
706729
df.iloc[2:4] = [[10, 11], [12, 13]]
707730
expected = DataFrame({"A": [0, 1, 10, 12, 4], "B": [5, 6, 11, 13, 9]})
708731
tm.assert_frame_equal(df, expected)
709732

710733
df = DataFrame(
711734
{"A": ["a", "b", "c", "d", "e"], "B": np.arange(5, 10, dtype="int64")}
712735
)
736+
if has_ref:
737+
view = df[:] # noqa: F841
713738
df.iloc[2:4] = [["x", 11], ["y", 13]]
714739
expected = DataFrame({"A": ["a", "b", "x", "y", "e"], "B": [5, 6, 11, 13, 9]})
715740
tm.assert_frame_equal(df, expected)
716741

742+
@pytest.mark.parametrize("has_ref", [True, False])
717743
@pytest.mark.parametrize("indexer", [[0], slice(None, 1, None), np.array([0])])
718744
@pytest.mark.parametrize("value", [["Z"], np.array(["Z"])])
719-
def test_iloc_setitem_with_scalar_index(self, indexer, value):
745+
def test_iloc_setitem_with_scalar_index(self, has_ref, indexer, value):
720746
# GH #19474
721747
# assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated
722748
# elementwisely, not using "setter('A', ['Z'])".
723749

724750
# Set object type to avoid upcast when setting "Z"
725751
df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]).astype({"A": object})
752+
if has_ref:
753+
view = df[:] # noqa: F841
726754
df.iloc[0, indexer] = value
727755
result = df.iloc[0, 0]
728756

@@ -1048,25 +1076,33 @@ def test_iloc_setitem_bool_indexer(self, klass):
10481076
expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]})
10491077
tm.assert_frame_equal(df, expected)
10501078

1079+
@pytest.mark.parametrize("has_ref", [True, False])
10511080
@pytest.mark.parametrize("indexer", [[1], slice(1, 2)])
1052-
def test_iloc_setitem_pure_position_based(self, indexer):
1081+
def test_iloc_setitem_pure_position_based(self, indexer, has_ref):
10531082
# GH#22046
10541083
df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]})
10551084
df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
1085+
if has_ref:
1086+
view = df2[:] # noqa: F841
10561087
df2.iloc[:, indexer] = df1.iloc[:, [0]]
10571088
expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]})
10581089
tm.assert_frame_equal(df2, expected)
10591090

1060-
def test_iloc_setitem_dictionary_value(self):
1091+
@pytest.mark.parametrize("has_ref", [True, False])
1092+
def test_iloc_setitem_dictionary_value(self, has_ref):
10611093
# GH#37728
10621094
df = DataFrame({"x": [1, 2], "y": [2, 2]})
1095+
if has_ref:
1096+
view = df[:]
10631097
rhs = {"x": 9, "y": 99}
10641098
df.iloc[1] = rhs
10651099
expected = DataFrame({"x": [1, 9], "y": [2, 99]})
10661100
tm.assert_frame_equal(df, expected)
10671101

10681102
# GH#38335 same thing, mixed dtypes
10691103
df = DataFrame({"x": [1, 2], "y": [2.0, 2.0]})
1104+
if has_ref:
1105+
view = df[:] # noqa: F841
10701106
df.iloc[1] = rhs
10711107
expected = DataFrame({"x": [1, 9], "y": [2.0, 99.0]})
10721108
tm.assert_frame_equal(df, expected)
@@ -1272,10 +1308,13 @@ def test_iloc_float_raises(self, series_with_simple_index, frame_or_series):
12721308
with pytest.raises(IndexError, match=_slice_iloc_msg):
12731309
obj.iloc[3.0] = 0
12741310

1275-
def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame):
1311+
@pytest.mark.parametrize("has_ref", [True, False])
1312+
def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame, has_ref):
12761313
with pytest.raises(IndexingError, match="Too many indexers"):
12771314
float_frame.iloc[:, :, :]
12781315

1316+
if has_ref:
1317+
view = float_frame[:] # noqa: F841
12791318
with pytest.raises(IndexError, match="too many indices for array"):
12801319
# GH#32257 we let numpy do validation, get their exception
12811320
float_frame.iloc[:, :, :] = 1

0 commit comments

Comments
 (0)