From 4f77590825a6671dcd9fe8277da56835e81bc64e Mon Sep 17 00:00:00 2001 From: Chilin Date: Sun, 16 Feb 2025 03:37:52 +0800 Subject: [PATCH 01/16] BUG: fix assign failure issue when Copy-on-Write --- pandas/core/internals/managers.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index a3738bb25f56c..92dfd64c3a86f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -572,7 +572,13 @@ def setitem(self, indexer, value) -> Self: 0, blk_loc, values ) # first block equals values - self.blocks[0].setitem((indexer[0], np.arange(len(blk_loc))), value) + is_full_column_selection = indexer[1] == slice(None) + col_indexer = ( + slice(None) + if is_full_column_selection + else np.arange(len(blk_loc)) + ) + self.blocks[0].setitem((indexer[0], col_indexer), value) return self # No need to split if we either set all columns or on a single block # manager From 6921ebbc8d64df8e3f69efbaabb473f36759eeb9 Mon Sep 17 00:00:00 2001 From: Chilin Chiou Date: Sun, 16 Feb 2025 22:40:16 +0800 Subject: [PATCH 02/16] Check if data type is slice before comparing with slice(None) --- pandas/core/internals/managers.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 92dfd64c3a86f..289667c309d45 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -572,12 +572,10 @@ def setitem(self, indexer, value) -> Self: 0, blk_loc, values ) # first block equals values - is_full_column_selection = indexer[1] == slice(None) - col_indexer = ( - slice(None) - if is_full_column_selection - else np.arange(len(blk_loc)) - ) + if isinstance(indexer[1], slice) and indexer[1] == slice(None): + col_indexer = slice(None) + else: + col_indexer = np.arange(len(blk_loc)) self.blocks[0].setitem((indexer[0], col_indexer), value) return self # No need to split if we either set all columns or on a single block From 69da0209dd9e7eb6f161ad9e42bc231dd4611ac1 Mon Sep 17 00:00:00 2001 From: Chilin Chiou Date: Sun, 16 Feb 2025 22:41:19 +0800 Subject: [PATCH 03/16] Add new testcase for assignment fails with enable Copy-on-Write --- pandas/tests/indexing/test_iloc.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 2f6998a85c80b..ef9f64e28ffa7 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -22,6 +22,7 @@ date_range, interval_range, isna, + option_context, to_datetime, ) import pandas._testing as tm @@ -1448,3 +1449,26 @@ def test_iloc_nullable_int64_size_1_nan(self): result = DataFrame({"a": ["test"], "b": [np.nan]}) with pytest.raises(TypeError, match="Invalid value"): result.loc[:, "b"] = result.loc[:, "b"].astype("Int64") + + def test_iloc_setitem_list_with_cow(self): + # GH#60309 + with option_context("mode.copy_on_write", True): + dftest = DataFrame( + {"A": [1, 4, 1, 5], "B": [2, 5, 2, 6], "C": [3, 6, 1, 7]} + ) + df = dftest[["B", "C"]] + + # Perform the iloc operation + df.iloc[[1, 3], :] = [[2, 2], [2, 2]] + + # Check that original DataFrame is unchanged + expected_orig = DataFrame( + {"A": [1, 4, 1, 5], "B": [2, 5, 2, 6], "C": [3, 6, 1, 7]} + ) + tm.assert_frame_equal(dftest, expected_orig) + + # Check that view is modified correctly + expected_view = DataFrame( + {"B": [2, 2, 2, 2], "C": [3, 2, 1, 2]}, index=df.index + ) + tm.assert_frame_equal(df, expected_view) From 41926d4ac2b4a44413aacb91ac380b322d16c96d Mon Sep 17 00:00:00 2001 From: Chilin Chiou Date: Sun, 16 Feb 2025 22:44:09 +0800 Subject: [PATCH 04/16] Add new entry for whatsnew/v2.3.0.rst --- doc/source/whatsnew/v2.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index 8bdddb5b7f85d..4c180fa33ccf8 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -37,6 +37,7 @@ Other enhancements updated to work correctly with NumPy >= 2 (:issue:`57739`) - :meth:`Series.str.decode` result now has ``StringDtype`` when ``future.infer_string`` is True (:issue:`60709`) - :meth:`~Series.to_hdf` and :meth:`~DataFrame.to_hdf` now round-trip with ``StringDtype`` (:issue:`60663`) +- The :meth:`DataFrame.iloc` now works correctly with ``copy_on_write`` option (:issue:`60309`) - The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for ``StringDtype`` columns when backed by PyArrow (:issue:`60633`) - The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`) From 1f9215cdea1f77dbf5b89f33fb117b0b77bfe5f1 Mon Sep 17 00:00:00 2001 From: Chilin Chiou Date: Sun, 16 Feb 2025 23:52:46 +0800 Subject: [PATCH 05/16] Add type hint --- pandas/core/internals/managers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 289667c309d45..2e6701916a8d4 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -572,6 +572,7 @@ def setitem(self, indexer, value) -> Self: 0, blk_loc, values ) # first block equals values + col_indexer: slice | np.ndarray if isinstance(indexer[1], slice) and indexer[1] == slice(None): col_indexer = slice(None) else: From fcffbf22d51968ec2b63f8c13ae98003db13212c Mon Sep 17 00:00:00 2001 From: Chilin Chiou Date: Sun, 2 Mar 2025 16:41:30 +0800 Subject: [PATCH 06/16] Rename view to df in test_iloc.py --- pandas/tests/indexing/test_iloc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index ef9f64e28ffa7..d38169c76d584 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1467,8 +1467,8 @@ def test_iloc_setitem_list_with_cow(self): ) tm.assert_frame_equal(dftest, expected_orig) - # Check that view is modified correctly - expected_view = DataFrame( + # Check that df is modified correctly + expected_df = DataFrame( {"B": [2, 2, 2, 2], "C": [3, 2, 1, 2]}, index=df.index ) - tm.assert_frame_equal(df, expected_view) + tm.assert_frame_equal(df, expected_df) From 09e6221e5a80a60180a63529f4125c6c4392689d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Sep 2025 16:23:33 +0200 Subject: [PATCH 07/16] fix --- pandas/core/internals/managers.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 9a85594a25e0e..68b5b14fa84b1 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -572,13 +572,13 @@ def setitem(self, indexer, value) -> Self: self._iset_split_block( # type: ignore[attr-defined] 0, blk_loc, values ) - # first block equals values - col_indexer: slice | np.ndarray - if isinstance(indexer[1], slice) and indexer[1] == slice(None): - col_indexer = slice(None) - else: - col_indexer = np.arange(len(blk_loc)) - self.blocks[0].setitem((indexer[0], col_indexer), value) + # first block equals values we are setting to -> select all columns + col_indexer = slice(None) + row_indexer = indexer[0] + if isinstance(row_indexer, np.ndarray) and row_indexer.ndim == 2: + # numpy cannot handle a 2d indexer in combo with a slice + row_indexer = np.squeeze(row_indexer, axis=1) + self.blocks[0].setitem((row_indexer, col_indexer), value) return self # No need to split if we either set all columns or on a single block # manager From 2753e31576280f5dca94e3824e49673b40efb4cc Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Sep 2025 16:28:59 +0200 Subject: [PATCH 08/16] move test --- pandas/tests/frame/indexing/test_setitem.py | 16 ++++++++++++++ pandas/tests/indexing/test_iloc.py | 24 --------------------- 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 20dd7b0c4d3e7..25dbb70bb1cc9 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1370,6 +1370,22 @@ def test_frame_setitem_empty_dataframe(self): ) tm.assert_frame_equal(df, expected) + def test_iloc_setitem_view_2dblock(self): + # https://github.com/pandas-dev/pandas/issues/60309 + df = DataFrame({"A": [1, 4, 1, 5], "B": [2, 5, 2, 6], "C": [3, 6, 1, 7]}) + df_orig = df.copy() + df_sub = df[["B", "C"]] + + # Perform the iloc operation + df_sub.iloc[[1, 3], :] = [[2, 2], [2, 2]] + + # Check that original DataFrame is unchanged + tm.assert_frame_equal(df, df_orig) + + # Check that df is modified correctly + expected = DataFrame({"B": [2, 2, 2, 2], "C": [3, 2, 1, 2]}, index=df.index) + tm.assert_frame_equal(df_sub, expected) + def test_full_setter_loc_incompatible_dtype(): # https://github.com/pandas-dev/pandas/issues/55791 diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index db7a3c5c6588a..c04ea129590bc 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -23,7 +23,6 @@ date_range, interval_range, isna, - option_context, to_datetime, ) import pandas._testing as tm @@ -1484,29 +1483,6 @@ def test_iloc_nullable_int64_size_1_nan(self): with pytest.raises(TypeError, match="Invalid value"): result.loc[:, "b"] = result.loc[:, "b"].astype("Int64") - def test_iloc_setitem_list_with_cow(self): - # GH#60309 - with option_context("mode.copy_on_write", True): - dftest = DataFrame( - {"A": [1, 4, 1, 5], "B": [2, 5, 2, 6], "C": [3, 6, 1, 7]} - ) - df = dftest[["B", "C"]] - - # Perform the iloc operation - df.iloc[[1, 3], :] = [[2, 2], [2, 2]] - - # Check that original DataFrame is unchanged - expected_orig = DataFrame( - {"A": [1, 4, 1, 5], "B": [2, 5, 2, 6], "C": [3, 6, 1, 7]} - ) - tm.assert_frame_equal(dftest, expected_orig) - - # Check that df is modified correctly - expected_df = DataFrame( - {"B": [2, 2, 2, 2], "C": [3, 2, 1, 2]}, index=df.index - ) - tm.assert_frame_equal(df, expected_df) - def test_iloc_arrow_extension_array(self): # GH#61311 pytest.importorskip("pyarrow") From a98de7f7c8dbe9d905db0b069d156259aa2966b7 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Sep 2025 16:34:31 +0200 Subject: [PATCH 09/16] add additional test case from the comments --- pandas/core/internals/managers.py | 2 +- pandas/tests/frame/indexing/test_setitem.py | 28 ++++++++++++++++----- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 68b5b14fa84b1..e077c15288ac8 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -572,7 +572,7 @@ def setitem(self, indexer, value) -> Self: self._iset_split_block( # type: ignore[attr-defined] 0, blk_loc, values ) - # first block equals values we are setting to -> select all columns + # first block equals values we are setting to -> set to all columns col_indexer = slice(None) row_indexer = indexer[0] if isinstance(row_indexer, np.ndarray) and row_indexer.ndim == 2: diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 25dbb70bb1cc9..555b26bcd7b79 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1372,19 +1372,35 @@ def test_frame_setitem_empty_dataframe(self): def test_iloc_setitem_view_2dblock(self): # https://github.com/pandas-dev/pandas/issues/60309 - df = DataFrame({"A": [1, 4, 1, 5], "B": [2, 5, 2, 6], "C": [3, 6, 1, 7]}) - df_orig = df.copy() - df_sub = df[["B", "C"]] + df_parent = DataFrame( + { + "A": [1, 4, 1, 5], + "B": [2, 5, 2, 6], + "C": [3, 6, 1, 7], + "D": [8, 9, 10, 11], + } + ) + df_orig = df_parent.copy() + df = df_parent[["B", "C"]] # Perform the iloc operation - df_sub.iloc[[1, 3], :] = [[2, 2], [2, 2]] + df.iloc[[1, 3], :] = [[2, 2], [2, 2]] # Check that original DataFrame is unchanged - tm.assert_frame_equal(df, df_orig) + tm.assert_frame_equal(df_parent, df_orig) # Check that df is modified correctly expected = DataFrame({"B": [2, 2, 2, 2], "C": [3, 2, 1, 2]}, index=df.index) - tm.assert_frame_equal(df_sub, expected) + tm.assert_frame_equal(df, expected) + + # with setting to subset of columns + df = df_parent[["B", "C", "D"]] + df.iloc[[1, 3], 0:3:2] = [[2, 2], [2, 2]] + tm.assert_frame_equal(df_parent, df_orig) + expected = DataFrame( + {"B": [2, 2, 2, 2], "C": [3, 6, 1, 7], "D": [8, 2, 10, 2]}, index=df.index + ) + tm.assert_frame_equal(df, expected) def test_full_setter_loc_incompatible_dtype(): From e1f70401ca442ae513662973eddcbd7bac8fedf4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Sep 2025 16:37:33 +0200 Subject: [PATCH 10/16] move whatsnew note to 2.3.3 --- doc/source/whatsnew/v2.3.0.rst | 1 - doc/source/whatsnew/v2.3.3.rst | 10 ++++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index f0b12403ca857..a174b3bc0bea2 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -125,7 +125,6 @@ Other enhancements - :meth:`Series.str.decode` result now has :class:`StringDtype` when ``future.infer_string`` is True (:issue:`60709`) - :meth:`~Series.to_hdf` and :meth:`~DataFrame.to_hdf` now round-trip with :class:`StringDtype` (:issue:`60663`) - Improved ``repr`` of :class:`.NumpyExtensionArray` to account for NEP51 (:issue:`61085`) -- The :meth:`DataFrame.iloc` now works correctly with ``copy_on_write`` option when assigning values after subsetting the columns of a DataFrame and using a slice (:issue:`60309`) - The :meth:`Series.str.decode` has gained the argument ``dtype`` to control the dtype of the result (:issue:`60940`) - The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for :class:`StringDtype` columns (:issue:`60633`) - The :meth:`~Series.sum` reduction is now implemented for :class:`StringDtype` columns (:issue:`59853`) diff --git a/doc/source/whatsnew/v2.3.3.rst b/doc/source/whatsnew/v2.3.3.rst index cbde6f52d4472..2326f3d35ed5c 100644 --- a/doc/source/whatsnew/v2.3.3.rst +++ b/doc/source/whatsnew/v2.3.3.rst @@ -25,6 +25,16 @@ Bug fixes - Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch`` with a compiled regex and custom flags (:issue:`62240`) + +Improvements and fixes for Copy-on-Write +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Bug fixes +^^^^^^^^^ + +- The :meth:`DataFrame.iloc` now works correctly with ``copy_on_write`` option when assigning values after subsetting the columns of a homogeneous DataFrame (:issue:`60309`) + + .. --------------------------------------------------------------------------- .. _whatsnew_233.contributors: From 11b006cb2ac106a03da79fb2d21754724cdbf8d4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 20 Sep 2025 20:39:24 +0200 Subject: [PATCH 11/16] try fix --- pandas/core/internals/managers.py | 9 ++++++++- pandas/tests/frame/methods/test_update.py | 9 +++++---- pandas/tests/indexing/test_loc.py | 7 ++++--- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index e077c15288ac8..812ab66d29658 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -573,11 +573,18 @@ def setitem(self, indexer, value) -> Self: 0, blk_loc, values ) # first block equals values we are setting to -> set to all columns - col_indexer = slice(None) + if len(blk_loc) > 1: + col_indexer = slice(None) + else: + col_indexer = np.arange(len(blk_loc)) row_indexer = indexer[0] if isinstance(row_indexer, np.ndarray) and row_indexer.ndim == 2: # numpy cannot handle a 2d indexer in combo with a slice row_indexer = np.squeeze(row_indexer, axis=1) + if len(row_indexer) == 0: + # numpy does not like empty indexer combined with slice + # and we are setting nothing anyway + return self self.blocks[0].setitem((row_indexer, col_indexer), value) return self # No need to split if we either set all columns or on a single block diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index ea63b2264d4f6..f91238104ec89 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -155,14 +155,15 @@ def test_update_with_different_dtype(self): with pytest.raises(TypeError, match="Invalid value"): df.update({"c": Series(["foo"], index=[0])}) - def test_update_modify_view(self, using_infer_string): + @pytest.mark.parametrize("dtype", ["str", object]) + def test_update_modify_view(self, dtype): # GH#47188 - df = DataFrame({"A": ["1", np.nan], "B": ["100", np.nan]}) - df2 = DataFrame({"A": ["a", "x"], "B": ["100", "200"]}) + df = DataFrame({"A": ["1", np.nan], "B": ["100", np.nan]}, dtype=dtype) + df2 = DataFrame({"A": ["a", "x"], "B": ["100", "200"]}, dtype=dtype) df2_orig = df2.copy() result_view = df2[:] df2.update(df) - expected = DataFrame({"A": ["1", "x"], "B": ["100", "200"]}) + expected = DataFrame({"A": ["1", "x"], "B": ["100", "200"]}, dtype=dtype) tm.assert_frame_equal(df2, expected) tm.assert_frame_equal(result_view, df2_orig) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 05f7b9cc7c40e..0c0855fa8c509 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -3339,11 +3339,12 @@ def test_loc_index_alignment_for_series(self): expected = DataFrame({"a": [999, 200], "b": [3, 4]}) tm.assert_frame_equal(expected, df) - def test_loc_reindexing_of_empty_index(self): + @pytest.mark.parametrize("dtype", ["str", object]) + def test_loc_reindexing_of_empty_index(self, dtype): # GH 57735 - df = DataFrame(index=[1, 1, 2, 2], data=["1", "1", "2", "2"]) + df = DataFrame(index=[1, 1, 2, 2], data=["1", "1", "2", "2"], dtype=dtype) df.loc[Series([False] * 4, index=df.index, name=0), 0] = df[0] - expected = DataFrame(index=[1, 1, 2, 2], data=["1", "1", "2", "2"]) + expected = DataFrame(index=[1, 1, 2, 2], data=["1", "1", "2", "2"], dtype=dtype) tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( From c469abdf4a5a119eeff93f62f03b55f216b2c97f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 20 Sep 2025 21:47:22 +0200 Subject: [PATCH 12/16] fixup --- pandas/core/internals/managers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 812ab66d29658..121beb9b59889 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -581,7 +581,7 @@ def setitem(self, indexer, value) -> Self: if isinstance(row_indexer, np.ndarray) and row_indexer.ndim == 2: # numpy cannot handle a 2d indexer in combo with a slice row_indexer = np.squeeze(row_indexer, axis=1) - if len(row_indexer) == 0: + if isinstance(row_indexer, np.ndarray) and len(row_indexer) == 0: # numpy does not like empty indexer combined with slice # and we are setting nothing anyway return self From 4ed65b5970890756541853a7cb9b4ecc7a9b14ae Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 21 Sep 2025 11:01:13 +0200 Subject: [PATCH 13/16] expand test coverage + more fixes --- pandas/core/internals/managers.py | 11 +++- pandas/tests/frame/indexing/test_indexing.py | 8 ++- pandas/tests/indexing/multiindex/test_loc.py | 9 ++- pandas/tests/indexing/test_iloc.py | 69 +++++++++++++++----- pandas/tests/indexing/test_loc.py | 24 +++++-- 5 files changed, 97 insertions(+), 24 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 121beb9b59889..6c6a601190381 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -572,11 +572,17 @@ def setitem(self, indexer, value) -> Self: self._iset_split_block( # type: ignore[attr-defined] 0, blk_loc, values ) + + indexer = list(indexer) # first block equals values we are setting to -> set to all columns - if len(blk_loc) > 1: + if isinstance(indexer[1], int): + col_indexer = 0 + elif len(blk_loc) > 1: col_indexer = slice(None) else: col_indexer = np.arange(len(blk_loc)) + indexer[1] = col_indexer + row_indexer = indexer[0] if isinstance(row_indexer, np.ndarray) and row_indexer.ndim == 2: # numpy cannot handle a 2d indexer in combo with a slice @@ -585,7 +591,8 @@ def setitem(self, indexer, value) -> Self: # numpy does not like empty indexer combined with slice # and we are setting nothing anyway return self - self.blocks[0].setitem((row_indexer, col_indexer), value) + indexer[0] = row_indexer + self.blocks[0].setitem(tuple(indexer), value) return self # No need to split if we either set all columns or on a single block # manager diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 0c99b08cb30c4..2b36c1135d36d 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1427,13 +1427,17 @@ def test_set_2d_casting_date_to_int(self, col, indexer): ) tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize("has_ref", [True, False]) @pytest.mark.parametrize("col", [{}, {"name": "a"}]) - def test_loc_setitem_reordering_with_all_true_indexer(self, col): + def test_loc_setitem_reordering_with_all_true_indexer(self, col, has_ref): # GH#48701 n = 17 df = DataFrame({**col, "x": range(n), "y": range(n)}) + value = df[["x", "y"]].copy() expected = df.copy() - df.loc[n * [True], ["x", "y"]] = df[["x", "y"]] + if has_ref: + view = df[:] # noqa: F841 + df.loc[n * [True], ["x", "y"]] = value tm.assert_frame_equal(df, expected) def test_loc_rhs_empty_warning(self): diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 70d71de66d3cc..51c8d5b3569f5 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -22,14 +22,21 @@ def frame_random_data_integer_multi_index(): class TestMultiIndexLoc: - def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_data): + @pytest.mark.parametrize("has_ref", [True, False]) + def test_loc_setitem_frame_with_multiindex( + self, multiindex_dataframe_random_data, has_ref + ): frame = multiindex_dataframe_random_data + if has_ref: + view = frame[:] frame.loc[("bar", "two"), "B"] = 5 assert frame.loc[("bar", "two"), "B"] == 5 # with integer labels df = frame.copy() df.columns = list(range(3)) + if has_ref: + view = df[:] # noqa: F841 df.loc[("bar", "two"), 1] = 7 assert df.loc[("bar", "two"), 1] == 7 diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index c04ea129590bc..4c8a434462714 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -105,11 +105,16 @@ def test_iloc_setitem_fullcol_categorical(self, indexer_li, key): expected = DataFrame({0: Series(cat.astype(object), dtype=object), 1: range(3)}) tm.assert_frame_equal(df, expected) - def test_iloc_setitem_ea_inplace(self, frame_or_series, index_or_series_or_array): + @pytest.mark.parametrize("has_ref", [True, False]) + def test_iloc_setitem_ea_inplace( + self, frame_or_series, index_or_series_or_array, has_ref + ): # GH#38952 Case with not setting a full column # IntegerArray without NAs arr = array([1, 2, 3, 4]) obj = frame_or_series(arr.to_numpy("i8")) + if has_ref: + view = obj[:] # noqa: F841 if frame_or_series is Series: values = obj.values @@ -125,11 +130,12 @@ def test_iloc_setitem_ea_inplace(self, frame_or_series, index_or_series_or_array tm.assert_equal(obj, expected) # Check that we are actually in-place - if frame_or_series is Series: - assert obj.values is not values - assert np.shares_memory(obj.values, values) - else: - assert np.shares_memory(obj[0].values, values) + if not has_ref: + if frame_or_series is Series: + assert obj.values is not values + assert np.shares_memory(obj.values, values) + else: + assert np.shares_memory(obj[0].values, values) def test_is_scalar_access(self): # GH#32085 index with duplicates doesn't matter for _is_scalar_access @@ -426,12 +432,15 @@ def test_iloc_getitem_slice_dups(self): tm.assert_frame_equal(df.iloc[10:, :2], df2) tm.assert_frame_equal(df.iloc[10:, 2:], df1) - def test_iloc_setitem(self): + @pytest.mark.parametrize("has_ref", [True, False]) + def test_iloc_setitem(sel, has_ref): df = DataFrame( np.random.default_rng(2).standard_normal((4, 4)), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3), ) + if has_ref: + view = df[:] # noqa: F841 df.iloc[1, 1] = 1 result = df.iloc[1, 1] @@ -448,10 +457,13 @@ def test_iloc_setitem(self): expected = Series([0, 1, 0], index=[4, 5, 6]) tm.assert_series_equal(s, expected) - def test_iloc_setitem_axis_argument(self): + @pytest.mark.parametrize("has_ref", [True, False]) + def test_iloc_setitem_axis_argument(self, has_ref): # GH45032 df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]]) df[1] = df[1].astype(object) + if has_ref: + view = df[:] expected = DataFrame([[6, "c", 10], [7, "d", 11], [5, 5, 5]]) expected[1] = expected[1].astype(object) df.iloc(axis=0)[2] = 5 @@ -459,16 +471,21 @@ def test_iloc_setitem_axis_argument(self): df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]]) df[1] = df[1].astype(object) + if has_ref: + view = df[:] # noqa: F841 expected = DataFrame([[6, "c", 5], [7, "d", 5], [8, "e", 5]]) expected[1] = expected[1].astype(object) df.iloc(axis=1)[2] = 5 tm.assert_frame_equal(df, expected) - def test_iloc_setitem_list(self): + @pytest.mark.parametrize("has_ref", [True, False]) + def test_iloc_setitem_list(self, has_ref): # setitem with an iloc list df = DataFrame( np.arange(9).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"] ) + if has_ref: + view = df[:] # noqa: F841 df.iloc[[0, 1], [1, 2]] df.iloc[[0, 1], [1, 2]] += 100 @@ -663,12 +680,15 @@ def test_iloc_getitem_doc_issue(self): expected = DataFrame(arr[1:5, 2:4], index=index[1:5], columns=columns[2:4]) tm.assert_frame_equal(result, expected) - def test_iloc_setitem_series(self): + @pytest.mark.parametrize("has_ref", [True, False]) + def test_iloc_setitem_series(self, has_ref): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), index=list("abcdefghij"), columns=list("ABCD"), ) + if has_ref: + view = df[:] # noqa: F841 df.iloc[1, 1] = 1 result = df.iloc[1, 1] @@ -697,12 +717,15 @@ def test_iloc_setitem_series(self): expected = Series([0, 1, 2, 3, 4, 5]) tm.assert_series_equal(result, expected) - def test_iloc_setitem_list_of_lists(self): + @pytest.mark.parametrize("has_ref", [True, False]) + def test_iloc_setitem_list_of_lists(self, has_ref): # GH 7551 # list-of-list is set incorrectly in mixed vs. single dtyped frames df = DataFrame( {"A": np.arange(5, dtype="int64"), "B": np.arange(5, 10, dtype="int64")} ) + if has_ref: + view = df[:] df.iloc[2:4] = [[10, 11], [12, 13]] expected = DataFrame({"A": [0, 1, 10, 12, 4], "B": [5, 6, 11, 13, 9]}) tm.assert_frame_equal(df, expected) @@ -710,19 +733,24 @@ def test_iloc_setitem_list_of_lists(self): df = DataFrame( {"A": ["a", "b", "c", "d", "e"], "B": np.arange(5, 10, dtype="int64")} ) + if has_ref: + view = df[:] # noqa: F841 df.iloc[2:4] = [["x", 11], ["y", 13]] expected = DataFrame({"A": ["a", "b", "x", "y", "e"], "B": [5, 6, 11, 13, 9]}) tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize("has_ref", [True, False]) @pytest.mark.parametrize("indexer", [[0], slice(None, 1, None), np.array([0])]) @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])]) - def test_iloc_setitem_with_scalar_index(self, indexer, value): + def test_iloc_setitem_with_scalar_index(self, has_ref, indexer, value): # GH #19474 # assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated # elementwisely, not using "setter('A', ['Z'])". # Set object type to avoid upcast when setting "Z" df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]).astype({"A": object}) + if has_ref: + view = df[:] # noqa: F841 df.iloc[0, indexer] = value result = df.iloc[0, 0] @@ -1048,18 +1076,24 @@ def test_iloc_setitem_bool_indexer(self, klass): expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize("has_ref", [True, False]) @pytest.mark.parametrize("indexer", [[1], slice(1, 2)]) - def test_iloc_setitem_pure_position_based(self, indexer): + def test_iloc_setitem_pure_position_based(self, indexer, has_ref): # GH#22046 df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]}) df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) + if has_ref: + view = df2[:] # noqa: F841 df2.iloc[:, indexer] = df1.iloc[:, [0]] expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]}) tm.assert_frame_equal(df2, expected) - def test_iloc_setitem_dictionary_value(self): + @pytest.mark.parametrize("has_ref", [True, False]) + def test_iloc_setitem_dictionary_value(self, has_ref): # GH#37728 df = DataFrame({"x": [1, 2], "y": [2, 2]}) + if has_ref: + view = df[:] rhs = {"x": 9, "y": 99} df.iloc[1] = rhs expected = DataFrame({"x": [1, 9], "y": [2, 99]}) @@ -1067,6 +1101,8 @@ def test_iloc_setitem_dictionary_value(self): # GH#38335 same thing, mixed dtypes df = DataFrame({"x": [1, 2], "y": [2.0, 2.0]}) + if has_ref: + view = df[:] # noqa: F841 df.iloc[1] = rhs expected = DataFrame({"x": [1, 9], "y": [2.0, 99.0]}) tm.assert_frame_equal(df, expected) @@ -1272,10 +1308,13 @@ def test_iloc_float_raises(self, series_with_simple_index, frame_or_series): with pytest.raises(IndexError, match=_slice_iloc_msg): obj.iloc[3.0] = 0 - def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame): + @pytest.mark.parametrize("has_ref", [True, False]) + def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame, has_ref): with pytest.raises(IndexingError, match="Too many indexers"): float_frame.iloc[:, :, :] + if has_ref: + view = float_frame[:] # noqa: F841 with pytest.raises(IndexError, match="too many indices for array"): # GH#32257 we let numpy do validation, get their exception float_frame.iloc[:, :, :] = 1 diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 0c0855fa8c509..6aa1666f1b912 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -722,9 +722,12 @@ def test_loc_modify_datetime(self): tm.assert_frame_equal(df, expected) - def test_loc_setitem_frame_with_reindex(self): + @pytest.mark.parametrize("has_ref", [True, False]) + def test_loc_setitem_frame_with_reindex(self, has_ref): # GH#6254 setting issue df = DataFrame(index=[3, 5, 4], columns=["A"], dtype=float) + if has_ref: + view = df[:] # noqa: F841 df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") # setting integer values into a float dataframe with loc is inplace, @@ -755,7 +758,8 @@ def test_loc_setitem_frame_with_inverted_slice(self): expected = DataFrame({"A": [3.0, 2.0, 1.0], "B": "string"}, index=[1, 2, 3]) tm.assert_frame_equal(df, expected) - def test_loc_setitem_empty_frame(self): + @pytest.mark.parametrize("has_ref", [True, False]) + def test_loc_setitem_empty_frame(self, has_ref): # GH#6252 setting with an empty frame keys1 = ["@" + str(i) for i in range(5)] val1 = np.arange(5, dtype="int64") @@ -766,6 +770,8 @@ def test_loc_setitem_empty_frame(self): index = list(set(keys1).union(keys2)) df = DataFrame(index=index) df["A"] = np.nan + if has_ref: + view = df[:] # noqa: F841 df.loc[keys1, "A"] = val1 df["B"] = np.nan @@ -780,12 +786,15 @@ def test_loc_setitem_empty_frame(self): ) tm.assert_frame_equal(df, expected) - def test_loc_setitem_frame(self): + @pytest.mark.parametrize("has_ref", [True, False]) + def test_loc_setitem_frame(self, has_ref): df = DataFrame( np.random.default_rng(2).standard_normal((4, 4)), index=list("abcd"), columns=list("ABCD"), ) + if has_ref: + view = df[:] # noqa: F841 result = df.iloc[0, 0] @@ -2118,7 +2127,8 @@ def test_loc_setitem_with_expansion_inf_upcast_empty(self): tm.assert_index_equal(result, expected) @pytest.mark.filterwarnings("ignore:indexing past lexsort depth") - def test_loc_setitem_with_expansion_nonunique_index(self, index): + @pytest.mark.parametrize("has_ref", [True, False]) + def test_loc_setitem_with_expansion_nonunique_index(self, index, has_ref): # GH#40096 if not len(index): pytest.skip("Not relevant for empty Index") @@ -2144,11 +2154,15 @@ def test_loc_setitem_with_expansion_nonunique_index(self, index): # Add new row, but no new columns df = orig.copy() + if has_ref: + view = df[:] df.loc[key, 0] = N tm.assert_frame_equal(df, expected) # add new row on a Series ser = orig.copy()[0] + if has_ref: + view = ser[:] ser.loc[key] = N # the series machinery lets us preserve int dtype instead of float expected = expected[0].astype(np.int64) @@ -2156,6 +2170,8 @@ def test_loc_setitem_with_expansion_nonunique_index(self, index): # add new row and new column df = orig.copy() + if has_ref: + view = df[:] # noqa: F841 df.loc[key, 1] = N expected = DataFrame( {0: list(arr) + [np.nan], 1: [np.nan] * N + [float(N)]}, From adab3745f9465c210cb5b07b335381ae0e5e9ac8 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 21 Sep 2025 11:30:49 +0200 Subject: [PATCH 14/16] typing --- pandas/core/internals/managers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 6c6a601190381..fabeebf27efb4 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -578,9 +578,9 @@ def setitem(self, indexer, value) -> Self: if isinstance(indexer[1], int): col_indexer = 0 elif len(blk_loc) > 1: - col_indexer = slice(None) + col_indexer = slice(None) # type: ignore[assignment] else: - col_indexer = np.arange(len(blk_loc)) + col_indexer = np.arange(len(blk_loc)) # type: ignore[assignment] indexer[1] = col_indexer row_indexer = indexer[0] From e17269f0eb0f26a08e34fcce261dc0e4793df3ce Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 21 Sep 2025 11:52:55 +0200 Subject: [PATCH 15/16] add additional specific test --- pandas/tests/frame/indexing/test_setitem.py | 34 +++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 555b26bcd7b79..65936f44387fe 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1402,6 +1402,40 @@ def test_iloc_setitem_view_2dblock(self): ) tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize( + "indexer, value", + [ + (([0, 2], slice(None)), [[2, 2, 2, 2], [2, 2, 2, 2]]), + ((slice(None), slice(None)), 2), + ((0, [1, 3]), [2, 2]), + (([0], 1), [2]), + ( + (np.array([False, True, False]), np.array([False, True, False, True])), + [2, 2], + ), + ], + ) + def test_setitem_2dblock_with_ref(self, indexer, value): + # https://github.com/pandas-dev/pandas/issues/60309 + arr = np.arange(12).reshape(3, 4) + + df_parent = DataFrame(arr.copy(), columns=list("ABCD")) + # the test is specifically for the case where the df is backed by a single + # block (taking the non-split path) + assert df_parent._mgr.is_single_block + df_orig = df_parent.copy() + df = df_parent[:] + + df.iloc[indexer] = value + + # Check that original DataFrame is unchanged + tm.assert_frame_equal(df_parent, df_orig) + + # Check that df is modified correctly + arr[indexer] = value + expected = DataFrame(arr, columns=list("ABCD")) + tm.assert_frame_equal(df, expected) + def test_full_setter_loc_incompatible_dtype(): # https://github.com/pandas-dev/pandas/issues/55791 From 574d5325caafde50eea619f3b6439c5d35cf4637 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 21 Sep 2025 15:28:37 +0200 Subject: [PATCH 16/16] use lib.is_integer + add test covering it --- pandas/core/internals/managers.py | 2 +- pandas/tests/frame/indexing/test_setitem.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index fabeebf27efb4..c9d2d495d843e 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -575,7 +575,7 @@ def setitem(self, indexer, value) -> Self: indexer = list(indexer) # first block equals values we are setting to -> set to all columns - if isinstance(indexer[1], int): + if lib.is_integer(indexer[1]): col_indexer = 0 elif len(blk_loc) > 1: col_indexer = slice(None) # type: ignore[assignment] diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 65936f44387fe..c0fead4889932 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1409,6 +1409,9 @@ def test_iloc_setitem_view_2dblock(self): ((slice(None), slice(None)), 2), ((0, [1, 3]), [2, 2]), (([0], 1), [2]), + (([0], np.int64(1)), [2]), + ((slice(None), np.int64(1)), [2, 2, 2]), + ((slice(None, 2), np.int64(1)), [2, 2]), ( (np.array([False, True, False]), np.array([False, True, False, True])), [2, 2],