From 011cf7cd1308c67e05a534f4bb44e341802d2f89 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 16 Mar 2023 16:22:51 +0100 Subject: [PATCH 1/4] BUG: Series constructor not respecting CoW when called with BlockManager --- pandas/core/series.py | 3 +++ pandas/tests/copy_view/test_constructors.py | 28 +++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index b0958869c67f3..139f3081054d1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -379,6 +379,9 @@ def __init__( copy: bool = False, fastpath: bool = False, ) -> None: + if isinstance(data, SingleBlockManager) and using_copy_on_write() and not copy: + data = data.copy(deep=False) + if ( isinstance(data, (SingleBlockManager, SingleArrayManager)) and index is None diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py index 6cf45c194707e..2efe457285eb4 100644 --- a/pandas/tests/copy_view/test_constructors.py +++ b/pandas/tests/copy_view/test_constructors.py @@ -1,6 +1,7 @@ import numpy as np import pytest +import pandas as pd from pandas import ( DataFrame, Series, @@ -82,6 +83,33 @@ def test_series_from_series_with_reindex(using_copy_on_write): assert not result._mgr.blocks[0].refs.has_reference() +@pytest.mark.parametrize("fastpath", [False, True]) +@pytest.mark.parametrize("dtype", [None, "int64"]) +@pytest.mark.parametrize("idx", [None, pd.RangeIndex(start=0, stop=3, step=1)]) +def test_series_from_block_manager(using_copy_on_write, idx, dtype, fastpath): + ser = Series([1, 2, 3], dtype="int64") + ser_orig = ser.copy() + ser2 = Series(ser._mgr, dtype=dtype, fastpath=fastpath) + assert np.shares_memory(get_array(ser), get_array(ser2)) + if using_copy_on_write: + assert not ser2._mgr._has_no_reference(0) + + ser2.iloc[0] = 100 + if using_copy_on_write: + tm.assert_series_equal(ser, ser_orig) + else: + expected = Series([100, 2, 3]) + tm.assert_series_equal(ser, expected) + + +def test_series_from_block_manager_different_dtype(using_copy_on_write): + ser = Series([1, 2, 3], dtype="int64") + ser2 = Series(ser._mgr, dtype="int32") + assert not np.shares_memory(get_array(ser), get_array(ser2)) + if using_copy_on_write: + assert ser2._mgr._has_no_reference(0) + + @pytest.mark.parametrize("func", [lambda x: x, lambda x: x._mgr]) @pytest.mark.parametrize("columns", [None, ["a"]]) def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, func): From 222a913ed3acdac4e8926ddf342ac859dc5bd798 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 17 Mar 2023 16:43:44 +0100 Subject: [PATCH 2/4] Address review --- pandas/core/series.py | 10 +++++++--- pandas/tests/copy_view/test_constructors.py | 4 +++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 139f3081054d1..febd7488ed424 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -379,15 +379,14 @@ def __init__( copy: bool = False, fastpath: bool = False, ) -> None: - if isinstance(data, SingleBlockManager) and using_copy_on_write() and not copy: - data = data.copy(deep=False) - if ( isinstance(data, (SingleBlockManager, SingleArrayManager)) and index is None and dtype is None and copy is False ): + if using_copy_on_write(): + data = data.copy(deep=False) # GH#33357 called with just the SingleBlockManager NDFrame.__init__(self, data) if fastpath: @@ -406,6 +405,8 @@ def __init__( data = SingleBlockManager.from_array(data, index) elif manager == "array": data = SingleArrayManager.from_array(data, index) + elif using_copy_on_write(): + data = data.copy(deep=False) if copy: data = data.copy() # skips validation of the name @@ -413,6 +414,9 @@ def __init__( NDFrame.__init__(self, data) return + if isinstance(data, SingleBlockManager) and using_copy_on_write() and not copy: + data = data.copy(deep=False) + name = ibase.maybe_extract_name(name, data, type(self)) if index is not None: diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py index 2efe457285eb4..34a93c0c142c4 100644 --- a/pandas/tests/copy_view/test_constructors.py +++ b/pandas/tests/copy_view/test_constructors.py @@ -87,9 +87,11 @@ def test_series_from_series_with_reindex(using_copy_on_write): @pytest.mark.parametrize("dtype", [None, "int64"]) @pytest.mark.parametrize("idx", [None, pd.RangeIndex(start=0, stop=3, step=1)]) def test_series_from_block_manager(using_copy_on_write, idx, dtype, fastpath): + if idx is None: + fastpath = False ser = Series([1, 2, 3], dtype="int64") ser_orig = ser.copy() - ser2 = Series(ser._mgr, dtype=dtype, fastpath=fastpath) + ser2 = Series(ser._mgr, dtype=dtype, fastpath=fastpath, index=idx) assert np.shares_memory(get_array(ser), get_array(ser2)) if using_copy_on_write: assert not ser2._mgr._has_no_reference(0) From fcb9bc61b2448cad47159c7d8de815ee5dc467a2 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 17 Mar 2023 16:43:57 +0100 Subject: [PATCH 3/4] Fix --- pandas/tests/copy_view/test_constructors.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py index 34a93c0c142c4..feb4750b7d539 100644 --- a/pandas/tests/copy_view/test_constructors.py +++ b/pandas/tests/copy_view/test_constructors.py @@ -87,8 +87,6 @@ def test_series_from_series_with_reindex(using_copy_on_write): @pytest.mark.parametrize("dtype", [None, "int64"]) @pytest.mark.parametrize("idx", [None, pd.RangeIndex(start=0, stop=3, step=1)]) def test_series_from_block_manager(using_copy_on_write, idx, dtype, fastpath): - if idx is None: - fastpath = False ser = Series([1, 2, 3], dtype="int64") ser_orig = ser.copy() ser2 = Series(ser._mgr, dtype=dtype, fastpath=fastpath, index=idx) From 63d41eee27d735402bfc28e0f9cce4e8c9d4601b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 17 Mar 2023 16:44:48 +0100 Subject: [PATCH 4/4] Fix --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index febd7488ed424..574a41f60dde2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -405,7 +405,7 @@ def __init__( data = SingleBlockManager.from_array(data, index) elif manager == "array": data = SingleArrayManager.from_array(data, index) - elif using_copy_on_write(): + elif using_copy_on_write() and not copy: data = data.copy(deep=False) if copy: data = data.copy()