diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 66188d9e91232..7f1ccc482f70f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -236,7 +236,6 @@ class NDFrame(PandasObject, indexing.IndexingMixin): _internal_names: list[str] = [ "_mgr", - "_item_cache", "_cache", "_name", "_metadata", diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index cb290fde7095c..67d7ffa80462a 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1898,10 +1898,6 @@ def _consolidate_check(self) -> None: self._known_consolidated = True def _consolidate_inplace(self) -> None: - # In general, _consolidate_inplace should only be called via - # DataFrame._consolidate_inplace, otherwise we will fail to invalidate - # the DataFrame's _item_cache. The exception is for newly-created - # BlockManager objects not yet attached to a DataFrame. if not self.is_consolidated(): self.blocks = _consolidate(self.blocks) self._is_consolidated = True diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py index b530cb98ef46c..761daf0e985cc 100644 --- a/pandas/tests/frame/indexing/test_insert.py +++ b/pandas/tests/frame/indexing/test_insert.py @@ -7,8 +7,6 @@ import numpy as np import pytest -from pandas.errors import PerformanceWarning - from pandas import ( DataFrame, Index, @@ -72,19 +70,6 @@ def test_insert_with_columns_dups(self): ) tm.assert_frame_equal(df, exp) - def test_insert_item_cache(self, performance_warning): - df = DataFrame(np.random.default_rng(2).standard_normal((4, 3))) - ser = df[0] - expected_warning = PerformanceWarning if performance_warning else None - - with tm.assert_produces_warning(expected_warning): - for n in range(100): - df[n + 3] = df[1] * n - - ser.iloc[0] = 99 - assert df.iloc[0, 0] == df[0][0] - assert df.iloc[0, 0] != 99 - def test_insert_EA_no_warning(self): # PerformanceWarning about fragmented frame should not be raised when # using EAs (https://github.com/pandas-dev/pandas/issues/44098) diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index 304638a3a7dcf..a5ed2e86283e9 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -207,20 +207,6 @@ def test_corr_nullable_integer(self, nullable_column, other_column, method): expected = DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"]) tm.assert_frame_equal(result, expected) - def test_corr_item_cache(self): - # Check that corr does not lead to incorrect entries in item_cache - - df = DataFrame({"A": range(10)}) - df["B"] = range(10)[::-1] - - ser = df["A"] # populate item_cache - assert len(df._mgr.blocks) == 2 - - _ = df.corr(numeric_only=True) - - ser.iloc[0] = 99 - assert df.loc[0, "A"] == 0 - @pytest.mark.parametrize("length", [2, 20, 200, 2000]) def test_corr_for_constant_columns(self, length): # GH: 37448 diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index d7baac7264a1d..631742d43263f 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -721,22 +721,6 @@ def test_quantile_empty_no_columns(self, interp_method): expected.columns.name = "captain tightpants" tm.assert_frame_equal(result, expected) - def test_quantile_item_cache(self, interp_method): - # previous behavior incorrect retained an invalid _item_cache entry - interpolation, method = interp_method - df = DataFrame( - np.random.default_rng(2).standard_normal((4, 3)), columns=["A", "B", "C"] - ) - df["D"] = df["A"] * 2 - ser = df["A"] - assert len(df._mgr.blocks) == 2 - - df.quantile(numeric_only=False, interpolation=interpolation, method=method) - - ser.iloc[0] = 99 - assert df.iloc[0, 0] == df["A"][0] - assert df.iloc[0, 0] != 99 - def test_invalid_method(self): with pytest.raises(ValueError, match="Invalid method: foo"): DataFrame(range(1)).quantile(0.5, method="foo") diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index 9a628c2ee9f73..9abe0c97c3260 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -592,21 +592,6 @@ def test_sort_values_nat_na_position_default(self): result = expected.sort_values(["A", "date"]) tm.assert_frame_equal(result, expected) - def test_sort_values_item_cache(self): - # previous behavior incorrect retained an invalid _item_cache entry - df = DataFrame( - np.random.default_rng(2).standard_normal((4, 3)), columns=["A", "B", "C"] - ) - df["D"] = df["A"] * 2 - ser = df["A"] - assert len(df._mgr.blocks) == 2 - - df.sort_values(by="A") - - ser.iloc[0] = 99 - assert df.iloc[0, 0] == df["A"][0] - assert df.iloc[0, 0] != 99 - def test_sort_values_reshaping(self): # GH 39426 values = list(range(21)) diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py index 4f621b4643b70..a6b99a70d6ecd 100644 --- a/pandas/tests/frame/methods/test_to_dict_of_blocks.py +++ b/pandas/tests/frame/methods/test_to_dict_of_blocks.py @@ -1,14 +1,8 @@ -import numpy as np -import pytest - -from pandas._config import using_string_dtype - from pandas import ( DataFrame, MultiIndex, ) import pandas._testing as tm -from pandas.core.arrays import NumpyExtensionArray class TestToDictOfBlocks: @@ -27,22 +21,6 @@ def test_no_copy_blocks(self, float_frame): assert _last_df is not None and not _last_df[column].equals(df[column]) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") -def test_to_dict_of_blocks_item_cache(): - # Calling to_dict_of_blocks should not poison item_cache - df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}) - df["c"] = NumpyExtensionArray(np.array([1, 2, None, 3], dtype=object)) - mgr = df._mgr - assert len(mgr.blocks) == 3 # i.e. not consolidated - - ser = df["b"] # populations item_cache["b"] - - df._to_dict_of_blocks() - - with pytest.raises(ValueError, match="read-only"): - ser.values[0] = "foo" - - def test_set_change_dtype_slice(): # GH#8850 cols = MultiIndex.from_tuples([("1st", "a"), ("2nd", "b"), ("3rd", "c")]) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 6fdbfac8f4e0a..f084d16e387a8 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -381,30 +381,3 @@ def test_update_inplace_sets_valid_block_values(): # check we haven't put a Series into any block.values assert isinstance(df._mgr.blocks[0].values, Categorical) - - -def test_nonconsolidated_item_cache_take(): - # https://github.com/pandas-dev/pandas/issues/35521 - - # create non-consolidated dataframe with object dtype columns - df = DataFrame( - { - "col1": Series(["a"], dtype=object), - } - ) - df["col2"] = Series([0], dtype=object) - assert not df._mgr.is_consolidated() - - # access column (item cache) - df["col1"] == "A" - # take operation - # (regression was that this consolidated but didn't reset item cache, - # resulting in an invalid cache and the .at operation not working properly) - df[df["col2"] == 0] - - # now setting value should update actual dataframe - df.at[0, "col1"] = "A" - - expected = DataFrame({"col1": ["A"], "col2": [0]}, dtype=object) - tm.assert_frame_equal(df, expected) - assert df.at[0, "col1"] == "A" diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py index e80acc230a320..d24d343332669 100644 --- a/pandas/tests/indexing/test_at.py +++ b/pandas/tests/indexing/test_at.py @@ -49,29 +49,6 @@ def test_selection_methods_of_assigned_col(): class TestAtSetItem: - def test_at_setitem_item_cache_cleared(self): - # GH#22372 Note the multi-step construction is necessary to trigger - # the original bug. pandas/issues/22372#issuecomment-413345309 - df = DataFrame(index=[0]) - df["x"] = 1 - df["cost"] = 2 - - # accessing df["cost"] adds "cost" to the _item_cache - df["cost"] - - # This loc[[0]] lookup used to call _consolidate_inplace at the - # BlockManager level, which failed to clear the _item_cache - df.loc[[0]] - - df.at[0, "x"] = 4 - df.at[0, "cost"] = 789 - - expected = DataFrame({"x": [4], "cost": 789}, index=[0]) - tm.assert_frame_equal(df, expected) - - # And in particular, check that the _item_cache has updated correctly. - tm.assert_series_equal(df["cost"], expected["cost"]) - def test_at_setitem_mixed_index_assignment(self): # GH#19860 ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 64d8068fa9291..266e35ac9088f 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -18,23 +18,6 @@ class TestCaching: - def test_slice_consolidate_invalidate_item_cache(self): - # this is chained assignment, but will 'work' - with option_context("chained_assignment", None): - # #3970 - df = DataFrame({"aa": np.arange(5), "bb": [2.2] * 5}) - - # Creates a second float block - df["cc"] = 0.0 - - # caches a reference to the 'bb' series - df["bb"] - - # Assignment to wrong series - with tm.raises_chained_assignment_error(): - df["bb"].iloc[0] = 0.17 - tm.assert_almost_equal(df["bb"][0], 2.2) - @pytest.mark.parametrize("do_ref", [True, False]) def test_setitem_cache_updating(self, do_ref): # GH 5424 @@ -89,18 +72,6 @@ def test_setitem_cache_updating_slices(self): tm.assert_frame_equal(out, expected) tm.assert_series_equal(out["A"], expected["A"]) - def test_altering_series_clears_parent_cache(self): - # GH #33675 - df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"]) - ser = df["A"] - - # Adding a new entry to ser swaps in a new array, so "A" needs to - # be removed from df._item_cache - ser["c"] = 5 - assert len(ser) == 3 - assert df["A"] is not ser - assert len(df["A"]) == 2 - class TestChaining: def test_setitem_chained_setfault(self): diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index ac8ac0766f04d..11e6b99204aee 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -735,8 +735,6 @@ def test_reindex_items(self): mgr = create_mgr("a: f8; b: i8; c: f8; d: i8; e: f8; f: bool; g: f8-2") reindexed = mgr.reindex_axis(["g", "c", "a", "d"], axis=0) - # reindex_axis does not consolidate_inplace, as that risks failing to - # invalidate _item_cache assert not reindexed.is_consolidated() tm.assert_index_equal(reindexed.items, Index(["g", "c", "a", "d"]))