CLN: remove and udpate for outdated _item_cache

chilin0525 · chilin0525 · commit 025b2b2ae085 · 2025-07-06T16:59:53.000+08:00
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -236,7 +236,6 @@ class NDFrame(PandasObject, indexing.IndexingMixin):
 
     _internal_names: list[str] = [
         "_mgr",
-        "_item_cache",
         "_cache",
         "_name",
         "_metadata",
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -1900,7 +1900,7 @@ def _consolidate_check(self) -> None:
     def _consolidate_inplace(self) -> None:
         # In general, _consolidate_inplace should only be called via
         #  DataFrame._consolidate_inplace, otherwise we will fail to invalidate
-        #  the DataFrame's _item_cache. The exception is for newly-created
+        #  the DataFrame's internal structures. The exception is for newly-created
         #  BlockManager objects not yet attached to a DataFrame.
         if not self.is_consolidated():
             self.blocks = _consolidate(self.blocks)
diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py
@@ -734,33 +734,6 @@ def test_column_as_series_set_with_upcast(backend):
             s[0] = "foo"
 
 
-@pytest.mark.parametrize(
-    "method",
-    [
-        lambda df: df["a"],
-        lambda df: df.loc[:, "a"],
-        lambda df: df.iloc[:, 0],
-    ],
-    ids=["getitem", "loc", "iloc"],
-)
-def test_column_as_series_no_item_cache(request, backend, method):
-    # Case: selecting a single column (which now also uses Copy-on-Write to protect
-    # the view) should always give a new object (i.e. not make use of a cache)
-    dtype_backend, DataFrame, _ = backend
-    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
-    df_orig = df.copy()
-
-    s1 = method(df)
-    s2 = method(df)
-
-    assert s1 is not s2
-
-    s1.iloc[0] = 0
-
-    tm.assert_series_equal(s2, df_orig["a"])
-    tm.assert_frame_equal(df, df_orig)
-
-
 # TODO add tests for other indexing methods on the Series
 
 
diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py
@@ -7,8 +7,6 @@
 import numpy as np
 import pytest
 
-from pandas.errors import PerformanceWarning
-
 from pandas import (
     DataFrame,
     Index,
@@ -72,19 +70,6 @@ def test_insert_with_columns_dups(self):
         )
         tm.assert_frame_equal(df, exp)
 
-    def test_insert_item_cache(self, performance_warning):
-        df = DataFrame(np.random.default_rng(2).standard_normal((4, 3)))
-        ser = df[0]
-        expected_warning = PerformanceWarning if performance_warning else None
-
-        with tm.assert_produces_warning(expected_warning):
-            for n in range(100):
-                df[n + 3] = df[1] * n
-
-        ser.iloc[0] = 99
-        assert df.iloc[0, 0] == df[0][0]
-        assert df.iloc[0, 0] != 99
-
     def test_insert_EA_no_warning(self):
         # PerformanceWarning about fragmented frame should not be raised when
         # using EAs (https://github.com/pandas-dev/pandas/issues/44098)
diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
@@ -207,20 +207,6 @@ def test_corr_nullable_integer(self, nullable_column, other_column, method):
         expected = DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"])
         tm.assert_frame_equal(result, expected)
 
-    def test_corr_item_cache(self):
-        # Check that corr does not lead to incorrect entries in item_cache
-
-        df = DataFrame({"A": range(10)})
-        df["B"] = range(10)[::-1]
-
-        ser = df["A"]  # populate item_cache
-        assert len(df._mgr.blocks) == 2
-
-        _ = df.corr(numeric_only=True)
-
-        ser.iloc[0] = 99
-        assert df.loc[0, "A"] == 0
-
     @pytest.mark.parametrize("length", [2, 20, 200, 2000])
     def test_corr_for_constant_columns(self, length):
         # GH: 37448
diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py
@@ -721,22 +721,6 @@ def test_quantile_empty_no_columns(self, interp_method):
         expected.columns.name = "captain tightpants"
         tm.assert_frame_equal(result, expected)
 
-    def test_quantile_item_cache(self, interp_method):
-        # previous behavior incorrect retained an invalid _item_cache entry
-        interpolation, method = interp_method
-        df = DataFrame(
-            np.random.default_rng(2).standard_normal((4, 3)), columns=["A", "B", "C"]
-        )
-        df["D"] = df["A"] * 2
-        ser = df["A"]
-        assert len(df._mgr.blocks) == 2
-
-        df.quantile(numeric_only=False, interpolation=interpolation, method=method)
-
-        ser.iloc[0] = 99
-        assert df.iloc[0, 0] == df["A"][0]
-        assert df.iloc[0, 0] != 99
-
     def test_invalid_method(self):
         with pytest.raises(ValueError, match="Invalid method: foo"):
             DataFrame(range(1)).quantile(0.5, method="foo")
diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py
@@ -592,21 +592,6 @@ def test_sort_values_nat_na_position_default(self):
         result = expected.sort_values(["A", "date"])
         tm.assert_frame_equal(result, expected)
 
-    def test_sort_values_item_cache(self):
-        # previous behavior incorrect retained an invalid _item_cache entry
-        df = DataFrame(
-            np.random.default_rng(2).standard_normal((4, 3)), columns=["A", "B", "C"]
-        )
-        df["D"] = df["A"] * 2
-        ser = df["A"]
-        assert len(df._mgr.blocks) == 2
-
-        df.sort_values(by="A")
-
-        ser.iloc[0] = 99
-        assert df.iloc[0, 0] == df["A"][0]
-        assert df.iloc[0, 0] != 99
-
     def test_sort_values_reshaping(self):
         # GH 39426
         values = list(range(21))
diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py
@@ -1,14 +1,8 @@
-import numpy as np
-import pytest
-
-from pandas._config import using_string_dtype
-
 from pandas import (
     DataFrame,
     MultiIndex,
 )
 import pandas._testing as tm
-from pandas.core.arrays import NumpyExtensionArray
 
 
 class TestToDictOfBlocks:
@@ -27,22 +21,6 @@ def test_no_copy_blocks(self, float_frame):
         assert _last_df is not None and not _last_df[column].equals(df[column])
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-def test_to_dict_of_blocks_item_cache():
-    # Calling to_dict_of_blocks should not poison item_cache
-    df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]})
-    df["c"] = NumpyExtensionArray(np.array([1, 2, None, 3], dtype=object))
-    mgr = df._mgr
-    assert len(mgr.blocks) == 3  # i.e. not consolidated
-
-    ser = df["b"]  # populations item_cache["b"]
-
-    df._to_dict_of_blocks()
-
-    with pytest.raises(ValueError, match="read-only"):
-        ser.values[0] = "foo"
-
-
 def test_set_change_dtype_slice():
     # GH#8850
     cols = MultiIndex.from_tuples([("1st", "a"), ("2nd", "b"), ("3rd", "c")])
diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
@@ -381,30 +381,3 @@ def test_update_inplace_sets_valid_block_values():
 
     # check we haven't put a Series into any block.values
     assert isinstance(df._mgr.blocks[0].values, Categorical)
-
-
-def test_nonconsolidated_item_cache_take():
-    # https://github.com/pandas-dev/pandas/issues/35521
-
-    # create non-consolidated dataframe with object dtype columns
-    df = DataFrame(
-        {
-            "col1": Series(["a"], dtype=object),
-        }
-    )
-    df["col2"] = Series([0], dtype=object)
-    assert not df._mgr.is_consolidated()
-
-    # access column (item cache)
-    df["col1"] == "A"
-    # take operation
-    # (regression was that this consolidated but didn't reset item cache,
-    # resulting in an invalid cache and the .at operation not working properly)
-    df[df["col2"] == 0]
-
-    # now setting value should update actual dataframe
-    df.at[0, "col1"] = "A"
-
-    expected = DataFrame({"col1": ["A"], "col2": [0]}, dtype=object)
-    tm.assert_frame_equal(df, expected)
-    assert df.at[0, "col1"] == "A"
diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py
@@ -49,29 +49,6 @@ def test_selection_methods_of_assigned_col():
 
 
 class TestAtSetItem:
-    def test_at_setitem_item_cache_cleared(self):
-        # GH#22372 Note the multi-step construction is necessary to trigger
-        #  the original bug. pandas/issues/22372#issuecomment-413345309
-        df = DataFrame(index=[0])
-        df["x"] = 1
-        df["cost"] = 2
-
-        # accessing df["cost"] adds "cost" to the _item_cache
-        df["cost"]
-
-        # This loc[[0]] lookup used to call _consolidate_inplace at the
-        #  BlockManager level, which failed to clear the _item_cache
-        df.loc[[0]]
-
-        df.at[0, "x"] = 4
-        df.at[0, "cost"] = 789
-
-        expected = DataFrame({"x": [4], "cost": 789}, index=[0])
-        tm.assert_frame_equal(df, expected)
-
-        # And in particular, check that the _item_cache has updated correctly.
-        tm.assert_series_equal(df["cost"], expected["cost"])
-
     def test_at_setitem_mixed_index_assignment(self):
         # GH#19860
         ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2])
diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py
@@ -18,23 +18,6 @@
 
 
 class TestCaching:
-    def test_slice_consolidate_invalidate_item_cache(self):
-        # this is chained assignment, but will 'work'
-        with option_context("chained_assignment", None):
-            # #3970
-            df = DataFrame({"aa": np.arange(5), "bb": [2.2] * 5})
-
-            # Creates a second float block
-            df["cc"] = 0.0
-
-            # caches a reference to the 'bb' series
-            df["bb"]
-
-            # Assignment to wrong series
-            with tm.raises_chained_assignment_error():
-                df["bb"].iloc[0] = 0.17
-            tm.assert_almost_equal(df["bb"][0], 2.2)
-
     @pytest.mark.parametrize("do_ref", [True, False])
     def test_setitem_cache_updating(self, do_ref):
         # GH 5424
@@ -89,18 +72,6 @@ def test_setitem_cache_updating_slices(self):
         tm.assert_frame_equal(out, expected)
         tm.assert_series_equal(out["A"], expected["A"])
 
-    def test_altering_series_clears_parent_cache(self):
-        # GH #33675
-        df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"])
-        ser = df["A"]
-
-        # Adding a new entry to ser swaps in a new array, so "A" needs to
-        #  be removed from df._item_cache
-        ser["c"] = 5
-        assert len(ser) == 3
-        assert df["A"] is not ser
-        assert len(df["A"]) == 2
-
 
 class TestChaining:
     def test_setitem_chained_setfault(self):
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
@@ -736,7 +736,7 @@ def test_reindex_items(self):
 
         reindexed = mgr.reindex_axis(["g", "c", "a", "d"], axis=0)
         # reindex_axis does not consolidate_inplace, as that risks failing to
-        #  invalidate _item_cache
+        #  invalidate internal caches
         assert not reindexed.is_consolidated()
 
         tm.assert_index_equal(reindexed.items, Index(["g", "c", "a", "d"]))