Force all deserialized objects to the oldest GC generation (#19681)

ilevkivskyi · web-flow · commit 6c5b13ccbad7 · 2025-08-19T11:48:51.000+01:00
This is a hack, but it gives ~30% perf win for `mypy -c 'import torch'`
on a warm run. This should not increase memory consumption too much,
since we shouldn't create any cyclic garbage during deserialization (we
do create some cyclic references, like `TypeInfo` -&gt; `SymbolTable` -&gt;
`Instance` -&gt; `TypeInfo`, but those are genuine long-living objects).
diff --git a/mypy/build.py b/mypy/build.py
@@ -116,6 +116,8 @@
     "abc",
 }
 
+# We are careful now, we can increase this in future if safe/useful.
+MAX_GC_FREEZE_CYCLES = 1
 
 Graph: _TypeAlias = dict[str, "State"]
 
@@ -707,6 +709,8 @@ def __init__(
         # new file can be processed O(n**2) times. This cache
         # avoids most of this redundant work.
         self.ast_cache: dict[str, tuple[MypyFile, list[ErrorInfo]]] = {}
+        # Number of times we used GC optimization hack for fresh SCCs.
+        self.gc_freeze_cycles = 0
 
     def dump_stats(self) -> None:
         if self.options.dump_build_stats:
@@ -3326,8 +3330,29 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
                 #
                 # TODO: see if it's possible to determine if we need to process only a
                 # _subset_ of the past SCCs instead of having to process them all.
+                if (
+                    platform.python_implementation() == "CPython"
+                    and manager.gc_freeze_cycles < MAX_GC_FREEZE_CYCLES
+                ):
+                    # When deserializing cache we create huge amount of new objects, so even
+                    # with our generous GC thresholds, GC is still doing a lot of pointless
+                    # work searching for garbage. So, we temporarily disable it when
+                    # processing fresh SCCs, and then move all the new objects to the oldest
+                    # generation with the freeze()/unfreeze() trick below. This is arguably
+                    # a hack, but it gives huge performance wins for large third-party
+                    # libraries, like torch.
+                    gc.collect()
+                    gc.disable()
                 for prev_scc in fresh_scc_queue:
                     process_fresh_modules(graph, prev_scc, manager)
+                if (
+                    platform.python_implementation() == "CPython"
+                    and manager.gc_freeze_cycles < MAX_GC_FREEZE_CYCLES
+                ):
+                    manager.gc_freeze_cycles += 1
+                    gc.freeze()
+                    gc.unfreeze()
+                    gc.enable()
                 fresh_scc_queue = []
             size = len(scc)
             if size == 1: