Skip to content

Commit 6c5b13c

Browse files
authored
Force all deserialized objects to the oldest GC generation (#19681)
This is a hack, but it gives ~30% perf win for `mypy -c 'import torch'` on a warm run. This should not increase memory consumption too much, since we shouldn't create any cyclic garbage during deserialization (we do create some cyclic references, like `TypeInfo` -> `SymbolTable` -> `Instance` -> `TypeInfo`, but those are genuine long-living objects).
1 parent 91487cb commit 6c5b13c

File tree

1 file changed

+25
-0
lines changed

1 file changed

+25
-0
lines changed

mypy/build.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@
116116
"abc",
117117
}
118118

119+
# We are careful now, we can increase this in future if safe/useful.
120+
MAX_GC_FREEZE_CYCLES = 1
119121

120122
Graph: _TypeAlias = dict[str, "State"]
121123

@@ -707,6 +709,8 @@ def __init__(
707709
# new file can be processed O(n**2) times. This cache
708710
# avoids most of this redundant work.
709711
self.ast_cache: dict[str, tuple[MypyFile, list[ErrorInfo]]] = {}
712+
# Number of times we used GC optimization hack for fresh SCCs.
713+
self.gc_freeze_cycles = 0
710714

711715
def dump_stats(self) -> None:
712716
if self.options.dump_build_stats:
@@ -3326,8 +3330,29 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
33263330
#
33273331
# TODO: see if it's possible to determine if we need to process only a
33283332
# _subset_ of the past SCCs instead of having to process them all.
3333+
if (
3334+
platform.python_implementation() == "CPython"
3335+
and manager.gc_freeze_cycles < MAX_GC_FREEZE_CYCLES
3336+
):
3337+
# When deserializing cache we create huge amount of new objects, so even
3338+
# with our generous GC thresholds, GC is still doing a lot of pointless
3339+
# work searching for garbage. So, we temporarily disable it when
3340+
# processing fresh SCCs, and then move all the new objects to the oldest
3341+
# generation with the freeze()/unfreeze() trick below. This is arguably
3342+
# a hack, but it gives huge performance wins for large third-party
3343+
# libraries, like torch.
3344+
gc.collect()
3345+
gc.disable()
33293346
for prev_scc in fresh_scc_queue:
33303347
process_fresh_modules(graph, prev_scc, manager)
3348+
if (
3349+
platform.python_implementation() == "CPython"
3350+
and manager.gc_freeze_cycles < MAX_GC_FREEZE_CYCLES
3351+
):
3352+
manager.gc_freeze_cycles += 1
3353+
gc.freeze()
3354+
gc.unfreeze()
3355+
gc.enable()
33313356
fresh_scc_queue = []
33323357
size = len(scc)
33333358
if size == 1:

0 commit comments

Comments
 (0)