|
40 | 40 | from typing_extensions import TypeAlias as _TypeAlias
|
41 | 41 |
|
42 | 42 | import mypy.semanal_main
|
| 43 | +from mypy.cache import Buffer |
43 | 44 | from mypy.checker import TypeChecker
|
44 | 45 | from mypy.error_formatter import OUTPUT_CHOICES, ErrorFormatter
|
45 | 46 | from mypy.errors import CompileError, ErrorInfo, Errors, report_internal_error
|
|
116 | 117 | "abc",
|
117 | 118 | }
|
118 | 119 |
|
| 120 | +# We are careful now, we can increase this in future if safe/useful. |
| 121 | +MAX_GC_FREEZE_CYCLES = 1 |
119 | 122 |
|
120 | 123 | Graph: _TypeAlias = dict[str, "State"]
|
121 | 124 |
|
@@ -707,6 +710,8 @@ def __init__(
|
707 | 710 | # new file can be processed O(n**2) times. This cache
|
708 | 711 | # avoids most of this redundant work.
|
709 | 712 | self.ast_cache: dict[str, tuple[MypyFile, list[ErrorInfo]]] = {}
|
| 713 | + # Number of times we used GC optimization hack for fresh SCCs. |
| 714 | + self.gc_freeze_cycles = 0 |
710 | 715 |
|
711 | 716 | def dump_stats(self) -> None:
|
712 | 717 | if self.options.dump_build_stats:
|
@@ -1139,6 +1144,17 @@ def read_deps_cache(manager: BuildManager, graph: Graph) -> dict[str, FgDepMeta]
|
1139 | 1144 | return module_deps_metas
|
1140 | 1145 |
|
1141 | 1146 |
|
| 1147 | +def _load_ff_file(file: str, manager: BuildManager, log_error: str) -> bytes | None: |
| 1148 | + t0 = time.time() |
| 1149 | + try: |
| 1150 | + data = manager.metastore.read(file) |
| 1151 | + except OSError: |
| 1152 | + manager.log(log_error + file) |
| 1153 | + return None |
| 1154 | + manager.add_stats(metastore_read_time=time.time() - t0) |
| 1155 | + return data |
| 1156 | + |
| 1157 | + |
1142 | 1158 | def _load_json_file(
|
1143 | 1159 | file: str, manager: BuildManager, log_success: str, log_error: str
|
1144 | 1160 | ) -> dict[str, Any] | None:
|
@@ -1259,7 +1275,11 @@ def get_cache_names(id: str, path: str, options: Options) -> tuple[str, str, str
|
1259 | 1275 | deps_json = None
|
1260 | 1276 | if options.cache_fine_grained:
|
1261 | 1277 | deps_json = prefix + ".deps.json"
|
1262 |
| - return (prefix + ".meta.json", prefix + ".data.json", deps_json) |
| 1278 | + if options.fixed_format_cache: |
| 1279 | + data_suffix = ".data.ff" |
| 1280 | + else: |
| 1281 | + data_suffix = ".data.json" |
| 1282 | + return (prefix + ".meta.json", prefix + data_suffix, deps_json) |
1263 | 1283 |
|
1264 | 1284 |
|
1265 | 1285 | def find_cache_meta(id: str, path: str, manager: BuildManager) -> CacheMeta | None:
|
@@ -1559,8 +1579,13 @@ def write_cache(
|
1559 | 1579 | tree.path = path
|
1560 | 1580 |
|
1561 | 1581 | # Serialize data and analyze interface
|
1562 |
| - data = tree.serialize() |
1563 |
| - data_bytes = json_dumps(data, manager.options.debug_cache) |
| 1582 | + if manager.options.fixed_format_cache: |
| 1583 | + data_io = Buffer() |
| 1584 | + tree.write(data_io) |
| 1585 | + data_bytes = data_io.getvalue() |
| 1586 | + else: |
| 1587 | + data = tree.serialize() |
| 1588 | + data_bytes = json_dumps(data, manager.options.debug_cache) |
1564 | 1589 | interface_hash = hash_digest(data_bytes)
|
1565 | 1590 |
|
1566 | 1591 | plugin_data = manager.plugin.report_config_data(ReportConfigContext(id, path, is_check=False))
|
@@ -2085,15 +2110,23 @@ def load_tree(self, temporary: bool = False) -> None:
|
2085 | 2110 | self.meta is not None
|
2086 | 2111 | ), "Internal error: this method must be called only for cached modules"
|
2087 | 2112 |
|
2088 |
| - data = _load_json_file( |
2089 |
| - self.meta.data_json, self.manager, "Load tree ", "Could not load tree: " |
2090 |
| - ) |
| 2113 | + data: bytes | dict[str, Any] | None |
| 2114 | + if self.options.fixed_format_cache: |
| 2115 | + data = _load_ff_file(self.meta.data_json, self.manager, "Could not load tree: ") |
| 2116 | + else: |
| 2117 | + data = _load_json_file( |
| 2118 | + self.meta.data_json, self.manager, "Load tree ", "Could not load tree: " |
| 2119 | + ) |
2091 | 2120 | if data is None:
|
2092 | 2121 | return
|
2093 | 2122 |
|
2094 | 2123 | t0 = time.time()
|
2095 | 2124 | # TODO: Assert data file wasn't changed.
|
2096 |
| - self.tree = MypyFile.deserialize(data) |
| 2125 | + if isinstance(data, bytes): |
| 2126 | + data_io = Buffer(data) |
| 2127 | + self.tree = MypyFile.read(data_io) |
| 2128 | + else: |
| 2129 | + self.tree = MypyFile.deserialize(data) |
2097 | 2130 | t1 = time.time()
|
2098 | 2131 | self.manager.add_stats(deserialize_time=t1 - t0)
|
2099 | 2132 | if not temporary:
|
@@ -2481,7 +2514,11 @@ def write_cache(self) -> None:
|
2481 | 2514 | ):
|
2482 | 2515 | if self.options.debug_serialize:
|
2483 | 2516 | try:
|
2484 |
| - self.tree.serialize() |
| 2517 | + if self.manager.options.fixed_format_cache: |
| 2518 | + data = Buffer() |
| 2519 | + self.tree.write(data) |
| 2520 | + else: |
| 2521 | + self.tree.serialize() |
2485 | 2522 | except Exception:
|
2486 | 2523 | print(f"Error serializing {self.id}", file=self.manager.stdout)
|
2487 | 2524 | raise # Propagate to display traceback
|
@@ -3326,8 +3363,29 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
|
3326 | 3363 | #
|
3327 | 3364 | # TODO: see if it's possible to determine if we need to process only a
|
3328 | 3365 | # _subset_ of the past SCCs instead of having to process them all.
|
| 3366 | + if ( |
| 3367 | + platform.python_implementation() == "CPython" |
| 3368 | + and manager.gc_freeze_cycles < MAX_GC_FREEZE_CYCLES |
| 3369 | + ): |
| 3370 | + # When deserializing cache we create huge amount of new objects, so even |
| 3371 | + # with our generous GC thresholds, GC is still doing a lot of pointless |
| 3372 | + # work searching for garbage. So, we temporarily disable it when |
| 3373 | + # processing fresh SCCs, and then move all the new objects to the oldest |
| 3374 | + # generation with the freeze()/unfreeze() trick below. This is arguably |
| 3375 | + # a hack, but it gives huge performance wins for large third-party |
| 3376 | + # libraries, like torch. |
| 3377 | + gc.collect() |
| 3378 | + gc.disable() |
3329 | 3379 | for prev_scc in fresh_scc_queue:
|
3330 | 3380 | process_fresh_modules(graph, prev_scc, manager)
|
| 3381 | + if ( |
| 3382 | + platform.python_implementation() == "CPython" |
| 3383 | + and manager.gc_freeze_cycles < MAX_GC_FREEZE_CYCLES |
| 3384 | + ): |
| 3385 | + manager.gc_freeze_cycles += 1 |
| 3386 | + gc.freeze() |
| 3387 | + gc.unfreeze() |
| 3388 | + gc.enable() |
3331 | 3389 | fresh_scc_queue = []
|
3332 | 3390 | size = len(scc)
|
3333 | 3391 | if size == 1:
|
|
0 commit comments