-
-
Notifications
You must be signed in to change notification settings - Fork 3k
Make metas more compact; fix indirect suppression #20075
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -363,7 +363,7 @@ class CacheMeta(NamedTuple): | |
| # dep_prios and dep_lines are in parallel with dependencies + suppressed | ||
| dep_prios: list[int] | ||
| dep_lines: list[int] | ||
| dep_hashes: dict[str, str] | ||
| dep_hashes: list[str] | ||
| interface_hash: str # hash representing the public interface | ||
| error_lines: list[str] | ||
| version_id: str # mypy version for cache invalidation | ||
|
|
@@ -403,7 +403,7 @@ def cache_meta_from_dict(meta: dict[str, Any], data_json: str) -> CacheMeta: | |
| meta.get("options"), | ||
| meta.get("dep_prios", []), | ||
| meta.get("dep_lines", []), | ||
| meta.get("dep_hashes", {}), | ||
| meta.get("dep_hashes", []), | ||
| meta.get("interface_hash", ""), | ||
| meta.get("error_lines", []), | ||
| meta.get("version_id", sentinel), | ||
|
|
@@ -1310,8 +1310,7 @@ def get_cache_names(id: str, path: str, options: Options) -> tuple[str, str, str | |
| Args: | ||
| id: module ID | ||
| path: module path | ||
| cache_dir: cache directory | ||
| pyversion: Python version (major, minor) | ||
| options: build options | ||
|
|
||
| Returns: | ||
| A tuple with the file names to be used for the meta JSON, the | ||
|
|
@@ -1328,7 +1327,7 @@ def get_cache_names(id: str, path: str, options: Options) -> tuple[str, str, str | |
| # Solve this by rewriting the paths as relative to the root dir. | ||
| # This only makes sense when using the filesystem backed cache. | ||
| root = _cache_dir_prefix(options) | ||
| return (os.path.relpath(pair[0], root), os.path.relpath(pair[1], root), None) | ||
| return os.path.relpath(pair[0], root), os.path.relpath(pair[1], root), None | ||
| prefix = os.path.join(*id.split(".")) | ||
| is_package = os.path.basename(path).startswith("__init__.py") | ||
| if is_package: | ||
|
|
@@ -1341,7 +1340,20 @@ def get_cache_names(id: str, path: str, options: Options) -> tuple[str, str, str | |
| data_suffix = ".data.ff" | ||
| else: | ||
| data_suffix = ".data.json" | ||
| return (prefix + ".meta.json", prefix + data_suffix, deps_json) | ||
| return prefix + ".meta.json", prefix + data_suffix, deps_json | ||
|
|
||
|
|
||
| def options_snapshot(id: str, manager: BuildManager) -> dict[str, object]: | ||
| """Make compact snapshot of options for a module. | ||
|
|
||
| Separately store only the options we may compare individually, and take a hash | ||
| of everything else. If --debug-cache is specified, fall back to full snapshot. | ||
| """ | ||
| snapshot = manager.options.clone_for_module(id).select_options_affecting_cache() | ||
| if manager.options.debug_cache: | ||
| return snapshot | ||
| platform_opt = snapshot.pop("platform") | ||
| return {"platform": platform_opt, "other_options": hash_digest(json_dumps(snapshot))} | ||
|
|
||
|
|
||
| def find_cache_meta(id: str, path: str, manager: BuildManager) -> CacheMeta | None: | ||
|
|
@@ -1403,7 +1415,7 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> CacheMeta | No | |
| # Ignore cache if (relevant) options aren't the same. | ||
| # Note that it's fine to mutilate cached_options since it's only used here. | ||
| cached_options = m.options | ||
| current_options = manager.options.clone_for_module(id).select_options_affecting_cache() | ||
| current_options = options_snapshot(id, manager) | ||
| if manager.options.skip_version_check: | ||
| # When we're lax about version we're also lax about platform. | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you explain this? It isn't a new line, but this behavior is really confusing to me. How "accept a cache generated by another mypy version" is related to "use cache for a potentially wrong platform"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately I don't have context on this decision, probably the idea is that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the motivation is that you can easily debug e.g. caches generated on Linux on a macOS development machine. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's an interesting idea - I didn't even think of that. Nice catch! |
||
| cached_options["platform"] = current_options["platform"] | ||
|
|
@@ -1556,7 +1568,7 @@ def validate_meta( | |
| "data_mtime": meta.data_mtime, | ||
| "dependencies": meta.dependencies, | ||
| "suppressed": meta.suppressed, | ||
| "options": (manager.options.clone_for_module(id).select_options_affecting_cache()), | ||
| "options": options_snapshot(id, manager), | ||
| "dep_prios": meta.dep_prios, | ||
| "dep_lines": meta.dep_lines, | ||
| "dep_hashes": meta.dep_hashes, | ||
|
|
@@ -1701,7 +1713,6 @@ def write_cache( | |
| # updates made by inline config directives in the file. This is | ||
| # important, or otherwise the options would never match when | ||
| # verifying the cache. | ||
| options = manager.options.clone_for_module(id) | ||
| assert source_hash is not None | ||
| meta = { | ||
| "id": id, | ||
|
|
@@ -1712,7 +1723,7 @@ def write_cache( | |
| "data_mtime": data_mtime, | ||
| "dependencies": dependencies, | ||
| "suppressed": suppressed, | ||
| "options": options.select_options_affecting_cache(), | ||
| "options": options_snapshot(id, manager), | ||
| "dep_prios": dep_prios, | ||
| "dep_lines": dep_lines, | ||
| "interface_hash": interface_hash, | ||
|
|
@@ -2029,7 +2040,10 @@ def __init__( | |
| self.priorities = {id: pri for id, pri in zip(all_deps, self.meta.dep_prios)} | ||
| assert len(all_deps) == len(self.meta.dep_lines) | ||
| self.dep_line_map = {id: line for id, line in zip(all_deps, self.meta.dep_lines)} | ||
| self.dep_hashes = self.meta.dep_hashes | ||
| assert len(self.meta.dep_hashes) == len(self.meta.dependencies) | ||
| self.dep_hashes = { | ||
| k: v for (k, v) in zip(self.meta.dependencies, self.meta.dep_hashes) | ||
| } | ||
| self.error_lines = self.meta.error_lines | ||
| if temporary: | ||
| self.load_tree(temporary=True) | ||
|
|
@@ -2346,6 +2360,7 @@ def compute_dependencies(self) -> None: | |
| self.suppressed_set = set() | ||
| self.priorities = {} # id -> priority | ||
| self.dep_line_map = {} # id -> line | ||
| self.dep_hashes = {} | ||
| dep_entries = manager.all_imported_modules_in_file( | ||
| self.tree | ||
| ) + self.manager.plugin.get_additional_deps(self.tree) | ||
|
|
@@ -2433,7 +2448,7 @@ def finish_passes(self) -> None: | |
|
|
||
| # We should always patch indirect dependencies, even in full (non-incremental) builds, | ||
| # because the cache still may be written, and it must be correct. | ||
| self._patch_indirect_dependencies( | ||
| self.patch_indirect_dependencies( | ||
| # Two possible sources of indirect dependencies: | ||
| # * Symbols not directly imported in this module but accessed via an attribute | ||
| # or via a re-export (vast majority of these recorded in semantic analysis). | ||
|
|
@@ -2470,21 +2485,17 @@ def free_state(self) -> None: | |
| self._type_checker.reset() | ||
| self._type_checker = None | ||
|
|
||
| def _patch_indirect_dependencies(self, module_refs: set[str], types: set[Type]) -> None: | ||
| assert None not in types | ||
| valid = self.valid_references() | ||
| def patch_indirect_dependencies(self, module_refs: set[str], types: set[Type]) -> None: | ||
| assert self.ancestors is not None | ||
| existing_deps = set(self.dependencies + self.suppressed + self.ancestors) | ||
| existing_deps.add(self.id) | ||
|
|
||
| encountered = self.manager.indirection_detector.find_modules(types) | module_refs | ||
| extra = encountered - valid | ||
|
|
||
| for dep in sorted(extra): | ||
| for dep in sorted(encountered - existing_deps): | ||
| if dep not in self.manager.modules: | ||
| continue | ||
| if dep not in self.suppressed_set and dep not in self.manager.missing_modules: | ||
| self.add_dependency(dep) | ||
| self.priorities[dep] = PRI_INDIRECT | ||
| elif dep not in self.suppressed_set and dep in self.manager.missing_modules: | ||
| self.suppress_dependency(dep) | ||
| self.add_dependency(dep) | ||
| self.priorities[dep] = PRI_INDIRECT | ||
|
|
||
| def compute_fine_grained_deps(self) -> dict[str, set[str]]: | ||
| assert self.tree is not None | ||
|
|
@@ -2514,16 +2525,6 @@ def update_fine_grained_deps(self, deps: dict[str, set[str]]) -> None: | |
| merge_dependencies(self.compute_fine_grained_deps(), deps) | ||
| type_state.update_protocol_deps(deps) | ||
|
|
||
| def valid_references(self) -> set[str]: | ||
| assert self.ancestors is not None | ||
| valid_refs = set(self.dependencies + self.suppressed + self.ancestors) | ||
| valid_refs.add(self.id) | ||
|
|
||
| if "os" in valid_refs: | ||
| valid_refs.add("os.path") | ||
|
|
||
| return valid_refs | ||
|
|
||
| def write_cache(self) -> tuple[dict[str, Any], str] | None: | ||
| assert self.tree is not None, "Internal error: method must be called on parsed file only" | ||
| # We don't support writing cache files in fine-grained incremental mode. | ||
|
|
@@ -2577,14 +2578,16 @@ def verify_dependencies(self, suppressed_only: bool = False) -> None: | |
| """ | ||
| manager = self.manager | ||
| assert self.ancestors is not None | ||
| # Strip out indirect dependencies. See comment in build.load_graph(). | ||
| if suppressed_only: | ||
| all_deps = self.suppressed | ||
| all_deps = [dep for dep in self.suppressed if self.priorities.get(dep) != PRI_INDIRECT] | ||
| else: | ||
| # Strip out indirect dependencies. See comment in build.load_graph(). | ||
| dependencies = [ | ||
| dep for dep in self.dependencies if self.priorities.get(dep) != PRI_INDIRECT | ||
| dep | ||
| for dep in self.dependencies + self.suppressed | ||
| if self.priorities.get(dep) != PRI_INDIRECT | ||
| ] | ||
| all_deps = dependencies + self.suppressed + self.ancestors | ||
| all_deps = dependencies + self.ancestors | ||
| for dep in all_deps: | ||
| if dep in manager.modules: | ||
| continue | ||
|
|
@@ -3250,6 +3253,13 @@ def load_graph( | |
| if dep in graph and dep in st.suppressed_set: | ||
| # Previously suppressed file is now visible | ||
| st.add_dependency(dep) | ||
| # In the loop above we skip indirect dependencies, so to make indirect dependencies behave | ||
| # more consistently with regular ones, we suppress them manually here (when needed). | ||
| for st in graph.values(): | ||
| indirect = [dep for dep in st.dependencies if st.priorities.get(dep) == PRI_INDIRECT] | ||
| for dep in indirect: | ||
| if dep not in graph: | ||
| st.suppress_dependency(dep) | ||
| manager.plugin.set_modules(manager.modules) | ||
| return graph | ||
|
|
||
|
|
@@ -3284,8 +3294,9 @@ def find_stale_sccs( | |
|
|
||
| Fresh SCCs are those where: | ||
| * We have valid cache files for all modules in the SCC. | ||
| * There are no changes in dependencies (files removed from/added to the build). | ||
| * The interface hashes of direct dependents matches those recorded in the cache. | ||
| * There are no new (un)suppressed dependencies (files removed/added to the build). | ||
| The first and second conditions are verified by is_fresh(). | ||
| """ | ||
| stale_sccs = [] | ||
| fresh_sccs = [] | ||
|
|
@@ -3294,34 +3305,15 @@ def find_stale_sccs( | |
| fresh = not stale_scc | ||
|
|
||
| # Verify that interfaces of dependencies still present in graph are up-to-date (fresh). | ||
| # Note: if a dependency is not in graph anymore, it should be considered interface-stale. | ||
| # This is important to trigger any relevant updates from indirect dependencies that were | ||
| # removed in load_graph(). | ||
| stale_deps = set() | ||
| for id in ascc.mod_ids: | ||
| for dep in graph[id].dep_hashes: | ||
| if dep not in graph: | ||
| stale_deps.add(dep) | ||
| continue | ||
| if graph[dep].interface_hash != graph[id].dep_hashes[dep]: | ||
| if dep in graph and graph[dep].interface_hash != graph[id].dep_hashes[dep]: | ||
| stale_deps.add(dep) | ||
| fresh = fresh and not stale_deps | ||
|
|
||
| undeps = set() | ||
| if fresh: | ||
| # Check if any dependencies that were suppressed according | ||
| # to the cache have been added back in this run. | ||
| # NOTE: Newly suppressed dependencies are handled by is_fresh(). | ||
| for id in ascc.mod_ids: | ||
| undeps.update(graph[id].suppressed) | ||
| undeps &= graph.keys() | ||
| if undeps: | ||
| fresh = False | ||
|
|
||
| if fresh: | ||
| fresh_msg = "fresh" | ||
| elif undeps: | ||
| fresh_msg = f"stale due to changed suppression ({' '.join(sorted(undeps))})" | ||
| elif stale_scc: | ||
| fresh_msg = "inherently stale" | ||
| if stale_scc != ascc.mod_ids: | ||
|
|
@@ -3563,9 +3555,7 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None: | |
| if meta_tuple is None: | ||
| continue | ||
| meta, meta_json = meta_tuple | ||
| meta["dep_hashes"] = { | ||
| dep: graph[dep].interface_hash for dep in graph[id].dependencies if dep in graph | ||
| } | ||
| meta["dep_hashes"] = [graph[dep].interface_hash for dep in graph[id].dependencies] | ||
| meta["error_lines"] = errors_by_id.get(id, []) | ||
| write_cache_meta(meta, manager, meta_json) | ||
| manager.done_sccs.add(ascc.id) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Random thing I noticed (no need to fix here, but could be useful to fix eventually if I'm right):
clone_for_moduledoes a bunch of caching to avoid repeated computation, butselect_options_affecting_cacherecomputes everything on each call. I think a similar caching strategy would work for it, which might improve options processing performance. If you agree with this, I can create an issue about this. The caching should probably also cover `hash_digest(json_dumps(...)``.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I think we should try this at some point, it may give visible speed-up for large builds (the method uses
getattr()in a loop, that may be slow).