Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 17 additions & 45 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

import collections
import contextlib
import errno
import gc
import json
import os
Expand Down Expand Up @@ -337,6 +336,7 @@ class CacheMeta(NamedTuple):
dep_lines: list[int]
dep_hashes: dict[str, str]
interface_hash: str # hash representing the public interface
error_lines: list[str]
version_id: str # mypy version for cache invalidation
ignore_all: bool # if errors were ignored
plugin_data: Any # config data from plugins
Expand Down Expand Up @@ -376,6 +376,7 @@ def cache_meta_from_dict(meta: dict[str, Any], data_json: str) -> CacheMeta:
meta.get("dep_lines", []),
meta.get("dep_hashes", {}),
meta.get("interface_hash", ""),
meta.get("error_lines", []),
meta.get("version_id", sentinel),
meta.get("ignore_all", True),
meta.get("plugin_data", None),
Expand Down Expand Up @@ -1502,6 +1503,7 @@ def validate_meta(
"dep_lines": meta.dep_lines,
"dep_hashes": meta.dep_hashes,
"interface_hash": meta.interface_hash,
"error_lines": meta.error_lines,
"version_id": manager.version_id,
"ignore_all": meta.ignore_all,
"plugin_data": meta.plugin_data,
Expand Down Expand Up @@ -1678,28 +1680,6 @@ def write_cache_meta(
return cache_meta_from_dict(meta, data_json)


def delete_cache(id: str, path: str, manager: BuildManager) -> None:
"""Delete cache files for a module.

The cache files for a module are deleted when mypy finds errors there.
This avoids inconsistent states with cache files from different mypy runs,
see #4043 for an example.
"""
# We don't delete .deps files on errors, since the dependencies
# are mostly generated from other files and the metadata is
# tracked separately.
meta_path, data_path, _ = get_cache_names(id, path, manager.options)
cache_paths = [meta_path, data_path]
manager.log(f"Deleting {id} {path} {' '.join(x for x in cache_paths if x)}")

for filename in cache_paths:
try:
manager.metastore.remove(filename)
except OSError as e:
if e.errno != errno.ENOENT:
manager.log(f"Error deleting cache file {filename}: {e.strerror}")


"""Dependency manager.

Design
Expand Down Expand Up @@ -1875,6 +1855,9 @@ class State:
# Map from dependency id to its last observed interface hash
dep_hashes: dict[str, str] = {}

# List of errors reported for this file last time.
error_lines: list[str] = []

# Parent package, its parent, etc.
ancestors: list[str] | None = None

Expand All @@ -1896,9 +1879,6 @@ class State:
# Whether to ignore all errors
ignore_all = False

# Whether the module has an error or any of its dependencies have one.
transitive_error = False

# Errors reported before semantic analysis, to allow fine-grained
# mode to keep reporting them.
early_errors: list[ErrorInfo]
Expand Down Expand Up @@ -2000,6 +1980,7 @@ def __init__(
assert len(all_deps) == len(self.meta.dep_lines)
self.dep_line_map = {id: line for id, line in zip(all_deps, self.meta.dep_lines)}
self.dep_hashes = self.meta.dep_hashes
self.error_lines = self.meta.error_lines
if temporary:
self.load_tree(temporary=True)
if not manager.use_fine_grained_cache():
Expand Down Expand Up @@ -2517,11 +2498,6 @@ def write_cache(self) -> tuple[dict[str, Any], str, str] | None:
print(f"Error serializing {self.id}", file=self.manager.stdout)
raise # Propagate to display traceback
return None
is_errors = self.transitive_error
if is_errors:
delete_cache(self.id, self.path, self.manager)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if there are blockers? Would we then need to delete the cache file? I wonder if we have a test for this, i.e. blocker is introduced, and the following incremental run still has the same blocker.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Blocker are always exceptions in _build() caught by build(), so we will not even get to the point where we write cache. If there was an existing one, it is fine. It will be either discarded on the next run as well (because it was discarded on this run, as we were checking the file for some reason), or actually used if a user does an "undo" in the file. I think there is never a reason to delete an existing cache file.

However, surprisingly I can't find an existing test for this.

self.meta = None
return None
dep_prios = self.dependency_priorities()
dep_lines = self.dependency_lines()
assert self.source_hash is not None
Expand Down Expand Up @@ -3342,15 +3318,14 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
else:
fresh_msg = f"stale due to deps ({' '.join(sorted(stale_deps))})"

# Initialize transitive_error for all SCC members from union
# of transitive_error of dependencies.
if any(graph[dep].transitive_error for dep in deps if dep in graph):
for id in scc:
graph[id].transitive_error = True

scc_str = " ".join(scc)
if fresh:
manager.trace(f"Queuing {fresh_msg} SCC ({scc_str})")
for id in scc:
if graph[id].error_lines:
manager.flush_errors(
manager.errors.simplify_path(graph[id].xpath), graph[id].error_lines, False
)
fresh_scc_queue.append(scc)
else:
if fresh_scc_queue:
Expand All @@ -3362,11 +3337,6 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
# single fresh SCC. This is intentional -- we don't need those modules
# loaded if there are no more stale SCCs to be rechecked.
#
# Also note we shouldn't have to worry about transitive_error here,
# since modules with transitive errors aren't written to the cache,
# and if any dependencies were changed, this SCC would be stale.
# (Also, in quick_and_dirty mode we don't care about transitive errors.)
#
# TODO: see if it's possible to determine if we need to process only a
# _subset_ of the past SCCs instead of having to process them all.
if (
Expand Down Expand Up @@ -3518,16 +3488,17 @@ def process_stale_scc(graph: Graph, scc: list[str], manager: BuildManager) -> No
for id in stale:
graph[id].generate_unused_ignore_notes()
graph[id].generate_ignore_without_code_notes()
if any(manager.errors.is_errors_for_file(graph[id].xpath) for id in stale):
for id in stale:
graph[id].transitive_error = True

# Flush errors, and write cache in two phases: first data files, then meta files.
meta_tuples = {}
errors_by_id = {}
for id in stale:
if graph[id].xpath not in manager.errors.ignored_files:
errors = manager.errors.file_messages(
graph[id].xpath, formatter=manager.error_formatter
)
manager.flush_errors(manager.errors.simplify_path(graph[id].xpath), errors, False)
errors_by_id[id] = errors
meta_tuples[id] = graph[id].write_cache()
graph[id].mark_as_rechecked()
for id in stale:
Expand All @@ -3539,6 +3510,7 @@ def process_stale_scc(graph: Graph, scc: list[str], manager: BuildManager) -> No
meta["dep_hashes"] = {
dep: graph[dep].interface_hash for dep in graph[id].dependencies if dep in graph
}
meta["error_lines"] = errors_by_id.get(id, [])
graph[id].meta = write_cache_meta(meta, manager, meta_json, data_json)


Expand Down
44 changes: 13 additions & 31 deletions mypy/test/testcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from pathlib import Path

from mypy import build
from mypy.build import Graph
from mypy.errors import CompileError
from mypy.modulefinder import BuildSource, FindModuleCache, SearchPaths
from mypy.test.config import test_data_prefix, test_temp_dir
Expand Down Expand Up @@ -164,11 +163,13 @@ def run_case_once(
sys.path.insert(0, plugin_dir)

res = None
blocker = False
try:
res = build.build(sources=sources, options=options, alt_lib_path=test_temp_dir)
a = res.errors
except CompileError as e:
a = e.messages
blocker = True
finally:
assert sys.path[0] == plugin_dir
del sys.path[0]
Expand Down Expand Up @@ -199,7 +200,7 @@ def run_case_once(

if res:
if options.cache_dir != os.devnull:
self.verify_cache(module_data, res.errors, res.manager, res.graph)
self.verify_cache(module_data, res.manager, blocker)

name = "targets"
if incremental_step:
Expand Down Expand Up @@ -229,42 +230,23 @@ def run_case_once(
check_test_output_files(testcase, incremental_step, strip_prefix="tmp/")

def verify_cache(
self,
module_data: list[tuple[str, str, str]],
a: list[str],
manager: build.BuildManager,
graph: Graph,
self, module_data: list[tuple[str, str, str]], manager: build.BuildManager, blocker: bool
) -> None:
# There should be valid cache metadata for each module except
# for those that had an error in themselves or one of their
# dependencies.
error_paths = self.find_error_message_paths(a)
busted_paths = {m.path for id, m in manager.modules.items() if graph[id].transitive_error}
modules = self.find_module_files(manager)
modules.update({module_name: path for module_name, path, text in module_data})
missing_paths = self.find_missing_cache_files(modules, manager)
# We would like to assert error_paths.issubset(busted_paths)
# but this runs into trouble because while some 'notes' are
# really errors that cause an error to be marked, many are
# just notes attached to other errors.
assert error_paths or not busted_paths, "Some modules reported error despite no errors"
if not missing_paths == busted_paths:
raise AssertionError(f"cache data discrepancy {missing_paths} != {busted_paths}")
if not blocker:
# There should be valid cache metadata for each module except
# in case of a blocking error in themselves or one of their
# dependencies.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this comment up-to-date? Don't we know have cache metadata even if there are non-blocker errors?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this should now read "... except for those that had a blocker error in themselves or ..."

modules = self.find_module_files(manager)
modules.update({module_name: path for module_name, path, text in module_data})
missing_paths = self.find_missing_cache_files(modules, manager)
if missing_paths:
raise AssertionError(f"cache data missing for {missing_paths}")
assert os.path.isfile(os.path.join(manager.options.cache_dir, ".gitignore"))
cachedir_tag = os.path.join(manager.options.cache_dir, "CACHEDIR.TAG")
assert os.path.isfile(cachedir_tag)
with open(cachedir_tag) as f:
assert f.read().startswith("Signature: 8a477f597d28d172789f06886806bc55")

def find_error_message_paths(self, a: list[str]) -> set[str]:
hits = set()
for line in a:
m = re.match(r"([^\s:]+):(\d+:)?(\d+:)? (error|warning|note):", line)
if m:
p = m.group(1)
hits.add(p)
return hits

def find_module_files(self, manager: build.BuildManager) -> dict[str, str]:
return {id: module.path for id, module in manager.modules.items()}

Expand Down
Loading
Loading