Skip to content

Commit 8bfecd4

Browse files
authored
Write cache for modules with errors (#19820)
This is required for #933 Here I use a very simple-minded approach, errors are serialized simply as a list of strings. In near future I may switch to serializing `ErrorInfo`s (as this has some other benefits). Note that many tests have `[stale ...]` checks updated because previously modules with errors were not included in the list. I double-checked each test that the new values are correct. Note we still don't write cache if there were blockers in an SCC (like a syntax error).
1 parent f8f618a commit 8bfecd4

File tree

4 files changed

+101
-101
lines changed

4 files changed

+101
-101
lines changed

mypy/build.py

Lines changed: 17 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515

1616
import collections
1717
import contextlib
18-
import errno
1918
import gc
2019
import json
2120
import os
@@ -337,6 +336,7 @@ class CacheMeta(NamedTuple):
337336
dep_lines: list[int]
338337
dep_hashes: dict[str, str]
339338
interface_hash: str # hash representing the public interface
339+
error_lines: list[str]
340340
version_id: str # mypy version for cache invalidation
341341
ignore_all: bool # if errors were ignored
342342
plugin_data: Any # config data from plugins
@@ -376,6 +376,7 @@ def cache_meta_from_dict(meta: dict[str, Any], data_json: str) -> CacheMeta:
376376
meta.get("dep_lines", []),
377377
meta.get("dep_hashes", {}),
378378
meta.get("interface_hash", ""),
379+
meta.get("error_lines", []),
379380
meta.get("version_id", sentinel),
380381
meta.get("ignore_all", True),
381382
meta.get("plugin_data", None),
@@ -1502,6 +1503,7 @@ def validate_meta(
15021503
"dep_lines": meta.dep_lines,
15031504
"dep_hashes": meta.dep_hashes,
15041505
"interface_hash": meta.interface_hash,
1506+
"error_lines": meta.error_lines,
15051507
"version_id": manager.version_id,
15061508
"ignore_all": meta.ignore_all,
15071509
"plugin_data": meta.plugin_data,
@@ -1678,28 +1680,6 @@ def write_cache_meta(
16781680
return cache_meta_from_dict(meta, data_json)
16791681

16801682

1681-
def delete_cache(id: str, path: str, manager: BuildManager) -> None:
1682-
"""Delete cache files for a module.
1683-
1684-
The cache files for a module are deleted when mypy finds errors there.
1685-
This avoids inconsistent states with cache files from different mypy runs,
1686-
see #4043 for an example.
1687-
"""
1688-
# We don't delete .deps files on errors, since the dependencies
1689-
# are mostly generated from other files and the metadata is
1690-
# tracked separately.
1691-
meta_path, data_path, _ = get_cache_names(id, path, manager.options)
1692-
cache_paths = [meta_path, data_path]
1693-
manager.log(f"Deleting {id} {path} {' '.join(x for x in cache_paths if x)}")
1694-
1695-
for filename in cache_paths:
1696-
try:
1697-
manager.metastore.remove(filename)
1698-
except OSError as e:
1699-
if e.errno != errno.ENOENT:
1700-
manager.log(f"Error deleting cache file {filename}: {e.strerror}")
1701-
1702-
17031683
"""Dependency manager.
17041684
17051685
Design
@@ -1875,6 +1855,9 @@ class State:
18751855
# Map from dependency id to its last observed interface hash
18761856
dep_hashes: dict[str, str] = {}
18771857

1858+
# List of errors reported for this file last time.
1859+
error_lines: list[str] = []
1860+
18781861
# Parent package, its parent, etc.
18791862
ancestors: list[str] | None = None
18801863

@@ -1896,9 +1879,6 @@ class State:
18961879
# Whether to ignore all errors
18971880
ignore_all = False
18981881

1899-
# Whether the module has an error or any of its dependencies have one.
1900-
transitive_error = False
1901-
19021882
# Errors reported before semantic analysis, to allow fine-grained
19031883
# mode to keep reporting them.
19041884
early_errors: list[ErrorInfo]
@@ -2000,6 +1980,7 @@ def __init__(
20001980
assert len(all_deps) == len(self.meta.dep_lines)
20011981
self.dep_line_map = {id: line for id, line in zip(all_deps, self.meta.dep_lines)}
20021982
self.dep_hashes = self.meta.dep_hashes
1983+
self.error_lines = self.meta.error_lines
20031984
if temporary:
20041985
self.load_tree(temporary=True)
20051986
if not manager.use_fine_grained_cache():
@@ -2517,11 +2498,6 @@ def write_cache(self) -> tuple[dict[str, Any], str, str] | None:
25172498
print(f"Error serializing {self.id}", file=self.manager.stdout)
25182499
raise # Propagate to display traceback
25192500
return None
2520-
is_errors = self.transitive_error
2521-
if is_errors:
2522-
delete_cache(self.id, self.path, self.manager)
2523-
self.meta = None
2524-
return None
25252501
dep_prios = self.dependency_priorities()
25262502
dep_lines = self.dependency_lines()
25272503
assert self.source_hash is not None
@@ -3315,15 +3291,14 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
33153291
else:
33163292
fresh_msg = f"stale due to deps ({' '.join(sorted(stale_deps))})"
33173293

3318-
# Initialize transitive_error for all SCC members from union
3319-
# of transitive_error of dependencies.
3320-
if any(graph[dep].transitive_error for dep in deps if dep in graph):
3321-
for id in scc:
3322-
graph[id].transitive_error = True
3323-
33243294
scc_str = " ".join(scc)
33253295
if fresh:
33263296
manager.trace(f"Queuing {fresh_msg} SCC ({scc_str})")
3297+
for id in scc:
3298+
if graph[id].error_lines:
3299+
manager.flush_errors(
3300+
manager.errors.simplify_path(graph[id].xpath), graph[id].error_lines, False
3301+
)
33273302
fresh_scc_queue.append(scc)
33283303
else:
33293304
if fresh_scc_queue:
@@ -3335,11 +3310,6 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
33353310
# single fresh SCC. This is intentional -- we don't need those modules
33363311
# loaded if there are no more stale SCCs to be rechecked.
33373312
#
3338-
# Also note we shouldn't have to worry about transitive_error here,
3339-
# since modules with transitive errors aren't written to the cache,
3340-
# and if any dependencies were changed, this SCC would be stale.
3341-
# (Also, in quick_and_dirty mode we don't care about transitive errors.)
3342-
#
33433313
# TODO: see if it's possible to determine if we need to process only a
33443314
# _subset_ of the past SCCs instead of having to process them all.
33453315
if (
@@ -3491,16 +3461,17 @@ def process_stale_scc(graph: Graph, scc: list[str], manager: BuildManager) -> No
34913461
for id in stale:
34923462
graph[id].generate_unused_ignore_notes()
34933463
graph[id].generate_ignore_without_code_notes()
3494-
if any(manager.errors.is_errors_for_file(graph[id].xpath) for id in stale):
3495-
for id in stale:
3496-
graph[id].transitive_error = True
3464+
3465+
# Flush errors, and write cache in two phases: first data files, then meta files.
34973466
meta_tuples = {}
3467+
errors_by_id = {}
34983468
for id in stale:
34993469
if graph[id].xpath not in manager.errors.ignored_files:
35003470
errors = manager.errors.file_messages(
35013471
graph[id].xpath, formatter=manager.error_formatter
35023472
)
35033473
manager.flush_errors(manager.errors.simplify_path(graph[id].xpath), errors, False)
3474+
errors_by_id[id] = errors
35043475
meta_tuples[id] = graph[id].write_cache()
35053476
graph[id].mark_as_rechecked()
35063477
for id in stale:
@@ -3512,6 +3483,7 @@ def process_stale_scc(graph: Graph, scc: list[str], manager: BuildManager) -> No
35123483
meta["dep_hashes"] = {
35133484
dep: graph[dep].interface_hash for dep in graph[id].dependencies if dep in graph
35143485
}
3486+
meta["error_lines"] = errors_by_id.get(id, [])
35153487
graph[id].meta = write_cache_meta(meta, manager, meta_json, data_json)
35163488

35173489

mypy/test/testcheck.py

Lines changed: 13 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from pathlib import Path
1010

1111
from mypy import build
12-
from mypy.build import Graph
1312
from mypy.errors import CompileError
1413
from mypy.modulefinder import BuildSource, FindModuleCache, SearchPaths
1514
from mypy.test.config import test_data_prefix, test_temp_dir
@@ -164,11 +163,13 @@ def run_case_once(
164163
sys.path.insert(0, plugin_dir)
165164

166165
res = None
166+
blocker = False
167167
try:
168168
res = build.build(sources=sources, options=options, alt_lib_path=test_temp_dir)
169169
a = res.errors
170170
except CompileError as e:
171171
a = e.messages
172+
blocker = True
172173
finally:
173174
assert sys.path[0] == plugin_dir
174175
del sys.path[0]
@@ -199,7 +200,7 @@ def run_case_once(
199200

200201
if res:
201202
if options.cache_dir != os.devnull:
202-
self.verify_cache(module_data, res.errors, res.manager, res.graph)
203+
self.verify_cache(module_data, res.manager, blocker)
203204

204205
name = "targets"
205206
if incremental_step:
@@ -229,42 +230,23 @@ def run_case_once(
229230
check_test_output_files(testcase, incremental_step, strip_prefix="tmp/")
230231

231232
def verify_cache(
232-
self,
233-
module_data: list[tuple[str, str, str]],
234-
a: list[str],
235-
manager: build.BuildManager,
236-
graph: Graph,
233+
self, module_data: list[tuple[str, str, str]], manager: build.BuildManager, blocker: bool
237234
) -> None:
238-
# There should be valid cache metadata for each module except
239-
# for those that had an error in themselves or one of their
240-
# dependencies.
241-
error_paths = self.find_error_message_paths(a)
242-
busted_paths = {m.path for id, m in manager.modules.items() if graph[id].transitive_error}
243-
modules = self.find_module_files(manager)
244-
modules.update({module_name: path for module_name, path, text in module_data})
245-
missing_paths = self.find_missing_cache_files(modules, manager)
246-
# We would like to assert error_paths.issubset(busted_paths)
247-
# but this runs into trouble because while some 'notes' are
248-
# really errors that cause an error to be marked, many are
249-
# just notes attached to other errors.
250-
assert error_paths or not busted_paths, "Some modules reported error despite no errors"
251-
if not missing_paths == busted_paths:
252-
raise AssertionError(f"cache data discrepancy {missing_paths} != {busted_paths}")
235+
if not blocker:
236+
# There should be valid cache metadata for each module except
237+
# in case of a blocking error in themselves or one of their
238+
# dependencies.
239+
modules = self.find_module_files(manager)
240+
modules.update({module_name: path for module_name, path, text in module_data})
241+
missing_paths = self.find_missing_cache_files(modules, manager)
242+
if missing_paths:
243+
raise AssertionError(f"cache data missing for {missing_paths}")
253244
assert os.path.isfile(os.path.join(manager.options.cache_dir, ".gitignore"))
254245
cachedir_tag = os.path.join(manager.options.cache_dir, "CACHEDIR.TAG")
255246
assert os.path.isfile(cachedir_tag)
256247
with open(cachedir_tag) as f:
257248
assert f.read().startswith("Signature: 8a477f597d28d172789f06886806bc55")
258249

259-
def find_error_message_paths(self, a: list[str]) -> set[str]:
260-
hits = set()
261-
for line in a:
262-
m = re.match(r"([^\s:]+):(\d+:)?(\d+:)? (error|warning|note):", line)
263-
if m:
264-
p = m.group(1)
265-
hits.add(p)
266-
return hits
267-
268250
def find_module_files(self, manager: build.BuildManager) -> dict[str, str]:
269251
return {id: module.path for id, module in manager.modules.items()}
270252

0 commit comments

Comments
 (0)