Skip to content

Commit 9edd29a

Browse files
authored
Inverse interface freshness logic (#19809)
Fixes #9554 This is another case where I am surprised id didn't work like this in the first place. Right now the freshness info originates from the dependency itself (like trust me, I am fresh, whatever it means). IMO this doesn't make much sense, instead a dependent should verify whether all dependencies are the same it last seen them. On the surface the idea is simple, but there are couple tricky parts: * This requires splitting `write_cache()` in two phases: first write all data files in an SCC (or at least serialize them), the write all meta files. I didn't find any elegant way to do the split, but it is probably fine, as we already have this untyped meta JSON in few places. * I am adding plugin data (used by mypyc separate compilation currently) as part of interface hash. It is not documented whether it should be this way or not, but I would say it should, and this is essentially how mypyc expects it (group name will appear in `#include <__native_group_name.h>`, so it _is_ a part of the interface). It used to work ~accidentally because we check plugin data in `find_cache_meta()` that is called before setting `interface_hash`, not in `validate_meta()`.
1 parent 60639c5 commit 9edd29a

File tree

3 files changed

+73
-35
lines changed

3 files changed

+73
-35
lines changed

mypy/build.py

Lines changed: 55 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,7 @@ class CacheMeta(NamedTuple):
335335
# dep_prios and dep_lines are in parallel with dependencies + suppressed
336336
dep_prios: list[int]
337337
dep_lines: list[int]
338+
dep_hashes: dict[str, str]
338339
interface_hash: str # hash representing the public interface
339340
version_id: str # mypy version for cache invalidation
340341
ignore_all: bool # if errors were ignored
@@ -373,6 +374,7 @@ def cache_meta_from_dict(meta: dict[str, Any], data_json: str) -> CacheMeta:
373374
meta.get("options"),
374375
meta.get("dep_prios", []),
375376
meta.get("dep_lines", []),
377+
meta.get("dep_hashes", {}),
376378
meta.get("interface_hash", ""),
377379
meta.get("version_id", sentinel),
378380
meta.get("ignore_all", True),
@@ -890,8 +892,6 @@ def log(self, *message: str) -> None:
890892
self.stderr.flush()
891893

892894
def log_fine_grained(self, *message: str) -> None:
893-
import mypy.build
894-
895895
if self.verbosity() >= 1:
896896
self.log("fine-grained:", *message)
897897
elif mypy.build.DEBUG_FINE_GRAINED:
@@ -1500,6 +1500,7 @@ def validate_meta(
15001500
"options": (manager.options.clone_for_module(id).select_options_affecting_cache()),
15011501
"dep_prios": meta.dep_prios,
15021502
"dep_lines": meta.dep_lines,
1503+
"dep_hashes": meta.dep_hashes,
15031504
"interface_hash": meta.interface_hash,
15041505
"version_id": manager.version_id,
15051506
"ignore_all": meta.ignore_all,
@@ -1543,7 +1544,7 @@ def write_cache(
15431544
source_hash: str,
15441545
ignore_all: bool,
15451546
manager: BuildManager,
1546-
) -> tuple[str, CacheMeta | None]:
1547+
) -> tuple[str, tuple[dict[str, Any], str, str] | None]:
15471548
"""Write cache files for a module.
15481549
15491550
Note that this mypy's behavior is still correct when any given
@@ -1564,9 +1565,9 @@ def write_cache(
15641565
manager: the build manager (for pyversion, log/trace)
15651566
15661567
Returns:
1567-
A tuple containing the interface hash and CacheMeta
1568-
corresponding to the metadata that was written (the latter may
1569-
be None if the cache could not be written).
1568+
A tuple containing the interface hash and inner tuple with cache meta JSON
1569+
that should be written and paths to cache files (inner tuple may be None,
1570+
if the cache data could not be written).
15701571
"""
15711572
metastore = manager.metastore
15721573
# For Bazel we use relative paths and zero mtimes.
@@ -1581,6 +1582,8 @@ def write_cache(
15811582
if bazel:
15821583
tree.path = path
15831584

1585+
plugin_data = manager.plugin.report_config_data(ReportConfigContext(id, path, is_check=False))
1586+
15841587
# Serialize data and analyze interface
15851588
if manager.options.fixed_format_cache:
15861589
data_io = Buffer()
@@ -1589,9 +1592,7 @@ def write_cache(
15891592
else:
15901593
data = tree.serialize()
15911594
data_bytes = json_dumps(data, manager.options.debug_cache)
1592-
interface_hash = hash_digest(data_bytes)
1593-
1594-
plugin_data = manager.plugin.report_config_data(ReportConfigContext(id, path, is_check=False))
1595+
interface_hash = hash_digest(data_bytes + json_dumps(plugin_data))
15951596

15961597
# Obtain and set up metadata
15971598
st = manager.get_stat(path)
@@ -1659,16 +1660,22 @@ def write_cache(
16591660
"ignore_all": ignore_all,
16601661
"plugin_data": plugin_data,
16611662
}
1663+
return interface_hash, (meta, meta_json, data_json)
16621664

1665+
1666+
def write_cache_meta(
1667+
meta: dict[str, Any], manager: BuildManager, meta_json: str, data_json: str
1668+
) -> CacheMeta:
16631669
# Write meta cache file
1670+
metastore = manager.metastore
16641671
meta_str = json_dumps(meta, manager.options.debug_cache)
16651672
if not metastore.write(meta_json, meta_str):
16661673
# Most likely the error is the replace() call
16671674
# (see https://github.com/python/mypy/issues/3215).
16681675
# The next run will simply find the cache entry out of date.
16691676
manager.log(f"Error writing meta JSON file {meta_json}")
16701677

1671-
return interface_hash, cache_meta_from_dict(meta, data_json)
1678+
return cache_meta_from_dict(meta, data_json)
16721679

16731680

16741681
def delete_cache(id: str, path: str, manager: BuildManager) -> None:
@@ -1867,6 +1874,9 @@ class State:
18671874
# Map each dependency to the line number where it is first imported
18681875
dep_line_map: dict[str, int]
18691876

1877+
# Map from dependency id to its last observed interface hash
1878+
dep_hashes: dict[str, str] = {}
1879+
18701880
# Parent package, its parent, etc.
18711881
ancestors: list[str] | None = None
18721882

@@ -1879,9 +1889,6 @@ class State:
18791889
# If caller_state is set, the line number in the caller where the import occurred
18801890
caller_line = 0
18811891

1882-
# If True, indicate that the public interface of this module is unchanged
1883-
externally_same = True
1884-
18851892
# Contains a hash of the public interface in incremental mode
18861893
interface_hash: str = ""
18871894

@@ -1994,6 +2001,7 @@ def __init__(
19942001
self.priorities = {id: pri for id, pri in zip(all_deps, self.meta.dep_prios)}
19952002
assert len(all_deps) == len(self.meta.dep_lines)
19962003
self.dep_line_map = {id: line for id, line in zip(all_deps, self.meta.dep_lines)}
2004+
self.dep_hashes = self.meta.dep_hashes
19972005
if temporary:
19982006
self.load_tree(temporary=True)
19992007
if not manager.use_fine_grained_cache():
@@ -2046,26 +2054,17 @@ def is_fresh(self) -> bool:
20462054
"""Return whether the cache data for this file is fresh."""
20472055
# NOTE: self.dependencies may differ from
20482056
# self.meta.dependencies when a dependency is dropped due to
2049-
# suppression by silent mode. However when a suppressed
2057+
# suppression by silent mode. However, when a suppressed
20502058
# dependency is added back we find out later in the process.
2051-
return (
2052-
self.meta is not None
2053-
and self.is_interface_fresh()
2054-
and self.dependencies == self.meta.dependencies
2055-
)
2056-
2057-
def is_interface_fresh(self) -> bool:
2058-
return self.externally_same
2059+
return self.meta is not None and self.dependencies == self.meta.dependencies
20592060

20602061
def mark_as_rechecked(self) -> None:
20612062
"""Marks this module as having been fully re-analyzed by the type-checker."""
20622063
self.manager.rechecked_modules.add(self.id)
20632064

2064-
def mark_interface_stale(self, *, on_errors: bool = False) -> None:
2065+
def mark_interface_stale(self) -> None:
20652066
"""Marks this module as having a stale public interface, and discards the cache data."""
2066-
self.externally_same = False
2067-
if not on_errors:
2068-
self.manager.stale_modules.add(self.id)
2067+
self.manager.stale_modules.add(self.id)
20692068

20702069
def check_blockers(self) -> None:
20712070
"""Raise CompileError if a blocking error is detected."""
@@ -2507,7 +2506,7 @@ def valid_references(self) -> set[str]:
25072506

25082507
return valid_refs
25092508

2510-
def write_cache(self) -> None:
2509+
def write_cache(self) -> tuple[dict[str, Any], str, str] | None:
25112510
assert self.tree is not None, "Internal error: method must be called on parsed file only"
25122511
# We don't support writing cache files in fine-grained incremental mode.
25132512
if (
@@ -2525,20 +2524,19 @@ def write_cache(self) -> None:
25252524
except Exception:
25262525
print(f"Error serializing {self.id}", file=self.manager.stdout)
25272526
raise # Propagate to display traceback
2528-
return
2527+
return None
25292528
is_errors = self.transitive_error
25302529
if is_errors:
25312530
delete_cache(self.id, self.path, self.manager)
25322531
self.meta = None
2533-
self.mark_interface_stale(on_errors=True)
2534-
return
2532+
return None
25352533
dep_prios = self.dependency_priorities()
25362534
dep_lines = self.dependency_lines()
25372535
assert self.source_hash is not None
25382536
assert len(set(self.dependencies)) == len(
25392537
self.dependencies
25402538
), f"Duplicates in dependencies list for {self.id} ({self.dependencies})"
2541-
new_interface_hash, self.meta = write_cache(
2539+
new_interface_hash, meta_tuple = write_cache(
25422540
self.id,
25432541
self.path,
25442542
self.tree,
@@ -2557,6 +2555,7 @@ def write_cache(self) -> None:
25572555
self.manager.log(f"Cached module {self.id} has changed interface")
25582556
self.mark_interface_stale()
25592557
self.interface_hash = new_interface_hash
2558+
return meta_tuple
25602559

25612560
def verify_dependencies(self, suppressed_only: bool = False) -> None:
25622561
"""Report errors for import targets in modules that don't exist.
@@ -3287,7 +3286,19 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
32873286
for id in scc:
32883287
deps.update(graph[id].dependencies)
32893288
deps -= ascc
3290-
stale_deps = {id for id in deps if id in graph and not graph[id].is_interface_fresh()}
3289+
3290+
# Verify that interfaces of dependencies still present in graph are up-to-date (fresh).
3291+
# Note: if a dependency is not in graph anymore, it should be considered interface-stale.
3292+
# This is important to trigger any relevant updates from indirect dependencies that were
3293+
# removed in load_graph().
3294+
stale_deps = set()
3295+
for id in ascc:
3296+
for dep in graph[id].dep_hashes:
3297+
if dep not in graph:
3298+
stale_deps.add(dep)
3299+
continue
3300+
if graph[dep].interface_hash != graph[id].dep_hashes[dep]:
3301+
stale_deps.add(dep)
32913302
fresh = fresh and not stale_deps
32923303
undeps = set()
32933304
if fresh:
@@ -3518,14 +3529,25 @@ def process_stale_scc(graph: Graph, scc: list[str], manager: BuildManager) -> No
35183529
if any(manager.errors.is_errors_for_file(graph[id].xpath) for id in stale):
35193530
for id in stale:
35203531
graph[id].transitive_error = True
3532+
meta_tuples = {}
35213533
for id in stale:
35223534
if graph[id].xpath not in manager.errors.ignored_files:
35233535
errors = manager.errors.file_messages(
35243536
graph[id].xpath, formatter=manager.error_formatter
35253537
)
35263538
manager.flush_errors(manager.errors.simplify_path(graph[id].xpath), errors, False)
3527-
graph[id].write_cache()
3539+
meta_tuples[id] = graph[id].write_cache()
35283540
graph[id].mark_as_rechecked()
3541+
for id in stale:
3542+
meta_tuple = meta_tuples[id]
3543+
if meta_tuple is None:
3544+
graph[id].meta = None
3545+
continue
3546+
meta, meta_json, data_json = meta_tuple
3547+
meta["dep_hashes"] = {
3548+
dep: graph[dep].interface_hash for dep in graph[id].dependencies if dep in graph
3549+
}
3550+
graph[id].meta = write_cache_meta(meta, manager, meta_json, data_json)
35293551

35303552

35313553
def sorted_components(

mypyc/test-data/run-multimodule.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -816,7 +816,7 @@ def foo() -> int: return 10
816816
[file driver.py]
817817
import native
818818

819-
[rechecked native, other_a]
819+
[rechecked other_a]
820820

821821
[case testSeparateCompilationWithUndefinedAttribute]
822822
from other_a import A

test-data/unit/check-incremental.test

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ def foo() -> int:
205205
return "foo"
206206
return inner2()
207207

208-
[rechecked mod1, mod2]
208+
[rechecked mod2]
209209
[stale]
210210
[out2]
211211
tmp/mod2.py:4: error: Incompatible return value type (got "str", expected "int")
@@ -6982,3 +6982,19 @@ class Sub(Base[Concatenate[int, P]]): ...
69826982
[out]
69836983
[out2]
69846984
tmp/impl.py:7: error: Argument 1 has incompatible type "str"; expected "int"
6985+
6986+
[case testIncrementalDifferentSourcesFreshnessCorrect]
6987+
# cmd: mypy -m foo bar
6988+
# cmd2: mypy -m foo
6989+
# cmd3: mypy -m foo bar
6990+
[file foo.py]
6991+
foo = 5
6992+
[file foo.py.2]
6993+
foo = None
6994+
[file bar.py]
6995+
from foo import foo
6996+
bar: int = foo
6997+
[out]
6998+
[out2]
6999+
[out3]
7000+
tmp/bar.py:2: error: Incompatible types in assignment (expression has type "None", variable has type "int")

0 commit comments

Comments
 (0)