Skip to content

Commit d118f61

Browse files
committed
Add some docstrings/comments
1 parent c9f4db6 commit d118f61

File tree

1 file changed

+59
-16
lines changed

1 file changed

+59
-16
lines changed

mypy/build.py

Lines changed: 59 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -123,14 +123,24 @@
123123

124124

125125
class SCC:
126+
"""A simple class that represents a strongly connected component (import cycle)."""
127+
126128
id_counter: ClassVar[int] = 0
127129

128130
def __init__(self, ids: set[str]) -> None:
129131
self.id = SCC.id_counter
130132
SCC.id_counter += 1
133+
# Ids of modules in this cycle.
131134
self.mod_ids = ids
135+
# Direct dependencies, should be populated by the caller.
132136
self.deps: set[int] = set()
137+
# Direct dependencies that have not been processed yet.
138+
# Should be populated by the caller. This set may change during graph
139+
# processing, while the above stays constant.
133140
self.not_ready_deps: set[int] = set()
141+
# SCCs that (directly) depend on this SCC. Note this is a list to
142+
# make processing order more predictable. Dependents will be notified
143+
# that they may be ready in the order in this list.
134144
self.direct_dependents: list[int] = []
135145

136146

@@ -737,9 +747,17 @@ def __init__(
737747
self.ast_cache: dict[str, tuple[MypyFile, list[ErrorInfo]]] = {}
738748
# Number of times we used GC optimization hack for fresh SCCs.
739749
self.gc_freeze_cycles = 0
750+
# Mapping from SCC id to corresponding SCC instance. This is populated
751+
# in process_graph().
740752
self.scc_by_id: dict[int, SCC] = {}
753+
# Global topological order for SCCs. This exists to make order of processing
754+
# SCCs more predictable.
741755
self.top_order: list[int] = []
756+
# Stale SCCs that are queued for processing. Note that as of now we have just
757+
# one worker, that is the same process. In the future, we will support multiple
758+
# parallel worker processes.
742759
self.scc_queue: list[SCC] = []
760+
# SCCs that have been fully processed.
743761
self.done_sccs: set[int] = set()
744762

745763
def dump_stats(self) -> None:
@@ -942,9 +960,16 @@ def stats_summary(self) -> Mapping[str, object]:
942960
return self.stats
943961

944962
def submit(self, sccs: list[SCC]) -> None:
963+
"""Submit a stale SCC for processing in current process."""
945964
self.scc_queue.extend(sccs)
946965

947966
def get_done(self, graph: Graph) -> tuple[list[SCC], bool]:
967+
"""Wait for a stale SCC processing (in process) to finish.
968+
969+
Return nest processed SCC amd whether we have more in the queue.
970+
This emulates the API we will have for parallel processing
971+
in multiple worker processes.
972+
"""
948973
if not self.scc_queue:
949974
return [], False
950975
next_scc = self.scc_queue.pop(0)
@@ -3230,8 +3255,12 @@ def load_graph(
32303255

32313256

32323257
def order_ascc_ex(graph: Graph, ascc: SCC) -> list[str]:
3233-
# Order the SCC's nodes using a heuristic.
3234-
# Note that ascc is a set, and scc is a list.
3258+
"""Apply extra heuristics on top of order_ascc().
3259+
3260+
This should be used only for actual SCCs, not for "inner" SCCs
3261+
we create recursively during ordering of the SCC. Currently, this
3262+
has only some special handling for builtin SCC.
3263+
"""
32353264
scc = order_ascc(graph, ascc.mod_ids)
32363265
# Make the order of the SCC that includes 'builtins' and 'typing',
32373266
# among other things, predictable. Various things may break if
@@ -3251,18 +3280,18 @@ def order_ascc_ex(graph: Graph, ascc: SCC) -> list[str]:
32513280
def find_stale_sccs(
32523281
sccs: list[SCC], graph: Graph, manager: BuildManager
32533282
) -> tuple[list[SCC], list[SCC]]:
3283+
"""Split a list of ready SCCs into stale and fresh.
3284+
3285+
Fresh SCCs are those where:
3286+
* We have valid cache files for all modules in the SCC.
3287+
* The interface hashes of direct dependents matches those recorded in the cache.
3288+
* There are no new (un)suppressed dependencies (files removed/added to the build).
3289+
"""
32543290
stale_sccs = []
32553291
fresh_sccs = []
32563292
for ascc in sccs:
3257-
# Because the SCCs are presented in topological sort order, we
3258-
# don't need to look at dependencies recursively for staleness
3259-
# -- the immediate dependencies are sufficient.
32603293
stale_scc = {id for id in ascc.mod_ids if not graph[id].is_fresh()}
32613294
fresh = not stale_scc
3262-
deps = set()
3263-
for id in ascc.mod_ids:
3264-
deps.update(graph[id].dependencies)
3265-
deps -= ascc.mod_ids
32663295

32673296
# Verify that interfaces of dependencies still present in graph are up-to-date (fresh).
32683297
# Note: if a dependency is not in graph anymore, it should be considered interface-stale.
@@ -3277,6 +3306,7 @@ def find_stale_sccs(
32773306
if graph[dep].interface_hash != graph[id].dep_hashes[dep]:
32783307
stale_deps.add(dep)
32793308
fresh = fresh and not stale_deps
3309+
32803310
undeps = set()
32813311
if fresh:
32823312
# Check if any dependencies that were suppressed according
@@ -3287,6 +3317,7 @@ def find_stale_sccs(
32873317
undeps &= graph.keys()
32883318
if undeps:
32893319
fresh = False
3320+
32903321
if fresh:
32913322
fresh_msg = "fresh"
32923323
elif undeps:
@@ -3326,13 +3357,12 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
33263357
manager.log(
33273358
"Found %d SCCs; largest has %d nodes" % (len(sccs), max(len(scc.mod_ids) for scc in sccs))
33283359
)
3329-
for scc in sccs:
3330-
pass # print("SCC", scc.id, scc.mod_ids, scc.deps, scc.direct_dependents)
33313360

33323361
scc_by_id = {scc.id: scc for scc in sccs}
33333362
manager.scc_by_id = scc_by_id
33343363
manager.top_order = [scc.id for scc in sccs]
33353364

3365+
# Prime the ready list with leaf SCCs (that have no dependencies).
33363366
ready = []
33373367
not_ready = []
33383368
for scc in sccs:
@@ -3347,6 +3377,10 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
33473377
if stale:
33483378
manager.submit(stale)
33493379
processing = True
3380+
# We eagerly walk over fresh SCCs to reach as many stale SCCs as soon
3381+
# as possible. Only when there are no fresh SCCs, we wait on scheduled stale ones.
3382+
# This strategy, similar to a naive strategy in minesweeper game, will allow us
3383+
# to leverage parallelism as much as possible.
33503384
if fresh:
33513385
done = fresh
33523386
else:
@@ -3410,8 +3444,8 @@ def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_INDIRECT
34103444
def process_fresh_modules(graph: Graph, modules: list[str], manager: BuildManager) -> None:
34113445
"""Process the modules in one group of modules from their cached data.
34123446
3413-
This can be used to process an SCC of modules
3414-
This involves loading the tree from JSON and then doing various cleanups.
3447+
This can be used to process an SCC of modules. This involves loading the tree (i.e.
3448+
module symbol tables) from cache file and then fixing cross-references in the symbols.
34153449
"""
34163450
t0 = time.time()
34173451
for id in modules:
@@ -3425,6 +3459,7 @@ def process_fresh_modules(graph: Graph, modules: list[str], manager: BuildManage
34253459

34263460
def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None:
34273461
"""Process the modules in one SCC from source code."""
3462+
# First verify if all transitive dependencies are loaded in the current process.
34283463
missing_sccs = set()
34293464
sccs_to_find = ascc.deps.copy()
34303465
while sccs_to_find:
@@ -3435,6 +3470,7 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None:
34353470
sccs_to_find.update(manager.scc_by_id[dep_scc].deps)
34363471

34373472
if missing_sccs:
3473+
# Load missing SCCs from cache.
34383474
fresh_sccs_to_load = [
34393475
manager.scc_by_id[sid] for sid in manager.top_order if sid in missing_sccs
34403476
]
@@ -3465,6 +3501,8 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None:
34653501
gc.freeze()
34663502
gc.unfreeze()
34673503
gc.enable()
3504+
3505+
# Process the SCC in stable order.
34683506
scc = order_ascc_ex(graph, ascc)
34693507
stale = scc
34703508
for id in stale:
@@ -3529,6 +3567,7 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None:
35293567
def prepare_sccs_full(
35303568
raw_sccs: Iterator[set[str]], edges: dict[str, list[str]]
35313569
) -> dict[SCC, set[SCC]]:
3570+
"""Turn raw SCC sets into SCC objects and build dependency graph for SCCs."""
35323571
sccs = [SCC(raw_scc) for raw_scc in raw_sccs]
35333572
scc_map = {}
35343573
for scc in sccs:
@@ -3539,6 +3578,7 @@ def prepare_sccs_full(
35393578
for id in scc.mod_ids:
35403579
scc_deps_map.setdefault(scc, set()).update(scc_map[dep] for dep in edges[id])
35413580
for scc in sccs:
3581+
# Remove trivial dependency on itself.
35423582
scc_deps_map[scc].discard(scc)
35433583
for dep_scc in scc_deps_map[scc]:
35443584
scc.deps.add(dep_scc.id)
@@ -3551,9 +3591,6 @@ def sorted_components(graph: Graph) -> list[SCC]:
35513591
35523592
The sort order is from leaves (nodes without dependencies) to
35533593
roots (nodes on which no other nodes depend).
3554-
3555-
This works for a subset of the full dependency graph too;
3556-
dependencies that aren't present in graph.keys() are ignored.
35573594
"""
35583595
# Compute SCCs.
35593596
vertices = set(graph)
@@ -3581,6 +3618,12 @@ def sorted_components(graph: Graph) -> list[SCC]:
35813618
def sorted_components_inner(
35823619
graph: Graph, vertices: AbstractSet[str], pri_max: int
35833620
) -> list[AbstractSet[str]]:
3621+
"""Simplified version of sorted_components() to work with sub-graphs.
3622+
3623+
This doesn't create SCC objects, and operates with raw sets. This function
3624+
also allows filtering dependencies to take into account when building SCCs.
3625+
This is used for heuristic ordering of modules within actual SCCs.
3626+
"""
35843627
edges = {id: deps_filtered(graph, vertices, id, pri_max) for id in vertices}
35853628
sccs = list(strongly_connected_components(vertices, edges))
35863629
res = []

0 commit comments

Comments
 (0)