123123
124124
125125class SCC :
126+ """A simple class that represents a strongly connected component (import cycle)."""
127+
126128 id_counter : ClassVar [int ] = 0
127129
128130 def __init__ (self , ids : set [str ]) -> None :
129131 self .id = SCC .id_counter
130132 SCC .id_counter += 1
133+ # Ids of modules in this cycle.
131134 self .mod_ids = ids
135+ # Direct dependencies, should be populated by the caller.
132136 self .deps : set [int ] = set ()
137+ # Direct dependencies that have not been processed yet.
138+ # Should be populated by the caller. This set may change during graph
139+ # processing, while the above stays constant.
133140 self .not_ready_deps : set [int ] = set ()
141+ # SCCs that (directly) depend on this SCC. Note this is a list to
142+ # make processing order more predictable. Dependents will be notified
143+ # that they may be ready in the order in this list.
134144 self .direct_dependents : list [int ] = []
135145
136146
@@ -737,9 +747,17 @@ def __init__(
737747 self .ast_cache : dict [str , tuple [MypyFile , list [ErrorInfo ]]] = {}
738748 # Number of times we used GC optimization hack for fresh SCCs.
739749 self .gc_freeze_cycles = 0
750+ # Mapping from SCC id to corresponding SCC instance. This is populated
751+ # in process_graph().
740752 self .scc_by_id : dict [int , SCC ] = {}
753+ # Global topological order for SCCs. This exists to make order of processing
754+ # SCCs more predictable.
741755 self .top_order : list [int ] = []
756+ # Stale SCCs that are queued for processing. Note that as of now we have just
757+ # one worker, that is the same process. In the future, we will support multiple
758+ # parallel worker processes.
742759 self .scc_queue : list [SCC ] = []
760+ # SCCs that have been fully processed.
743761 self .done_sccs : set [int ] = set ()
744762
745763 def dump_stats (self ) -> None :
@@ -942,9 +960,16 @@ def stats_summary(self) -> Mapping[str, object]:
942960 return self .stats
943961
944962 def submit (self , sccs : list [SCC ]) -> None :
963+ """Submit a stale SCC for processing in current process."""
945964 self .scc_queue .extend (sccs )
946965
947966 def get_done (self , graph : Graph ) -> tuple [list [SCC ], bool ]:
967+ """Wait for a stale SCC processing (in process) to finish.
968+
969+ Return nest processed SCC amd whether we have more in the queue.
970+ This emulates the API we will have for parallel processing
971+ in multiple worker processes.
972+ """
948973 if not self .scc_queue :
949974 return [], False
950975 next_scc = self .scc_queue .pop (0 )
@@ -3230,8 +3255,12 @@ def load_graph(
32303255
32313256
32323257def order_ascc_ex (graph : Graph , ascc : SCC ) -> list [str ]:
3233- # Order the SCC's nodes using a heuristic.
3234- # Note that ascc is a set, and scc is a list.
3258+ """Apply extra heuristics on top of order_ascc().
3259+
3260+ This should be used only for actual SCCs, not for "inner" SCCs
3261+ we create recursively during ordering of the SCC. Currently, this
3262+ has only some special handling for builtin SCC.
3263+ """
32353264 scc = order_ascc (graph , ascc .mod_ids )
32363265 # Make the order of the SCC that includes 'builtins' and 'typing',
32373266 # among other things, predictable. Various things may break if
@@ -3251,18 +3280,18 @@ def order_ascc_ex(graph: Graph, ascc: SCC) -> list[str]:
32513280def find_stale_sccs (
32523281 sccs : list [SCC ], graph : Graph , manager : BuildManager
32533282) -> tuple [list [SCC ], list [SCC ]]:
3283+ """Split a list of ready SCCs into stale and fresh.
3284+
3285+ Fresh SCCs are those where:
3286+ * We have valid cache files for all modules in the SCC.
3287+ * The interface hashes of direct dependents matches those recorded in the cache.
3288+ * There are no new (un)suppressed dependencies (files removed/added to the build).
3289+ """
32543290 stale_sccs = []
32553291 fresh_sccs = []
32563292 for ascc in sccs :
3257- # Because the SCCs are presented in topological sort order, we
3258- # don't need to look at dependencies recursively for staleness
3259- # -- the immediate dependencies are sufficient.
32603293 stale_scc = {id for id in ascc .mod_ids if not graph [id ].is_fresh ()}
32613294 fresh = not stale_scc
3262- deps = set ()
3263- for id in ascc .mod_ids :
3264- deps .update (graph [id ].dependencies )
3265- deps -= ascc .mod_ids
32663295
32673296 # Verify that interfaces of dependencies still present in graph are up-to-date (fresh).
32683297 # Note: if a dependency is not in graph anymore, it should be considered interface-stale.
@@ -3277,6 +3306,7 @@ def find_stale_sccs(
32773306 if graph [dep ].interface_hash != graph [id ].dep_hashes [dep ]:
32783307 stale_deps .add (dep )
32793308 fresh = fresh and not stale_deps
3309+
32803310 undeps = set ()
32813311 if fresh :
32823312 # Check if any dependencies that were suppressed according
@@ -3287,6 +3317,7 @@ def find_stale_sccs(
32873317 undeps &= graph .keys ()
32883318 if undeps :
32893319 fresh = False
3320+
32903321 if fresh :
32913322 fresh_msg = "fresh"
32923323 elif undeps :
@@ -3326,13 +3357,12 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
33263357 manager .log (
33273358 "Found %d SCCs; largest has %d nodes" % (len (sccs ), max (len (scc .mod_ids ) for scc in sccs ))
33283359 )
3329- for scc in sccs :
3330- pass # print("SCC", scc.id, scc.mod_ids, scc.deps, scc.direct_dependents)
33313360
33323361 scc_by_id = {scc .id : scc for scc in sccs }
33333362 manager .scc_by_id = scc_by_id
33343363 manager .top_order = [scc .id for scc in sccs ]
33353364
3365+ # Prime the ready list with leaf SCCs (that have no dependencies).
33363366 ready = []
33373367 not_ready = []
33383368 for scc in sccs :
@@ -3347,6 +3377,10 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
33473377 if stale :
33483378 manager .submit (stale )
33493379 processing = True
3380+ # We eagerly walk over fresh SCCs to reach as many stale SCCs as soon
3381+ # as possible. Only when there are no fresh SCCs, we wait on scheduled stale ones.
3382+ # This strategy, similar to a naive strategy in minesweeper game, will allow us
3383+ # to leverage parallelism as much as possible.
33503384 if fresh :
33513385 done = fresh
33523386 else :
@@ -3410,8 +3444,8 @@ def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_INDIRECT
34103444def process_fresh_modules (graph : Graph , modules : list [str ], manager : BuildManager ) -> None :
34113445 """Process the modules in one group of modules from their cached data.
34123446
3413- This can be used to process an SCC of modules
3414- This involves loading the tree from JSON and then doing various cleanups .
3447+ This can be used to process an SCC of modules. This involves loading the tree (i.e.
3448+ module symbol tables) from cache file and then fixing cross-references in the symbols .
34153449 """
34163450 t0 = time .time ()
34173451 for id in modules :
@@ -3425,6 +3459,7 @@ def process_fresh_modules(graph: Graph, modules: list[str], manager: BuildManage
34253459
34263460def process_stale_scc (graph : Graph , ascc : SCC , manager : BuildManager ) -> None :
34273461 """Process the modules in one SCC from source code."""
3462+ # First verify if all transitive dependencies are loaded in the current process.
34283463 missing_sccs = set ()
34293464 sccs_to_find = ascc .deps .copy ()
34303465 while sccs_to_find :
@@ -3435,6 +3470,7 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None:
34353470 sccs_to_find .update (manager .scc_by_id [dep_scc ].deps )
34363471
34373472 if missing_sccs :
3473+ # Load missing SCCs from cache.
34383474 fresh_sccs_to_load = [
34393475 manager .scc_by_id [sid ] for sid in manager .top_order if sid in missing_sccs
34403476 ]
@@ -3465,6 +3501,8 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None:
34653501 gc .freeze ()
34663502 gc .unfreeze ()
34673503 gc .enable ()
3504+
3505+ # Process the SCC in stable order.
34683506 scc = order_ascc_ex (graph , ascc )
34693507 stale = scc
34703508 for id in stale :
@@ -3529,6 +3567,7 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None:
35293567def prepare_sccs_full (
35303568 raw_sccs : Iterator [set [str ]], edges : dict [str , list [str ]]
35313569) -> dict [SCC , set [SCC ]]:
3570+ """Turn raw SCC sets into SCC objects and build dependency graph for SCCs."""
35323571 sccs = [SCC (raw_scc ) for raw_scc in raw_sccs ]
35333572 scc_map = {}
35343573 for scc in sccs :
@@ -3539,6 +3578,7 @@ def prepare_sccs_full(
35393578 for id in scc .mod_ids :
35403579 scc_deps_map .setdefault (scc , set ()).update (scc_map [dep ] for dep in edges [id ])
35413580 for scc in sccs :
3581+ # Remove trivial dependency on itself.
35423582 scc_deps_map [scc ].discard (scc )
35433583 for dep_scc in scc_deps_map [scc ]:
35443584 scc .deps .add (dep_scc .id )
@@ -3551,9 +3591,6 @@ def sorted_components(graph: Graph) -> list[SCC]:
35513591
35523592 The sort order is from leaves (nodes without dependencies) to
35533593 roots (nodes on which no other nodes depend).
3554-
3555- This works for a subset of the full dependency graph too;
3556- dependencies that aren't present in graph.keys() are ignored.
35573594 """
35583595 # Compute SCCs.
35593596 vertices = set (graph )
@@ -3581,6 +3618,12 @@ def sorted_components(graph: Graph) -> list[SCC]:
35813618def sorted_components_inner (
35823619 graph : Graph , vertices : AbstractSet [str ], pri_max : int
35833620) -> list [AbstractSet [str ]]:
3621+ """Simplified version of sorted_components() to work with sub-graphs.
3622+
3623+ This doesn't create SCC objects, and operates with raw sets. This function
3624+ also allows filtering dependencies to take into account when building SCCs.
3625+ This is used for heuristic ordering of modules within actual SCCs.
3626+ """
35843627 edges = {id : deps_filtered (graph , vertices , id , pri_max ) for id in vertices }
35853628 sccs = list (strongly_connected_components (vertices , edges ))
35863629 res = []
0 commit comments