diff --git a/src/fromager/dependency_graph.py b/src/fromager/dependency_graph.py index efde31cf..555ed43c 100644 --- a/src/fromager/dependency_graph.py +++ b/src/fromager/dependency_graph.py @@ -111,6 +111,60 @@ def construct_root_node(cls) -> DependencyNode: Version("0"), ) + def iter_build_requirements(self) -> typing.Iterable[DependencyNode]: + """Get all unique, recursive build requirements + + Yield all direct and indirect requirements to build the dependency. + Includes direct build dependencies and their recursive **install** + requirements. + + The result is equivalent to the set of ``[build-system].requires`` + plus all ``Requires-Dist`` of build system requirements -- all + packages in the build environment. + """ + visited: set[str] = set() + # The outer loop iterates over all children and picks + # direct build requirements. For each build requirement, it traverses + # all children and recursively get their install requirements + # (depth first). + for edge in self.children: + if edge.key in visited: + # optimization: don't traverse visited nodes + continue + if not edge.req_type.is_build_requirement: + # not a build requirement + continue + visited.add(edge.key) + # it's a new ``[build-system].requires``. + yield edge.destination_node + # recursively get install dependencies of this build dep (depth first). + for install_edge in self._traverse_install_requirements( + edge.destination_node.children, visited + ): + yield install_edge.destination_node + + def iter_install_requirements(self) -> typing.Iterable[DependencyNode]: + """Get all unique, recursive install requirements""" + visited: set[str] = set() + for edge in self._traverse_install_requirements(self.children, visited): + yield edge.destination_node + + def _traverse_install_requirements( + self, + start_edges: list[DependencyEdge], + visited: set[str], + ) -> typing.Iterable[DependencyEdge]: + for edge in start_edges: + if edge.key in visited: + continue + if not edge.req_type.is_install_requirement: + continue + visited.add(edge.destination_node.key) + yield edge + yield from self._traverse_install_requirements( + edge.destination_node.children, visited + ) + @dataclasses.dataclass(frozen=True, order=True, slots=True) class DependencyEdge: diff --git a/tests/test_dependency_graph.py b/tests/test_dependency_graph.py index 5ce835ea..cf970230 100644 --- a/tests/test_dependency_graph.py +++ b/tests/test_dependency_graph.py @@ -1,10 +1,13 @@ import dataclasses +import graphlib import pytest +from packaging.requirements import Requirement from packaging.utils import canonicalize_name from packaging.version import Version from fromager.dependency_graph import DependencyNode +from fromager.requirements_file import RequirementType def mknode(name: str, version: str = "1.0", **kwargs) -> DependencyNode: @@ -59,3 +62,113 @@ def test_dependencynode_dataclass(): assert root.canonicalized_name == "" assert root.version == Version("0.0") assert root.key == "" + + +def test_iter_requirements() -> None: + a = mknode("a") + # install requirements of a + b = mknode("b") + # build requirement of a + c = mknode("c") + # build requirement of c + d = mknode("d") + # install requirement of b and c + e = mknode("e") + # build requirement of a and c + f = mknode("f") + + a.add_child(b, Requirement(b.canonicalized_name), RequirementType.INSTALL) + a.add_child(c, Requirement(c.canonicalized_name), RequirementType.BUILD_BACKEND) + a.add_child(c, Requirement(c.canonicalized_name), RequirementType.BUILD_SYSTEM) + a.add_child(f, Requirement(c.canonicalized_name), RequirementType.BUILD_SYSTEM) + b.add_child(e, Requirement(b.canonicalized_name), RequirementType.INSTALL) + c.add_child(d, Requirement(d.canonicalized_name), RequirementType.BUILD_SYSTEM) + c.add_child(e, Requirement(e.canonicalized_name), RequirementType.INSTALL) + c.add_child(f, Requirement(f.canonicalized_name), RequirementType.BUILD_BACKEND) + + assert sorted(a.iter_install_requirements()) == [b, e] + assert sorted(a.iter_build_requirements()) == [c, e, f] + assert sorted(b.iter_install_requirements()) == [e] + assert sorted(b.iter_build_requirements()) == [] + assert sorted(c.iter_install_requirements()) == [e] + assert sorted(c.iter_build_requirements()) == [d, f] + + build_graph = get_build_graph(a, b, c, d, e, f) + assert build_graph == [ + # no build requirements, B and E can be built in parallel, as + # B just has an install requirement on E. + ["b", "d", "e", "f"], + # C needs D, F to build. + ["c"], + # A needs C, E, F. + ["a"], + ] + + +def get_build_graph(*nodes: DependencyNode) -> list[list[str]]: + topo: graphlib.TopologicalSorter[str] = graphlib.TopologicalSorter() + for node in nodes: + build_deps = [n.canonicalized_name for n in node.iter_build_requirements()] + topo.add(node.canonicalized_name, *build_deps) + topo.prepare() + steps: list[list[str]] = [] + while topo.is_active(): + ready = topo.get_ready() + steps.append(sorted(ready)) + topo.done(*ready) + return steps + + +def test_pr759_discussion() -> None: + a = mknode("a") + b = mknode("b") + c = mknode("c") + d = mknode("d") + # A needs B to build. + a.add_child(b, Requirement(c.canonicalized_name), RequirementType.BUILD_BACKEND) + # B needs C to build. + b.add_child(c, Requirement(c.canonicalized_name), RequirementType.BUILD_BACKEND) + # B needs D to install. + b.add_child(d, Requirement(c.canonicalized_name), RequirementType.INSTALL) + + assert sorted(a.iter_build_requirements()) == [b, d] + assert sorted(b.iter_build_requirements()) == [c] + assert sorted(c.iter_build_requirements()) == [] + assert sorted(d.iter_build_requirements()) == [] + + build_graph = get_build_graph(a, b, c, d) + assert build_graph == [["c", "d"], ["b"], ["a"]] + + # add more nodes + e = mknode("e") + f = mknode("f") + # D needs E to install. + d.add_child(e, Requirement(c.canonicalized_name), RequirementType.INSTALL) + # E needs F to build. + e.add_child(f, Requirement(c.canonicalized_name), RequirementType.BUILD_BACKEND) + + # build requirements + assert sorted(a.iter_build_requirements()) == [b, d, e] + assert sorted(b.iter_build_requirements()) == [c] + assert sorted(c.iter_build_requirements()) == [] + assert sorted(d.iter_build_requirements()) == [] + assert sorted(e.iter_build_requirements()) == [f] + + build_graph = get_build_graph(a, b, c, d, e, f) + assert build_graph == [ + # D, C, F don't have build requirements + ["c", "d", "f"], + # B needs C, E needs F + ["b", "e"], + # A needs B, D, E + ["a"], + ] + + # install requirements + assert sorted(a.iter_install_requirements()) == [] + # E is an indirect install dependency + assert sorted(b.iter_install_requirements()) == [d, e] + assert sorted(c.iter_install_requirements()) == [] + assert sorted(d.iter_install_requirements()) == [e] + assert sorted(e.iter_install_requirements()) == [] + assert sorted(f.iter_install_requirements()) == []