From 8a076c6c83a3188f6716e2e4bf371772be1280e5 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Sun, 14 Sep 2025 18:12:37 +0200 Subject: [PATCH] feat: get install and build dependencies Extend `DependencyNode` to get all install dependencies and build requirements. The new method return unique dependencies by recursively walking the dependency graph. The build requirements include all recursive installation requirements of build requirements. Signed-off-by: Christian Heimes --- src/fromager/dependency_graph.py | 54 +++++++++++++++ tests/test_dependency_graph.py | 113 +++++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+) diff --git a/src/fromager/dependency_graph.py b/src/fromager/dependency_graph.py index efde31cf..555ed43c 100644 --- a/src/fromager/dependency_graph.py +++ b/src/fromager/dependency_graph.py @@ -111,6 +111,60 @@ def construct_root_node(cls) -> DependencyNode: Version("0"), ) + def iter_build_requirements(self) -> typing.Iterable[DependencyNode]: + """Get all unique, recursive build requirements + + Yield all direct and indirect requirements to build the dependency. + Includes direct build dependencies and their recursive **install** + requirements. + + The result is equivalent to the set of ``[build-system].requires`` + plus all ``Requires-Dist`` of build system requirements -- all + packages in the build environment. + """ + visited: set[str] = set() + # The outer loop iterates over all children and picks + # direct build requirements. For each build requirement, it traverses + # all children and recursively get their install requirements + # (depth first). + for edge in self.children: + if edge.key in visited: + # optimization: don't traverse visited nodes + continue + if not edge.req_type.is_build_requirement: + # not a build requirement + continue + visited.add(edge.key) + # it's a new ``[build-system].requires``. + yield edge.destination_node + # recursively get install dependencies of this build dep (depth first). + for install_edge in self._traverse_install_requirements( + edge.destination_node.children, visited + ): + yield install_edge.destination_node + + def iter_install_requirements(self) -> typing.Iterable[DependencyNode]: + """Get all unique, recursive install requirements""" + visited: set[str] = set() + for edge in self._traverse_install_requirements(self.children, visited): + yield edge.destination_node + + def _traverse_install_requirements( + self, + start_edges: list[DependencyEdge], + visited: set[str], + ) -> typing.Iterable[DependencyEdge]: + for edge in start_edges: + if edge.key in visited: + continue + if not edge.req_type.is_install_requirement: + continue + visited.add(edge.destination_node.key) + yield edge + yield from self._traverse_install_requirements( + edge.destination_node.children, visited + ) + @dataclasses.dataclass(frozen=True, order=True, slots=True) class DependencyEdge: diff --git a/tests/test_dependency_graph.py b/tests/test_dependency_graph.py index 5ce835ea..cf970230 100644 --- a/tests/test_dependency_graph.py +++ b/tests/test_dependency_graph.py @@ -1,10 +1,13 @@ import dataclasses +import graphlib import pytest +from packaging.requirements import Requirement from packaging.utils import canonicalize_name from packaging.version import Version from fromager.dependency_graph import DependencyNode +from fromager.requirements_file import RequirementType def mknode(name: str, version: str = "1.0", **kwargs) -> DependencyNode: @@ -59,3 +62,113 @@ def test_dependencynode_dataclass(): assert root.canonicalized_name == "" assert root.version == Version("0.0") assert root.key == "" + + +def test_iter_requirements() -> None: + a = mknode("a") + # install requirements of a + b = mknode("b") + # build requirement of a + c = mknode("c") + # build requirement of c + d = mknode("d") + # install requirement of b and c + e = mknode("e") + # build requirement of a and c + f = mknode("f") + + a.add_child(b, Requirement(b.canonicalized_name), RequirementType.INSTALL) + a.add_child(c, Requirement(c.canonicalized_name), RequirementType.BUILD_BACKEND) + a.add_child(c, Requirement(c.canonicalized_name), RequirementType.BUILD_SYSTEM) + a.add_child(f, Requirement(c.canonicalized_name), RequirementType.BUILD_SYSTEM) + b.add_child(e, Requirement(b.canonicalized_name), RequirementType.INSTALL) + c.add_child(d, Requirement(d.canonicalized_name), RequirementType.BUILD_SYSTEM) + c.add_child(e, Requirement(e.canonicalized_name), RequirementType.INSTALL) + c.add_child(f, Requirement(f.canonicalized_name), RequirementType.BUILD_BACKEND) + + assert sorted(a.iter_install_requirements()) == [b, e] + assert sorted(a.iter_build_requirements()) == [c, e, f] + assert sorted(b.iter_install_requirements()) == [e] + assert sorted(b.iter_build_requirements()) == [] + assert sorted(c.iter_install_requirements()) == [e] + assert sorted(c.iter_build_requirements()) == [d, f] + + build_graph = get_build_graph(a, b, c, d, e, f) + assert build_graph == [ + # no build requirements, B and E can be built in parallel, as + # B just has an install requirement on E. + ["b", "d", "e", "f"], + # C needs D, F to build. + ["c"], + # A needs C, E, F. + ["a"], + ] + + +def get_build_graph(*nodes: DependencyNode) -> list[list[str]]: + topo: graphlib.TopologicalSorter[str] = graphlib.TopologicalSorter() + for node in nodes: + build_deps = [n.canonicalized_name for n in node.iter_build_requirements()] + topo.add(node.canonicalized_name, *build_deps) + topo.prepare() + steps: list[list[str]] = [] + while topo.is_active(): + ready = topo.get_ready() + steps.append(sorted(ready)) + topo.done(*ready) + return steps + + +def test_pr759_discussion() -> None: + a = mknode("a") + b = mknode("b") + c = mknode("c") + d = mknode("d") + # A needs B to build. + a.add_child(b, Requirement(c.canonicalized_name), RequirementType.BUILD_BACKEND) + # B needs C to build. + b.add_child(c, Requirement(c.canonicalized_name), RequirementType.BUILD_BACKEND) + # B needs D to install. + b.add_child(d, Requirement(c.canonicalized_name), RequirementType.INSTALL) + + assert sorted(a.iter_build_requirements()) == [b, d] + assert sorted(b.iter_build_requirements()) == [c] + assert sorted(c.iter_build_requirements()) == [] + assert sorted(d.iter_build_requirements()) == [] + + build_graph = get_build_graph(a, b, c, d) + assert build_graph == [["c", "d"], ["b"], ["a"]] + + # add more nodes + e = mknode("e") + f = mknode("f") + # D needs E to install. + d.add_child(e, Requirement(c.canonicalized_name), RequirementType.INSTALL) + # E needs F to build. + e.add_child(f, Requirement(c.canonicalized_name), RequirementType.BUILD_BACKEND) + + # build requirements + assert sorted(a.iter_build_requirements()) == [b, d, e] + assert sorted(b.iter_build_requirements()) == [c] + assert sorted(c.iter_build_requirements()) == [] + assert sorted(d.iter_build_requirements()) == [] + assert sorted(e.iter_build_requirements()) == [f] + + build_graph = get_build_graph(a, b, c, d, e, f) + assert build_graph == [ + # D, C, F don't have build requirements + ["c", "d", "f"], + # B needs C, E needs F + ["b", "e"], + # A needs B, D, E + ["a"], + ] + + # install requirements + assert sorted(a.iter_install_requirements()) == [] + # E is an indirect install dependency + assert sorted(b.iter_install_requirements()) == [d, e] + assert sorted(c.iter_install_requirements()) == [] + assert sorted(d.iter_install_requirements()) == [e] + assert sorted(e.iter_install_requirements()) == [] + assert sorted(f.iter_install_requirements()) == []