From 6ad58655b08851db21c9c457ca05cbb8f62cab83 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Sun, 14 Sep 2025 18:08:26 +0200 Subject: [PATCH 1/4] chore: use dataclass for DependencyNode Convert the `DependencyNode` and `DependencyEdge` into data classes. The classes are frozen (immutable) and support comparison, sorting, and hashing. Signed-off-by: Christian Heimes --- src/fromager/dependency_graph.py | 96 +++++++++++++++++--------------- 1 file changed, 52 insertions(+), 44 deletions(-) diff --git a/src/fromager/dependency_graph.py b/src/fromager/dependency_graph.py index af3043d9..a1513a53 100644 --- a/src/fromager/dependency_graph.py +++ b/src/fromager/dependency_graph.py @@ -1,3 +1,6 @@ +from __future__ import annotations + +import dataclasses import json import logging import pathlib @@ -29,28 +32,42 @@ class DependencyNodeDict(typing.TypedDict): edges: list[DependencyEdgeDict] +@dataclasses.dataclass(frozen=True, order=True, slots=True) class DependencyNode: - def __init__( - self, - req_name: NormalizedName, - version: Version, - download_url: str = "", - pre_built: bool = False, - ) -> None: - self.canonicalized_name = req_name - self.version = version - self.parents: list[DependencyEdge] = [] - self.children: list[DependencyEdge] = [] - self.key = f"{self.canonicalized_name}=={version}" - self.download_url = download_url - self.pre_built = pre_built + canonicalized_name: NormalizedName + version: Version + download_url: str = dataclasses.field(default="", compare=False) + pre_built: bool = dataclasses.field(default=False, compare=False) + # additional fields + key: str = dataclasses.field(init=False, compare=False, repr=False) + parents: list[DependencyEdge] = dataclasses.field( + default_factory=list, + init=False, + compare=False, + repr=False, + ) + children: list[DependencyEdge] = dataclasses.field( + default_factory=list, + init=False, + compare=False, + repr=False, + ) + + def __post_init__(self) -> None: + if self.canonicalized_name == ROOT: + # root has a special key + object.__setattr__(self, "key", ROOT) + else: + object.__setattr__( + self, "key", f"{self.canonicalized_name}=={self.version}" + ) def add_child( self, - child: "DependencyNode", + child: DependencyNode, req: Requirement, req_type: RequirementType, - ): + ) -> None: current_to_child_edge = DependencyEdge( req=req, req_type=req_type, destination_node=child ) @@ -62,11 +79,6 @@ def add_child( # not an issue for fromager since it is used as a short-lived process child.parents.append(child_to_current_edge) - def get_incoming_install_edges(self) -> list["DependencyEdge"]: - return [ - edge for edge in self.parents if edge.req_type == RequirementType.INSTALL - ] - def to_dict(self) -> DependencyNodeDict: return { "download_url": self.download_url, @@ -76,9 +88,14 @@ def to_dict(self) -> DependencyNodeDict: "edges": [edge.to_dict() for edge in self.children], } + def get_incoming_install_edges(self) -> list[DependencyEdge]: + return [ + edge for edge in self.parents if edge.req_type == RequirementType.INSTALL + ] + def get_outgoing_edges( self, req_name: str, req_type: RequirementType - ) -> list["DependencyEdge"]: + ) -> list[DependencyEdge]: return [ edge for edge in self.children @@ -86,29 +103,20 @@ def get_outgoing_edges( and edge.req_type == req_type ] - @classmethod - def construct_root_node(cls) -> "DependencyNode": - root = cls( - canonicalize_name(ROOT), Version("0") - ) # version doesn't really matter for root - root.key = ROOT # root has a special key type - return root - +@dataclasses.dataclass(frozen=True, order=True, slots=True) class DependencyEdge: - def __init__( - self, - destination_node: DependencyNode, - req_type: RequirementType, - req: Requirement, - ) -> None: - self.req_type = req_type - self.req = req - self.destination_node = destination_node + key: str = dataclasses.field(init=False, repr=True, compare=True) + destination_node: DependencyNode = dataclasses.field(repr=False, compare=False) + req: Requirement = dataclasses.field(repr=True, compare=True) + req_type: RequirementType = dataclasses.field(repr=True, compare=True) + + def __post_init__(self) -> None: + object.__setattr__(self, "key", self.destination_node.key) def to_dict(self) -> DependencyEdgeDict: return { - "key": self.destination_node.key, + "key": self.key, "req_type": str(self.req_type), "req": str(self.req), } @@ -123,7 +131,7 @@ def __init__(self) -> None: def from_file( cls, graph_file: pathlib.Path | str, - ) -> "DependencyGraph": + ) -> DependencyGraph: with open_file_or_url(graph_file) as f: # TODO: add JSON validation to ensure it is a parsable graph json raw_graph = typing.cast(dict[str, dict], json.load(f)) @@ -133,7 +141,7 @@ def from_file( def from_dict( cls, graph_dict: dict[str, dict[str, typing.Any]], - ) -> "DependencyGraph": + ) -> DependencyGraph: graph = cls() stack = [ROOT] visited = set() @@ -166,7 +174,7 @@ def from_dict( def clear(self) -> None: self.nodes.clear() - self.nodes[ROOT] = DependencyNode.construct_root_node() + self.nodes[ROOT] = DependencyNode(canonicalize_name(ROOT), Version("0")) def _to_dict(self): raw_graph = {} @@ -193,7 +201,7 @@ def _add_node( pre_built: bool, ): new_node = DependencyNode( - req_name=req_name, + canonicalized_name=req_name, version=version, download_url=download_url, pre_built=pre_built, From f4f0ae465fb4984b1d513d34a9025b4fff3eef2e Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Sun, 14 Sep 2025 18:12:37 +0200 Subject: [PATCH 2/4] feat: get install and build dependencies Extend `DependencyNode` to get all install dependencies and build requirements. The new method return unique dependencies by recursively walking the dependency graph. The build requirements include all recursive installation requirements of build requirements. Signed-off-by: Christian Heimes --- src/fromager/dependency_graph.py | 54 +++++++++++ tests/test_dependency_graph.py | 148 +++++++++++++++++++++++++++++++ 2 files changed, 202 insertions(+) create mode 100644 tests/test_dependency_graph.py diff --git a/src/fromager/dependency_graph.py b/src/fromager/dependency_graph.py index a1513a53..08ce407c 100644 --- a/src/fromager/dependency_graph.py +++ b/src/fromager/dependency_graph.py @@ -103,6 +103,60 @@ def get_outgoing_edges( and edge.req_type == req_type ] + def iter_build_requirements(self) -> typing.Iterable[DependencyNode]: + """Get all unique, recursive build requirements + + Yield all direct and indirect requirements to build the dependency. + Includes direct build dependencies and their recursive **install** + requirements. + + The result is equivalent to the set of ``[build-system].requires`` + plus all ``Requires-Dist`` of build system requirements -- all + packages in the build environment. + """ + visited: set[str] = set() + # The outer loop iterates over all children and picks + # direct build requirements. For each build requirement, it traverses + # all children and recursively get their install requirements + # (depth first). + for edge in self.children: + if edge.key in visited: + # optimization: don't traverse visited nodes + continue + if not edge.req_type.is_build_requirement: + # not a build requirement + continue + visited.add(edge.key) + # it's a new ``[build-system].requires``. + yield edge.destination_node + # recursively get install dependencies of this build dep (depth first). + for install_edge in self._traverse_install_requirements( + edge.destination_node.children, visited + ): + yield install_edge.destination_node + + def iter_install_requirements(self) -> typing.Iterable[DependencyNode]: + """Get all unique, recursive install requirements""" + visited: set[str] = set() + for edge in self._traverse_install_requirements(self.children, visited): + yield edge.destination_node + + def _traverse_install_requirements( + self, + start_edges: list[DependencyEdge], + visited: set[str], + ) -> typing.Iterable[DependencyEdge]: + for edge in start_edges: + if edge.key in visited: + continue + if not edge.req_type.is_install_requirement: + continue + visited.add(edge.destination_node.key) + yield edge + yield from self._traverse_install_requirements( + edge.destination_node.children, visited + ) + @dataclasses.dataclass(frozen=True, order=True, slots=True) class DependencyEdge: diff --git a/tests/test_dependency_graph.py b/tests/test_dependency_graph.py new file mode 100644 index 00000000..b7b1d3fc --- /dev/null +++ b/tests/test_dependency_graph.py @@ -0,0 +1,148 @@ +import graphlib + +from packaging.requirements import Requirement +from packaging.utils import canonicalize_name +from packaging.version import Version + +from fromager.dependency_graph import DependencyNode +from fromager.requirements_file import RequirementType + + +def mknode(name: str, version: str = "1.0", **kwargs) -> DependencyNode: + return DependencyNode(canonicalize_name(name), Version(version), **kwargs) + + +def get_build_graph(*nodes: DependencyNode) -> list[list[str]]: + topo: graphlib.TopologicalSorter[str] = graphlib.TopologicalSorter() + for node in nodes: + build_deps = [n.canonicalized_name for n in node.iter_build_requirements()] + topo.add(node.canonicalized_name, *build_deps) + topo.prepare() + steps: list[list[str]] = [] + while topo.is_active(): + ready = topo.get_ready() + steps.append(sorted(ready)) + topo.done(*ready) + return steps + + +def test_compare() -> None: + a_10 = mknode("a", "1.0") + a_20 = mknode("a", "2.0") + b = mknode("b", "1.0") + assert a_10 == a_10 + assert not a_10 == a_20 + assert a_10 != a_20 + assert a_10 != b + assert a_10 == mknode("a", "1.0") + assert a_10 < a_20 + assert a_10 <= a_10 + assert a_10 >= a_10 + assert b > a_10 + assert b > a_20 + + +def test_hash() -> None: + a_10 = mknode("a", "1.0") + a_20 = mknode("a", "2.0") + b = mknode("b", "1.0") + s = {a_10, a_10, a_20} + assert s == {a_10, a_20} + assert a_10 in s + assert b not in s + + +def test_iter_requirements() -> None: + a = mknode("a") + # install requirements of a + b = mknode("b") + # build requirement of a + c = mknode("c") + # build requirement of c + d = mknode("d") + # install requirement of b and c + e = mknode("e") + # build requirement of a and c + f = mknode("f") + + a.add_child(b, Requirement(b.canonicalized_name), RequirementType.INSTALL) + a.add_child(c, Requirement(c.canonicalized_name), RequirementType.BUILD_BACKEND) + a.add_child(c, Requirement(c.canonicalized_name), RequirementType.BUILD_SYSTEM) + a.add_child(f, Requirement(c.canonicalized_name), RequirementType.BUILD_SYSTEM) + b.add_child(e, Requirement(b.canonicalized_name), RequirementType.INSTALL) + c.add_child(d, Requirement(d.canonicalized_name), RequirementType.BUILD_SYSTEM) + c.add_child(e, Requirement(e.canonicalized_name), RequirementType.INSTALL) + c.add_child(f, Requirement(f.canonicalized_name), RequirementType.BUILD_BACKEND) + + assert sorted(a.iter_install_requirements()) == [b, e] + assert sorted(a.iter_build_requirements()) == [c, e, f] + assert sorted(b.iter_install_requirements()) == [e] + assert sorted(b.iter_build_requirements()) == [] + assert sorted(c.iter_install_requirements()) == [e] + assert sorted(c.iter_build_requirements()) == [d, f] + + build_graph = get_build_graph(a, b, c, d, e, f) + assert build_graph == [ + # no build requirements, B and E can be built in parallel, as + # B just has an install requirement on E. + ["b", "d", "e", "f"], + # C needs D, F to build. + ["c"], + # A needs C, E, F. + ["a"], + ] + + +def test_pr759_discussion() -> None: + a = mknode("a") + b = mknode("b") + c = mknode("c") + d = mknode("d") + # A needs B to build. + a.add_child(b, Requirement(c.canonicalized_name), RequirementType.BUILD_BACKEND) + # B needs C to build. + b.add_child(c, Requirement(c.canonicalized_name), RequirementType.BUILD_BACKEND) + # B needs D to install. + b.add_child(d, Requirement(c.canonicalized_name), RequirementType.INSTALL) + + assert sorted(a.iter_build_requirements()) == [b, d] + assert sorted(b.iter_build_requirements()) == [c] + assert sorted(c.iter_build_requirements()) == [] + assert sorted(d.iter_build_requirements()) == [] + + build_graph = get_build_graph(a, b, c, d) + assert build_graph == [["c", "d"], ["b"], ["a"]] + + # add more nodes + e = mknode("e") + f = mknode("f") + # D needs E to install. + d.add_child(e, Requirement(c.canonicalized_name), RequirementType.INSTALL) + # E needs F to build. + e.add_child(f, Requirement(c.canonicalized_name), RequirementType.BUILD_BACKEND) + + # build requirements + assert sorted(a.iter_build_requirements()) == [b, d, e] + assert sorted(b.iter_build_requirements()) == [c] + assert sorted(c.iter_build_requirements()) == [] + assert sorted(d.iter_build_requirements()) == [] + assert sorted(e.iter_build_requirements()) == [f] + + build_graph = get_build_graph(a, b, c, d, e, f) + assert build_graph == [ + # D, C, F don't have build requirements + ["c", "d", "f"], + # B needs C, E needs F + ["b", "e"], + # A needs B, D, E + ["a"], + ] + + # install requirements + assert sorted(a.iter_install_requirements()) == [] + # E is an indirect install dependency + assert sorted(b.iter_install_requirements()) == [d, e] + assert sorted(c.iter_install_requirements()) == [] + assert sorted(d.iter_install_requirements()) == [e] + assert sorted(e.iter_install_requirements()) == [] + assert sorted(f.iter_install_requirements()) == [] From e9322609946d519c127a5e5854b36a747ea92aab Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Mon, 15 Sep 2025 15:23:36 +0200 Subject: [PATCH 3/4] feat: add TopologicalSorter to DependencyGraph The `DependencyGraph` can now construct a `graphlib.TopologicalSorter` of dependencies and their build dependencies. The graph sorter returns packages according to their build order. Signed-off-by: Christian Heimes --- src/fromager/dependency_graph.py | 14 ++ tests/test_dependency_graph.py | 93 ++++++++- tests/testdata/graph.json | 311 +++++++++++++++++++++++++++++++ 3 files changed, 417 insertions(+), 1 deletion(-) create mode 100644 tests/testdata/graph.json diff --git a/src/fromager/dependency_graph.py b/src/fromager/dependency_graph.py index 08ce407c..9c49d0ca 100644 --- a/src/fromager/dependency_graph.py +++ b/src/fromager/dependency_graph.py @@ -1,6 +1,7 @@ from __future__ import annotations import dataclasses +import graphlib import json import logging import pathlib @@ -358,3 +359,16 @@ def _depth_first_traversal( yield from self._depth_first_traversal( edge.destination_node.children, visited, match_dep_types ) + + def get_build_topology(self) -> graphlib.TopologicalSorter[DependencyNode]: + """Construct a topology graph of packages and their build dependencies + + See :class:`graphlib.TopologicalSorter` + """ + topo: graphlib.TopologicalSorter[DependencyNode] = graphlib.TopologicalSorter() + for node in self.get_all_nodes(): + if node.key == ROOT: + continue + topo.add(node, *node.iter_build_requirements()) + topo.prepare() + return topo diff --git a/tests/test_dependency_graph.py b/tests/test_dependency_graph.py index b7b1d3fc..73c19be9 100644 --- a/tests/test_dependency_graph.py +++ b/tests/test_dependency_graph.py @@ -1,13 +1,21 @@ import graphlib +import pathlib + +import pytest from packaging.requirements import Requirement from packaging.utils import canonicalize_name from packaging.version import Version -from fromager.dependency_graph import DependencyNode +from fromager.dependency_graph import DependencyGraph, DependencyNode from fromager.requirements_file import RequirementType +@pytest.fixture +def depgraph(testdata_path: pathlib.Path) -> DependencyGraph: + return DependencyGraph.from_file(testdata_path / "graph.json") + + def mknode(name: str, version: str = "1.0", **kwargs) -> DependencyNode: return DependencyNode(canonicalize_name(name), Version(version), **kwargs) @@ -146,3 +154,86 @@ def test_pr759_discussion() -> None: assert sorted(d.iter_install_requirements()) == [e] assert sorted(e.iter_install_requirements()) == [] assert sorted(f.iter_install_requirements()) == [] + + +def test_dependency_graph(depgraph: DependencyGraph) -> None: + assert set(depgraph.nodes) == { + "", + "cython==3.1.1", + "flit-core==3.12.0", + "imapautofiler==1.14.0", + "imapclient==3.0.1", + "jaraco-classes==3.4.0", + "jaraco-context==6.0.1", + "jaraco-functools==4.1.0", + "jinja2==3.1.6", + "keyring==25.6.0", + "markupsafe==3.0.2", + "more-itertools==10.7.0", + "packaging==25.0", + "pyyaml==6.0.2", + "setuptools-scm==8.3.1", + "setuptools==80.8.0", + "wheel==0.46.1", + } + + +def test_dependency_graph_iter_requirements(depgraph: DependencyGraph) -> None: + nodes = depgraph.get_nodes_by_name("cython") + assert len(nodes) == 1 + cython = nodes[0] + assert sorted(node.key for node in cython.iter_build_requirements()) == [ + "setuptools==80.8.0" + ] + assert sorted(node.key for node in cython.iter_install_requirements()) == [] + + nodes = depgraph.get_nodes_by_name("pyyaml") + assert len(nodes) == 1 + pyyaml = nodes[0] + assert sorted(node.key for node in pyyaml.iter_build_requirements()) == [ + "cython==3.1.1", + "setuptools==80.8.0", + "wheel==0.46.1", + ] + assert sorted(node.key for node in pyyaml.iter_install_requirements()) == [] + + +def test_build_graph(depgraph: DependencyGraph) -> None: + steps: list[list[str]] = [] + + topo = depgraph.get_build_topology() + while topo.is_active(): + nodes: tuple[DependencyNode, ...] = topo.get_ready() + steps.append(sorted(node.key for node in nodes)) + topo.done(*nodes) + + assert steps == [ + [ + # build systems can bootstrap without external dependencies + "flit-core==3.12.0", + "setuptools==80.8.0", + ], + [ + # packages that just depend on 'flit-core' or 'setuptools' + "cython==3.1.1", + "imapclient==3.0.1", + "jinja2==3.1.6", + "markupsafe==3.0.2", + "more-itertools==10.7.0", + "packaging==25.0", + ], + [ + # the two packages depend on 'packaging' + "setuptools-scm==8.3.1", + "wheel==0.46.1", + ], + [ + # final set depends on 'setuptools-scm' or 'wheel' + "imapautofiler==1.14.0", + "jaraco-classes==3.4.0", + "jaraco-context==6.0.1", + "jaraco-functools==4.1.0", + "keyring==25.6.0", + "pyyaml==6.0.2", + ], + ] diff --git a/tests/testdata/graph.json b/tests/testdata/graph.json new file mode 100644 index 00000000..b0b01c50 --- /dev/null +++ b/tests/testdata/graph.json @@ -0,0 +1,311 @@ +{ + "": { + "download_url": "", + "pre_built": false, + "version": "0", + "canonicalized_name": "", + "edges": [ + { + "key": "imapautofiler==1.14.0", + "req_type": "toplevel", + "req": "imapautofiler==1.14.0" + } + ] + }, + "imapautofiler==1.14.0": { + "download_url": "https://files.pythonhosted.org/packages/e5/ac/5715b2616657b06dab8e44fc51d51d8736ef0ba758e479cba9343254fba5/imapautofiler-1.14.0.tar.gz#sha256=dae2edb315994a906bcea3d8bf372adfd62805e7ada8e3a0a3d032595a600d89", + "pre_built": false, + "version": "1.14.0", + "canonicalized_name": "imapautofiler", + "edges": [ + { + "key": "setuptools==80.8.0", + "req_type": "build-system", + "req": "setuptools" + }, + { + "key": "setuptools-scm==8.3.1", + "req_type": "build-system", + "req": "setuptools_scm[toml]>=6.2" + }, + { + "key": "pyyaml==6.0.2", + "req_type": "install", + "req": "PyYAML>=3.11" + }, + { + "key": "imapclient==3.0.1", + "req_type": "install", + "req": "imapclient>=2.2.0" + }, + { + "key": "jinja2==3.1.6", + "req_type": "install", + "req": "jinja2>=2.11.2" + }, + { + "key": "keyring==25.6.0", + "req_type": "install", + "req": "keyring>=10.0.0" + } + ] + }, + "keyring==25.6.0": { + "download_url": "https://files.pythonhosted.org/packages/70/09/d904a6e96f76ff214be59e7aa6ef7190008f52a0ab6689760a98de0bf37d/keyring-25.6.0.tar.gz#sha256=0b39998aa941431eb3d9b0d4b2460bc773b9df6fed7621c2dfb291a7e0187a66", + "pre_built": false, + "version": "25.6.0", + "canonicalized_name": "keyring", + "edges": [ + { + "key": "setuptools==80.8.0", + "req_type": "build-system", + "req": "setuptools>=61.2" + }, + { + "key": "setuptools-scm==8.3.1", + "req_type": "build-system", + "req": "setuptools_scm[toml]>=3.4.1" + }, + { + "key": "jaraco-classes==3.4.0", + "req_type": "install", + "req": "jaraco.classes" + }, + { + "key": "jaraco-context==6.0.1", + "req_type": "install", + "req": "jaraco.context" + }, + { + "key": "jaraco-functools==4.1.0", + "req_type": "install", + "req": "jaraco.functools" + } + ] + }, + "jaraco-functools==4.1.0": { + "download_url": "https://files.pythonhosted.org/packages/ab/23/9894b3df5d0a6eb44611c36aec777823fc2e07740dabbd0b810e19594013/jaraco_functools-4.1.0.tar.gz#sha256=70f7e0e2ae076498e212562325e805204fc092d7b4c17e0e86c959e249701a9d", + "pre_built": false, + "version": "4.1.0", + "canonicalized_name": "jaraco-functools", + "edges": [ + { + "key": "setuptools==80.8.0", + "req_type": "build-system", + "req": "setuptools>=61.2" + }, + { + "key": "setuptools-scm==8.3.1", + "req_type": "build-system", + "req": "setuptools_scm[toml]>=3.4.1" + }, + { + "key": "more-itertools==10.7.0", + "req_type": "install", + "req": "more_itertools" + } + ] + }, + "more-itertools==10.7.0": { + "download_url": "https://files.pythonhosted.org/packages/ce/a0/834b0cebabbfc7e311f30b46c8188790a37f89fc8d756660346fe5abfd09/more_itertools-10.7.0.tar.gz#sha256=9fddd5403be01a94b204faadcff459ec3568cf110265d3c54323e1e866ad29d3", + "pre_built": false, + "version": "10.7.0", + "canonicalized_name": "more-itertools", + "edges": [ + { + "key": "flit-core==3.12.0", + "req_type": "build-system", + "req": "flit_core<4,>=3.2" + } + ] + }, + "flit-core==3.12.0": { + "download_url": "https://files.pythonhosted.org/packages/69/59/b6fc2188dfc7ea4f936cd12b49d707f66a1cb7a1d2c16172963534db741b/flit_core-3.12.0.tar.gz#sha256=18f63100d6f94385c6ed57a72073443e1a71a4acb4339491615d0f16d6ff01b2", + "pre_built": false, + "version": "3.12.0", + "canonicalized_name": "flit-core", + "edges": [] + }, + "setuptools-scm==8.3.1": { + "download_url": "https://files.pythonhosted.org/packages/b9/19/7ae64b70b2429c48c3a7a4ed36f50f94687d3bfcd0ae2f152367b6410dff/setuptools_scm-8.3.1.tar.gz#sha256=3d555e92b75dacd037d32bafdf94f97af51ea29ae8c7b234cf94b7a5bd242a63", + "pre_built": false, + "version": "8.3.1", + "canonicalized_name": "setuptools-scm", + "edges": [ + { + "key": "setuptools==80.8.0", + "req_type": "build-system", + "req": "setuptools>=61" + }, + { + "key": "packaging==25.0", + "req_type": "build-system", + "req": "packaging>=20" + }, + { + "key": "setuptools==80.8.0", + "req_type": "build-system", + "req": "setuptools" + } + ] + }, + "setuptools==80.8.0": { + "download_url": "https://files.pythonhosted.org/packages/8d/d2/ec1acaaff45caed5c2dedb33b67055ba9d4e96b091094df90762e60135fe/setuptools-80.8.0.tar.gz#sha256=49f7af965996f26d43c8ae34539c8d99c5042fbff34302ea151eaa9c207cd257", + "pre_built": false, + "version": "80.8.0", + "canonicalized_name": "setuptools", + "edges": [] + }, + "packaging==25.0": { + "download_url": "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz#sha256=d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", + "pre_built": false, + "version": "25.0", + "canonicalized_name": "packaging", + "edges": [ + { + "key": "flit-core==3.12.0", + "req_type": "build-system", + "req": "flit_core>=3.3" + } + ] + }, + "jaraco-context==6.0.1": { + "download_url": "https://files.pythonhosted.org/packages/df/ad/f3777b81bf0b6e7bc7514a1656d3e637b2e8e15fab2ce3235730b3e7a4e6/jaraco_context-6.0.1.tar.gz#sha256=9bae4ea555cf0b14938dc0aee7c9f32ed303aa20a3b73e7dc80111628792d1b3", + "pre_built": false, + "version": "6.0.1", + "canonicalized_name": "jaraco-context", + "edges": [ + { + "key": "setuptools==80.8.0", + "req_type": "build-system", + "req": "setuptools>=61.2" + }, + { + "key": "setuptools-scm==8.3.1", + "req_type": "build-system", + "req": "setuptools_scm[toml]>=3.4.1" + } + ] + }, + "jaraco-classes==3.4.0": { + "download_url": "https://files.pythonhosted.org/packages/06/c0/ed4a27bc5571b99e3cff68f8a9fa5b56ff7df1c2251cc715a652ddd26402/jaraco.classes-3.4.0.tar.gz#sha256=47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd", + "pre_built": false, + "version": "3.4.0", + "canonicalized_name": "jaraco-classes", + "edges": [ + { + "key": "setuptools==80.8.0", + "req_type": "build-system", + "req": "setuptools>=56" + }, + { + "key": "setuptools-scm==8.3.1", + "req_type": "build-system", + "req": "setuptools_scm[toml]>=3.4.1" + }, + { + "key": "more-itertools==10.7.0", + "req_type": "install", + "req": "more_itertools" + } + ] + }, + "jinja2==3.1.6": { + "download_url": "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz#sha256=0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", + "pre_built": false, + "version": "3.1.6", + "canonicalized_name": "jinja2", + "edges": [ + { + "key": "flit-core==3.12.0", + "req_type": "build-system", + "req": "flit_core<4" + }, + { + "key": "markupsafe==3.0.2", + "req_type": "install", + "req": "MarkupSafe>=2.0" + } + ] + }, + "markupsafe==3.0.2": { + "download_url": "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz#sha256=ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", + "pre_built": false, + "version": "3.0.2", + "canonicalized_name": "markupsafe", + "edges": [ + { + "key": "setuptools==80.8.0", + "req_type": "build-system", + "req": "setuptools>=70.1" + } + ] + }, + "imapclient==3.0.1": { + "download_url": "https://files.pythonhosted.org/packages/b6/63/0eea51c9c263c18021cdc5866def55c98393f3bd74bbb8e3053e36f0f81a/IMAPClient-3.0.1.zip#sha256=78e6d62fbfbbe233e1f0e0e993160fd665eb1fd35973acddc61c15719b22bc02", + "pre_built": false, + "version": "3.0.1", + "canonicalized_name": "imapclient", + "edges": [ + { + "key": "setuptools==80.8.0", + "req_type": "build-system", + "req": "setuptools>=40.8.0" + } + ] + }, + "pyyaml==6.0.2": { + "download_url": "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz#sha256=d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", + "pre_built": false, + "version": "6.0.2", + "canonicalized_name": "pyyaml", + "edges": [ + { + "key": "cython==3.1.1", + "req_type": "build-system", + "req": "Cython; python_version < \"3.13\"" + }, + { + "key": "setuptools==80.8.0", + "req_type": "build-system", + "req": "setuptools" + }, + { + "key": "wheel==0.46.1", + "req_type": "build-system", + "req": "wheel" + } + ] + }, + "wheel==0.46.1": { + "download_url": "https://files.pythonhosted.org/packages/fb/62/e90918c4558b002726ab930863c0cbd3e7cf9a7befa1d4a1a240cecdb379/wheel-0.46.1.tar.gz#sha256=fd477efb5da0f7df1d3c76c73c14394002c844451bd63229d8570f376f5e6a38", + "pre_built": false, + "version": "0.46.1", + "canonicalized_name": "wheel", + "edges": [ + { + "key": "flit-core==3.12.0", + "req_type": "build-system", + "req": "flit_core<4,>=3.8" + }, + { + "key": "packaging==25.0", + "req_type": "build-system", + "req": "packaging>=24.0" + } + ] + }, + "cython==3.1.1": { + "download_url": "https://files.pythonhosted.org/packages/5b/d3/bb000603e46144db2e5055219bbddcf7ab3b10012fcb342695694fb88141/cython-3.1.1.tar.gz#sha256=505ccd413669d5132a53834d792c707974248088c4f60c497deb1b416e366397", + "pre_built": false, + "version": "3.1.1", + "canonicalized_name": "cython", + "edges": [ + { + "key": "setuptools==80.8.0", + "req_type": "build-system", + "req": "setuptools>=40.8.0" + } + ] + } +} \ No newline at end of file From 59c500fe694a18b311ce3657e6e09d2b093f0452 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Mon, 15 Sep 2025 16:26:37 +0200 Subject: [PATCH 4/4] fix: use graphlib in build-parallel core Our custom dependency tracker for build-parallel code has a bug. It does not track and handle installation requirements of build requirements correctly. This causes build bugs when the installation requirement tree of a build dependency is deep. This PR completely rewrites and replaces the core logic of build-parallel command with stdlib's `graphlib` module. The `TopologicalSorter` detects cycles and tracks when a new node becomes ready. Nodes with a dependency on an exclusive build node now become built-ready earlier. I also changed the code to use a single instance of `ThreadPoolExecutor`. It's a tiny bit more efficient and saves one level of indention. See: #755 Signed-off-by: Christian Heimes --- e2e/test_build_parallel.sh | 2 +- src/fromager/commands/build.py | 180 +++++++++++++++------------------ 2 files changed, 80 insertions(+), 102 deletions(-) diff --git a/e2e/test_build_parallel.sh b/e2e/test_build_parallel.sh index 9a89e0ab..e843b20b 100755 --- a/e2e/test_build_parallel.sh +++ b/e2e/test_build_parallel.sh @@ -40,7 +40,7 @@ fromager \ --settings-dir="$SCRIPTDIR/build-parallel" \ build-parallel "$OUTDIR/graph.json" -if ! grep -q "cython-3.1.1: ready to build" "$log"; then +if ! grep -q "ready to build: cython==3.1.1" "$log"; then echo "Did not find message indicating build of cython would start" 1>&2 pass=false fi diff --git a/src/fromager/commands/build.py b/src/fromager/commands/build.py index 2f365176..b6ab3179 100644 --- a/src/fromager/commands/build.py +++ b/src/fromager/commands/build.py @@ -39,6 +39,7 @@ logger = logging.getLogger(__name__) DependencyNodeList = list[dependency_graph.DependencyNode] +DependencyNodeSet = set[dependency_graph.DependencyNode] @dataclasses.dataclass(order=True, frozen=True) @@ -612,126 +613,103 @@ def build_parallel( # Load the dependency graph logger.info("reading dependency graph from %s", graph_file) - graph: dependency_graph.DependencyGraph graph = dependency_graph.DependencyGraph.from_file(graph_file) - # Track what has been built - built_node_keys: set[str] = set() + # topological sort graph of node -> direct build dependencies + topo = graph.get_build_topology() - # Get all nodes that need to be built (excluding prebuilt ones and the root node) - # Sort the nodes to build by their key one time to avoid - # redoing the sort every iteration and to make the output deterministic. - nodes_to_build: DependencyNodeList = sorted( - (n for n in graph.nodes.values() if n.key != dependency_graph.ROOT), - key=lambda n: n.key, - ) - logger.info("found %d packages to build", len(nodes_to_build)) + # size and exclusive nodes + topo_size: int = 0 + exclusive_nodes: DependencyNodeSet = set() + for node in graph.get_all_nodes(): + if node.key == dependency_graph.ROOT: + continue + topo_size += 1 + if wkctx.settings.package_build_info(node.canonicalized_name).exclusive_build: + exclusive_nodes.add(node) - # A node can be built when all of its build dependencies are built - entries: list[BuildSequenceEntry] = [] + logger.info("found %d packages to build", topo_size) + + # Track node keys ready to build, `get_ready` does not return the same node twice + ready_nodes: DependencyNodeSet = set() + ready_exclusive_nodes: DependencyNodeSet + working_nodes: DependencyNodeList - with progress.progress_context(total=len(nodes_to_build)) as progressbar: + built_entries: list[BuildSequenceEntry] = [] + rounds: int = 0 + + # Build up to max_workers nodes concurrently (or all if max_workers is None) + with ( + concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor, + progress.progress_context(total=topo_size) as progressbar, + ): def update_progressbar_cb(future: concurrent.futures.Future) -> None: """Immediately update the progress when when a task is done""" progressbar.update() - while nodes_to_build: - # Find nodes that can be built (all build dependencies are built) - buildable_nodes: DependencyNodeList = [] - for node in nodes_to_build: - with req_ctxvar_context( - Requirement(node.canonicalized_name), node.version - ): - # Get all build dependencies (build-system, build-backend, build-sdist) - build_deps: DependencyNodeList = [ - edge.destination_node - for edge in node.children - if edge.req_type.is_build_requirement - ] - # A node can be built when all of its build dependencies are built - unbuilt_deps: set[str] = set( - dep.key for dep in build_deps if dep.key not in built_node_keys - ) - if not unbuilt_deps: - logger.info( - "ready to build, have all build dependencies: %s", - sorted(set(dep.key for dep in build_deps)), - ) - buildable_nodes.append(node) - else: - logger.info( - "waiting for build dependencies: %s", - sorted(unbuilt_deps), - ) - - if not buildable_nodes: - # If we can't build anything but still have nodes, we have a cycle - remaining: list[str] = [n.key for n in nodes_to_build] - logger.info("have already built: %s", sorted(built_node_keys)) - raise ValueError(f"Circular dependency detected among: {remaining}") - - logger.info( - "ready to build: %s", - sorted(n.key for n in buildable_nodes), - ) + while topo.is_active(): + rounds += 1 + # Get new available nodes ready to build + ready_nodes.update(topo.get_ready()) # Check if any buildable node requires exclusive build (exclusive_build == True) - exclusive_nodes: DependencyNodeList = [ - node - for node in buildable_nodes - if wkctx.settings.package_build_info( - node.canonicalized_name - ).exclusive_build - ] - if exclusive_nodes: - # Only build the first exclusive node this round - buildable_nodes = [exclusive_nodes[0]] + ready_exclusive_nodes = sorted(ready_nodes.intersection(exclusive_nodes)) + if ready_exclusive_nodes: + # Only build the first exclusive node this round. The other + # nodes are kept in ready_node_keys and not marked as 'done'. + working_nodes = [ready_exclusive_nodes[0]] logger.info( - f"{exclusive_nodes[0].canonicalized_name}: requires exclusive build, running it alone this round." + f"{working_nodes[0].canonicalized_name}: requires exclusive build, running it alone this round." ) + else: + working_nodes = sorted(ready_nodes) + + logger.info( + "round #%i, ready to build: %s", + rounds, + ", ".join(n.key for n in working_nodes), + ) # Build up to max_workers nodes concurrently (or all if max_workers is None) - with concurrent.futures.ThreadPoolExecutor( - max_workers=max_workers - ) as executor: - futures: list[concurrent.futures.Future[tuple[pathlib.Path, bool]]] = [] - reqs: list[Requirement] = [] - logger.info( - "starting to build: %s", sorted(n.key for n in buildable_nodes) + futures: list[concurrent.futures.Future[tuple[pathlib.Path, bool]]] = [] + reqs: list[Requirement] = [] + + for node in working_nodes: + req = Requirement(f"{node.canonicalized_name}=={node.version}") + reqs.append(req) + future = executor.submit( + _build_parallel, + wkctx=wkctx, + resolved_version=node.version, + req=req, + source_download_url=node.download_url, + force=force, + cache_wheel_server_url=cache_wheel_server_url, ) - for node in buildable_nodes: - req = Requirement(f"{node.canonicalized_name}=={node.version}") - reqs.append(req) - future = executor.submit( - _build_parallel, - wkctx=wkctx, - resolved_version=node.version, - req=req, - source_download_url=node.download_url, - force=force, - cache_wheel_server_url=cache_wheel_server_url, - ) - future.add_done_callback(update_progressbar_cb) - futures.append(future) - - # Wait for all builds to complete - for node, future in zip(buildable_nodes, futures, strict=True): - try: - entry = future.result() - entries.append(entry) - built_node_keys.add(node.key) - nodes_to_build.remove(node) - # progress bar is updated in callback - except Exception as e: - logger.error(f"Failed to build {node.key}: {e}") - raise - - # invalidate uv cache - wkctx.uv_clean_cache(*reqs) + future.add_done_callback(update_progressbar_cb) + futures.append(future) + + # Wait for all builds to complete + for node, future in zip(working_nodes, futures, strict=True): + try: + entry = future.result() + except Exception as e: + logger.error(f"Failed to build {node.key}: {e}") + raise + else: + built_entries.append(entry) + # mark node as done + logger.info("%s: done", node.key) + topo.done(node) + ready_nodes.remove(node) + # progress bar is updated in callback + + # invalidate uv cache + wkctx.uv_clean_cache(*reqs) metrics.summarize(wkctx, "Building in parallel") - _summary(wkctx, entries) + _summary(wkctx, built_entries) build_parallel._fromager_show_build_settings = True # type: ignore