From 877f2c54ef59746c9d945c6197b3eab7871d2b10 Mon Sep 17 00:00:00 2001 From: Lalatendu Mohanty Date: Fri, 22 Aug 2025 00:25:58 -0400 Subject: [PATCH] feat: Add --test-mode for resilient bootstrap with failure handling Adds --test-mode flag that marks failed packages as pre-built and continues bootstrap until all packages are processed. Uses optimal n+1 retry logic and reports comprehensive failure summary. Enables discovery of all build failures rather than stopping on first failure, supporting mixed source/binary dependency workflows. Fixes #713 Co-developed-with: Cursor IDE with Claude 4.0 Sonnet Signed-off-by: Lalatendu Mohanty --- src/fromager/bootstrapper.py | 413 +++++++++++++++++++---------- src/fromager/commands/bootstrap.py | 66 ++++- src/fromager/packagesettings.py | 6 + 3 files changed, 336 insertions(+), 149 deletions(-) diff --git a/src/fromager/bootstrapper.py b/src/fromager/bootstrapper.py index dc4cdb22..cc7130a4 100644 --- a/src/fromager/bootstrapper.py +++ b/src/fromager/bootstrapper.py @@ -1,5 +1,6 @@ from __future__ import annotations +import dataclasses import json import logging import operator @@ -36,6 +37,57 @@ logger = logging.getLogger(__name__) + +@dataclasses.dataclass +class BuildResult: + """Simple result object for package builds.""" + + wheel_filename: pathlib.Path | None = None + sdist_filename: pathlib.Path | None = None + unpack_dir: pathlib.Path | None = None + source_url_type: str = "unknown" + sdist_root_dir: pathlib.Path | None = None + build_env: build_environment.BuildEnvironment | None = None + failed: bool = False + + @classmethod + def failure(cls) -> BuildResult: + """Create a failed build result.""" + return cls(source_url_type="failed", failed=True) + + +class BuildFailureError(Exception): + """Specific exception for build failures in test mode.""" + + def __init__(self, package_name: str, version: str, original_error: Exception): + self.package_name = package_name + self.version = version + self.original_error = original_error + super().__init__( + f"Build failed for {package_name}=={version}: {original_error}" + ) + + +class PreBuiltFallbackError(Exception): + """Exception when fallback to pre-built also fails in test mode.""" + + def __init__( + self, + package_name: str, + version: str, + build_error: Exception, + fallback_error: Exception, + ): + self.package_name = package_name + self.version = version + self.build_error = build_error + self.fallback_error = fallback_error + super().__init__( + f"Both build and pre-built fallback failed for {package_name}=={version}: " + f"build error: {build_error}, fallback error: {fallback_error}" + ) + + # package name, extras, version, sdist/wheel SeenKey = tuple[NormalizedName, tuple[str, ...], str, typing.Literal["sdist", "wheel"]] @@ -48,12 +100,15 @@ def __init__( prev_graph: DependencyGraph | None = None, cache_wheel_server_url: str | None = None, sdist_only: bool = False, + test_mode: bool = False, ) -> None: self.ctx = ctx self.progressbar = progressbar or progress.Progressbar(None) self.prev_graph = prev_graph self.cache_wheel_server_url = cache_wheel_server_url or ctx.wheel_server_url self.sdist_only = sdist_only + self.test_mode = test_mode + self.failed_packages: set[str] = set() # Track failed packages in test mode self.why: list[tuple[RequirementType, Requirement, Version]] = [] # Push items onto the stack as we start to resolve their # dependencies so at the end we have a list of items that need to @@ -154,8 +209,8 @@ def bootstrap(self, req: Requirement, req_type: RequirementType) -> Version: # # When bootstrap encounters another package with a *build* requirement # on a pre-built wheel, its installation dependencies are materialized. - build_sdist_only = self.sdist_only and not self._processing_build_requirement( - req_type + build_sdist_only: bool = ( + self.sdist_only and not self._processing_build_requirement(req_type) ) # Avoid cyclic dependencies and redundant processing. @@ -174,163 +229,40 @@ def bootstrap(self, req: Requirement, req_type: RequirementType) -> Version: # we're given. self.why.append((req_type, req, resolved_version)) - # for cleanup - build_env: build_environment.BuildEnvironment | None = None - sdist_root_dir: pathlib.Path | None = None - cached_wheel_filename: pathlib.Path | None = None - wheel_filename: pathlib.Path | None = None - sdist_filename: pathlib.Path | None = None - unpack_dir: pathlib.Path | None = None - unpacked_cached_wheel: pathlib.Path | None = None - - source_url_type = sources.get_source_type(self.ctx, req) - - if pbi.pre_built: - wheel_filename, unpack_dir = self._download_prebuilt( - req=req, - req_type=req_type, - resolved_version=resolved_version, - wheel_url=source_url, - ) - # Remember that this is a prebuilt wheel, and where we got it. - source_url_type = str(SourceType.PREBUILT) - else: - # Look a few places for an existing wheel that matches what we need, - # using caches for locations where we might have built the wheel - # before. - - # Check if we have previously built a wheel and still have it on the - # local filesystem. - if not wheel_filename and not cached_wheel_filename: - cached_wheel_filename, unpacked_cached_wheel = ( - self._look_for_existing_wheel( - req, - resolved_version, - self.ctx.wheels_build, - ) - ) - - # Check if we have previously downloaded a wheel and still have it - # on the local filesystem. - if not wheel_filename and not cached_wheel_filename: - cached_wheel_filename, unpacked_cached_wheel = ( - self._look_for_existing_wheel( - req, - resolved_version, - self.ctx.wheels_downloads, - ) - ) - - # Look for a wheel on the cache server and download it if there is - # one. - if not wheel_filename and not cached_wheel_filename: - cached_wheel_filename, unpacked_cached_wheel = ( - self._download_wheel_from_cache(req, resolved_version) - ) - - if not unpacked_cached_wheel: - # We didn't find anything so we are going to have to build the - # wheel in order to process its installation dependencies. - logger.debug("no cached wheel, downloading sources") - source_filename = sources.download_source( - ctx=self.ctx, - req=req, - version=resolved_version, - download_url=source_url, - ) - sdist_root_dir = sources.prepare_source( - ctx=self.ctx, - req=req, - source_filename=source_filename, - version=resolved_version, - ) - else: - logger.debug(f"have cached wheel in {unpacked_cached_wheel}") - sdist_root_dir = unpacked_cached_wheel / unpacked_cached_wheel.stem - - assert sdist_root_dir is not None - - if sdist_root_dir.parent.parent != self.ctx.work_dir: - raise ValueError( - f"'{sdist_root_dir}/../..' should be {self.ctx.work_dir}" - ) - unpack_dir = sdist_root_dir.parent - - build_env = build_environment.BuildEnvironment( - ctx=self.ctx, - parent_dir=sdist_root_dir.parent, - ) - - # need to call this function irrespective of whether we had the wheel cached - # so that the build dependencies can be bootstrapped - self._prepare_build_dependencies(req, sdist_root_dir, build_env) - - if cached_wheel_filename: - logger.debug( - f"getting install requirements from cached " - f"wheel {cached_wheel_filename.name}" - ) - # prefer existing wheel even in sdist_only mode - # skip building even if it is a non-fromager built wheel - wheel_filename = cached_wheel_filename - build_sdist_only = False - elif build_sdist_only: - # get install dependencies from sdist and pyproject_hooks (only top-level and install) - logger.debug( - f"getting install requirements from sdist " - f"{req.name}=={resolved_version} ({req_type})" - ) - wheel_filename = None - sdist_filename = self._build_sdist( - req, resolved_version, sdist_root_dir, build_env - ) - else: - # build wheel (build requirements, full build mode) - logger.debug( - f"building wheel {req.name}=={resolved_version} " - f"to get install requirements ({req_type})" - ) - wheel_filename, sdist_filename = self._build_wheel( - req, resolved_version, sdist_root_dir, build_env - ) + # Much simpler: just build the package + result = self._build_package(req, resolved_version, pbi, build_sdist_only) hooks.run_post_bootstrap_hooks( ctx=self.ctx, req=req, dist_name=canonicalize_name(req.name), dist_version=str(resolved_version), - sdist_filename=sdist_filename, - wheel_filename=wheel_filename, + sdist_filename=result.sdist_filename, + wheel_filename=result.wheel_filename, ) - if wheel_filename is not None: - assert unpack_dir is not None - logger.debug( - "get install dependencies of wheel %s", - wheel_filename.name, + # Get install dependencies - much simpler logic + if result.failed: + logger.warning( + "test mode: skipping install dependencies for failed package %s", + req.name, ) + install_dependencies = set() + elif result.wheel_filename and result.unpack_dir: install_dependencies = dependencies.get_install_dependencies_of_wheel( req=req, - wheel_filename=wheel_filename, - requirements_file_dir=unpack_dir, - ) - elif sdist_filename is not None: - assert sdist_root_dir is not None - assert build_env is not None - logger.debug( - "get install dependencies of sdist from directory %s", - sdist_root_dir, + wheel_filename=result.wheel_filename, + requirements_file_dir=result.unpack_dir, ) + elif result.sdist_filename and result.sdist_root_dir and result.build_env: install_dependencies = dependencies.get_install_dependencies_of_sdist( ctx=self.ctx, req=req, - version=resolved_version, - sdist_root_dir=sdist_root_dir, - build_env=build_env, + sdist_root_dir=result.sdist_root_dir, + build_env=result.build_env, ) else: - # unreachable - raise RuntimeError("wheel_filename and sdist_filename are None") + raise RuntimeError("Both wheel_filename and sdist_filename are None") logger.debug( "install dependencies: %s", @@ -341,7 +273,7 @@ def bootstrap(self, req: Requirement, req_type: RequirementType) -> Version: req=req, version=resolved_version, source_url=source_url, - source_url_type=source_url_type, + source_url_type=result.source_url_type, prebuilt=pbi.pre_built, constraint=constraint, ) @@ -357,7 +289,7 @@ def bootstrap(self, req: Requirement, req_type: RequirementType) -> Version: # we are done processing this req, so lets remove it from the why chain self.why.pop() - self.ctx.clean_build_dirs(sdist_root_dir, build_env) + self.ctx.clean_build_dirs(result.sdist_root_dir, result.build_env) return resolved_version @property @@ -431,6 +363,178 @@ def _build_wheel( logger.info(f"built wheel for version {resolved_version}: {wheel_filename}") return wheel_filename, sdist_filename + def _build_package( + self, + req: Requirement, + resolved_version: Version, + pbi, + build_sdist_only: bool, + ) -> BuildResult: + """Build or download package - handles test mode failures gracefully.""" + try: + return self._build_wheel_and_sdist( + req, resolved_version, pbi, build_sdist_only + ) + except Exception: + if not self.test_mode: + raise # Re-raise in normal mode + + # Test mode: try pre-built fallback + logger.warning( + "test mode: build failed for %s==%s, attempting fallback to pre-built", + req.name, + resolved_version, + exc_info=True, + ) + + canonical_name = str(canonicalize_name(req.name)) + self.failed_packages.add(canonical_name) + + try: + self._mark_package_as_pre_built_runtime(req) + pbi = self.ctx.package_build_info(req) + result = self._build_wheel_and_sdist( + req, resolved_version, pbi, build_sdist_only + ) + logger.info( + "test mode: successfully handled %s as pre-built after build failure", + req.name, + ) + return result + except Exception as prebuilt_error: + logger.error( + "test mode: failed to handle %s as pre-built: %s", + req.name, + prebuilt_error, + exc_info=True, + ) + return BuildResult.failure() + + def _build_wheel_and_sdist( + self, + req: Requirement, + resolved_version: Version, + pbi, + build_sdist_only: bool, + ) -> BuildResult: + """Build or download wheel and sdist for the current requirement.""" + build_env: build_environment.BuildEnvironment | None = None + sdist_root_dir: pathlib.Path | None = None + cached_wheel_filename: pathlib.Path | None = None + wheel_filename: pathlib.Path | None = None + sdist_filename: pathlib.Path | None = None + unpack_dir: pathlib.Path | None = None + unpacked_cached_wheel: pathlib.Path | None = None + source_url, _ = self.resolve_version( + req=req, req_type=RequirementType.TOP_LEVEL + ) + source_url_type = sources.get_source_type(self.ctx, req) + + if pbi.pre_built: + wheel_filename, unpack_dir = self._download_prebuilt( + req=req, + req_type=RequirementType.TOP_LEVEL, + resolved_version=resolved_version, + wheel_url=source_url, + ) + source_url_type = str(SourceType.PREBUILT) + else: + # Try to find existing wheels + cached_wheel_filename, unpacked_cached_wheel = self._find_cached_wheel( + req, resolved_version + ) + + if not unpacked_cached_wheel: + # Download and prepare sources + sdist_root_dir, unpack_dir = self._prepare_source_for_build( + req, resolved_version, source_url + ) + build_env = build_environment.BuildEnvironment( + ctx=self.ctx, parent_dir=sdist_root_dir.parent + ) + self._prepare_build_dependencies(req, sdist_root_dir, build_env) + else: + logger.debug(f"have cached wheel in {unpacked_cached_wheel}") + sdist_root_dir = unpacked_cached_wheel / unpacked_cached_wheel.stem + unpack_dir = sdist_root_dir.parent + build_env = build_environment.BuildEnvironment( + ctx=self.ctx, parent_dir=sdist_root_dir.parent + ) + self._prepare_build_dependencies(req, sdist_root_dir, build_env) + + # Build the package + if cached_wheel_filename: + logger.debug(f"using cached wheel {cached_wheel_filename.name}") + wheel_filename = cached_wheel_filename + elif build_sdist_only: + logger.debug(f"building sdist for {req.name}=={resolved_version}") + wheel_filename = None + sdist_filename = self._build_sdist( + req, resolved_version, sdist_root_dir, build_env + ) + else: + logger.debug(f"building wheel for {req.name}=={resolved_version}") + wheel_filename, sdist_filename = self._build_wheel( + req, resolved_version, sdist_root_dir, build_env + ) + + return BuildResult( + wheel_filename=wheel_filename, + sdist_filename=sdist_filename, + unpack_dir=unpack_dir, + source_url_type=source_url_type, + sdist_root_dir=sdist_root_dir, + build_env=build_env, + ) + + def _find_cached_wheel( + self, req: Requirement, resolved_version: Version + ) -> tuple[pathlib.Path | None, pathlib.Path | None]: + """Find cached wheel in various locations.""" + # Check for existing wheels in multiple locations + cached_wheel_filename, unpacked_cached_wheel = self._look_for_existing_wheel( + req, resolved_version, self.ctx.wheels_build + ) + + if not cached_wheel_filename: + cached_wheel_filename, unpacked_cached_wheel = ( + self._look_for_existing_wheel( + req, resolved_version, self.ctx.wheels_downloads + ) + ) + + if not cached_wheel_filename: + cached_wheel_filename, unpacked_cached_wheel = ( + self._download_wheel_from_cache(req, resolved_version) + ) + + return cached_wheel_filename, unpacked_cached_wheel + + def _prepare_source_for_build( + self, req: Requirement, resolved_version: Version, source_url: str + ) -> tuple[pathlib.Path, pathlib.Path]: + """Download and prepare source for building.""" + logger.debug("no cached wheel, downloading sources") + source_filename = sources.download_source( + ctx=self.ctx, + req=req, + version=resolved_version, + download_url=source_url, + ) + sdist_root_dir = sources.prepare_source( + ctx=self.ctx, + req=req, + source_filename=source_filename, + version=resolved_version, + ) + + assert sdist_root_dir is not None + if sdist_root_dir.parent.parent != self.ctx.work_dir: + raise ValueError(f"'{sdist_root_dir}/../..' should be {self.ctx.work_dir}") + + unpack_dir = sdist_root_dir.parent + return sdist_root_dir, unpack_dir + def _prepare_build_dependencies( self, req: Requirement, @@ -500,7 +604,21 @@ def _handle_build_requirements( try: self.bootstrap(req=dep, req_type=build_type) except Exception as err: - raise ValueError(f"could not handle {self._explain}") from err + if self.test_mode: + # Follow repository's logging pattern for test mode failures + logger.warning( + "test mode: build dependency %s failed for %s, marking as failed and continuing", + dep.name, + req.name, + exc_info=True, # Debug details like __main__.py pattern + ) + canonical_name = str(canonicalize_name(dep.name)) + self.failed_packages.add(canonical_name) + # Continue processing other dependencies instead of failing immediately + continue + else: + # Original behavior for non-test mode + raise ValueError(f"could not handle {self._explain}") from err self.progressbar.update() def _download_prebuilt( @@ -1035,3 +1153,8 @@ def _add_to_build_order( # Requirement and Version instances that can't be # converted to JSON without help. json.dump(self._build_stack, f, indent=2, default=str) + + def _mark_package_as_pre_built_runtime(self, req: Requirement) -> None: + """Mark a package as pre-built at runtime for test mode.""" + canonical_name = canonicalize_name(req.name) + self.ctx.settings.pre_built_override.add(canonical_name) diff --git a/src/fromager/commands/bootstrap.py b/src/fromager/commands/bootstrap.py index f643966f..c7cfbdc0 100644 --- a/src/fromager/commands/bootstrap.py +++ b/src/fromager/commands/bootstrap.py @@ -97,6 +97,13 @@ def _get_requirements_from_args( default=False, help="Skip generating constraints.txt file to allow building collections with conflicting versions", ) +@click.option( + "--test-mode", + "test_mode", + is_flag=True, + default=False, + help="Test mode: mark failed packages as pre-built and continue, report failures at end", +) @click.argument("toplevel", nargs=-1) @click.pass_obj def bootstrap( @@ -106,6 +113,7 @@ def bootstrap( cache_wheel_server_url: str | None, sdist_only: bool, skip_constraints: bool, + test_mode: bool, toplevel: list[str], ) -> None: """Compute and build the dependencies of a set of requirements recursively @@ -116,6 +124,11 @@ def bootstrap( """ logger.info(f"cache wheel server url: {cache_wheel_server_url}") + if test_mode: + logger.info( + "test mode enabled: will mark failed packages as pre-built and continue" + ) + to_build = _get_requirements_from_args(toplevel, requirements_files) if not to_build: raise RuntimeError( @@ -148,6 +161,7 @@ def bootstrap( prev_graph, cache_wheel_server_url, sdist_only=sdist_only, + test_mode=test_mode, ) # we need to resolve all the top level dependencies before we start bootstrapping. @@ -182,9 +196,26 @@ def bootstrap( for req in to_build: token = requirement_ctxvar.set(req) - bt.bootstrap(req, requirements_file.RequirementType.TOP_LEVEL) - progressbar.update() - requirement_ctxvar.reset(token) + try: + bt.bootstrap(req, requirements_file.RequirementType.TOP_LEVEL) + progressbar.update() + if test_mode: + logger.info("Successfully processed: %s", req) + except Exception as err: + if test_mode: + # Test mode: log error but continue processing + logger.error( + "test mode: failed to process %s: %s", + req, + err, + exc_info=True, # Full traceback to debug log + ) + progressbar.update() # Update progress even on failure + else: + # Normal mode: re-raise the exception (fail-fast) + raise + finally: + requirement_ctxvar.reset(token) constraints_filename = wkctx.work_dir / "constraints.txt" if skip_constraints: @@ -199,7 +230,25 @@ def bootstrap( logger.debug("match_py_req LRU cache: %r", resolver.match_py_req.cache_info()) - metrics.summarize(wkctx, "Bootstrapping") + # Test mode summary reporting + if test_mode: + if bt.failed_packages: + # Use repository's logging pattern for error reporting + logger.error("test mode: the following packages failed to build:") + for package in sorted(bt.failed_packages): + logger.error(" - %s", package) + logger.error( + "test mode: %d package(s) failed to build", len(bt.failed_packages) + ) + # Follow repository's error exit pattern like __main__.py and lint.py + raise SystemExit( + f"Test mode completed with {len(bt.failed_packages)} build failures" + ) + else: + logger.info("test mode: all packages built successfully") + metrics.summarize(wkctx, "Test Mode Bootstrapping") + else: + metrics.summarize(wkctx, "Bootstrapping") def write_constraints_file( @@ -458,6 +507,13 @@ def write_constraints_file( default=None, help="maximum number of parallel workers to run (default: unlimited)", ) +@click.option( + "--test-mode", + "test_mode", + is_flag=True, + default=False, + help="Test mode: mark failed packages as pre-built and continue, report failures at end", +) @click.argument("toplevel", nargs=-1) @click.pass_obj @click.pass_context @@ -471,6 +527,7 @@ def bootstrap_parallel( skip_constraints: bool, force: bool, max_workers: int | None, + test_mode: bool, toplevel: list[str], ) -> None: """Bootstrap and build-parallel @@ -494,6 +551,7 @@ def bootstrap_parallel( cache_wheel_server_url=cache_wheel_server_url, sdist_only=True, skip_constraints=skip_constraints, + test_mode=test_mode, toplevel=toplevel, ) diff --git a/src/fromager/packagesettings.py b/src/fromager/packagesettings.py index 709e317f..089d7a6f 100644 --- a/src/fromager/packagesettings.py +++ b/src/fromager/packagesettings.py @@ -727,6 +727,11 @@ def has_config(self) -> bool: @property def pre_built(self) -> bool: """Does the variant use pre-build wheels?""" + # Check if package is in pre_built_override set + if self.package in self._settings.pre_built_override: + return True + + # Check variant configuration vi = self._ps.variants.get(self.variant) if vi is not None: return vi.pre_built @@ -1034,6 +1039,7 @@ def __init__( self._patches_dir = patches_dir self._max_jobs = max_jobs self._pbi_cache: dict[Package, PackageBuildInfo] = {} + self.pre_built_override: set[NormalizedName] = set() @classmethod def from_files(