-
Notifications
You must be signed in to change notification settings - Fork 32
feat(ci): add container build matrix check. Fixes #119 #127
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
prachimaskar
wants to merge
4
commits into
kubeflow:main
Choose a base branch
from
prachimaskar:add-container-build-matrix-check
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+429
−0
Open
Changes from 3 commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
37a73af
ci: add check_container_build_matrix script and unit tests
prachimaskar 7ad52d0
ci: add container-build-matrix-check workflow
prachimaskar 46a662c
ci: add .container-build-ignore exclusion file
prachimaskar 809d245
ci: address Copilot review comments
prachimaskar File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| # List directories containing Containerfiles/Dockerfiles that should NOT | ||
| # be validated against the container-build matrix (e.g. local-dev only) | ||
| # | ||
| # One path per line, relative to the repo root. | ||
| # | ||
| # Example: | ||
| # docs/examples/local-dev |
Empty file.
177 changes: 177 additions & 0 deletions
177
.github/scripts/check_container_build_matrix/check_container_build_matrix.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,177 @@ | ||
| #!/usr/bin/env python3 | ||
| """Check that every Containerfile/Dockerfile has a matching entry in container-build.yml. | ||
|
|
||
| This ensures contributors do not add a Containerfile without registering it in | ||
| the container-build matrix, which would cause CI to silently skip building it. | ||
| """ | ||
|
|
||
| import argparse | ||
| import sys | ||
| from pathlib import Path | ||
|
|
||
| import yaml | ||
|
|
||
| SEARCH_ROOTS = ["components", "pipelines", "docs/examples"] | ||
| CONTAINER_FILENAMES = {"Containerfile", "Dockerfile"} | ||
| IGNORE_FILENAME = ".container-build-ignore" | ||
| WORKFLOW_PATH = ".github/workflows/container-build.yml" | ||
|
|
||
|
|
||
| def get_repo_root() -> Path: | ||
| """Locate and return the repository root directory.""" | ||
| path = Path(__file__).resolve() | ||
| for parent in path.parents: | ||
| if (parent / ".github").exists(): | ||
| return parent | ||
| raise RuntimeError("Could not locate repo root") | ||
|
|
||
|
|
||
| def discover_container_files(repo_root: Path, search_roots: list[str]) -> list[Path]: | ||
| """Recursively find all Containerfile/Dockerfile paths under search_roots.""" | ||
| found = [] | ||
| for root in search_roots: | ||
| base = repo_root / root | ||
| if not base.exists(): | ||
| continue | ||
| for name in CONTAINER_FILENAMES: | ||
| found.extend(base.rglob(name)) | ||
| return sorted(found) | ||
|
|
||
|
|
||
| def load_ignore_list(repo_root: Path) -> set[str]: | ||
| """Load directories to ignore from IGNORE_FILENAME at the repo root. | ||
|
|
||
| Each non-empty, non-comment line is treated as a path relative to the | ||
| repo root (e.g. components/foo/bar). | ||
| """ | ||
| ignore_file = repo_root / IGNORE_FILENAME | ||
| if not ignore_file.exists(): | ||
| return set() | ||
| ignored = set() | ||
| for line in ignore_file.read_text().splitlines(): | ||
| line = line.strip() | ||
| if line and not line.startswith("#"): | ||
| ignored.add(str(Path(line))) | ||
| return ignored | ||
|
|
||
|
|
||
| def parse_matrix_contexts(workflow_path: Path) -> set[str]: | ||
| """Extract context values from jobs.build.strategy.matrix.include.""" | ||
| with open(workflow_path) as f: | ||
| workflow = yaml.safe_load(f) | ||
|
|
||
| includes = workflow.get("jobs", {}).get("build", {}).get("strategy", {}).get("matrix", {}).get("include", []) | ||
| if not isinstance(includes, list): | ||
| return set() | ||
|
|
||
| contexts = set() | ||
| for entry in includes: | ||
prachimaskar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if "context" in entry: | ||
| contexts.add(str(Path(entry["context"]))) | ||
| return contexts | ||
|
|
||
|
|
||
| def check( | ||
| repo_root: Path, | ||
| search_roots: list[str], | ||
| workflow_path: Path, | ||
| ) -> tuple[bool, list[dict]]: | ||
| """Run the matrix check and return (all_matched, results). | ||
|
|
||
| Each result dict has: | ||
| - file: str path relative to repo root | ||
| - status: "ok" | "unmatched" | "ignored" | ||
| - suggestion: str (only present when status == "unmatched") | ||
| """ | ||
| container_files = discover_container_files(repo_root, search_roots) | ||
| matrix_contexts = parse_matrix_contexts(workflow_path) | ||
| ignore_list = load_ignore_list(repo_root) | ||
|
|
||
| results = [] | ||
| all_matched = True | ||
|
|
||
| for cf in container_files: | ||
| rel_dir = str(cf.parent.relative_to(repo_root)) | ||
|
|
||
| if rel_dir in ignore_list: | ||
| results.append({"file": str(cf.relative_to(repo_root)), "status": "ignored"}) | ||
| continue | ||
|
|
||
| if rel_dir in matrix_contexts: | ||
| results.append({"file": str(cf.relative_to(repo_root)), "status": "ok"}) | ||
| else: | ||
| all_matched = False | ||
| suggestion = f" - context: {rel_dir}\n dockerfile: {cf.relative_to(repo_root)}" | ||
| results.append( | ||
| { | ||
| "file": str(cf.relative_to(repo_root)), | ||
| "status": "unmatched", | ||
| "suggestion": suggestion, | ||
| } | ||
prachimaskar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ) | ||
|
|
||
| return all_matched, results | ||
|
|
||
|
|
||
| def _print_results(results: list[dict], all_matched: bool, workflow_path: str, use_emoji: bool = True) -> None: | ||
| """Print check results to stdout.""" | ||
| check_icon = "🔍" if use_emoji else "[CHECK]" | ||
| ok_icon = "✅" if use_emoji else "[OK]" | ||
| skip_icon = "⏭️" if use_emoji else "[SKIP]" | ||
| fail_icon = "❌" if use_emoji else "[FAIL]" | ||
|
|
||
| print(f"{check_icon} Checking container build matrix entries in {workflow_path}...") | ||
|
|
||
| unmatched = [r for r in results if r["status"] == "unmatched"] | ||
| ignored = [r for r in results if r["status"] == "ignored"] | ||
| ok = [r for r in results if r["status"] == "ok"] | ||
|
|
||
| if ignored: | ||
| print() | ||
| for r in ignored: | ||
| print(f" {skip_icon} {r['file']} (ignored via {IGNORE_FILENAME})") | ||
|
|
||
| if all_matched: | ||
| print() | ||
| print(f"{ok_icon} All Containerfiles/Dockerfiles have a matching matrix entry ({len(ok)} checked)") | ||
| return | ||
|
|
||
| print() | ||
| for r in unmatched: | ||
| print(f" {fail_icon} {r['file']} has no matching entry in the container-build matrix") | ||
|
|
||
| print() | ||
| print(f"{fail_icon} Some Containerfiles/Dockerfiles are not registered in the container-build matrix.") | ||
| print(f" Add the following entries to the strategy.matrix.include section of {workflow_path}:") | ||
| print() | ||
| for r in unmatched: | ||
| print(r["suggestion"]) | ||
| print() | ||
| print(" See docs on Adding a Custom Base Image for details.") | ||
|
|
||
|
|
||
| def main() -> int: | ||
| """CLI entry point.""" | ||
| parser = argparse.ArgumentParser( | ||
| description="Check that every Containerfile/Dockerfile has a matching container-build matrix entry" | ||
| ) | ||
| parser.add_argument("--repo-root", default=None, help="Path to the repository root") | ||
| parser.add_argument("--workflow", default=WORKFLOW_PATH, help="Path to container-build workflow") | ||
| parser.add_argument("--search-roots", nargs="+", default=SEARCH_ROOTS, help="Directories to scan") | ||
| parser.add_argument("--no-emoji", action="store_true", help="Disable emoji output") | ||
| args = parser.parse_args() | ||
|
|
||
| repo_root = Path(args.repo_root).resolve() if args.repo_root else get_repo_root() | ||
| workflow_path = repo_root / args.workflow | ||
|
|
||
| if not workflow_path.exists(): | ||
| print(f"❌ Workflow file not found: {workflow_path}") | ||
| return 2 # distinct exit code for missing workflow | ||
|
|
||
| all_matched, results = check(repo_root, args.search_roots, workflow_path) | ||
| _print_results(results, all_matched, args.workflow, use_emoji=not args.no_emoji) | ||
| return 0 if all_matched else 1 | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| sys.exit(main()) | ||
Empty file.
209 changes: 209 additions & 0 deletions
209
.github/scripts/check_container_build_matrix/tests/test_check_container_build_matrix.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,209 @@ | ||
| """Unit tests for check_container_build_matrix.""" | ||
|
|
||
| import textwrap | ||
| from pathlib import Path | ||
|
|
||
| from ..check_container_build_matrix import ( | ||
| IGNORE_FILENAME, | ||
| check, | ||
| discover_container_files, | ||
| load_ignore_list, | ||
| parse_matrix_contexts, | ||
| ) | ||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # Helpers | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
|
|
||
| def _write(path: Path, content: str) -> Path: | ||
| path.parent.mkdir(parents=True, exist_ok=True) | ||
| path.write_text(textwrap.dedent(content)) | ||
| return path | ||
|
|
||
|
|
||
| def _make_workflow(tmp_path: Path, contexts: list[str]) -> Path: | ||
| includes = "\n".join(f" - context: {ctx}\n dockerfile: {ctx}/Containerfile" for ctx in contexts) | ||
| content = f"""\ | ||
| jobs: | ||
| build: | ||
| strategy: | ||
| matrix: | ||
| include: | ||
| {includes} | ||
| """ | ||
| return _write(tmp_path / ".github/workflows/container-build.yml", content) | ||
prachimaskar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # discover_container_files | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
|
|
||
| class TestDiscoverContainerFiles: | ||
| """Tests for discover_container_files.""" | ||
|
|
||
| def test_finds_containerfile(self, tmp_path): | ||
| """Finds a Containerfile under a search root.""" | ||
| _write(tmp_path / "components/cat/comp/Containerfile", "FROM python:3.11") | ||
| results = discover_container_files(tmp_path, ["components"]) | ||
| assert any("Containerfile" in str(r) for r in results) | ||
|
|
||
| def test_finds_dockerfile(self, tmp_path): | ||
| """Finds a Dockerfile under a search root.""" | ||
| _write(tmp_path / "components/cat/comp/Dockerfile", "FROM python:3.11") | ||
| results = discover_container_files(tmp_path, ["components"]) | ||
| assert any("Dockerfile" in str(r) for r in results) | ||
|
|
||
| def test_finds_nested_subcategory(self, tmp_path): | ||
| """Finds a Containerfile nested under a subcategory.""" | ||
| _write(tmp_path / "components/cat/subcat/comp/Containerfile", "FROM python:3.11") | ||
| results = discover_container_files(tmp_path, ["components"]) | ||
| assert len(results) == 1 | ||
|
|
||
| def test_missing_root_is_skipped(self, tmp_path): | ||
| """Returns empty list when search root does not exist.""" | ||
| results = discover_container_files(tmp_path, ["nonexistent"]) | ||
| assert results == [] | ||
|
|
||
| def test_multiple_roots(self, tmp_path): | ||
| """Finds container files across multiple search roots.""" | ||
| _write(tmp_path / "components/cat/a/Containerfile", "FROM a") | ||
| _write(tmp_path / "pipelines/cat/b/Containerfile", "FROM b") | ||
| results = discover_container_files(tmp_path, ["components", "pipelines"]) | ||
| assert len(results) == 2 | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # parse_matrix_contexts | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
|
|
||
| class TestParseMatrixContexts: | ||
| """Tests for parse_matrix_contexts.""" | ||
|
|
||
| def test_extracts_contexts(self, tmp_path): | ||
| """Extracts context paths from the matrix workflow file.""" | ||
| wf = _make_workflow(tmp_path, ["components/cat/comp", "pipelines/cat/pipe"]) | ||
| contexts = parse_matrix_contexts(wf) | ||
| contexts_normalized = {c.replace("\\", "/") for c in contexts} | ||
| assert "components/cat/comp" in contexts_normalized | ||
| assert "pipelines/cat/pipe" in contexts_normalized | ||
|
|
||
| def test_empty_matrix(self, tmp_path): | ||
| """Returns empty set when matrix include list is empty.""" | ||
| wf = _write( | ||
| tmp_path / ".github/workflows/container-build.yml", | ||
| """\ | ||
| jobs: | ||
| build: | ||
| strategy: | ||
| matrix: | ||
| include: [] | ||
| """, | ||
| ) | ||
| assert parse_matrix_contexts(wf) == set() | ||
|
|
||
| def test_missing_matrix_key(self, tmp_path): | ||
| """Returns empty set when matrix key is missing.""" | ||
| wf = _write( | ||
| tmp_path / ".github/workflows/container-build.yml", | ||
| "jobs:\n build:\n steps: []\n", | ||
| ) | ||
| assert parse_matrix_contexts(wf) == set() | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # load_ignore_list | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
|
|
||
| class TestLoadIgnoreList: | ||
| """Tests for load_ignore_list.""" | ||
|
|
||
| def test_loads_paths(self, tmp_path): | ||
| """Loads paths from the ignore file.""" | ||
| (tmp_path / IGNORE_FILENAME).write_text("components/foo/bar\npipelines/baz\n") | ||
| result = load_ignore_list(tmp_path) | ||
| result_normalized = {p.replace("\\", "/") for p in result} | ||
| assert "components/foo/bar" in result_normalized | ||
| assert "pipelines/baz" in result_normalized | ||
|
|
||
| def test_ignores_comments_and_blank_lines(self, tmp_path): | ||
| """Skips comments and blank lines in the ignore file.""" | ||
| (tmp_path / IGNORE_FILENAME).write_text("# comment\n\ncomponents/foo\n") | ||
| result = load_ignore_list(tmp_path) | ||
| result_normalized = {p.replace("\\", "/") for p in result} | ||
| assert result_normalized == {"components/foo"} | ||
|
|
||
| def test_missing_file_returns_empty(self, tmp_path): | ||
| """Returns empty set when ignore file does not exist.""" | ||
| assert load_ignore_list(tmp_path) == set() | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # check (integration) | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
|
|
||
| class TestCheck: | ||
| """Integration tests for check.""" | ||
|
|
||
| def test_all_matched(self, tmp_path): | ||
| """Passes when all container files have a matrix entry.""" | ||
| _write(tmp_path / "components/cat/comp/Containerfile", "FROM python:3.11") | ||
| wf = _make_workflow(tmp_path, ["components/cat/comp"]) | ||
| all_matched, results = check(tmp_path, ["components"], wf) | ||
| assert all_matched | ||
| assert results[0]["status"] == "ok" | ||
|
|
||
| def test_unmatched_fails(self, tmp_path): | ||
| """Fails when a container file has no matrix entry.""" | ||
| _write(tmp_path / "components/cat/comp/Containerfile", "FROM python:3.11") | ||
| wf = _make_workflow(tmp_path, []) | ||
| all_matched, results = check(tmp_path, ["components"], wf) | ||
| assert not all_matched | ||
| assert results[0]["status"] == "unmatched" | ||
| assert "suggestion" in results[0] | ||
|
|
||
| def test_suggestion_contains_context(self, tmp_path): | ||
| """Suggestion includes the correct context path.""" | ||
| _write(tmp_path / "components/cat/comp/Containerfile", "FROM python:3.11") | ||
| wf = _make_workflow(tmp_path, []) | ||
| _, results = check(tmp_path, ["components"], wf) | ||
| suggestion_normalized = results[0]["suggestion"].replace("\\", "/") | ||
| assert "components/cat/comp" in suggestion_normalized | ||
|
|
||
| def test_ignored_path_is_skipped(self, tmp_path): | ||
| """Skips container files listed in the ignore file.""" | ||
| _write(tmp_path / "components/cat/comp/Containerfile", "FROM python:3.11") | ||
| (tmp_path / IGNORE_FILENAME).write_text("components/cat/comp\n") | ||
| wf = _make_workflow(tmp_path, []) | ||
| all_matched, results = check(tmp_path, ["components"], wf) | ||
| assert all_matched | ||
| assert results[0]["status"] == "ignored" | ||
|
|
||
| def test_subcategory_path_matched(self, tmp_path): | ||
| """Passes when a subcategory container file has a matrix entry.""" | ||
| _write(tmp_path / "components/cat/subcat/comp/Containerfile", "FROM python:3.11") | ||
| wf = _make_workflow(tmp_path, ["components/cat/subcat/comp"]) | ||
| all_matched, results = check(tmp_path, ["components"], wf) | ||
| assert all_matched | ||
|
|
||
| def test_missing_and_present_mixed(self, tmp_path): | ||
| """Reports ok and unmatched correctly when results are mixed.""" | ||
| _write(tmp_path / "components/cat/a/Containerfile", "FROM a") | ||
| _write(tmp_path / "components/cat/b/Containerfile", "FROM b") | ||
| wf = _make_workflow(tmp_path, ["components/cat/a"]) | ||
| all_matched, results = check(tmp_path, ["components"], wf) | ||
| assert not all_matched | ||
| statuses = {r["file"].replace("\\", "/").split("/")[-2]: r["status"] for r in results} | ||
| assert statuses["a"] == "ok" | ||
| assert statuses["b"] == "unmatched" | ||
|
|
||
| def test_no_container_files_passes(self, tmp_path): | ||
| """Passes when no container files are found.""" | ||
| wf = _make_workflow(tmp_path, []) | ||
| all_matched, results = check(tmp_path, ["components"], wf) | ||
| assert all_matched | ||
| assert results == [] | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.