Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .container-build-ignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# List directories containing Containerfiles/Dockerfiles that should NOT
# be validated against the container-build matrix (e.g. local-dev only)
#
# One path per line, relative to the repo root.
#
# Example:
# docs/examples/local-dev
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
#!/usr/bin/env python3
"""Check that every Containerfile/Dockerfile has a matching entry in container-build.yml.

This ensures contributors do not add a Containerfile without registering it in
the container-build matrix, which would cause CI to silently skip building it.
"""

import argparse
import sys
from pathlib import Path

import yaml

SEARCH_ROOTS = ["components", "pipelines", "docs/examples"]
CONTAINER_FILENAMES = {"Containerfile", "Dockerfile"}
IGNORE_FILENAME = ".container-build-ignore"
WORKFLOW_PATH = ".github/workflows/container-build.yml"


def get_repo_root() -> Path:
"""Locate and return the repository root directory."""
path = Path(__file__).resolve()
for parent in path.parents:
if (parent / ".github").exists():
return parent
raise RuntimeError("Could not locate repo root")


def discover_container_files(repo_root: Path, search_roots: list[str]) -> list[Path]:
"""Recursively find all Containerfile/Dockerfile paths under search_roots."""
found = []
for root in search_roots:
base = repo_root / root
if not base.exists():
continue
for name in CONTAINER_FILENAMES:
found.extend(base.rglob(name))
return sorted(found)


def load_ignore_list(repo_root: Path) -> set[str]:
"""Load directories to ignore from IGNORE_FILENAME at the repo root.

Each non-empty, non-comment line is treated as a path relative to the
repo root (e.g. components/foo/bar).
"""
ignore_file = repo_root / IGNORE_FILENAME
if not ignore_file.exists():
return set()
ignored = set()
for line in ignore_file.read_text().splitlines():
line = line.strip()
if line and not line.startswith("#"):
ignored.add(str(Path(line)))
return ignored


def parse_matrix_contexts(workflow_path: Path) -> set[str]:
"""Extract context values from jobs.build.strategy.matrix.include."""
with open(workflow_path) as f:
workflow = yaml.safe_load(f)

includes = workflow.get("jobs", {}).get("build", {}).get("strategy", {}).get("matrix", {}).get("include", [])
if not isinstance(includes, list):
return set()

contexts = set()
for entry in includes:
if "context" in entry:
contexts.add(str(Path(entry["context"])))
return contexts


def check(
repo_root: Path,
search_roots: list[str],
workflow_path: Path,
) -> tuple[bool, list[dict]]:
"""Run the matrix check and return (all_matched, results).

Each result dict has:
- file: str path relative to repo root
- status: "ok" | "unmatched" | "ignored"
- suggestion: str (only present when status == "unmatched")
"""
container_files = discover_container_files(repo_root, search_roots)
matrix_contexts = parse_matrix_contexts(workflow_path)
ignore_list = load_ignore_list(repo_root)

results = []
all_matched = True

for cf in container_files:
rel_dir = str(cf.parent.relative_to(repo_root))

if rel_dir in ignore_list:
results.append({"file": str(cf.relative_to(repo_root)), "status": "ignored"})
continue

if rel_dir in matrix_contexts:
results.append({"file": str(cf.relative_to(repo_root)), "status": "ok"})
else:
all_matched = False
suggestion = f" - context: {rel_dir}\n dockerfile: {cf.relative_to(repo_root)}"
results.append(
{
"file": str(cf.relative_to(repo_root)),
"status": "unmatched",
"suggestion": suggestion,
}
)

return all_matched, results


def _print_results(results: list[dict], all_matched: bool, workflow_path: str, use_emoji: bool = True) -> None:
"""Print check results to stdout."""
check_icon = "🔍" if use_emoji else "[CHECK]"
ok_icon = "✅" if use_emoji else "[OK]"
skip_icon = "⏭️" if use_emoji else "[SKIP]"
fail_icon = "❌" if use_emoji else "[FAIL]"

print(f"{check_icon} Checking container build matrix entries in {workflow_path}...")

unmatched = [r for r in results if r["status"] == "unmatched"]
ignored = [r for r in results if r["status"] == "ignored"]
ok = [r for r in results if r["status"] == "ok"]

if ignored:
print()
for r in ignored:
print(f" {skip_icon} {r['file']} (ignored via {IGNORE_FILENAME})")

if all_matched:
print()
print(f"{ok_icon} All Containerfiles/Dockerfiles have a matching matrix entry ({len(ok)} checked)")
return

print()
for r in unmatched:
print(f" {fail_icon} {r['file']} has no matching entry in the container-build matrix")

print()
print(f"{fail_icon} Some Containerfiles/Dockerfiles are not registered in the container-build matrix.")
print(f" Add the following entries to the strategy.matrix.include section of {workflow_path}:")
print()
for r in unmatched:
print(r["suggestion"])
print()
print(" See docs on Adding a Custom Base Image for details.")


def main() -> int:
"""CLI entry point."""
parser = argparse.ArgumentParser(
description="Check that every Containerfile/Dockerfile has a matching container-build matrix entry"
)
parser.add_argument("--repo-root", default=None, help="Path to the repository root")
parser.add_argument("--workflow", default=WORKFLOW_PATH, help="Path to container-build workflow")
parser.add_argument("--search-roots", nargs="+", default=SEARCH_ROOTS, help="Directories to scan")
parser.add_argument("--no-emoji", action="store_true", help="Disable emoji output")
args = parser.parse_args()

repo_root = Path(args.repo_root).resolve() if args.repo_root else get_repo_root()
workflow_path = repo_root / args.workflow

if not workflow_path.exists():
print(f"❌ Workflow file not found: {workflow_path}")
return 2 # distinct exit code for missing workflow

all_matched, results = check(repo_root, args.search_roots, workflow_path)
_print_results(results, all_matched, args.workflow, use_emoji=not args.no_emoji)
return 0 if all_matched else 1


if __name__ == "__main__":
sys.exit(main())
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
"""Unit tests for check_container_build_matrix."""

import textwrap
from pathlib import Path

from ..check_container_build_matrix import (
IGNORE_FILENAME,
check,
discover_container_files,
load_ignore_list,
parse_matrix_contexts,
)

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _write(path: Path, content: str) -> Path:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(textwrap.dedent(content))
return path


def _make_workflow(tmp_path: Path, contexts: list[str]) -> Path:
includes = "\n".join(f" - context: {ctx}\n dockerfile: {ctx}/Containerfile" for ctx in contexts)
content = f"""\
jobs:
build:
strategy:
matrix:
include:
{includes}
"""
return _write(tmp_path / ".github/workflows/container-build.yml", content)


# ---------------------------------------------------------------------------
# discover_container_files
# ---------------------------------------------------------------------------


class TestDiscoverContainerFiles:
"""Tests for discover_container_files."""

def test_finds_containerfile(self, tmp_path):
"""Finds a Containerfile under a search root."""
_write(tmp_path / "components/cat/comp/Containerfile", "FROM python:3.11")
results = discover_container_files(tmp_path, ["components"])
assert any("Containerfile" in str(r) for r in results)

def test_finds_dockerfile(self, tmp_path):
"""Finds a Dockerfile under a search root."""
_write(tmp_path / "components/cat/comp/Dockerfile", "FROM python:3.11")
results = discover_container_files(tmp_path, ["components"])
assert any("Dockerfile" in str(r) for r in results)

def test_finds_nested_subcategory(self, tmp_path):
"""Finds a Containerfile nested under a subcategory."""
_write(tmp_path / "components/cat/subcat/comp/Containerfile", "FROM python:3.11")
results = discover_container_files(tmp_path, ["components"])
assert len(results) == 1

def test_missing_root_is_skipped(self, tmp_path):
"""Returns empty list when search root does not exist."""
results = discover_container_files(tmp_path, ["nonexistent"])
assert results == []

def test_multiple_roots(self, tmp_path):
"""Finds container files across multiple search roots."""
_write(tmp_path / "components/cat/a/Containerfile", "FROM a")
_write(tmp_path / "pipelines/cat/b/Containerfile", "FROM b")
results = discover_container_files(tmp_path, ["components", "pipelines"])
assert len(results) == 2


# ---------------------------------------------------------------------------
# parse_matrix_contexts
# ---------------------------------------------------------------------------


class TestParseMatrixContexts:
"""Tests for parse_matrix_contexts."""

def test_extracts_contexts(self, tmp_path):
"""Extracts context paths from the matrix workflow file."""
wf = _make_workflow(tmp_path, ["components/cat/comp", "pipelines/cat/pipe"])
contexts = parse_matrix_contexts(wf)
contexts_normalized = {c.replace("\\", "/") for c in contexts}
assert "components/cat/comp" in contexts_normalized
assert "pipelines/cat/pipe" in contexts_normalized

def test_empty_matrix(self, tmp_path):
"""Returns empty set when matrix include list is empty."""
wf = _write(
tmp_path / ".github/workflows/container-build.yml",
"""\
jobs:
build:
strategy:
matrix:
include: []
""",
)
assert parse_matrix_contexts(wf) == set()

def test_missing_matrix_key(self, tmp_path):
"""Returns empty set when matrix key is missing."""
wf = _write(
tmp_path / ".github/workflows/container-build.yml",
"jobs:\n build:\n steps: []\n",
)
assert parse_matrix_contexts(wf) == set()


# ---------------------------------------------------------------------------
# load_ignore_list
# ---------------------------------------------------------------------------


class TestLoadIgnoreList:
"""Tests for load_ignore_list."""

def test_loads_paths(self, tmp_path):
"""Loads paths from the ignore file."""
(tmp_path / IGNORE_FILENAME).write_text("components/foo/bar\npipelines/baz\n")
result = load_ignore_list(tmp_path)
result_normalized = {p.replace("\\", "/") for p in result}
assert "components/foo/bar" in result_normalized
assert "pipelines/baz" in result_normalized

def test_ignores_comments_and_blank_lines(self, tmp_path):
"""Skips comments and blank lines in the ignore file."""
(tmp_path / IGNORE_FILENAME).write_text("# comment\n\ncomponents/foo\n")
result = load_ignore_list(tmp_path)
result_normalized = {p.replace("\\", "/") for p in result}
assert result_normalized == {"components/foo"}

def test_missing_file_returns_empty(self, tmp_path):
"""Returns empty set when ignore file does not exist."""
assert load_ignore_list(tmp_path) == set()


# ---------------------------------------------------------------------------
# check (integration)
# ---------------------------------------------------------------------------


class TestCheck:
"""Integration tests for check."""

def test_all_matched(self, tmp_path):
"""Passes when all container files have a matrix entry."""
_write(tmp_path / "components/cat/comp/Containerfile", "FROM python:3.11")
wf = _make_workflow(tmp_path, ["components/cat/comp"])
all_matched, results = check(tmp_path, ["components"], wf)
assert all_matched
assert results[0]["status"] == "ok"

def test_unmatched_fails(self, tmp_path):
"""Fails when a container file has no matrix entry."""
_write(tmp_path / "components/cat/comp/Containerfile", "FROM python:3.11")
wf = _make_workflow(tmp_path, [])
all_matched, results = check(tmp_path, ["components"], wf)
assert not all_matched
assert results[0]["status"] == "unmatched"
assert "suggestion" in results[0]

def test_suggestion_contains_context(self, tmp_path):
"""Suggestion includes the correct context path."""
_write(tmp_path / "components/cat/comp/Containerfile", "FROM python:3.11")
wf = _make_workflow(tmp_path, [])
_, results = check(tmp_path, ["components"], wf)
suggestion_normalized = results[0]["suggestion"].replace("\\", "/")
assert "components/cat/comp" in suggestion_normalized

def test_ignored_path_is_skipped(self, tmp_path):
"""Skips container files listed in the ignore file."""
_write(tmp_path / "components/cat/comp/Containerfile", "FROM python:3.11")
(tmp_path / IGNORE_FILENAME).write_text("components/cat/comp\n")
wf = _make_workflow(tmp_path, [])
all_matched, results = check(tmp_path, ["components"], wf)
assert all_matched
assert results[0]["status"] == "ignored"

def test_subcategory_path_matched(self, tmp_path):
"""Passes when a subcategory container file has a matrix entry."""
_write(tmp_path / "components/cat/subcat/comp/Containerfile", "FROM python:3.11")
wf = _make_workflow(tmp_path, ["components/cat/subcat/comp"])
all_matched, results = check(tmp_path, ["components"], wf)
assert all_matched

def test_missing_and_present_mixed(self, tmp_path):
"""Reports ok and unmatched correctly when results are mixed."""
_write(tmp_path / "components/cat/a/Containerfile", "FROM a")
_write(tmp_path / "components/cat/b/Containerfile", "FROM b")
wf = _make_workflow(tmp_path, ["components/cat/a"])
all_matched, results = check(tmp_path, ["components"], wf)
assert not all_matched
statuses = {r["file"].replace("\\", "/").split("/")[-2]: r["status"] for r in results}
assert statuses["a"] == "ok"
assert statuses["b"] == "unmatched"

def test_no_container_files_passes(self, tmp_path):
"""Passes when no container files are found."""
wf = _make_workflow(tmp_path, [])
all_matched, results = check(tmp_path, ["components"], wf)
assert all_matched
assert results == []
Loading
Loading