diff --git a/.github/workflows/build-notebooks-pr.yaml b/.github/workflows/build-notebooks-pr.yaml index d04d90a6c8..fd93b545db 100644 --- a/.github/workflows/build-notebooks-pr.yaml +++ b/.github/workflows/build-notebooks-pr.yaml @@ -1,28 +1,40 @@ --- "name": "Build Notebooks" -"permissions": - "packages": "read" "on": "pull_request": +permissions: + contents: read + packages: read + pull-requests: read + jobs: gen: name: Generate job matrix runs-on: ubuntu-latest outputs: matrix: ${{ steps.gen.outputs.matrix }} + has_jobs: ${{ steps.gen.outputs.has_jobs }} steps: - uses: actions/checkout@v4 - - run: python3 ci/cached-builds/gen_gha_matrix_jobs.py + + - run: | + python3 ci/cached-builds/gen_gha_matrix_jobs.py \ + --owner=${{ github.repository_owner }} \ + --repo=${{ github.event.pull_request.base.repo.name }} \ + --pr-number=${{ github.event.pull_request.number }} \ + --skip-unchanged id: gen + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # base images build: needs: ["gen"] strategy: fail-fast: false matrix: "${{ fromJson(needs.gen.outputs.matrix) }}" uses: ./.github/workflows/build-notebooks-TEMPLATE.yaml + if: ${{ fromJson(needs.gen.outputs.has_jobs) }} with: target: "${{ matrix.target }}" github: "${{ toJSON(github) }}" diff --git a/Makefile b/Makefile index 83e9fcb50d..ac28bfe8c6 100644 --- a/Makefile +++ b/Makefile @@ -59,6 +59,7 @@ endef # ARG 2: Path of image context we want to build. # ARG 3: Base image tag name (optional). define image + $(info #*# Image build directory: <$(2)> #(MACHINE-PARSED LINE)#*#...) $(call build_image,$(1),$(2),$(3)) $(call push_image,$(1)) endef diff --git a/ci/cached-builds/gen_gha_matrix_jobs.py b/ci/cached-builds/gen_gha_matrix_jobs.py index 867577932b..7a4746275b 100644 --- a/ci/cached-builds/gen_gha_matrix_jobs.py +++ b/ci/cached-builds/gen_gha_matrix_jobs.py @@ -1,11 +1,17 @@ +import argparse import itertools import json +import logging import os import pathlib import re import string +import sys +import unittest from typing import Iterable +import gha_pr_changed_files + """Trivial Makefile parser that extracts target dependencies so that we can build each Dockerfile image target in its own GitHub Actions job and handle dependencies between them. @@ -115,11 +121,13 @@ def write_github_workflow_file(tree: dict[str, list[str]], path: pathlib.Path) - def flatten(list_of_lists): return list(itertools.chain.from_iterable(list_of_lists)) + def compute_leafs_in_dependency_tree(tree: dict[str, list[str]]) -> list[str]: key_set = set(tree.keys()) value_set = set(flatten(tree.values())) return [key for key in key_set if key not in value_set] + def print_github_actions_pr_matrix(tree: dict[str, list[str]], leafs: list[str]) -> list[str]: """Outputs GitHub matrix definition Json """ @@ -136,10 +144,24 @@ def print_github_actions_pr_matrix(tree: dict[str, list[str]], leafs: list[str]) targets.append(leaf) matrix = {"target": targets} - return [f"matrix={json.dumps(matrix, separators=(',', ':'))}"] + return [f"matrix={json.dumps(matrix, separators=(',', ':'))}", + f"has_jobs={json.dumps(len(leafs) > 0, separators=(',', ':'))}"] def main() -> None: + logging.basicConfig(level=logging.DEBUG, stream=sys.stderr) + + argparser = argparse.ArgumentParser() + argparser.add_argument("--owner", type=str, required=False, + help="GitHub repo owner/org (for the --skip-unchanged feature)") + argparser.add_argument("--repo", type=str, required=False, + help="GitHub repo name (for the --skip-unchanged feature)") + argparser.add_argument("--pr-number", type=int, required=False, + help="PR number under owner/repo (for the --skip-unchanged feature)") + argparser.add_argument("--skip-unchanged", type=bool, required=False, default=False, + action=argparse.BooleanOptionalAction) + args = argparser.parse_args() + # https://www.gnu.org/software/make/manual/make.html#Reading-Makefiles with open("Makefile", "rt") as makefile: lines = read_makefile_lines(makefile) @@ -148,6 +170,10 @@ def main() -> None: write_github_workflow_file(tree, project_dir / ".github" / "workflows" / "build-notebooks.yaml") leafs = compute_leafs_in_dependency_tree(tree) + if args.skip_unchanged: + logging.info(f"Skipping targets not modified in PR #{args.pr_number}") + changed_files = gha_pr_changed_files.list_changed_files(args.owner, args.repo, args.pr_number) + leafs = gha_pr_changed_files.filter_out_unchanged(leafs, changed_files) output = print_github_actions_pr_matrix(tree, leafs) print("leafs", leafs) @@ -159,3 +185,18 @@ def main() -> None: if __name__ == '__main__': main() + + +class SelfTests(unittest.TestCase): + def test_select_changed_targets(self): + with open(project_dir / "Makefile", "rt") as makefile: + lines = read_makefile_lines(makefile) + tree = extract_target_dependencies(lines) + leafs = compute_leafs_in_dependency_tree(tree) + + changed_files = ["jupyter/datascience/ubi9-python-3.9/Dockerfile"] + + leafs = gha_pr_changed_files.filter_out_unchanged(leafs, changed_files) + assert set(leafs) == {'cuda-jupyter-tensorflow-ubi9-python-3.9', + 'jupyter-trustyai-ubi9-python-3.9', + 'jupyter-pytorch-ubi9-python-3.9'} diff --git a/ci/cached-builds/gha_pr_changed_files.py b/ci/cached-builds/gha_pr_changed_files.py new file mode 100644 index 0000000000..1f3ab56e15 --- /dev/null +++ b/ci/cached-builds/gha_pr_changed_files.py @@ -0,0 +1,126 @@ +import json +import logging +import os +import pathlib +import re +import subprocess +import unittest +import urllib.request + +PROJECT_ROOT = pathlib.Path(__file__).parent.parent.parent.resolve() + + +def get_github_token() -> str: + github_token = os.environ['GITHUB_TOKEN'] + return github_token + + +# https://docs.github.com/en/graphql/guides/forming-calls-with-graphql +def compose_gh_api_request(pull_number: int, owner="opendatahub-io", repo="notebooks", per_page=100, + cursor="") -> urllib.request.Request: + github_token = get_github_token() + + return urllib.request.Request( + url="https://api.github.com/graphql", + method="POST", + headers={ + "Authorization": f"bearer {github_token}", + }, + # https://docs.github.com/en/graphql/guides/using-the-explorer + data=json.dumps({"query": f""" +{{ + repository(owner:"{owner}", name:"{repo}") {{ + pullRequest(number:{pull_number}) {{ + files(first:{per_page}, after:"{cursor}") {{ + edges {{ + node {{ + path + }} + cursor + }} + }} + }} + }} +}} + """}).encode("utf-8"), + ) + + +def list_changed_files(owner: str, repo: str, pr_number: int, per_page=100) -> list[str]: + files = [] + + logging.debug("Getting list of changed files from GitHub API") + + CURSOR = "" + while CURSOR is not None: + request = compose_gh_api_request(pull_number=pr_number, owner=owner, repo=repo, per_page=per_page, + cursor=CURSOR) + response = urllib.request.urlopen(request) + data = json.loads(response.read().decode("utf-8")) + response.close() + edges = data["data"]["repository"]["pullRequest"]["files"]["edges"] + + CURSOR = None + for edge in edges: + files.append(edge["node"]["path"]) + CURSOR = edge["cursor"] + + logging.debug(f"Determined {len(files)} changed files: {files[:5]} (..., printing up to 5)") + return files + + +def analyze_build_directories(make_target) -> list[str]: + directories = [] + + pattern = re.compile(r"#\*# Image build directory: <(?P