Skip to content

Commit 91ca7b0

Browse files
committed
Limit PR checks to build only the modified images
1 parent 2c36c11 commit 91ca7b0

File tree

4 files changed

+147
-3
lines changed

4 files changed

+147
-3
lines changed

.github/workflows/build-notebooks-pr.yaml

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
---
22
"name": "Build Notebooks"
3-
"permissions":
4-
"packages": "read"
53
"on":
64
"pull_request":
75

6+
permissions:
7+
contents: read
8+
packages: read
9+
pull-requests: read
10+
811
jobs:
912
gen:
1013
name: Generate job matrix
@@ -13,8 +16,16 @@ jobs:
1316
matrix: ${{ steps.gen.outputs.matrix }}
1417
steps:
1518
- uses: actions/checkout@v4
16-
- run: python3 ci/cached-builds/gen_gha_matrix_jobs.py
19+
20+
- run: |
21+
python3 ci/cached-builds/gen_gha_matrix_jobs.py \
22+
--owner=${{ github.repository_owner }} \
23+
--repo=${{ github.event.pull_request.base.repo.name }} \
24+
--pr-number=${{ github.event.pull_request.number }} \
25+
--skip-unchanged
1726
id: gen
27+
env:
28+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
1829
1930
# base images
2031
build:

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ endef
5959
# ARG 2: Path of image context we want to build.
6060
# ARG 3: Base image tag name (optional).
6161
define image
62+
$(info #*# Image build directory: <$(2)> #(MACHINE-PARSED LINE)#*#...)
6263
$(call build_image,$(1),$(2),$(3))
6364
$(call push_image,$(1))
6465
endef

ci/cached-builds/gen_gha_matrix_jobs.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
1+
import argparse
12
import itertools
23
import json
4+
import logging
35
import os
46
import pathlib
57
import re
68
import string
9+
import sys
710
from typing import Iterable
811

12+
import gha_pr_changed_files
13+
914
"""Trivial Makefile parser that extracts target dependencies so that we can build each Dockerfile image target in its
1015
own GitHub Actions job and handle dependencies between them.
1116
@@ -140,6 +145,15 @@ def print_github_actions_pr_matrix(tree: dict[str, list[str]], leafs: list[str])
140145

141146

142147
def main() -> None:
148+
logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
149+
150+
argparser = argparse.ArgumentParser()
151+
argparser.add_argument("--owner", type=str, required=False, help="GitHub repo owner/org (for the --skip-unchanged feature)")
152+
argparser.add_argument("--repo", type=str, required=False, help="GitHub repo name (for the --skip-unchanged feature)")
153+
argparser.add_argument("--pr-number", type=int, required=False, help="PR number under owner/repo (for the --skip-unchanged feature)")
154+
argparser.add_argument("--skip-unchanged", type=bool, required=False, default=False, action=argparse.BooleanOptionalAction)
155+
args = argparser.parse_args()
156+
143157
# https://www.gnu.org/software/make/manual/make.html#Reading-Makefiles
144158
with open("Makefile", "rt") as makefile:
145159
lines = read_makefile_lines(makefile)
@@ -148,6 +162,9 @@ def main() -> None:
148162
write_github_workflow_file(tree, project_dir / ".github" / "workflows" / "build-notebooks.yaml")
149163

150164
leafs = compute_leafs_in_dependency_tree(tree)
165+
if args.skip_unchanged:
166+
logging.info(f"Skipping targets not modified in PR #{args.pr_number}")
167+
leafs = gha_pr_changed_files.filter_out_unchanged(args.owner, args.repo, args.pr_number, leafs)
151168
output = print_github_actions_pr_matrix(tree, leafs)
152169

153170
print("leafs", leafs)
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import json
2+
import logging
3+
import os
4+
import pathlib
5+
import re
6+
import subprocess
7+
import unittest
8+
import urllib.request
9+
10+
PROJECT_ROOT = pathlib.Path(__file__).parent.parent.parent.resolve()
11+
12+
13+
def get_github_token() -> str:
14+
github_token = os.environ['GITHUB_TOKEN']
15+
return github_token
16+
17+
18+
# https://docs.github.com/en/graphql/guides/forming-calls-with-graphql
19+
def compose_gh_api_request(owner="opendatahub-io", repo="notebooks", pull_number=556, per_page=100, cursor=""):
20+
github_token = get_github_token()
21+
22+
return urllib.request.Request(
23+
url="https://api.github.com/graphql",
24+
method="POST",
25+
headers={
26+
"Authorization": f"bearer {github_token}",
27+
},
28+
# https://docs.github.com/en/graphql/guides/using-the-explorer
29+
data=json.dumps({"query": f"""
30+
{{
31+
repository(owner:"{owner}", name:"{repo}") {{
32+
pullRequest(number:{pull_number}) {{
33+
files(first:{per_page}, after:"{cursor}") {{
34+
edges {{
35+
node {{
36+
path
37+
}}
38+
cursor
39+
}}
40+
}}
41+
}}
42+
}}
43+
}}
44+
"""}).encode("utf-8"),
45+
)
46+
47+
48+
def list_changed_files(owner: str, repo: str, pr_number: int, per_page=100) -> list[str]:
49+
files = []
50+
51+
logging.debug("Getting list of changed files from GitHub API")
52+
53+
CURSOR = ""
54+
while CURSOR is not None:
55+
request = compose_gh_api_request(owner, repo, pull_number=pr_number, per_page=per_page, cursor=CURSOR)
56+
response = urllib.request.urlopen(request)
57+
data = json.loads(response.read().decode("utf-8"))
58+
response.close()
59+
edges = data["data"]["repository"]["pullRequest"]["files"]["edges"]
60+
61+
CURSOR = None
62+
for edge in edges:
63+
files.append(edge["node"]["path"])
64+
CURSOR = edge["cursor"]
65+
66+
logging.debug(f"Determined {len(files)} changed files: {files[:5]} (..., printing up to 5)")
67+
return files
68+
69+
70+
def analyze_build_directories(make_target) -> list[str]:
71+
directories = []
72+
73+
pattern = re.compile(r"#\*# Image build directory: <(?P<dir>[^>]+)> #\(MACHINE-PARSED LINE\)#\*#\.\.\.")
74+
try:
75+
logging.debug(f"Running make in --just-print mode for target {make_target}")
76+
for line in subprocess.check_output(["make", make_target, "--just-print"], encoding="utf-8",
77+
cwd=PROJECT_ROOT).splitlines():
78+
if m := pattern.match(line):
79+
directories.append(m["dir"])
80+
except subprocess.CalledProcessError as e:
81+
print(e.stderr, e.stdout)
82+
raise
83+
84+
logging.debug(f"Running make in --just-print mode for target {make_target}")
85+
return directories
86+
87+
88+
def should_build_target(changed_files: list[str], target_directories: list[str]) -> bool:
89+
for directory in target_directories:
90+
if any(changed_file.startswith(directory) for changed_file in changed_files):
91+
return True
92+
return False
93+
94+
95+
def filter_out_unchanged(owner: str, repo: str, pr_number: int, targets: list[str]) -> list[str]:
96+
changed_files = list_changed_files(owner, repo, pr_number)
97+
return [target for target in targets if
98+
should_build_target(changed_files, target_directories=analyze_build_directories(target))]
99+
100+
101+
class SelTestsTest(unittest.TestCase):
102+
def test_compose_gh_api_request__call_without_asserting(self):
103+
request = compose_gh_api_request(pull_number=556, per_page=100, cursor="")
104+
print(request.data)
105+
106+
def test_list_changed_files__pagination_works(self):
107+
changed_files = list_changed_files(556, per_page=1)
108+
assert changed_files == ['codeserver/ubi9-python-3.9/Dockerfile',
109+
'codeserver/ubi9-python-3.9/run-code-server.sh']
110+
111+
def test_analyze_build_directories(self):
112+
directories = analyze_build_directories("jupyter-intel-pytorch-ubi9-python-3.9")
113+
assert directories == ["base/ubi9-python-3.9",
114+
"intel/base/gpu/ubi9-python-3.9",
115+
"jupyter/intel/pytorch/ubi9-python-3.9"]

0 commit comments

Comments
 (0)