Skip to content

Commit 7bfbf32

Browse files
authored
Limit PR checks to build only the modified images (#558)
1 parent ec6c84e commit 7bfbf32

File tree

4 files changed

+185
-5
lines changed

4 files changed

+185
-5
lines changed

.github/workflows/build-notebooks-pr.yaml

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,40 @@
11
---
22
"name": "Build Notebooks"
3-
"permissions":
4-
"packages": "read"
53
"on":
64
"pull_request":
75

6+
permissions:
7+
contents: read
8+
packages: read
9+
pull-requests: read
10+
811
jobs:
912
gen:
1013
name: Generate job matrix
1114
runs-on: ubuntu-latest
1215
outputs:
1316
matrix: ${{ steps.gen.outputs.matrix }}
17+
has_jobs: ${{ steps.gen.outputs.has_jobs }}
1418
steps:
1519
- uses: actions/checkout@v4
16-
- run: python3 ci/cached-builds/gen_gha_matrix_jobs.py
20+
21+
- run: |
22+
python3 ci/cached-builds/gen_gha_matrix_jobs.py \
23+
--owner=${{ github.repository_owner }} \
24+
--repo=${{ github.event.pull_request.base.repo.name }} \
25+
--pr-number=${{ github.event.pull_request.number }} \
26+
--skip-unchanged
1727
id: gen
28+
env:
29+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
1830
19-
# base images
2031
build:
2132
needs: ["gen"]
2233
strategy:
2334
fail-fast: false
2435
matrix: "${{ fromJson(needs.gen.outputs.matrix) }}"
2536
uses: ./.github/workflows/build-notebooks-TEMPLATE.yaml
37+
if: ${{ fromJson(needs.gen.outputs.has_jobs) }}
2638
with:
2739
target: "${{ matrix.target }}"
2840
github: "${{ toJSON(github) }}"

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ endef
5959
# ARG 2: Path of image context we want to build.
6060
# ARG 3: Base image tag name (optional).
6161
define image
62+
$(info #*# Image build directory: <$(2)> #(MACHINE-PARSED LINE)#*#...)
6263
$(call build_image,$(1),$(2),$(3))
6364
$(call push_image,$(1))
6465
endef

ci/cached-builds/gen_gha_matrix_jobs.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
1+
import argparse
12
import itertools
23
import json
4+
import logging
35
import os
46
import pathlib
57
import re
68
import string
9+
import sys
10+
import unittest
711
from typing import Iterable
812

13+
import gha_pr_changed_files
14+
915
"""Trivial Makefile parser that extracts target dependencies so that we can build each Dockerfile image target in its
1016
own GitHub Actions job and handle dependencies between them.
1117
@@ -115,11 +121,13 @@ def write_github_workflow_file(tree: dict[str, list[str]], path: pathlib.Path) -
115121
def flatten(list_of_lists):
116122
return list(itertools.chain.from_iterable(list_of_lists))
117123

124+
118125
def compute_leafs_in_dependency_tree(tree: dict[str, list[str]]) -> list[str]:
119126
key_set = set(tree.keys())
120127
value_set = set(flatten(tree.values()))
121128
return [key for key in key_set if key not in value_set]
122129

130+
123131
def print_github_actions_pr_matrix(tree: dict[str, list[str]], leafs: list[str]) -> list[str]:
124132
"""Outputs GitHub matrix definition Json
125133
"""
@@ -136,10 +144,24 @@ def print_github_actions_pr_matrix(tree: dict[str, list[str]], leafs: list[str])
136144
targets.append(leaf)
137145

138146
matrix = {"target": targets}
139-
return [f"matrix={json.dumps(matrix, separators=(',', ':'))}"]
147+
return [f"matrix={json.dumps(matrix, separators=(',', ':'))}",
148+
f"has_jobs={json.dumps(len(leafs) > 0, separators=(',', ':'))}"]
140149

141150

142151
def main() -> None:
152+
logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
153+
154+
argparser = argparse.ArgumentParser()
155+
argparser.add_argument("--owner", type=str, required=False,
156+
help="GitHub repo owner/org (for the --skip-unchanged feature)")
157+
argparser.add_argument("--repo", type=str, required=False,
158+
help="GitHub repo name (for the --skip-unchanged feature)")
159+
argparser.add_argument("--pr-number", type=int, required=False,
160+
help="PR number under owner/repo (for the --skip-unchanged feature)")
161+
argparser.add_argument("--skip-unchanged", type=bool, required=False, default=False,
162+
action=argparse.BooleanOptionalAction)
163+
args = argparser.parse_args()
164+
143165
# https://www.gnu.org/software/make/manual/make.html#Reading-Makefiles
144166
with open("Makefile", "rt") as makefile:
145167
lines = read_makefile_lines(makefile)
@@ -148,6 +170,10 @@ def main() -> None:
148170
write_github_workflow_file(tree, project_dir / ".github" / "workflows" / "build-notebooks.yaml")
149171

150172
leafs = compute_leafs_in_dependency_tree(tree)
173+
if args.skip_unchanged:
174+
logging.info(f"Skipping targets not modified in PR #{args.pr_number}")
175+
changed_files = gha_pr_changed_files.list_changed_files(args.owner, args.repo, args.pr_number)
176+
leafs = gha_pr_changed_files.filter_out_unchanged(leafs, changed_files)
151177
output = print_github_actions_pr_matrix(tree, leafs)
152178

153179
print("leafs", leafs)
@@ -159,3 +185,18 @@ def main() -> None:
159185

160186
if __name__ == '__main__':
161187
main()
188+
189+
190+
class SelfTests(unittest.TestCase):
191+
def test_select_changed_targets(self):
192+
with open(project_dir / "Makefile", "rt") as makefile:
193+
lines = read_makefile_lines(makefile)
194+
tree = extract_target_dependencies(lines)
195+
leafs = compute_leafs_in_dependency_tree(tree)
196+
197+
changed_files = ["jupyter/datascience/ubi9-python-3.9/Dockerfile"]
198+
199+
leafs = gha_pr_changed_files.filter_out_unchanged(leafs, changed_files)
200+
assert set(leafs) == {'cuda-jupyter-tensorflow-ubi9-python-3.9',
201+
'jupyter-trustyai-ubi9-python-3.9',
202+
'jupyter-pytorch-ubi9-python-3.9'}
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import json
2+
import logging
3+
import os
4+
import pathlib
5+
import re
6+
import subprocess
7+
import unittest
8+
import urllib.request
9+
10+
PROJECT_ROOT = pathlib.Path(__file__).parent.parent.parent.resolve()
11+
12+
13+
def get_github_token() -> str:
14+
github_token = os.environ['GITHUB_TOKEN']
15+
return github_token
16+
17+
18+
# https://docs.github.com/en/graphql/guides/forming-calls-with-graphql
19+
def compose_gh_api_request(pull_number: int, owner="opendatahub-io", repo="notebooks", per_page=100,
20+
cursor="") -> urllib.request.Request:
21+
github_token = get_github_token()
22+
23+
return urllib.request.Request(
24+
url="https://api.github.com/graphql",
25+
method="POST",
26+
headers={
27+
"Authorization": f"bearer {github_token}",
28+
},
29+
# https://docs.github.com/en/graphql/guides/using-the-explorer
30+
data=json.dumps({"query": f"""
31+
{{
32+
repository(owner:"{owner}", name:"{repo}") {{
33+
pullRequest(number:{pull_number}) {{
34+
files(first:{per_page}, after:"{cursor}") {{
35+
edges {{
36+
node {{
37+
path
38+
}}
39+
cursor
40+
}}
41+
}}
42+
}}
43+
}}
44+
}}
45+
"""}).encode("utf-8"),
46+
)
47+
48+
49+
def list_changed_files(owner: str, repo: str, pr_number: int, per_page=100) -> list[str]:
50+
files = []
51+
52+
logging.debug("Getting list of changed files from GitHub API")
53+
54+
CURSOR = ""
55+
while CURSOR is not None:
56+
request = compose_gh_api_request(pull_number=pr_number, owner=owner, repo=repo, per_page=per_page,
57+
cursor=CURSOR)
58+
response = urllib.request.urlopen(request)
59+
data = json.loads(response.read().decode("utf-8"))
60+
response.close()
61+
edges = data["data"]["repository"]["pullRequest"]["files"]["edges"]
62+
63+
CURSOR = None
64+
for edge in edges:
65+
files.append(edge["node"]["path"])
66+
CURSOR = edge["cursor"]
67+
68+
logging.debug(f"Determined {len(files)} changed files: {files[:5]} (..., printing up to 5)")
69+
return files
70+
71+
72+
def analyze_build_directories(make_target) -> list[str]:
73+
directories = []
74+
75+
pattern = re.compile(r"#\*# Image build directory: <(?P<dir>[^>]+)> #\(MACHINE-PARSED LINE\)#\*#\.\.\.")
76+
try:
77+
logging.debug(f"Running make in --just-print mode for target {make_target}")
78+
for line in subprocess.check_output(["make", make_target, "--just-print"], encoding="utf-8",
79+
cwd=PROJECT_ROOT).splitlines():
80+
if m := pattern.match(line):
81+
directories.append(m["dir"])
82+
except subprocess.CalledProcessError as e:
83+
print(e.stderr, e.stdout)
84+
raise
85+
86+
logging.debug(f"Target {make_target} depends on files in directories {directories}")
87+
return directories
88+
89+
90+
def should_build_target(changed_files: list[str], target_directories: list[str]) -> str:
91+
"""Returns truthy if there is at least one changed file necessitating a build.
92+
Falsy (empty) string is returned otherwise."""
93+
for directory in target_directories:
94+
for changed_file in changed_files:
95+
if changed_file.startswith(directory):
96+
return changed_file
97+
return ""
98+
99+
100+
def filter_out_unchanged(targets: list[str], changed_files: list[str]) -> list[str]:
101+
changed = []
102+
for target in targets:
103+
target_directories = analyze_build_directories(target)
104+
if reason := should_build_target(changed_files, target_directories):
105+
logging.info(f"✅ Will build {target} because file {reason} has been changed")
106+
changed.append(target)
107+
else:
108+
logging.info(f"❌ Won't build {target}")
109+
return changed
110+
111+
112+
class SelfTests(unittest.TestCase):
113+
def test_compose_gh_api_request__call_without_asserting(self):
114+
request = compose_gh_api_request(pull_number=556, per_page=100, cursor="")
115+
print(request.data)
116+
117+
def test_list_changed_files__pagination_works(self):
118+
changed_files = list_changed_files(owner="opendatahub-io", repo="notebooks", pr_number=556, per_page=1)
119+
assert set(changed_files) == {'codeserver/ubi9-python-3.9/Dockerfile',
120+
'codeserver/ubi9-python-3.9/run-code-server.sh'}
121+
122+
def test_analyze_build_directories(self):
123+
directories = analyze_build_directories("jupyter-intel-pytorch-ubi9-python-3.9")
124+
assert set(directories) == {"base/ubi9-python-3.9",
125+
"intel/base/gpu/ubi9-python-3.9",
126+
"jupyter/intel/pytorch/ubi9-python-3.9"}

0 commit comments

Comments
 (0)