Skip to content

Commit ab84dfa

Browse files
Merge pull request #602 from jiridanek/jd_use_git_not_gh_api
ci: get a list of changed files using git diff instead of GitHub APIv4
2 parents 64fb240 + 9a9a4a0 commit ab84dfa

File tree

4 files changed

+31
-79
lines changed

4 files changed

+31
-79
lines changed

.github/workflows/build-notebooks-pr.yaml

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
---
2-
"name": "Build Notebooks"
2+
"name": "Build Notebooks (pr)"
33
"on":
44
"pull_request":
55

@@ -18,15 +18,18 @@ jobs:
1818
steps:
1919
- uses: actions/checkout@v4
2020

21-
- run: |
21+
- name: Determine targets to build based on changed files
22+
run: |
23+
set -x
24+
git fetch --no-tags origin 'pull/${{ github.event.pull_request.number }}/head:${{ github.event.pull_request.head.ref }}'
25+
git fetch --no-tags origin '+refs/heads/${{ github.event.pull_request.base.ref }}:refs/remotes/origin/${{ github.event.pull_request.base.ref }}'
2226
python3 ci/cached-builds/gen_gha_matrix_jobs.py \
23-
--owner=${{ github.repository_owner }} \
24-
--repo=${{ github.event.pull_request.base.repo.name }} \
25-
--pr-number=${{ github.event.pull_request.number }} \
26-
--skip-unchanged
27+
--from-ref 'origin/${{ github.event.pull_request.base.ref }}' \
28+
--to-ref '${{ github.event.pull_request.head.ref }}'
2729
id: gen
2830
env:
2931
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
32+
shell: bash
3033

3134
build:
3235
needs: ["gen"]

.github/workflows/build-notebooks.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
---
22
# This file is autogenerated by ci/cached-builds/gen_gha_matrix_jobs.py
33
{
4-
"name": "Build Notebooks",
4+
"name": "Build Notebooks (push)",
55
"permissions": {
66
"packages": "write"
77
},

ci/cached-builds/gen_gha_matrix_jobs.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def write_github_workflow_file(tree: dict[str, list[str]], path: pathlib.Path) -
9898
}
9999

100100
workflow = {
101-
"name": "Build Notebooks",
101+
"name": "Build Notebooks (push)",
102102
# https://docs.github.com/en/actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
103103
"permissions": {
104104
"packages": "write",
@@ -153,14 +153,10 @@ def main() -> None:
153153
logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
154154

155155
argparser = argparse.ArgumentParser()
156-
argparser.add_argument("--owner", type=str, required=False,
157-
help="GitHub repo owner/org (for the --skip-unchanged feature)")
158-
argparser.add_argument("--repo", type=str, required=False,
159-
help="GitHub repo name (for the --skip-unchanged feature)")
160-
argparser.add_argument("--pr-number", type=int, required=False,
161-
help="PR number under owner/repo (for the --skip-unchanged feature)")
162-
argparser.add_argument("--skip-unchanged", type=bool, required=False, default=False,
163-
action=argparse.BooleanOptionalAction)
156+
argparser.add_argument("--from-ref", type=str, required=False,
157+
help="Git ref of the base branch (to determine changed files)")
158+
argparser.add_argument("--to-ref", type=str, required=False,
159+
help="Git ref of the PR branch (to determine changed files)")
164160
args = argparser.parse_args()
165161

166162
# https://www.gnu.org/software/make/manual/make.html#Reading-Makefiles
@@ -171,9 +167,9 @@ def main() -> None:
171167
write_github_workflow_file(tree, project_dir / ".github" / "workflows" / "build-notebooks.yaml")
172168

173169
leafs = compute_leafs_in_dependency_tree(tree)
174-
if args.skip_unchanged:
175-
logging.info(f"Skipping targets not modified in PR #{args.pr_number}")
176-
changed_files = gha_pr_changed_files.list_changed_files(args.owner, args.repo, args.pr_number)
170+
if args.from_ref:
171+
logging.info(f"Skipping targets not modified in the PR")
172+
changed_files = gha_pr_changed_files.list_changed_files(args.from_ref, args.to_ref)
177173
leafs = gha_pr_changed_files.filter_out_unchanged(leafs, changed_files)
178174
output = print_github_actions_pr_matrix(tree, leafs)
179175

@@ -198,6 +194,8 @@ def test_select_changed_targets(self):
198194
changed_files = ["jupyter/datascience/ubi9-python-3.9/Dockerfile"]
199195

200196
leafs = gha_pr_changed_files.filter_out_unchanged(leafs, changed_files)
201-
assert set(leafs) == {'cuda-jupyter-tensorflow-ubi9-python-3.9',
197+
assert set(leafs) == {'amd-jupyter-pytorch-c9s-python-3.9',
198+
'amd-jupyter-tensorflow-c9s-python-3.9',
199+
'cuda-jupyter-tensorflow-ubi9-python-3.9',
202200
'jupyter-trustyai-ubi9-python-3.9',
203201
'jupyter-pytorch-ubi9-python-3.9'}

ci/cached-builds/gha_pr_changed_files.py

Lines changed: 10 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
1-
import json
21
import logging
32
import os
43
import pathlib
54
import re
65
import subprocess
76
import unittest
8-
import urllib.request
97

108
PROJECT_ROOT = pathlib.Path(__file__).parent.parent.parent.resolve()
119

@@ -15,57 +13,13 @@ def get_github_token() -> str:
1513
return github_token
1614

1715

18-
# https://docs.github.com/en/graphql/guides/forming-calls-with-graphql
19-
def compose_gh_api_request(pull_number: int, owner="opendatahub-io", repo="notebooks", per_page=100,
20-
cursor="") -> urllib.request.Request:
21-
github_token = get_github_token()
22-
23-
return urllib.request.Request(
24-
url="https://api.github.com/graphql",
25-
method="POST",
26-
headers={
27-
"Authorization": f"bearer {github_token}",
28-
},
29-
# https://docs.github.com/en/graphql/guides/using-the-explorer
30-
data=json.dumps({"query": f"""
31-
{{
32-
repository(owner:"{owner}", name:"{repo}") {{
33-
pullRequest(number:{pull_number}) {{
34-
files(first:{per_page}, after:"{cursor}") {{
35-
edges {{
36-
node {{
37-
path
38-
}}
39-
cursor
40-
}}
41-
}}
42-
}}
43-
}}
44-
}}
45-
"""}).encode("utf-8"),
46-
)
47-
48-
49-
def list_changed_files(owner: str, repo: str, pr_number: int, per_page=100) -> list[str]:
50-
files = []
51-
52-
logging.debug("Getting list of changed files from GitHub API")
53-
54-
CURSOR = ""
55-
while CURSOR is not None:
56-
request = compose_gh_api_request(pull_number=pr_number, owner=owner, repo=repo, per_page=per_page,
57-
cursor=CURSOR)
58-
response = urllib.request.urlopen(request)
59-
data = json.loads(response.read().decode("utf-8"))
60-
response.close()
61-
edges = data["data"]["repository"]["pullRequest"]["files"]["edges"]
62-
63-
CURSOR = None
64-
for edge in edges:
65-
files.append(edge["node"]["path"])
66-
CURSOR = edge["cursor"]
67-
68-
logging.debug(f"Determined {len(files)} changed files: {files[:5]} (..., printing up to 5)")
16+
def list_changed_files(from_ref: str, to_ref: str) -> list[str]:
17+
logging.debug("Getting list of changed files from git diff")
18+
19+
files = subprocess.check_output(["git", "diff", "--name-only", from_ref, to_ref],
20+
encoding='utf-8').splitlines()
21+
22+
logging.debug(f"Determined {len(files)} changed files: {files[:100]} (..., printing up to 100 files)")
6923
return files
7024

7125

@@ -110,12 +64,9 @@ def filter_out_unchanged(targets: list[str], changed_files: list[str]) -> list[s
11064

11165

11266
class SelfTests(unittest.TestCase):
113-
def test_compose_gh_api_request__call_without_asserting(self):
114-
request = compose_gh_api_request(pull_number=556, per_page=100, cursor="")
115-
print(request.data)
116-
117-
def test_list_changed_files__pagination_works(self):
118-
changed_files = list_changed_files(owner="opendatahub-io", repo="notebooks", pr_number=556, per_page=1)
67+
def test_list_changed_files(self):
68+
"""This is PR #556 in opendatahub-io/notebooks"""
69+
changed_files = list_changed_files(from_ref="4d4841f", to_ref="2c36c11")
11970
assert set(changed_files) == {'codeserver/ubi9-python-3.9/Dockerfile',
12071
'codeserver/ubi9-python-3.9/run-code-server.sh'}
12172

0 commit comments

Comments
 (0)