Skip to content

Commit 41c0e43

Browse files
committed
WIP - verify GH action tag/SHA combinations
This change introduces a new function `verify_actions` to validate the contents against GitHub. TL;DR The function verifies that the SHAs specified in `actions.yml` exist in the GH repo. Also ensures that the SHA exists on the Git tag, if the `tag` attribute is specified. The rest of the (currently spaghetti code) function is a lot of output and error(failure) and warning collection. Although it issues quite a few GH API requests, the rate limiter should not kick in (with an authenticated GH token). I opted to rely on the HTTP/1.1 `urllib.request` stuff, which has no connection-reuse. The alternative would have been to add a dependency. The algorithm roughly works like this, for each action specified in `actions.yml`: * Issue a warning and stop, if the name is like `OWNER/*` ("wildcard" repository). Can't verify Git SHAs in this case. * Issue a warning and stop, if the name is like `docker:*` (not implemented) * Issue an error and stop, if the name doesn't start with an `OWNER/REPO` pattern. * Each expired entry is just skipped * If there is a wildcard reference and a SHA reference, issue an error. Then, for each reference for an action: * If no `tag` is specified, let GH resolve the commit SHA. Emit a warning to add the value of the `tag` attribute, if the SHA can be resolved. Otherwise, emit an error. * If `tag` is specified: * Add the SHA to the set of requested-shas-by-tag * Call GH's "matching-refs" endpoint for the 'tag' value * Emit en error, if the object type is not a tag or commit. * Also resolve 'tag' object types to 'commit' object types. * Add each returned SHA to the set of valid-shas-by-tag. * For each "requested tag" verify that the sets of valid and requested shas intersect. If not, emit an error.
1 parent b5021ac commit 41c0e43

File tree

6 files changed

+531
-1
lines changed

6 files changed

+531
-1
lines changed

.github/workflows/update_actions.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ jobs:
3232
- run: pip install ruyaml
3333

3434
- name: Update actions.yml
35-
shell: python
35+
shell: python
36+
env:
37+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
3638
run: |
3739
import sys
3840
sys.path.append("./gateway/")
@@ -41,6 +43,11 @@ jobs:
4143
g.update_actions(".github/workflows/dummy.yml", "actions.yml")
4244
g.update_patterns("approved_patterns.yml", "actions.yml")
4345
46+
import action_tags as at
47+
result = at.verify_actions("actions.yml")
48+
if result.has_failures():
49+
raise Exception(f"Verify actions result summary:\n{result}")
50+
4451
- name: Commit and push changes
4552
if: ${{ github.event_name != 'pull_request' }}
4653
run: |

CONTRIBUTING.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
2+
# Updating code in `gateway/`
3+
4+
## Prerequisites
5+
6+
1. Python 3.13+
7+
2. `pipx install uv`
8+
9+
## Running tests
10+
11+
`uvx --with ruyaml pytest`
12+
13+
To print stdout/stderr to the console when running pytest:
14+
15+
`uvx --with ruyaml pytest -s`

actions.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,11 +641,17 @@ scacap/action-surefire-report:
641641
1a128e49c0585bc0b8e38e541ac3b6e35a5bc727:
642642
expires_at: 2025-12-22
643643
tag: v1.9.0
644+
# GH API requests from GH hosted runners fail with 403 and the following error message:
645+
# 'Although you appear to have the correct authorization credentials, the `ScaCap` organization has an IP allow list enabled, and your IP address is not permitted to access this resource.'
646+
ignore_gh_api_errors: true
644647
'*':
645648
expires_at: 2025-08-01
646649
keep: true
647650
5609ce4db72c09db044803b344a8968fd1f315da:
648651
tag: v1.9.1
652+
# GH API requests from GH hosted runners fail with 403 and the following error message:
653+
# 'Although you appear to have the correct authorization credentials, the `ScaCap` organization has an IP allow list enabled, and your IP address is not permitted to access this resource.'
654+
ignore_gh_api_errors: true
649655
scala-steward-org/scala-steward-action:
650656
53d486a68877f4a6d1e110e8058fe21e593db356:
651657
tag: v2.77.0

gateway/action_tags.py

Lines changed: 332 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,332 @@
1+
# /// script
2+
# requires-python = ">=3.13"
3+
# dependencies = [
4+
# "ruyaml",
5+
# ]
6+
# ///
7+
8+
import os
9+
import re
10+
from urllib.error import HTTPError
11+
12+
import ruyaml
13+
14+
from datetime import date
15+
from urllib.request import Request, urlopen
16+
from pathlib import Path
17+
from ruyaml import CommentedMap, CommentedSeq
18+
from gateway import ActionsYAML, load_yaml, on_gha
19+
20+
re_github_actions_repo_wildcard = r"^[A-Za-z0-9-_.]+/[*]$"
21+
re_github_actions_repo = r"^([A-Za-z0-9-_.]+/[A-Za-z0-9-_.]+)(/.+)?$"
22+
# Something like 'pytooling/actions/with-post-step' or 'readthedocs/actions/preview'.
23+
re_docker_image = r"^docker://.+"
24+
re_git_sha = r"^[a-f0-9]{7,}$"
25+
26+
class ActionTagsCheckResult(object):
27+
def __init__(self, log_to_console: bool = True):
28+
self.log_to_console = log_to_console
29+
self.logs = []
30+
self.failures = []
31+
self.warnings = []
32+
33+
def log(self, message: str) -> None:
34+
if self.log_to_console:
35+
print(message)
36+
self.logs.append(message)
37+
38+
def failure(self, message: str) -> None:
39+
self.failures.append(message)
40+
41+
def warning(self, message: str) -> None:
42+
self.warnings.append(message)
43+
44+
def has_failures(self) -> bool:
45+
return len(self.failures) > 0
46+
47+
def has_warnings(self) -> bool:
48+
return len(self.warnings) > 0
49+
50+
def __str__(self):
51+
return (
52+
''.join([f"FAILURE: {failure}\n" for failure in self.failures])
53+
+ ''.join([f"WARNING: {warning}\n" for warning in self.warnings]))
54+
55+
56+
class ApiResponse(object):
57+
def __init__(self, req_url: str, status: int, reason: str, headers: dict[str, str], body: str):
58+
self.req_url = req_url
59+
self.status = status
60+
self.reason = reason
61+
self.headers = headers
62+
self.body = body
63+
64+
65+
def _gh_api_get(url_abspath: str) -> ApiResponse:
66+
headers: dict[str, str] = {
67+
'Accept': 'application/vnd.github.v3+json',
68+
}
69+
# Use GH_TOKEN, if available.
70+
# Unauthorized GH API requests are quite rate-limited.
71+
# Tip: add an extra space before 'export' to prevent adding the line to the shell history.
72+
# export GH_TOKEN=$(gh auth token)
73+
gh_token = os.environ['GH_TOKEN']
74+
if gh_token:
75+
headers['Authorization'] = f"Bearer {gh_token}"
76+
req_url = f"https://api.github.com{url_abspath}"
77+
request = Request(url=req_url, headers=headers)
78+
try:
79+
with urlopen(request) as response:
80+
return ApiResponse(req_url, response.status, response.reason, dict(response.headers), response.read().decode('utf-8'))
81+
except HTTPError as e:
82+
return ApiResponse(req_url, e.code, e.reason, dict(e.headers), e.read().decode('utf-8'))
83+
except Exception as e:
84+
print(f"Failed to fetch '{req_url}' from GitHub API")
85+
raise e
86+
87+
def _gh_get_commit_object(owner_repo: str, sha: str) -> ApiResponse:
88+
return _gh_api_get(f"/repos/{owner_repo}/git/commits/{sha}")
89+
90+
def _gh_get_tag(owner_repo: str, tag_sha: str) -> ApiResponse:
91+
return _gh_api_get(f"/repos/{owner_repo}/git/tags/{tag_sha}")
92+
93+
def _gh_matching_tags(owner_repo: str, tag: str) -> ApiResponse:
94+
return _gh_api_get(f"/repos/{owner_repo}/git/matching-refs/tags/{tag}")
95+
96+
def verify_actions(actions: Path | ActionsYAML | str, log_to_console: bool = True, today: date = date.today()) -> ActionTagsCheckResult:
97+
"""
98+
Validates the contents of the actions file against GitHub.
99+
100+
The function verifies that the SHAs specified in `actions.yml` exist in the GH repo.
101+
Also ensures that the SHA exists on the Git tag, if the `tag` attribute is specified.
102+
103+
The algorithm roughly works like this, for each action specified in `actions.yml`:
104+
* Issue a warning and stop, if the name is like `OWNER/*` ("wildcard" repository).
105+
Can't verify Git SHAs in this case.
106+
* Issue a warning and stop, if the name is like `docker:*` (not implemented)
107+
* Issue an error and stop, if the name doesn't start with an `OWNER/REPO` pattern.
108+
* Each expired entry is just skipped
109+
* If there is a wildcard reference and a SHA reference, issue an error.
110+
111+
Then, for each reference for an action:
112+
* If no `tag` is specified, let GH resolve the commit SHA.
113+
Emit a warning to add the value of the `tag` attribute, if the SHA can be resolved.
114+
Otherwise, emit an error.
115+
* If `tag` is specified:
116+
* Add the SHA to the set of requested-shas-by-tag
117+
* Call GH's "matching-refs" endpoint for the 'tag' value
118+
* Emit en error, if the object type is not a tag or commit.
119+
* Also resolve 'tag' object types to 'commit' object types.
120+
* Add each returned SHA to the set of valid-shas-by-tag.
121+
* For each "requested tag" verify that the sets of valid and requested shas intersect. If not, emit an error.
122+
123+
Args:
124+
actions: Path to the actions list file (mandatory)
125+
log_to_console: Whether to log messages immediately to the console (default: True)
126+
today: The current date (default: today)
127+
"""
128+
if on_gha():
129+
print(f"::group::Verfiy GitHub Actions")
130+
gh_token = os.environ['GH_TOKEN']
131+
if not gh_token or len(gh_token) == 0:
132+
raise Exception("GH_TOKEN environment variable is not set or empty")
133+
134+
if isinstance(actions, Path) or isinstance(actions, str):
135+
actions = load_yaml(actions)
136+
actions_yaml: ActionsYAML = actions
137+
138+
result = ActionTagsCheckResult(log_to_console=log_to_console or on_gha())
139+
140+
for name, action in actions_yaml.items():
141+
gh_repo_matcher = re.match(re_github_actions_repo, name)
142+
if gh_repo_matcher is not None:
143+
owner_repo = gh_repo_matcher.group(1)
144+
result.log(f"Checking GitHub action {name} in GH repo 'https://github.com/{owner_repo}'...")
145+
valid_shas_by_tag: dict[str, set[str]] = {}
146+
requested_shas_by_tag: dict[str, set[str]] = {}
147+
has_wildcard = False
148+
has_wildcard_msg_emitted = False
149+
# Flag whether to not error out on tag/SHA mismatches due to explicitly ignored GH API errors.
150+
has_ignored_api_errors = False
151+
for ref, details in action.items():
152+
if details and 'expires_at' in details:
153+
expires_at: date = details.get('expires_at')
154+
if expires_at < today:
155+
# skip expired entries
156+
result.log(f" .. ref '{ref}' is expired, skipping")
157+
continue
158+
159+
# noinspection PyTypedDict
160+
ignore_gh_api_errors = details and 'ignore_gh_api_errors' in details and details['ignore_gh_api_errors'] == True
161+
if ignore_gh_api_errors:
162+
m = f"ignore_gh_api_errors is set to true: will ignore GH API errors for action {name} ref '{ref}'"
163+
result.log(f" .. ⚡ {m}")
164+
result.warning(m)
165+
166+
if ref == '*':
167+
# "wildcard" SHA - what would we...
168+
result.log(f" .. detected wildcard ref")
169+
if len(requested_shas_by_tag) > 0 and not has_wildcard_msg_emitted:
170+
m = f"GitHub action {name} references a wildcard SHA but also has specific SHAs"
171+
result.log(f" .. ⚡ {m}")
172+
result.warning(m)
173+
has_wildcard_msg_emitted = True
174+
has_wildcard = True
175+
continue
176+
elif re.match(re_git_sha, ref):
177+
result.log(f" .. detected entry with Git SHA '{ref}'")
178+
if has_wildcard and not has_wildcard_msg_emitted:
179+
m = f"GitHub action {name} references a wildcard SHA but also has specific SHAs"
180+
result.log(f" .. ⚡ {m}")
181+
result.warning(m)
182+
has_wildcard_msg_emitted = True
183+
184+
if not details or not 'tag' in details:
185+
result.log(f" .. no Git tag")
186+
# https://docs.github.com/en/rest/git/commits?apiVersion=2022-11-28#get-a-commit-object
187+
response = _gh_get_commit_object(owner_repo, ref)
188+
match response.status:
189+
case 200:
190+
m = f"GitHub action {name} references existing commit SHA '{ref}' but does not specify the tag name for it."
191+
result.log(f" .. ⚡ {m}")
192+
result.warning(m)
193+
case 404:
194+
m = f"GitHub action {name} references non existing commit SHA '{ref}': HTTP/{response.status}: {response.reason}, API URL: {response.req_url}"
195+
result.log(f" .. ❌ {m}")
196+
result.failure(m)
197+
case _:
198+
m = f"Failed to fetch Git SHA '{ref}' from GitHub repo 'https://github.com/{owner_repo}': HTTP/{response.status}: {response.reason}, API URL: {response.req_url}\n{response.body}"
199+
if ignore_gh_api_errors:
200+
has_ignored_api_errors = True
201+
result.log(f" .. ⚡ {m}")
202+
result.warning(m)
203+
else:
204+
result.log(f" .. ❌ {m}")
205+
result.failure(m)
206+
else:
207+
tag: str = details.get('tag')
208+
result.log(f" .. collecting Git SHAs for tag {tag}")
209+
210+
if not tag in requested_shas_by_tag:
211+
requested_shas_by_tag[tag] = set()
212+
requested_shas_by_tag[tag].add(ref)
213+
214+
if not tag in valid_shas_by_tag:
215+
valid_shas_by_tag[tag] = set()
216+
valid_shas_for_tag = valid_shas_by_tag[tag]
217+
218+
# https://docs.github.com/en/rest/git/refs?apiVersion=2022-11-28#list-matching-references
219+
response = _gh_matching_tags(owner_repo, tag)
220+
match response.status:
221+
case 200:
222+
response_json: CommentedSeq = ruyaml.YAML().load(response.body)
223+
for msg in response_json:
224+
tag_ref_map: CommentedMap = msg
225+
tag_object: CommentedMap = tag_ref_map["object"]
226+
tab_object_type: str = tag_object["type"]
227+
tag_object_sha: str = tag_object["sha"]
228+
result.log(f" .. GH yields {tab_object_type} SHA '{tag_object_sha}' for '{tag_ref_map['ref']}'")
229+
match tab_object_type:
230+
case "tag":
231+
valid_shas_for_tag.add(tag_object_sha)
232+
# https://docs.github.com/en/rest/git/tags?apiVersion=2022-11-28#get-a-tag
233+
response2 = _gh_get_tag(owner_repo, tag_object_sha)
234+
match response2.status:
235+
case 200:
236+
tag_object_sha = ruyaml.YAML().load(response2.body)["object"]["sha"]
237+
valid_shas_for_tag.add(tag_object_sha)
238+
result.log(f" .. GH returns commit SHA '{tag_object_sha}' for previous tag SHA")
239+
case 404:
240+
result.log(f" .. commit SHA '{tag_object_sha}' does not exist")
241+
case _:
242+
m = f"Failed to fetch details for Git tag '{tag}' from GitHub repo 'https://github.com/{owner_repo}': HTTP/{response2.status}: {response2.reason}, API URL: {response2.req_url}\n{response2.body}"
243+
if ignore_gh_api_errors:
244+
has_ignored_api_errors = True
245+
result.log(f" .. ⚡ {m}")
246+
result.warning(m)
247+
else:
248+
result.log(f" .. ❌ {m}")
249+
result.failure(m)
250+
case "commit":
251+
valid_shas_for_tag.add(tag_object_sha)
252+
case "branch":
253+
m = f"Branch references mentioned for Git tag '{tag}' for GitHub action {name}"
254+
result.log(f" .. ❌ {m}")
255+
result.failure(m)
256+
case _:
257+
m = f"Invalid Git object type '{tag_object['type']}' for Git tag '{tag}' in GitHub repo 'https://github.com/{owner_repo}'"
258+
result.log(f" .. ❌ {m}")
259+
result.failure(m)
260+
case _:
261+
m = f"Failed to fetch matching Git tags for '{tag}' from GitHub repo 'https://github.com/{owner_repo}': HTTP/{response.status}: {response.reason}, API URL: {response.req_url}\n{response.body}"
262+
if ignore_gh_api_errors:
263+
result.log(f" .. ⚡ {m}")
264+
result.warning(m)
265+
has_ignored_api_errors = True
266+
else:
267+
result.log(f" .. ❌ {m}")
268+
result.failure(m)
269+
else:
270+
m = f"GitHub action {name} references an invalid Git SHA '{ref}'"
271+
result.log(f" .. ❌ {m}")
272+
result.failure(m)
273+
274+
for req_tag, req_shas in requested_shas_by_tag.items():
275+
result.log(f" .. checking tag '{req_tag}'")
276+
result.log(f" .. referenced SHAs: {req_shas}")
277+
valid_shas = valid_shas_by_tag.get(req_tag)
278+
result.log(f" .. verified SHAs: {valid_shas if len(valid_shas)>0 else '(none)'}")
279+
if not valid_shas:
280+
m = f"GitHub action {name} references Git tag '{req_tag}' via SHAs '{req_shas}' but no SHAs for tag could be found - does the Git tag exist?"
281+
if has_ignored_api_errors:
282+
result.warning(m)
283+
result.log(f" ⚡ {m}")
284+
else:
285+
result.failure(m)
286+
result.log(f" ❌ {m}")
287+
elif req_shas.isdisjoint(valid_shas):
288+
m = f"GitHub action {name} references Git tag '{req_tag}' via SHAs '{req_shas}' but none of those matches the valid SHAs '{valid_shas}'"
289+
result.failure(m)
290+
result.log(f" ❌ {m}")
291+
else:
292+
result.log(f" ✅ GitHub action {name} definition for tag '{req_tag}' is good!")
293+
294+
elif re.match(re_github_actions_repo_wildcard, name):
295+
m =f"Ignoring '{name}' because it uses a GitHub repository wildcard ..."
296+
result.warning(m)
297+
result.log(f"⚡ {m}")
298+
299+
elif re.match(re_docker_image, name):
300+
m =f"Ignoring '{name}' because it references a Docker image ..."
301+
result.warning(m)
302+
result.log(f"⚡ {m}")
303+
304+
else:
305+
m = f"Cannot determine action kind for '{name}'"
306+
result.failure(m)
307+
result.log(f"❌ {m}")
308+
309+
if on_gha():
310+
if result.has_failures() or result.has_warnings():
311+
with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f:
312+
f.write(f"# GitHub Actions verification result\n")
313+
if len(result.failures) > 0:
314+
f.write(f"## Failures ({len(result.failures)})\n")
315+
f.write('```\n')
316+
for msg in result.failures:
317+
f.write(f"{msg}\n\n")
318+
f.write('```\n')
319+
if len(result.warnings) > 0:
320+
f.write(f"## Warnings ({len(result.warnings)})\n")
321+
f.write('```\n')
322+
for msg in result.warnings:
323+
f.write(f"{msg}\n\n")
324+
f.write('```\n')
325+
f.write(f"## Log\n")
326+
f.write('```\n')
327+
for msg in result.logs:
328+
f.write(f"{msg}\n")
329+
f.write('```\n')
330+
print("::endgroup::")
331+
332+
return result

gateway/gateway.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class RefDetails(TypedDict):
2424

2525
expires_at: date
2626
keep: NotRequired[bool]
27+
tag: NotRequired[str]
2728

2829

2930
ActionRefs = Dict[str, RefDetails]

0 commit comments

Comments
 (0)