Skip to content

Commit a1796d0

Browse files
committed
WIP - verify GH action tag/SHA combinations
This change introduces a new function `verify_actions` to validate the contents against GitHub. TL;DR The function verifies that the SHAs specified in `actions.yml` exist in the GH repo. Also ensures that the SHA exists on the Git tag, if the `tag` attribute is specified. The rest of the (currently spaghetti code) function is a lot of output and error(failure) and warning collection. Although it issues quite a few GH API requests, the rate limiter should not kick in (with an authenticated GH token). I opted to rely on the HTTP/1.1 `urllib.request` stuff, which has no connection-reuse. The alternative would have been to add a dependency. The algorithm roughly works like this, for each action specified in `actions.yml`: * Issue a warning and stop, if the name is like `OWNER/*` ("wildcard" repository). Can't verify Git SHAs in this case. * Issue a warning and stop, if the name is like `docker:*` (not implemented) * Issue an error and stop, if the name doesn't start with an `OWNER/REPO` pattern. * Each expired entry is just skipped * If there is a wildcard reference and a SHA reference, issue an error. Then, for each reference for an action: * If no `tag` is specified, let GH resolve the commit SHA. Emit a warning to add the value of the `tag` attribute, if the SHA can be resolved. Otherwise, emit an error. * If `tag` is specified: * Add the SHA to the set of requested-shas-by-tag * Call GH's "matching-refs" endpoint for the 'tag' value * Emit en error, if the object type is not a tag or commit. * Also resolve 'tag' object types to 'commit' object types. * Add each returned SHA to the set of valid-shas-by-tag. * For each "requested tag" verify that the sets of valid and requested shas intersect. If not, emit an error.
1 parent b5021ac commit a1796d0

File tree

6 files changed

+526
-1
lines changed

6 files changed

+526
-1
lines changed

.github/workflows/update_actions.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ jobs:
3232
- run: pip install ruyaml
3333

3434
- name: Update actions.yml
35-
shell: python
35+
shell: python
36+
env:
37+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
3638
run: |
3739
import sys
3840
sys.path.append("./gateway/")
@@ -41,6 +43,11 @@ jobs:
4143
g.update_actions(".github/workflows/dummy.yml", "actions.yml")
4244
g.update_patterns("approved_patterns.yml", "actions.yml")
4345
46+
import action_tags as at
47+
result = at.verify_actions("actions.yml")
48+
if result.has_failures:
49+
raise Exception(f"Failed to verify actions:\n{result}")
50+
4451
- name: Commit and push changes
4552
if: ${{ github.event_name != 'pull_request' }}
4653
run: |

CONTRIBUTING.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
2+
# Updating code in `gateway/`
3+
4+
## Prerequisites
5+
6+
1. Python 3.13+
7+
2. `pipx install uv`
8+
9+
## Running tests
10+
11+
`uvx --with ruyaml pytest`
12+
13+
To print stdout/stderr to the console when running pytest:
14+
15+
`uvx --with ruyaml pytest -s`

actions.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,11 +641,17 @@ scacap/action-surefire-report:
641641
1a128e49c0585bc0b8e38e541ac3b6e35a5bc727:
642642
expires_at: 2025-12-22
643643
tag: v1.9.0
644+
# GH API requests from GH hosted runners fail with 403 and the following error message:
645+
# 'Although you appear to have the correct authorization credentials, the `ScaCap` organization has an IP allow list enabled, and your IP address is not permitted to access this resource.'
646+
ignore_gh_api_errors: true
644647
'*':
645648
expires_at: 2025-08-01
646649
keep: true
647650
5609ce4db72c09db044803b344a8968fd1f315da:
648651
tag: v1.9.1
652+
# GH API requests from GH hosted runners fail with 403 and the following error message:
653+
# 'Although you appear to have the correct authorization credentials, the `ScaCap` organization has an IP allow list enabled, and your IP address is not permitted to access this resource.'
654+
ignore_gh_api_errors: true
649655
scala-steward-org/scala-steward-action:
650656
53d486a68877f4a6d1e110e8058fe21e593db356:
651657
tag: v2.77.0

gateway/action_tags.py

Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
# /// script
2+
# requires-python = ">=3.13"
3+
# dependencies = [
4+
# "ruyaml",
5+
# ]
6+
# ///
7+
8+
import os
9+
import re
10+
from urllib.error import HTTPError
11+
12+
import ruyaml
13+
14+
from datetime import date
15+
from urllib.request import Request, urlopen
16+
from pathlib import Path
17+
from ruyaml import CommentedMap, CommentedSeq
18+
from gateway import ActionsYAML, load_yaml, on_gha
19+
20+
re_github_actions_repo_wildcard = r"^[A-Za-z0-9-_.]+/[*]$"
21+
re_github_actions_repo = r"^([A-Za-z0-9-_.]+/[A-Za-z0-9-_.]+)(/.+)?$"
22+
# Something like 'pytooling/actions/with-post-step' or 'readthedocs/actions/preview'.
23+
re_docker_image = r"^docker://.+"
24+
re_git_sha = r"^[a-f0-9]{7,}$"
25+
26+
class ActionTagsCheckResult(object):
27+
def __init__(self, log_to_console: bool = True):
28+
self.log_to_console = log_to_console
29+
self.logs = []
30+
self.failures = []
31+
self.warnings = []
32+
33+
def log(self, message: str) -> None:
34+
if self.log_to_console:
35+
print(message)
36+
self.logs.append(message)
37+
38+
def failure(self, message: str) -> None:
39+
self.failures.append(message)
40+
41+
def warning(self, message: str) -> None:
42+
self.warnings.append(message)
43+
44+
def has_failures(self) -> bool:
45+
return len(self.failures) > 0
46+
47+
def has_warnings(self) -> bool:
48+
return len(self.warnings) > 0
49+
50+
def __str__(self):
51+
return (
52+
''.join([f"FAILURE: {failure}\n" for failure in self.failures])
53+
+ ''.join([f"WARNING: {warning}\n" for warning in self.warnings]))
54+
55+
56+
class ApiResponse(object):
57+
def __init__(self, req_url: str, status: int, reason: str, headers: dict[str, str], body: str):
58+
self.req_url = req_url
59+
self.status = status
60+
self.reason = reason
61+
self.headers = headers
62+
self.body = body
63+
64+
65+
def _gh_api_get(url_abspath: str) -> ApiResponse:
66+
headers: dict[str, str] = {
67+
'Accept': 'application/vnd.github.v3+json',
68+
}
69+
# Use GH_TOKEN, if available.
70+
# Unauthorized GH API requests are quite rate-limited.
71+
# Tip: add an extra space before 'export' to prevent adding the line to the shell history.
72+
# export GH_TOKEN=$(gh auth token)
73+
gh_token = os.environ['GH_TOKEN']
74+
if gh_token:
75+
headers['Authorization'] = f"Bearer {gh_token}"
76+
req_url = f"https://api.github.com{url_abspath}"
77+
request = Request(url=req_url, headers=headers)
78+
try:
79+
with urlopen(request) as response:
80+
return ApiResponse(req_url, response.status, response.reason, dict(response.headers), response.read().decode('utf-8'))
81+
except HTTPError as e:
82+
return ApiResponse(req_url, e.code, e.reason, dict(e.headers), e.read().decode('utf-8'))
83+
except Exception as e:
84+
print(f"Failed to fetch '{req_url}' from GitHub API")
85+
raise e
86+
87+
def _gh_get_commit_object(owner_repo: str, sha: str) -> ApiResponse:
88+
return _gh_api_get(f"/repos/{owner_repo}/git/commits/{sha}")
89+
90+
def _gh_get_tag(owner_repo: str, tag_sha: str) -> ApiResponse:
91+
return _gh_api_get(f"/repos/{owner_repo}/git/tags/{tag_sha}")
92+
93+
def _gh_matching_tags(owner_repo: str, tag: str) -> ApiResponse:
94+
return _gh_api_get(f"/repos/{owner_repo}/git/matching-refs/tags/{tag}")
95+
96+
def verify_actions(actions: Path | ActionsYAML | str, log_to_console: bool = True, today: date = date.today()) -> ActionTagsCheckResult:
97+
"""
98+
Validates the contents of the actions file against GitHub.
99+
100+
The function verifies that the SHAs specified in `actions.yml` exist in the GH repo.
101+
Also ensures that the SHA exists on the Git tag, if the `tag` attribute is specified.
102+
103+
The algorithm roughly works like this, for each action specified in `actions.yml`:
104+
* Issue a warning and stop, if the name is like `OWNER/*` ("wildcard" repository).
105+
Can't verify Git SHAs in this case.
106+
* Issue a warning and stop, if the name is like `docker:*` (not implemented)
107+
* Issue an error and stop, if the name doesn't start with an `OWNER/REPO` pattern.
108+
* Each expired entry is just skipped
109+
* If there is a wildcard reference and a SHA reference, issue an error.
110+
111+
Then, for each reference for an action:
112+
* If no `tag` is specified, let GH resolve the commit SHA.
113+
Emit a warning to add the value of the `tag` attribute, if the SHA can be resolved.
114+
Otherwise, emit an error.
115+
* If `tag` is specified:
116+
* Add the SHA to the set of requested-shas-by-tag
117+
* Call GH's "matching-refs" endpoint for the 'tag' value
118+
* Emit en error, if the object type is not a tag or commit.
119+
* Also resolve 'tag' object types to 'commit' object types.
120+
* Add each returned SHA to the set of valid-shas-by-tag.
121+
* For each "requested tag" verify that the sets of valid and requested shas intersect. If not, emit an error.
122+
123+
Args:
124+
actions: Path to the actions list file (mandatory)
125+
log_to_console: Whether to log messages immediately to the console (default: True)
126+
today: The current date (default: today)
127+
"""
128+
if on_gha():
129+
print(f"::group::Verfiy GitHub Actions")
130+
gh_token = os.environ['GH_TOKEN']
131+
if not gh_token or len(gh_token) == 0:
132+
raise Exception("GH_TOKEN environment variable is not set or empty")
133+
134+
if isinstance(actions, Path) or isinstance(actions, str):
135+
actions = load_yaml(actions)
136+
actions_yaml: ActionsYAML = actions
137+
138+
result = ActionTagsCheckResult(log_to_console=log_to_console or on_gha())
139+
140+
for name, action in actions_yaml.items():
141+
gh_repo_matcher = re.match(re_github_actions_repo, name)
142+
if gh_repo_matcher is not None:
143+
owner_repo = gh_repo_matcher.group(1)
144+
result.log(f"Checking GitHub action {name} in GH repo 'https://github.com/{owner_repo}'...")
145+
valid_shas_by_tag: dict[str, set[str]] = {}
146+
requested_shas_by_tag: dict[str, set[str]] = {}
147+
has_wildcard = False
148+
has_wildcard_msg_emitted = False
149+
has_ignored_api_errors = False
150+
for ref, details in action.items():
151+
if details and 'expires_at' in details:
152+
expires_at: date = details.get('expires_at')
153+
if expires_at < today:
154+
# skip expired entries
155+
result.log(f" .. ref '{ref}' is expired, skipping")
156+
continue
157+
158+
# noinspection PyTypedDict
159+
ignore_gh_api_errors = details and 'ignore_gh_api_errors' in details and details['ignore_gh_api_errors'] == True
160+
161+
if ref == '*':
162+
# "wildcard" SHA - what would we...
163+
result.log(f" .. detected wildcard ref")
164+
if len(requested_shas_by_tag) > 0 and not has_wildcard_msg_emitted:
165+
m = f"GitHub action {name} references a wildcard SHA but also has specific SHAs"
166+
result.log(f" .. ⚡ {m}")
167+
result.warning(m)
168+
has_wildcard_msg_emitted = True
169+
has_wildcard = True
170+
continue
171+
elif re.match(re_git_sha, ref):
172+
result.log(f" .. detected entry with Git SHA '{ref}'")
173+
if has_wildcard and not has_wildcard_msg_emitted:
174+
m = f"GitHub action {name} references a wildcard SHA but also has specific SHAs"
175+
result.log(f" .. ⚡ {m}")
176+
result.warning(m)
177+
has_wildcard_msg_emitted = True
178+
179+
if not details or not 'tag' in details:
180+
result.log(f" .. no Git tag")
181+
# https://docs.github.com/en/rest/git/commits?apiVersion=2022-11-28#get-a-commit-object
182+
response = _gh_get_commit_object(owner_repo, ref)
183+
match response.status:
184+
case 200:
185+
m = f"GitHub action {name} references existing commit SHA '{ref}' but does specify the tag name for it."
186+
result.log(f" .. ⚡ {m}")
187+
result.warning(m)
188+
case 404:
189+
m = f"GitHub action {name} references non existing commit SHA '{ref}': HTTP/{response.status}: {response.reason}, API URL: {response.req_url}"
190+
result.log(f" .. ❌ {m}")
191+
result.failure(m)
192+
case _:
193+
m = f"Failed to fetch Git SHA '{ref}' from GitHub repo 'https://github.com/{owner_repo}': HTTP/{response.status}: {response.reason}, API URL: {response.req_url}\n{response.body}"
194+
if ignore_gh_api_errors:
195+
has_ignored_api_errors = True
196+
result.log(f" .. ⚡ {m}")
197+
result.warning(m)
198+
else:
199+
result.log(f" .. ❌ {m}")
200+
result.failure(m)
201+
else:
202+
tag: str = details.get('tag')
203+
result.log(f" .. collecting Git SHAs for tag {tag}")
204+
205+
if not tag in requested_shas_by_tag:
206+
requested_shas_by_tag[tag] = set()
207+
requested_shas_by_tag[tag].add(ref)
208+
209+
if not tag in valid_shas_by_tag:
210+
valid_shas_by_tag[tag] = set()
211+
valid_shas_for_tag = valid_shas_by_tag[tag]
212+
213+
# https://docs.github.com/en/rest/git/refs?apiVersion=2022-11-28#list-matching-references
214+
response = _gh_matching_tags(owner_repo, tag)
215+
match response.status:
216+
case 200:
217+
response_json: CommentedSeq = ruyaml.YAML().load(response.body)
218+
for msg in response_json:
219+
tag_ref_map: CommentedMap = msg
220+
tag_object: CommentedMap = tag_ref_map["object"]
221+
tab_object_type: str = tag_object["type"]
222+
tag_object_sha: str = tag_object["sha"]
223+
result.log(f" .. GH yields {tab_object_type} SHA '{tag_object_sha}' for '{tag_ref_map['ref']}'")
224+
match tab_object_type:
225+
case "tag":
226+
valid_shas_for_tag.add(tag_object_sha)
227+
# https://docs.github.com/en/rest/git/tags?apiVersion=2022-11-28#get-a-tag
228+
response2 = _gh_get_tag(owner_repo, tag_object_sha)
229+
match response2.status:
230+
case 200:
231+
tag_object_sha = ruyaml.YAML().load(response2.body)["object"]["sha"]
232+
valid_shas_for_tag.add(tag_object_sha)
233+
result.log(f" .. GH returns commit SHA '{tag_object_sha}' for previous tag SHA")
234+
case 404:
235+
result.log(f" .. commit SHA '{tag_object_sha}' does not exist")
236+
case _:
237+
m = f"Failed to fetch details for Git tag '{tag}' from GitHub repo 'https://github.com/{owner_repo}': HTTP/{response2.status}: {response2.reason}, API URL: {response2.req_url}\n{response2.body}"
238+
if ignore_gh_api_errors:
239+
has_ignored_api_errors = True
240+
result.log(f" .. ⚡ {m}")
241+
result.warning(m)
242+
else:
243+
result.log(f" .. ❌ {m}")
244+
result.failure(m)
245+
case "commit":
246+
valid_shas_for_tag.add(tag_object_sha)
247+
case "branch":
248+
m = f"Branch references mentioned for Git tag '{tag}' for GitHub action {name}"
249+
result.log(f" .. ❌ {m}")
250+
result.failure(m)
251+
case _:
252+
m = f"Invalid Git object type '{tag_object['type']}' for Git tag '{tag}' in GitHub repo 'https://github.com/{owner_repo}'"
253+
result.log(f" .. ❌ {m}")
254+
result.failure(m)
255+
case _:
256+
m = f"Failed to fetch matching Git tags for '{tag}' from GitHub repo 'https://github.com/{owner_repo}': HTTP/{response.status}: {response.reason}, API URL: {response.req_url}\n{response.body}"
257+
if ignore_gh_api_errors:
258+
result.log(f" .. ⚡ {m}")
259+
result.warning(m)
260+
has_ignored_api_errors = True
261+
else:
262+
result.log(f" .. ❌ {m}")
263+
result.failure(m)
264+
else:
265+
m = f"GitHub action {name} references an invalid Git SHA '{ref}'"
266+
result.log(f" .. ❌ {m}")
267+
result.failure(m)
268+
269+
for req_tag, req_shas in requested_shas_by_tag.items():
270+
result.log(f" .. checking tag '{req_tag}'")
271+
result.log(f" .. referenced SHAs: {req_shas}")
272+
valid_shas = valid_shas_by_tag.get(req_tag)
273+
result.log(f" .. verified SHAs: {valid_shas if len(valid_shas)>0 else '(none)'}")
274+
if not valid_shas:
275+
m = f"GitHub action {name} references Git tag '{req_tag}' via SHAs '{req_shas}' but no SHAs for tag could be found - does the Git tag exist?"
276+
if has_ignored_api_errors:
277+
result.warning(m)
278+
result.log(f" ⚡ {m}")
279+
else:
280+
result.failure(m)
281+
result.log(f" ❌ {m}")
282+
elif req_shas.isdisjoint(valid_shas):
283+
m = f"GitHub action {name} references Git tag '{req_tag}' via SHAs '{req_shas}' but none of those matches the valid SHAs '{valid_shas}'"
284+
result.failure(m)
285+
result.log(f" ❌ {m}")
286+
else:
287+
result.log(f" ✅ GitHub action {name} definition for tag '{req_tag}' is good!")
288+
289+
elif re.match(re_github_actions_repo_wildcard, name):
290+
m =f"Ignoring '{name}' because it uses a GitHub repository wildcard ..."
291+
result.warning(m)
292+
result.log(f"⚡ {m}")
293+
294+
elif re.match(re_docker_image, name):
295+
m =f"Ignoring '{name}' because it references a Docker image ..."
296+
result.warning(m)
297+
result.log(f"⚡ {m}")
298+
299+
else:
300+
m = f"Cannot determine action kind for '{name}'"
301+
result.failure(m)
302+
result.log(f"❌ {m}")
303+
304+
if on_gha():
305+
if result.has_failures() or result.has_warnings():
306+
with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f:
307+
f.write(f"# GitHub Actions verification result\n")
308+
if len(result.failures) > 0:
309+
f.write(f"## Failures ({len(result.failures)})\n")
310+
f.write('```\n')
311+
for msg in result.failures:
312+
f.write(f"{msg}\n\n")
313+
f.write('```\n')
314+
if len(result.warnings) > 0:
315+
f.write(f"## Warnings ({len(result.warnings)})\n")
316+
f.write('```\n')
317+
for msg in result.warnings:
318+
f.write(f"{msg}\n\n")
319+
f.write('```\n')
320+
f.write(f"## Log\n")
321+
f.write('```\n')
322+
for msg in result.logs:
323+
f.write(f"{msg}\n")
324+
f.write('```\n')
325+
print("::endgroup::")
326+
327+
return result

gateway/gateway.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class RefDetails(TypedDict):
2424

2525
expires_at: date
2626
keep: NotRequired[bool]
27+
tag: NotRequired[str]
2728

2829

2930
ActionRefs = Dict[str, RefDetails]

0 commit comments

Comments
 (0)