Skip to content

Commit 185416b

Browse files
committed
WIP - verify GH action tag/SHA combinations
This change introduces a new function `verify_actions` to validate the contents against GitHub. TL;DR The function verifies that the SHAs specified in `actions.yml` exist in the GH repo. Also ensures that the SHA exists on the Git tag, if the `tag` attribute is specified. The rest of the (currently spaghetti code) function is a lot of output and error(failure) and warning collection. Although it issues quite a few GH API requests, the rate limiter should not kick in (with an authenticated GH token). I opted to rely on the HTTP/1.1 `urllib.request` stuff, which has no connection-reuse. The alternative would have been to add a dependency. The algorithm roughly works like this, for each action specified in `actions.yml`: * Issue a warning and stop, if the name is like `OWNER/*` ("wildcard" repository). Can't verify Git SHAs in this case. * Issue a warning and stop, if the name is like `docker:*` (not implemented) * Issue an error and stop, if the name doesn't start with an `OWNER/REPO` pattern. * Each expired entry is just skipped * If there is a wildcard reference and a SHA reference, issue an error. Then, for each reference for an action: * If no `tag` is specified, let GH resolve the commit SHA. Emit a warning to add the value of the `tag` attribute, if the SHA can be resolved. Otherwise, emit an error. * If `tag` is specified: * Add the SHA to the set of requested-shas-by-tag * Call GH's "matching-refs" endpoint for the 'tag' value * Emit en error, if the object type is not a tag or commit. * Also resolve 'tag' object types to 'commit' object types. * Add each returned SHA to the set of valid-shas-by-tag. * For each "requested tag" verify that the sets of valid and requested shas intersect. If not, emit an error.
1 parent bcc36f3 commit 185416b

File tree

5 files changed

+344
-1
lines changed

5 files changed

+344
-1
lines changed

.github/workflows/update_actions.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ jobs:
3232
- run: pip install ruyaml
3333

3434
- name: Update actions.yml
35-
shell: python
35+
shell: python
36+
env:
37+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
3638
run: |
3739
import sys
3840
sys.path.append("./gateway/")
@@ -41,6 +43,11 @@ jobs:
4143
g.update_actions(".github/workflows/dummy.yml", "actions.yml")
4244
g.update_patterns("approved_patterns.yml", "actions.yml")
4345
46+
import action_tags as at
47+
result = at.verify_actions("actions.yml")
48+
if result.has_failures:
49+
raise Exception(f"Failed to verify actions:\n{result}")
50+
4451
- name: Commit and push changes
4552
if: ${{ github.event_name != 'pull_request' }}
4653
run: |

CONTRIBUTING.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
2+
# Updating code in `gateway/`
3+
4+
## Prerequisites
5+
6+
1. Python 3.13+
7+
2. `pipx install uv`
8+
9+
## Running tests
10+
11+
`uvx --with ruyaml pytest`
12+
13+
To print stdout/stderr to the console when running pytest:
14+
15+
`uvx --with ruyaml pytest -s`

gateway/action_tags.py

Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
# /// script
2+
# requires-python = ">=3.13"
3+
# dependencies = [
4+
# "ruyaml",
5+
# ]
6+
# ///
7+
8+
import os
9+
import re
10+
from urllib.error import HTTPError
11+
12+
import ruyaml
13+
14+
from datetime import date
15+
from urllib.request import Request, urlopen
16+
from pathlib import Path
17+
from ruyaml import CommentedMap, CommentedSeq
18+
from gateway import ActionsYAML, load_yaml, on_gha
19+
20+
re_github_actions_repo_wildcard = r"^[A-Za-z0-9-_.]+/[*]$"
21+
re_github_actions_repo = r"^([A-Za-z0-9-_.]+/[A-Za-z0-9-_.]+)(/.+)?$"
22+
# Something like 'pytooling/actions/with-post-step' or 'readthedocs/actions/preview'.
23+
re_docker_image = r"^docker://.+"
24+
re_git_sha = r"^[a-f0-9]{7,}$"
25+
26+
class ActionTagsCheckResult(object):
27+
def __init__(self, log_to_console: bool = True):
28+
self.log_to_console = log_to_console
29+
self.logs = []
30+
self.failures = []
31+
self.warnings = []
32+
33+
def log(self, message: str) -> None:
34+
if self.log_to_console:
35+
print(message)
36+
self.logs.append(message)
37+
38+
def failure(self, message: str) -> None:
39+
self.failures.append(message)
40+
41+
def warning(self, message: str) -> None:
42+
self.warnings.append(message)
43+
44+
def has_failures(self) -> bool:
45+
return len(self.failures) > 0
46+
47+
def has_warnings(self) -> bool:
48+
return len(self.warnings) > 0
49+
50+
def __str__(self):
51+
return (
52+
'\n'.join([f"FAILURE: {failure}" for failure in self.failures])
53+
+ '\n'.join([f"WARNING: {warning}" for warning in self.warnings]))
54+
55+
56+
class ApiResponse(object):
57+
def __init__(self, req_url: str, status: int, reason: str, headers: dict[str, str], body: str):
58+
self.req_url = req_url
59+
self.status = status
60+
self.reason = reason
61+
self.headers = headers
62+
self.body = body
63+
64+
65+
def _gh_api_get(url_abspath: str) -> ApiResponse:
66+
headers: dict[str, str] = {
67+
'Accept': 'application/vnd.github.v3+json',
68+
}
69+
# Use GH_TOKEN, if available.
70+
# Unauthorized GH API requests are quite rate-limited.
71+
# Tip: add an extra space before 'export' to prevent adding the line to the shell history.
72+
# export GH_TOKEN=$(gh auth token)
73+
gh_token = os.environ['GH_TOKEN']
74+
if gh_token:
75+
headers['Authorization'] = f"Bearer {gh_token}"
76+
req_url = f"https://api.github.com{url_abspath}"
77+
request = Request(url=req_url, headers=headers)
78+
try:
79+
with urlopen(request) as response:
80+
return ApiResponse(req_url, response.status, response.reason, dict(response.headers), response.read().decode('utf-8'))
81+
except HTTPError as e:
82+
return ApiResponse(req_url, e.code, e.reason, dict(e.headers), e.read().decode('utf-8'))
83+
except Exception as e:
84+
print(f"Failed to fetch '{req_url}' from GitHub API")
85+
raise e
86+
87+
def _gh_get_commit_object(owner_repo: str, sha: str) -> ApiResponse:
88+
return _gh_api_get(f"/repos/{owner_repo}/git/commits/{sha}")
89+
90+
def _gh_get_tag(owner_repo: str, tag_sha: str) -> ApiResponse:
91+
return _gh_api_get(f"/repos/{owner_repo}/git/tags/{tag_sha}")
92+
93+
def _gh_matching_tags(owner_repo: str, tag: str) -> ApiResponse:
94+
return _gh_api_get(f"/repos/{owner_repo}/git/matching-refs/tags/{tag}")
95+
96+
def verify_actions(actions_path: Path, log_to_console: bool = True) -> ActionTagsCheckResult:
97+
"""
98+
Validates the contents of the actions file against GitHub.
99+
100+
The function verifies that the SHAs specified in `actions.yml` exist in the GH repo.
101+
Also ensures that the SHA exists on the Git tag, if the `tag` attribute is specified.
102+
103+
The algorithm roughly works like this, for each action specified in `actions.yml`:
104+
* Issue a warning and stop, if the name is like `OWNER/*` ("wildcard" repository).
105+
Can't verify Git SHAs in this case.
106+
* Issue a warning and stop, if the name is like `docker:*` (not implemented)
107+
* Issue an error and stop, if the name doesn't start with an `OWNER/REPO` pattern.
108+
* Each expired entry is just skipped
109+
* If there is a wildcard reference and a SHA reference, issue an error.
110+
111+
Then, for each reference for an action:
112+
* If no `tag` is specified, let GH resolve the commit SHA.
113+
Emit a warning to add the value of the `tag` attribute, if the SHA can be resolved.
114+
Otherwise, emit an error.
115+
* If `tag` is specified:
116+
* Add the SHA to the set of requested-shas-by-tag
117+
* Call GH's "matching-refs" endpoint for the 'tag' value
118+
* Emit en error, if the object type is not a tag or commit.
119+
* Also resolve 'tag' object types to 'commit' object types.
120+
* Add each returned SHA to the set of valid-shas-by-tag.
121+
* For each "requested tag" verify that the sets of valid and requested shas intersect. If not, emit an error.
122+
123+
Args:
124+
actions_path: Path to the actions list file (mandatory)
125+
log_to_console: Whether to log messages immediately to the console (default: True)
126+
"""
127+
if on_gha():
128+
print(f"::group::Verfiy GitHub Actions")
129+
gh_token = os.environ['GH_TOKEN']
130+
if not gh_token or len(gh_token) == 0:
131+
raise Exception("GH_TOKEN environment variable is not set or empty")
132+
133+
actions: ActionsYAML = load_yaml(actions_path)
134+
135+
result = ActionTagsCheckResult(log_to_console=log_to_console or on_gha())
136+
137+
for name, action in actions.items():
138+
gh_repo_matcher = re.match(re_github_actions_repo, name)
139+
if gh_repo_matcher is not None:
140+
owner_repo = gh_repo_matcher.group(1)
141+
result.log(f"Checking GitHub action 'https://github.com/{owner_repo}' ...")
142+
valid_shas_by_tag: dict[str, set[str]] = {}
143+
requested_shas_by_tag: dict[str, set[str]] = {}
144+
has_wildcard = False
145+
for ref, details in action.items():
146+
if details and 'expires_at' in details:
147+
expires_at: date = details.get('expires_at')
148+
# TODO consider the 'keep=true' flag?
149+
if expires_at < date.today():
150+
# skip expired entries
151+
result.log(f" .. ref '{ref}' is expired, skipping")
152+
continue
153+
154+
if ref == '*':
155+
# "wildcard" SHA - what would we...
156+
result.log(f" .. detected wildcard ref")
157+
if len(requested_shas_by_tag) > 0:
158+
m = f"GitHub action 'https://github.com/{owner_repo}' references a wildcard SHA but also has specific SHAs"
159+
result.log(f" .. ❌ {m}")
160+
result.failure(m)
161+
has_wildcard = True
162+
continue
163+
elif re.match(re_git_sha, ref):
164+
result.log(f" .. detected entry with Git SHA '{ref}'")
165+
if has_wildcard:
166+
m = f"GitHub action 'https://github.com/{owner_repo}' references a wildcard SHA but also has specific SHAs"
167+
result.log(f" .. ⚡ {m}")
168+
result.warning(m)
169+
170+
if not details or not 'tag' in details:
171+
result.log(f" .. no Git tag")
172+
# https://docs.github.com/en/rest/git/commits?apiVersion=2022-11-28#get-a-commit-object
173+
response = _gh_get_commit_object(owner_repo, ref)
174+
match response.status:
175+
case 200:
176+
m = f"GitHub action 'https://github.com/{owner_repo}' references existing commit SHA '{ref}' but but does specify the tag name for it."
177+
result.log(f" .. ⚡ {m}")
178+
result.warning(m)
179+
case 404:
180+
m = f"GitHub action 'https://github.com/{owner_repo}' references non existing commit SHA '{ref}': HTTP/{response.status}: {response.reason}, API URL: {response.req_url}"
181+
result.log(f" .. ❌ {m}")
182+
result.failure(m)
183+
case _:
184+
m = f"Failed to fetch Git SHA '{ref}' from GitHub repo 'https://github.com/{owner_repo}': HTTP/{response.status}: {response.reason}, API URL: {response.req_url}\n{response.body}"
185+
result.log(f" .. ❌ {m}")
186+
result.failure(m)
187+
else:
188+
tag: str = details.get('tag')
189+
result.log(f" .. collecting Git SHAs for tag {tag}")
190+
191+
if not tag in requested_shas_by_tag:
192+
requested_shas_by_tag[tag] = set()
193+
requested_shas_by_tag[tag].add(ref)
194+
195+
if not tag in valid_shas_by_tag:
196+
valid_shas_by_tag[tag] = set()
197+
valid_shas_for_tag = valid_shas_by_tag[tag]
198+
199+
# https://docs.github.com/en/rest/git/refs?apiVersion=2022-11-28#list-matching-references
200+
response = _gh_matching_tags(owner_repo, tag)
201+
match response.status:
202+
case 200:
203+
response_json: CommentedSeq = ruyaml.YAML().load(response.body)
204+
for msg in response_json:
205+
tag_ref_map: CommentedMap = msg
206+
tag_object: CommentedMap = tag_ref_map["object"]
207+
tab_object_type: str = tag_object["type"]
208+
tag_object_sha: str = tag_object["sha"]
209+
result.log(f" .. GH yields {tab_object_type} SHA '{tag_object_sha}' for '{tag_ref_map['ref']}'")
210+
match tab_object_type:
211+
case "tag":
212+
valid_shas_for_tag.add(tag_object_sha)
213+
# https://docs.github.com/en/rest/git/tags?apiVersion=2022-11-28#get-a-tag
214+
response2 = _gh_get_tag(owner_repo, tag_object_sha)
215+
match response2.status:
216+
case 200:
217+
tag_object_sha = ruyaml.YAML().load(response2.body)["object"]["sha"]
218+
valid_shas_for_tag.add(tag_object_sha)
219+
result.log(f" .. GH returns commit SHA '{tag_object_sha}' for previous tag SHA")
220+
case 404:
221+
result.log(f" .. commit SHA '{tag_object_sha}' does not exist")
222+
case _:
223+
m = f"Failed to fetch details for Git tag '{tag}' from GitHub repo 'https://github.com/{owner_repo}': HTTP/{response2.status}: {response2.reason}, API URL: {response2.req_url}\n{response2.body}"
224+
result.log(f" .. ❌ {m}")
225+
result.failure(m)
226+
case "commit":
227+
valid_shas_for_tag.add(tag_object_sha)
228+
case "branch":
229+
m = f"Branch references mentioned for Git tag '{tag}' for GitHub action 'https://github.com/{owner_repo}'"
230+
result.log(f" .. ❌ {m}")
231+
result.failure(m)
232+
case _:
233+
m = f"Invalid Git object type '{tag_object['type']}' for Git tag '{tag}' in GitHub repo 'https://github.com/{owner_repo}'"
234+
result.log(f" .. ❌ {m}")
235+
result.failure(m)
236+
case _:
237+
m = f"Failed to fetch matching Git tags for '{tag}' from GitHub repo 'https://github.com/{owner_repo}': HTTP/{response.status}: {response.reason}, API URL: {response.req_url}\n{response.body}"
238+
result.log(f" .. ❌ {m}")
239+
result.failure(m)
240+
else:
241+
m = f"GitHub action 'https://github.com/{owner_repo}' references an invalid Git SHA '{ref}'"
242+
result.log(f" .. ❌ {m}")
243+
result.failure(m)
244+
245+
for req_tag, req_shas in requested_shas_by_tag.items():
246+
result.log(f" .. checking tag '{req_tag}'")
247+
result.log(f" .. referenced SHAs: {req_shas}")
248+
valid_shas = valid_shas_by_tag.get(req_tag)
249+
result.log(f" .. verified SHAs: {valid_shas if len(valid_shas)>0 else '(none)'}")
250+
if not valid_shas:
251+
m = f"GitHub action 'https://github.com/{owner_repo}' references Git tag '{req_tag}' via SHAs '{req_shas}' but no SHAs for tag could be found - does the Git tag exist?"
252+
result.failure(m)
253+
result.log(f" ❌ {m}")
254+
elif req_shas.isdisjoint(valid_shas):
255+
m = f"GitHub action 'https://github.com/{owner_repo}' references Git tag '{req_tag}' via SHAs '{req_shas}' but none of those matches the valid SHAs '{valid_shas}'"
256+
result.failure(m)
257+
result.log(f" ❌ {m}")
258+
else:
259+
result.log(f" ✅ GitHub action 'https://github.com/{owner_repo}' definition for tag '{req_tag}' is good!")
260+
261+
elif re.match(re_github_actions_repo_wildcard, name):
262+
m =f"Ignoring '{name}' because it uses a GitHub repository wildcard ..."
263+
result.warning(m)
264+
result.log(f"⚡ {m}")
265+
266+
elif re.match(re_docker_image, name):
267+
m =f"Ignoring '{name}' because it references a Docker image ..."
268+
result.warning(m)
269+
result.log(f"⚡ {m}")
270+
271+
else:
272+
m = f"Cannot determine action kind for '{name}'"
273+
result.failure(m)
274+
result.log(f"❌ {m}")
275+
276+
if on_gha():
277+
if result.has_failures() or result.has_warnings():
278+
with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f:
279+
f.write(f"# GitHub Actions verification result\n")
280+
if len(result.failures) > 0:
281+
f.write(f"## Failures ({len(result.failures)})\n")
282+
f.write('```\n')
283+
for msg in result.failures:
284+
f.write(f"{msg}\n\n")
285+
f.write('```\n')
286+
if len(result.warnings) > 0:
287+
f.write(f"## Warnings ({len(result.warnings)})\n")
288+
f.write('```\n')
289+
for msg in result.warnings:
290+
f.write(f"{msg}\n\n")
291+
f.write('```\n')
292+
f.write(f"## Log\n")
293+
f.write('```\n')
294+
for msg in result.logs:
295+
f.write(f"{msg}\n")
296+
f.write('```\n')
297+
print("::endgroup::")
298+
299+
return result

gateway/gateway.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class RefDetails(TypedDict):
2424

2525
expires_at: date
2626
keep: NotRequired[bool]
27+
tag: NotRequired[str]
2728

2829

2930
ActionRefs = Dict[str, RefDetails]

gateway/test_action_tags.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from action_tags import *
2+
3+
4+
# TODO add some more test cases
5+
def test_patterns():
6+
assert re.match(re_github_actions_repo, "foo/bar")
7+
assert not re.match(re_github_actions_repo, "foo/*")
8+
assert re.match(re_github_actions_repo, "foo/bar/.github/actions/*")
9+
assert re.match(re_github_actions_repo, "foo/bar/.github/actions/some.yml")
10+
assert re.match(re_docker_image, "docker://foo/bar")
11+
12+
13+
# TODO this is not a test, but a utility to run verify_actions manually.
14+
def test_verify_actions():
15+
gh_token = os.environ['GH_TOKEN']
16+
if not gh_token:
17+
raise Exception("GH_TOKEN environment variable should be set for this test as it issues GitHub API requests.")
18+
19+
this_dir = os.path.dirname(os.path.realpath(__file__))
20+
actions_path = this_dir + "/../actions.yml"
21+
verify_actions(actions_path)

0 commit comments

Comments
 (0)