|
| 1 | +# /// script |
| 2 | +# requires-python = ">=3.13" |
| 3 | +# dependencies = [ |
| 4 | +# "ruyaml", |
| 5 | +# ] |
| 6 | +# /// |
| 7 | + |
| 8 | +import os |
| 9 | +import re |
| 10 | +from urllib.error import HTTPError |
| 11 | + |
| 12 | +import ruyaml |
| 13 | + |
| 14 | +from datetime import date |
| 15 | +from urllib.request import Request, urlopen |
| 16 | +from pathlib import Path |
| 17 | +from ruyaml import CommentedMap, CommentedSeq |
| 18 | +from gateway import ActionsYAML, load_yaml, on_gha |
| 19 | + |
| 20 | +re_github_actions_repo_wildcard = r"^[A-Za-z0-9-_.]+/[*]$" |
| 21 | +re_github_actions_repo = r"^([A-Za-z0-9-_.]+/[A-Za-z0-9-_.]+)(/.+)?$" |
| 22 | +# Something like 'pytooling/actions/with-post-step' or 'readthedocs/actions/preview'. |
| 23 | +re_docker_image = r"^docker://.+" |
| 24 | +re_git_sha = r"^[a-f0-9]{7,}$" |
| 25 | + |
| 26 | +class ActionTagsCheckResult(object): |
| 27 | + def __init__(self, log_to_console: bool = True): |
| 28 | + self.log_to_console = log_to_console |
| 29 | + self.logs = [] |
| 30 | + self.failures = [] |
| 31 | + self.warnings = [] |
| 32 | + |
| 33 | + def log(self, message: str) -> None: |
| 34 | + if self.log_to_console: |
| 35 | + print(message) |
| 36 | + self.logs.append(message) |
| 37 | + |
| 38 | + def failure(self, message: str) -> None: |
| 39 | + self.failures.append(message) |
| 40 | + |
| 41 | + def warning(self, message: str) -> None: |
| 42 | + self.warnings.append(message) |
| 43 | + |
| 44 | + def has_failures(self) -> bool: |
| 45 | + return len(self.failures) > 0 |
| 46 | + |
| 47 | + def has_warnings(self) -> bool: |
| 48 | + return len(self.warnings) > 0 |
| 49 | + |
| 50 | + def __str__(self): |
| 51 | + return ( |
| 52 | + ''.join([f"FAILURE: {failure}\n" for failure in self.failures]) |
| 53 | + + ''.join([f"WARNING: {warning}\n" for warning in self.warnings])) |
| 54 | + |
| 55 | + |
| 56 | +class ApiResponse(object): |
| 57 | + def __init__(self, req_url: str, status: int, reason: str, headers: dict[str, str], body: str): |
| 58 | + self.req_url = req_url |
| 59 | + self.status = status |
| 60 | + self.reason = reason |
| 61 | + self.headers = headers |
| 62 | + self.body = body |
| 63 | + |
| 64 | + |
| 65 | +def _gh_api_get(url_abspath: str) -> ApiResponse: |
| 66 | + headers: dict[str, str] = { |
| 67 | + 'Accept': 'application/vnd.github.v3+json', |
| 68 | + } |
| 69 | + # Use GH_TOKEN, if available. |
| 70 | + # Unauthorized GH API requests are quite rate-limited. |
| 71 | + # Tip: add an extra space before 'export' to prevent adding the line to the shell history. |
| 72 | + # export GH_TOKEN=$(gh auth token) |
| 73 | + gh_token = os.environ['GH_TOKEN'] |
| 74 | + if gh_token: |
| 75 | + headers['Authorization'] = f"Bearer {gh_token}" |
| 76 | + req_url = f"https://api.github.com{url_abspath}" |
| 77 | + request = Request(url=req_url, headers=headers) |
| 78 | + try: |
| 79 | + with urlopen(request) as response: |
| 80 | + return ApiResponse(req_url, response.status, response.reason, dict(response.headers), response.read().decode('utf-8')) |
| 81 | + except HTTPError as e: |
| 82 | + return ApiResponse(req_url, e.code, e.reason, dict(e.headers), e.read().decode('utf-8')) |
| 83 | + except Exception as e: |
| 84 | + print(f"Failed to fetch '{req_url}' from GitHub API") |
| 85 | + raise e |
| 86 | + |
| 87 | +def _gh_get_commit_object(owner_repo: str, sha: str) -> ApiResponse: |
| 88 | + return _gh_api_get(f"/repos/{owner_repo}/git/commits/{sha}") |
| 89 | + |
| 90 | +def _gh_get_tag(owner_repo: str, tag_sha: str) -> ApiResponse: |
| 91 | + return _gh_api_get(f"/repos/{owner_repo}/git/tags/{tag_sha}") |
| 92 | + |
| 93 | +def _gh_matching_tags(owner_repo: str, tag: str) -> ApiResponse: |
| 94 | + return _gh_api_get(f"/repos/{owner_repo}/git/matching-refs/tags/{tag}") |
| 95 | + |
| 96 | +def verify_actions(actions: Path | ActionsYAML | str, log_to_console: bool = True, today: date = date.today()) -> ActionTagsCheckResult: |
| 97 | + """ |
| 98 | + Validates the contents of the actions file against GitHub. |
| 99 | +
|
| 100 | + The function verifies that the SHAs specified in `actions.yml` exist in the GH repo. |
| 101 | + Also ensures that the SHA exists on the Git tag, if the `tag` attribute is specified. |
| 102 | +
|
| 103 | + The algorithm roughly works like this, for each action specified in `actions.yml`: |
| 104 | + * Issue a warning and stop, if the name is like `OWNER/*` ("wildcard" repository). |
| 105 | + Can't verify Git SHAs in this case. |
| 106 | + * Issue a warning and stop, if the name is like `docker:*` (not implemented) |
| 107 | + * Issue an error and stop, if the name doesn't start with an `OWNER/REPO` pattern. |
| 108 | + * Each expired entry is just skipped |
| 109 | + * If there is a wildcard reference and a SHA reference, issue an error. |
| 110 | +
|
| 111 | + Then, for each reference for an action: |
| 112 | + * If no `tag` is specified, let GH resolve the commit SHA. |
| 113 | + Emit a warning to add the value of the `tag` attribute, if the SHA can be resolved. |
| 114 | + Otherwise, emit an error. |
| 115 | + * If `tag` is specified: |
| 116 | + * Add the SHA to the set of requested-shas-by-tag |
| 117 | + * Call GH's "matching-refs" endpoint for the 'tag' value |
| 118 | + * Emit en error, if the object type is not a tag or commit. |
| 119 | + * Also resolve 'tag' object types to 'commit' object types. |
| 120 | + * Add each returned SHA to the set of valid-shas-by-tag. |
| 121 | + * For each "requested tag" verify that the sets of valid and requested shas intersect. If not, emit an error. |
| 122 | +
|
| 123 | + Args: |
| 124 | + actions: Path to the actions list file (mandatory) |
| 125 | + log_to_console: Whether to log messages immediately to the console (default: True) |
| 126 | + today: The current date (default: today) |
| 127 | + """ |
| 128 | + if on_gha(): |
| 129 | + print(f"::group::Verfiy GitHub Actions") |
| 130 | + gh_token = os.environ['GH_TOKEN'] |
| 131 | + if not gh_token or len(gh_token) == 0: |
| 132 | + raise Exception("GH_TOKEN environment variable is not set or empty") |
| 133 | + |
| 134 | + if isinstance(actions, Path) or isinstance(actions, str): |
| 135 | + actions = load_yaml(actions) |
| 136 | + actions_yaml: ActionsYAML = actions |
| 137 | + |
| 138 | + result = ActionTagsCheckResult(log_to_console=log_to_console or on_gha()) |
| 139 | + |
| 140 | + for name, action in actions_yaml.items(): |
| 141 | + gh_repo_matcher = re.match(re_github_actions_repo, name) |
| 142 | + if gh_repo_matcher is not None: |
| 143 | + owner_repo = gh_repo_matcher.group(1) |
| 144 | + result.log(f"Checking GitHub action {name} in GH repo 'https://github.com/{owner_repo}'...") |
| 145 | + valid_shas_by_tag: dict[str, set[str]] = {} |
| 146 | + requested_shas_by_tag: dict[str, set[str]] = {} |
| 147 | + has_wildcard = False |
| 148 | + has_wildcard_msg_emitted = False |
| 149 | + has_ignored_api_errors = False |
| 150 | + for ref, details in action.items(): |
| 151 | + if details and 'expires_at' in details: |
| 152 | + expires_at: date = details.get('expires_at') |
| 153 | + if expires_at < today: |
| 154 | + # skip expired entries |
| 155 | + result.log(f" .. ref '{ref}' is expired, skipping") |
| 156 | + continue |
| 157 | + |
| 158 | + # noinspection PyTypedDict |
| 159 | + ignore_gh_api_errors = details and 'ignore_gh_api_errors' in details and details['ignore_gh_api_errors'] == True |
| 160 | + |
| 161 | + if ref == '*': |
| 162 | + # "wildcard" SHA - what would we... |
| 163 | + result.log(f" .. detected wildcard ref") |
| 164 | + if len(requested_shas_by_tag) > 0 and not has_wildcard_msg_emitted: |
| 165 | + m = f"GitHub action {name} references a wildcard SHA but also has specific SHAs" |
| 166 | + result.log(f" .. ⚡ {m}") |
| 167 | + result.warning(m) |
| 168 | + has_wildcard_msg_emitted = True |
| 169 | + has_wildcard = True |
| 170 | + continue |
| 171 | + elif re.match(re_git_sha, ref): |
| 172 | + result.log(f" .. detected entry with Git SHA '{ref}'") |
| 173 | + if has_wildcard and not has_wildcard_msg_emitted: |
| 174 | + m = f"GitHub action {name} references a wildcard SHA but also has specific SHAs" |
| 175 | + result.log(f" .. ⚡ {m}") |
| 176 | + result.warning(m) |
| 177 | + has_wildcard_msg_emitted = True |
| 178 | + |
| 179 | + if not details or not 'tag' in details: |
| 180 | + result.log(f" .. no Git tag") |
| 181 | + # https://docs.github.com/en/rest/git/commits?apiVersion=2022-11-28#get-a-commit-object |
| 182 | + response = _gh_get_commit_object(owner_repo, ref) |
| 183 | + match response.status: |
| 184 | + case 200: |
| 185 | + m = f"GitHub action {name} references existing commit SHA '{ref}' but does specify the tag name for it." |
| 186 | + result.log(f" .. ⚡ {m}") |
| 187 | + result.warning(m) |
| 188 | + case 404: |
| 189 | + m = f"GitHub action {name} references non existing commit SHA '{ref}': HTTP/{response.status}: {response.reason}, API URL: {response.req_url}" |
| 190 | + result.log(f" .. ❌ {m}") |
| 191 | + result.failure(m) |
| 192 | + case _: |
| 193 | + m = f"Failed to fetch Git SHA '{ref}' from GitHub repo 'https://github.com/{owner_repo}': HTTP/{response.status}: {response.reason}, API URL: {response.req_url}\n{response.body}" |
| 194 | + if ignore_gh_api_errors: |
| 195 | + has_ignored_api_errors = True |
| 196 | + result.log(f" .. ⚡ {m}") |
| 197 | + result.warning(m) |
| 198 | + else: |
| 199 | + result.log(f" .. ❌ {m}") |
| 200 | + result.failure(m) |
| 201 | + else: |
| 202 | + tag: str = details.get('tag') |
| 203 | + result.log(f" .. collecting Git SHAs for tag {tag}") |
| 204 | + |
| 205 | + if not tag in requested_shas_by_tag: |
| 206 | + requested_shas_by_tag[tag] = set() |
| 207 | + requested_shas_by_tag[tag].add(ref) |
| 208 | + |
| 209 | + if not tag in valid_shas_by_tag: |
| 210 | + valid_shas_by_tag[tag] = set() |
| 211 | + valid_shas_for_tag = valid_shas_by_tag[tag] |
| 212 | + |
| 213 | + # https://docs.github.com/en/rest/git/refs?apiVersion=2022-11-28#list-matching-references |
| 214 | + response = _gh_matching_tags(owner_repo, tag) |
| 215 | + match response.status: |
| 216 | + case 200: |
| 217 | + response_json: CommentedSeq = ruyaml.YAML().load(response.body) |
| 218 | + for msg in response_json: |
| 219 | + tag_ref_map: CommentedMap = msg |
| 220 | + tag_object: CommentedMap = tag_ref_map["object"] |
| 221 | + tab_object_type: str = tag_object["type"] |
| 222 | + tag_object_sha: str = tag_object["sha"] |
| 223 | + result.log(f" .. GH yields {tab_object_type} SHA '{tag_object_sha}' for '{tag_ref_map['ref']}'") |
| 224 | + match tab_object_type: |
| 225 | + case "tag": |
| 226 | + valid_shas_for_tag.add(tag_object_sha) |
| 227 | + # https://docs.github.com/en/rest/git/tags?apiVersion=2022-11-28#get-a-tag |
| 228 | + response2 = _gh_get_tag(owner_repo, tag_object_sha) |
| 229 | + match response2.status: |
| 230 | + case 200: |
| 231 | + tag_object_sha = ruyaml.YAML().load(response2.body)["object"]["sha"] |
| 232 | + valid_shas_for_tag.add(tag_object_sha) |
| 233 | + result.log(f" .. GH returns commit SHA '{tag_object_sha}' for previous tag SHA") |
| 234 | + case 404: |
| 235 | + result.log(f" .. commit SHA '{tag_object_sha}' does not exist") |
| 236 | + case _: |
| 237 | + m = f"Failed to fetch details for Git tag '{tag}' from GitHub repo 'https://github.com/{owner_repo}': HTTP/{response2.status}: {response2.reason}, API URL: {response2.req_url}\n{response2.body}" |
| 238 | + if ignore_gh_api_errors: |
| 239 | + has_ignored_api_errors = True |
| 240 | + result.log(f" .. ⚡ {m}") |
| 241 | + result.warning(m) |
| 242 | + else: |
| 243 | + result.log(f" .. ❌ {m}") |
| 244 | + result.failure(m) |
| 245 | + case "commit": |
| 246 | + valid_shas_for_tag.add(tag_object_sha) |
| 247 | + case "branch": |
| 248 | + m = f"Branch references mentioned for Git tag '{tag}' for GitHub action {name}" |
| 249 | + result.log(f" .. ❌ {m}") |
| 250 | + result.failure(m) |
| 251 | + case _: |
| 252 | + m = f"Invalid Git object type '{tag_object['type']}' for Git tag '{tag}' in GitHub repo 'https://github.com/{owner_repo}'" |
| 253 | + result.log(f" .. ❌ {m}") |
| 254 | + result.failure(m) |
| 255 | + case _: |
| 256 | + m = f"Failed to fetch matching Git tags for '{tag}' from GitHub repo 'https://github.com/{owner_repo}': HTTP/{response.status}: {response.reason}, API URL: {response.req_url}\n{response.body}" |
| 257 | + if ignore_gh_api_errors: |
| 258 | + result.log(f" .. ⚡ {m}") |
| 259 | + result.warning(m) |
| 260 | + has_ignored_api_errors = True |
| 261 | + else: |
| 262 | + result.log(f" .. ❌ {m}") |
| 263 | + result.failure(m) |
| 264 | + else: |
| 265 | + m = f"GitHub action {name} references an invalid Git SHA '{ref}'" |
| 266 | + result.log(f" .. ❌ {m}") |
| 267 | + result.failure(m) |
| 268 | + |
| 269 | + for req_tag, req_shas in requested_shas_by_tag.items(): |
| 270 | + result.log(f" .. checking tag '{req_tag}'") |
| 271 | + result.log(f" .. referenced SHAs: {req_shas}") |
| 272 | + valid_shas = valid_shas_by_tag.get(req_tag) |
| 273 | + result.log(f" .. verified SHAs: {valid_shas if len(valid_shas)>0 else '(none)'}") |
| 274 | + if not valid_shas: |
| 275 | + m = f"GitHub action {name} references Git tag '{req_tag}' via SHAs '{req_shas}' but no SHAs for tag could be found - does the Git tag exist?" |
| 276 | + if has_ignored_api_errors: |
| 277 | + result.warning(m) |
| 278 | + result.log(f" ⚡ {m}") |
| 279 | + else: |
| 280 | + result.failure(m) |
| 281 | + result.log(f" ❌ {m}") |
| 282 | + elif req_shas.isdisjoint(valid_shas): |
| 283 | + m = f"GitHub action {name} references Git tag '{req_tag}' via SHAs '{req_shas}' but none of those matches the valid SHAs '{valid_shas}'" |
| 284 | + result.failure(m) |
| 285 | + result.log(f" ❌ {m}") |
| 286 | + else: |
| 287 | + result.log(f" ✅ GitHub action {name} definition for tag '{req_tag}' is good!") |
| 288 | + |
| 289 | + elif re.match(re_github_actions_repo_wildcard, name): |
| 290 | + m =f"Ignoring '{name}' because it uses a GitHub repository wildcard ..." |
| 291 | + result.warning(m) |
| 292 | + result.log(f"⚡ {m}") |
| 293 | + |
| 294 | + elif re.match(re_docker_image, name): |
| 295 | + m =f"Ignoring '{name}' because it references a Docker image ..." |
| 296 | + result.warning(m) |
| 297 | + result.log(f"⚡ {m}") |
| 298 | + |
| 299 | + else: |
| 300 | + m = f"Cannot determine action kind for '{name}'" |
| 301 | + result.failure(m) |
| 302 | + result.log(f"❌ {m}") |
| 303 | + |
| 304 | + if on_gha(): |
| 305 | + if result.has_failures() or result.has_warnings(): |
| 306 | + with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f: |
| 307 | + f.write(f"# GitHub Actions verification result\n") |
| 308 | + if len(result.failures) > 0: |
| 309 | + f.write(f"## Failures ({len(result.failures)})\n") |
| 310 | + f.write('```\n') |
| 311 | + for msg in result.failures: |
| 312 | + f.write(f"{msg}\n\n") |
| 313 | + f.write('```\n') |
| 314 | + if len(result.warnings) > 0: |
| 315 | + f.write(f"## Warnings ({len(result.warnings)})\n") |
| 316 | + f.write('```\n') |
| 317 | + for msg in result.warnings: |
| 318 | + f.write(f"{msg}\n\n") |
| 319 | + f.write('```\n') |
| 320 | + f.write(f"## Log\n") |
| 321 | + f.write('```\n') |
| 322 | + for msg in result.logs: |
| 323 | + f.write(f"{msg}\n") |
| 324 | + f.write('```\n') |
| 325 | + print("::endgroup::") |
| 326 | + |
| 327 | + return result |
0 commit comments