Create GHA extract shell scripts action (#1)

omus · web-flow · commit a9e3b70f830c · 2025-01-06T11:46:01.000-06:00
diff --git a/.editorconfig b/.editorconfig
@@ -0,0 +1,12 @@
+# https://editorconfig.org/
+
+# https://manpages.debian.org/testing/shfmt/shfmt.1.en.html#EXAMPLES
+[*.sh]
+indent_style = space
+indent_size = 4
+shell_variant      = bash  # --language-variant
+binary_next_line   = false
+switch_case_indent = true  # --case-indent
+space_redirects    = false
+keep_padding       = false
+function_next_line = false # --func-next-line
diff --git a/.github/workflows/integration-tests.yaml b/.github/workflows/integration-tests.yaml
@@ -0,0 +1,50 @@
+---
+name: Integration Tests
+on:
+  pull_request:
+    paths:
+      - "action.yaml"
+      - "gha_extract_shell_scripts.py"
+      - ".github/workflows/integration-tests.yaml"
+
+jobs:
+  test:
+    name: Test
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+      - name: Run action
+        id: self
+        uses: ./
+      - name: Target step
+        run: |
+          echo "${{ env.greeting }}, $name"
+        env:
+          greeting: Hello
+          name: Integration Tests
+      - name: Test extracted
+        run: |
+          if [[ -f "$output_file" ]]; then
+              echo "Output:"
+              cat -n "$output_file"
+              echo "Expected:"
+              cat -n <<<"$expected"
+          else
+              find "${output_dir:?}"
+              exit 1
+          fi
+          diff --color=always "${output_file:?}" <(echo "${expected:?}")
+        env:
+          output_dir: ${{ steps.self.outputs.output-dir }}
+          output_file: ${{ steps.self.outputs.output-dir }}/integration-tests.yaml/job=Test/step=Target_step.sh
+          expected: |-
+            #!/usr/bin/env bash
+            set -e
+            # shellcheck disable=SC2016,SC2034
+            greeting='Hello'
+            # shellcheck disable=SC2016,SC2034
+            name='Integration Tests'
+            # ---
+            echo ":env.greeting:, $name"
diff --git a/.github/workflows/shell.yaml b/.github/workflows/shell.yaml
@@ -0,0 +1,37 @@
+---
+name: Shell
+on:
+  pull_request:
+    paths:
+      - "**.sh"
+      - ".github/workflows/*"
+      - "action.yaml"
+      - "gha_extract_shell_scripts.py"
+
+jobs:
+  lint-format:
+    name: Lint & Format
+    needs: workflow-scripts
+    # These permissions are needed to:
+    # - Checkout the Git repo (`contents: read`)
+    # - Post a comments on PRs: https://github.com/luizm/action-sh-checker#secrets
+    permissions:
+      contents: read
+      pull-requests: write
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Extract workflow shell scripts
+        id: extract
+        uses: ./
+      - uses: luizm/action-sh-checker@c6edb3de93e904488b413636d96c6a56e3ad671a  # v0.8.0
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
+        with:
+          sh_checker_comment: true
+      # Support investigating linting/formatting errors
+      - uses: actions/upload-artifact@v4
+        if: ${{ !cancelled() }}
+        with:
+          name: workflow-scripts
+          path: ${{ steps.extract.outputs.output-dir }}
diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml
@@ -0,0 +1,27 @@
+---
+name: Unit Tests
+on:
+  pull_request:
+    paths:
+      - "**/*.py"
+      - ".github/workflows/unit-tests.yaml"
+
+jobs:
+  test:
+    name: Test
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.x"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+      - name: Test with unittest
+        run: |
+          python test/test_reference.py
diff --git a/.github/workflows/yaml.yaml b/.github/workflows/yaml.yaml
@@ -0,0 +1,18 @@
+---
+# https://yamllint.readthedocs.io/en/stable/integration.html#integration-with-github-actions
+name: YAML
+on:
+  pull_request:
+    paths:
+      - "**/*.yaml"
+      - "**/*.yml"
+jobs:
+  lint:
+    name: Lint
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install yamllint
+        run: pip install yamllint
+      - name: Lint YAML files
+        run: yamllint . --format=github
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+__pycache__
diff --git a/.yamllint.yaml b/.yamllint.yaml
@@ -0,0 +1,8 @@
+---
+rules:
+  indentation:
+    spaces: 2
+    indent-sequences: true
+  document-start:
+    present: true
+  new-line-at-end-of-file: enable
diff --git a/README.md b/README.md
@@ -1,2 +1,65 @@
-# inline-workflow-shell-scripts
-Extracts inline shell scripts within GitHub Action workflows
+# GHA Extract Shell Scripts
+
+Processes the GitHub Action workflows contained within `.github/workflows` and extracts all steps which contain an embedded shell script for the purpose of running linting and formatting. Each workflow step containing a shell script will be written out to a file to make it easy to use existing tooling such as `shellcheck` and `shfmt`.
+
+## Example
+
+```yaml
+---
+name: Shell
+on:
+  pull_request:
+    paths:
+      - "**.sh"
+      - ".github/workflows/*"
+
+jobs:
+  lint-format:
+    name: Lint & Format
+    # These permissions are needed to:
+    # - Checkout the Git repo (`contents: read`)
+    # - Post a comments on PRs: https://github.com/luizm/action-sh-checker#secrets
+    permissions:
+      contents: read
+      pull-requests: write
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Extract workflow shell scripts
+        id: extract
+        uses: beacon-biosignals/gha-extract-shell-scripts@v1
+      - uses: luizm/action-sh-checker@c6edb3de93e904488b413636d96c6a56e3ad671a  # v0.8.0
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
+        with:
+          sh_checker_comment: true
+      # Support investigating linting/formatting errors
+      - uses: actions/upload-artifact@v4
+        if: ${{ failure() }}
+        with:
+          name: workflow-scripts
+          path: ${{ steps.extract.outputs.output-dir }}
+```
+
+## Inputs
+
+The `gha-extract-shell-scripts` action supports the following inputs:
+
+| Name                 | Description | Required | Example |
+|:---------------------|:------------|:---------|:--------|
+| `output-dir`         | Allows the user to specify the name of the directory containing the extracted workflow shell script steps. Defaults to `workflow_scripts`. | No | `workflow_scripts` |
+| `shellcheck-disable` | Ignore all the specified errors within the extracted shell scripts. | No | `SC2016,SC2050` |
+
+## Outputs
+
+| Name         | Description | Example |
+|:-------------|:------------|:--------|
+| `output-dir` | The name of the directory containing the various extracted workflow shell script steps. | `workflow_scripts` |
+
+## Permissions
+
+The following [job permissions](https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs) are required to run this action:
+
+```yaml
+permissions: {}
+```
diff --git a/action.yaml b/action.yaml
@@ -0,0 +1,32 @@
+---
+inputs:
+  output-dir:
+    default: "workflow_scripts"
+  shellcheck-disable:
+    default: ""
+outputs:
+  output-dir:
+    value: ${{ inputs.output-dir }}
+runs:
+  using: composite
+  steps:
+    - name: Install dependencies
+      shell: bash
+      run: |
+        venv="$(mktemp -d venv.XXXXXX)"
+        python -m venv "$venv"
+        source "$venv/bin/activate"
+        python -m pip install -r "${GITHUB_ACTION_PATH}/requirements.txt"
+    - name: Extract shell scripts
+      shell: bash
+      run: |
+        args=()
+        if [[ -n "$disable" ]]; then
+            args+=(--disable "$disable")
+        fi
+        args+=("$input_dir" "$output_dir")
+        python "${GITHUB_ACTION_PATH}/gha_extract_shell_scripts.py" "${args[@]}"
+      env:
+        disable: ${{ inputs.shellcheck-disable }}
+        input_dir: .github/workflows
+        output_dir: ${{ inputs.output-dir }}
diff --git a/gha_extract_shell_scripts.py b/gha_extract_shell_scripts.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+
+# Reads shell scripts from `run` steps in GitHub Actions workflows and outputs
+# them as files so that tools like `shfmt` or ShellCheck can operate on them.
+#
+# Arguments:
+# - Path to output directory where shell scripts will be written.
+
+import os
+import re
+import sys
+
+import argparse
+from pathlib import Path
+
+import yaml
+
+
+def list_str(values):
+    return values.split(',')
+
+
+def sanitize(path):
+    # Needed filename replacements to satisfy both GHA artifacts and shellcheck.
+    replacements = {
+        " ": "_",
+        "/": "-",
+        '"': "",
+        "(": "",
+        ")": "",
+        "&": "",
+        "$": "",
+    }
+    return path.translate(str.maketrans(replacements))
+
+
+# Replace any GHA placeholders, e.g. ${{ matrix.version }}.
+def sanitize_gha_expression(string):
+    return re.sub(r"\${{\s*(.*?)\s*}}", r":\1:", string)
+
+
+def process_workflow_file(workflow_path: Path, output_dir: Path, ignored_errors=[]):
+    with workflow_path.open() as f:
+        workflow = yaml.safe_load(f)
+    workflow_file = workflow_path.name
+    # GHA allows workflow names to be defined as empty (e.g. `name:`)
+    workflow_name = sanitize(workflow.get("name") or workflow_path.stem)
+    workflow_default_shell = workflow.get("defaults", {}).get("run", {}).get("shell")
+    workflow_env = workflow.get("env", {})
+    count = 0
+    print(f"Processing {workflow_path} ({workflow_name})")
+    for job_key, job in workflow.get("jobs", {}).items():
+        # GHA allows job names to be defined as empty (e.g. `name:`)
+        job_name = sanitize(job.get("name") or job_key)
+        job_default_shell = (
+            job.get("defaults", {}).get("run", {}).get("shell", workflow_default_shell)
+        )
+        job_env = workflow_env | job.get("env", {})
+        for i, step in enumerate(job.get("steps", [])):
+            run = step.get("run")
+            if not run:
+                continue
+            run = sanitize_gha_expression(run)
+            shell = step.get("shell", job_default_shell)
+            if shell and shell not in ["bash", "sh"]:
+                print(f"Skipping command with unknown shell '{shell}'")
+                continue
+            env = job_env | step.get("env", {})
+            # GHA allows step names to be defined as empty (e.g. `name:`)
+            step_name = sanitize(step.get("name") or str(i + 1))
+            script_path = (
+                output_dir / workflow_file / f"job={job_name}" / f"step={step_name}.sh"
+            )
+            script_path.parent.mkdir(parents=True, exist_ok=True)
+            with script_path.open("w") as f:
+                # Default shell is bash.
+                f.write(f"#!/usr/bin/env {shell or 'bash'}\n")
+                # Ignore failure with GitHub expression variables such as:
+                # - SC2050: `[[ "${{ github.ref }}" == "refs/heads/main" ]]`
+                if ignored_errors:
+                    f.write(f"# shellcheck disable={','.join(ignored_errors)}\n")
+                    # Add a no-op command to ensure that additional shellcheck
+                    # disable directives aren't applied globally
+                    # https://github.com/koalaman/shellcheck/issues/657#issuecomment-213038218
+                    f.write("true\n")
+                # Whether or not it was explicitly set determines the arguments.
+                # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsshell
+                if not shell or shell == "sh":
+                    f.write("set -e\n")
+                elif shell == "bash":
+                    f.write("set -eo pipefail\n")
+                for k, v in env.items():
+                    f.write("# shellcheck disable=SC2016,SC2034\n")
+                    v = sanitize_gha_expression(str(v)).replace("'", "'\\''")
+                    f.write(f"{k}='{v}'\n")
+                f.write("# ---\n")
+                f.write(run)
+                if not run.endswith("\n"):
+                    f.write("\n")
+            count += 1
+    print(f"Produced {count} files")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("input_dir", type=Path)
+    parser.add_argument("output_dir", type=Path)
+    parser.add_argument("--disable", type=list_str)
+    args = parser.parse_args()
+
+    print(f"Outputting scripts to {args.output_dir}")
+    args.output_dir.mkdir(parents=True, exist_ok=True)
+    for file in os.listdir(args.input_dir):
+        if file.endswith(".yaml") or file.endswith(".yml"):
+            process_workflow_file(
+                args.input_dir / file, args.output_dir, args.disable
+            )
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1 @@
+PyYAML==6.0.2
diff --git a/test/github-actions-demo.yaml b/test/github-actions-demo.yaml
diff --git a/test/nested-env.yaml b/test/nested-env.yaml
diff --git a/test/test_reference.py b/test/test_reference.py