Add hallucination detection controls for PR review pipeline #58
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Pipeline Tests | |
| on: | |
| push: | |
| branches: [main] | |
| paths: | |
| - 'scripts/**' | |
| - 'kosli/policies/**' | |
| - 'tests/unit/test_ci_gate.py' | |
| - 'tests/unit/test_verify_integrity.py' | |
| - '.github/workflows/**' | |
| pull_request: | |
| branches: [main] | |
| paths: | |
| - 'scripts/**' | |
| - 'kosli/policies/**' | |
| - 'tests/unit/test_ci_gate.py' | |
| - 'tests/unit/test_verify_integrity.py' | |
| - '.github/workflows/**' | |
| env: | |
| FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: 'true' | |
| jobs: | |
| python-tests: | |
| name: Python Pipeline Tests | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 | |
| - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 | |
| with: | |
| python-version: '3.12' | |
| - name: Install dependencies | |
| run: | | |
| pip install pytest | |
| # Install project dependencies needed by pipeline_steps | |
| if [ -f requirements.txt ]; then pip install -r requirements.txt; fi | |
| - name: Run pipeline unit tests | |
| env: | |
| PYTHONPATH: ${{ github.workspace }} | |
| run: | | |
| pytest tests/unit/test_ci_gate.py -v --tb=short | |
| pytest tests/unit/test_verify_integrity.py -v --tb=short 2>/dev/null || echo "Shell script tests not yet implemented" | |
| rego-tests: | |
| name: OPA Rego Policy Tests | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 | |
| - name: Install OPA | |
| run: | | |
| curl -L -o opa https://openpolicyagent.org/downloads/latest/opa_linux_amd64_static | |
| chmod +x opa | |
| sudo mv opa /usr/local/bin/ | |
| - name: Run Rego policy tests | |
| run: | | |
| cd kosli/policies | |
| FAILED=0 | |
| for test_file in *_test.rego; do | |
| policy="${test_file%_test.rego}.rego" | |
| if [ -f "$policy" ]; then | |
| echo "=== Testing: $(basename $policy) ===" | |
| if ! opa test "$policy" "$test_file" -v; then | |
| FAILED=1 | |
| fi | |
| echo | |
| fi | |
| done | |
| if [ "$FAILED" -eq 1 ]; then | |
| echo "❌ Some Rego tests failed" | |
| exit 1 | |
| fi | |
| echo "✅ All Rego tests passed" | |
| shell-tests: | |
| name: Shell Script Tests | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 | |
| - name: Test verify_artifact_integrity.sh with mock inputs | |
| run: | | |
| # Create mock kosli binary for testing source fingerprint comparison. | |
| # The script now compares source directory fingerprints (not commit SHAs). | |
| MOCK_BIN=$(mktemp -d) | |
| # Mock kosli — handles "get trail" and "get attestation" commands | |
| # Uses valid flow name patterns that pass the script's regex validation. | |
| # Mock dispatches on trail name to simulate different scenarios: | |
| # GH99-* → normal flow (returns reviewed-code artifact) | |
| # GH88-* → no-artifact flow (trail exists, no reviewed-code artifact) | |
| # GH77-* → missing flow (kosli get trail fails) | |
| cat > "$MOCK_BIN/kosli" << 'MOCK_EOF' | |
| #!/usr/bin/env bash | |
| # Mock kosli CLI for testing verify_artifact_integrity.sh | |
| # | |
| # Handles: | |
| # kosli get trail <name> --flow <flow> --output json | |
| # kosli get attestation <name> --flow <flow> --trail <trail> --output json | |
| if [[ "$*" == *"get trail"* ]]; then | |
| # Dispatch on trail name to simulate different scenarios | |
| if [[ "$*" == *"GH77-"* ]]; then | |
| echo '{}' ; exit 1 | |
| fi | |
| if [[ "$*" == *"GH88-"* ]]; then | |
| # Trail exists but no reviewed-code artifact | |
| echo '{"artifacts": []}' ; exit 0 | |
| fi | |
| cat << 'JSON' | |
| { | |
| "artifacts": [ | |
| { | |
| "name": "reviewed-code", | |
| "fingerprint": "abc123def456abc123def456abc123def456abc123def456abc123def456abcdef" | |
| } | |
| ] | |
| } | |
| JSON | |
| exit 0 | |
| fi | |
| if [[ "$*" == *"get attestation"* ]]; then | |
| # Fallback: return source_fingerprint from user_data for GH88 (no-artifact) scenario | |
| if [[ "$*" == *"GH88-"* ]]; then | |
| cat << 'JSON' | |
| { | |
| "user_data": { | |
| "source_fingerprint": "fallback_fp_abc123def456abc123def456abc123def456abc123def456abcdef" | |
| } | |
| } | |
| JSON | |
| exit 0 | |
| fi | |
| echo '{}' ; exit 1 | |
| fi | |
| echo "mock kosli: unhandled command: $*" >&2 | |
| exit 1 | |
| MOCK_EOF | |
| chmod +x "$MOCK_BIN/kosli" | |
| export PATH="$MOCK_BIN:$PATH" | |
| # All test flow names must match the script's validation regex: | |
| # ^agentic-sdlc-demo-GH[0-9]+-CodeReview$ | |
| FLOW_99="agentic-sdlc-demo-GH99-CodeReview" | |
| FLOW_88="agentic-sdlc-demo-GH88-CodeReview" | |
| FLOW_77="agentic-sdlc-demo-GH77-CodeReview" | |
| MATCHING_FP="abc123def456abc123def456abc123def456abc123def456abc123def456abcdef" | |
| DIFFERENT_FP="ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" | |
| FALLBACK_FP="fallback_fp_abc123def456abc123def456abc123def456abc123def456abcdef" | |
| echo "=== Test 1: Matching source fingerprints ===" | |
| RESULT=$(scripts/ci/verify_artifact_integrity.sh \ | |
| --review-flow "$FLOW_99" \ | |
| --trail-names "GH99-Loop1 GH99-Final" \ | |
| --build-fingerprint "$MATCHING_FP") | |
| echo "$RESULT" | jq . | |
| MATCH=$(echo "$RESULT" | jq -r '.match') | |
| [ "$MATCH" = "true" ] || { echo "FAIL: expected match=true"; exit 1; } | |
| echo "✅ PASS" | |
| echo "" | |
| echo "=== Test 2: Mismatched source fingerprints ===" | |
| RESULT=$(scripts/ci/verify_artifact_integrity.sh \ | |
| --review-flow "$FLOW_99" \ | |
| --trail-names "GH99-Loop1 GH99-Final" \ | |
| --build-fingerprint "$DIFFERENT_FP") | |
| echo "$RESULT" | jq . | |
| MATCH=$(echo "$RESULT" | jq -r '.match') | |
| [ "$MATCH" = "false" ] || { echo "FAIL: expected match=false"; exit 1; } | |
| echo "✅ PASS" | |
| echo "" | |
| echo "=== Test 3: No Final trail ===" | |
| RESULT=$(scripts/ci/verify_artifact_integrity.sh \ | |
| --review-flow "$FLOW_99" \ | |
| --trail-names "GH99-Loop1 GH99-Loop2" \ | |
| --build-fingerprint "$MATCHING_FP") | |
| echo "$RESULT" | jq . | |
| MATCH=$(echo "$RESULT" | jq -r '.match') | |
| REASON=$(echo "$RESULT" | jq -r '.mismatch_reason') | |
| [ "$MATCH" = "false" ] || { echo "FAIL: expected match=false"; exit 1; } | |
| [[ "$REASON" == *"No Final trail"* ]] || { echo "FAIL: wrong reason: $REASON"; exit 1; } | |
| echo "✅ PASS" | |
| echo "" | |
| echo "=== Test 4: No build fingerprint provided ===" | |
| RESULT=$(scripts/ci/verify_artifact_integrity.sh \ | |
| --review-flow "$FLOW_99" \ | |
| --trail-names "GH99-Final" \ | |
| --build-fingerprint "") | |
| echo "$RESULT" | jq . | |
| MATCH=$(echo "$RESULT" | jq -r '.match') | |
| REASON=$(echo "$RESULT" | jq -r '.mismatch_reason') | |
| [ "$MATCH" = "false" ] || { echo "FAIL: expected match=false"; exit 1; } | |
| [[ "$REASON" == *"No build fingerprint"* ]] || { echo "FAIL: wrong reason: $REASON"; exit 1; } | |
| echo "✅ PASS" | |
| echo "" | |
| echo "=== Test 5: Missing reviewed-code artifact, fallback to attestation user_data ===" | |
| RESULT=$(scripts/ci/verify_artifact_integrity.sh \ | |
| --review-flow "$FLOW_88" \ | |
| --trail-names "GH88-Final" \ | |
| --build-fingerprint "$FALLBACK_FP") | |
| echo "$RESULT" | jq . | |
| MATCH=$(echo "$RESULT" | jq -r '.match') | |
| [ "$MATCH" = "true" ] || { echo "FAIL: expected match=true (fallback fingerprint)"; exit 1; } | |
| echo "✅ PASS" | |
| echo "" | |
| echo "=== Test 6: Missing flow (kosli get trail fails) ===" | |
| RESULT=$(scripts/ci/verify_artifact_integrity.sh \ | |
| --review-flow "$FLOW_77" \ | |
| --trail-names "GH77-Final" \ | |
| --build-fingerprint "$MATCHING_FP") | |
| echo "$RESULT" | jq . | |
| MATCH=$(echo "$RESULT" | jq -r '.match') | |
| [ "$MATCH" = "false" ] || { echo "FAIL: expected match=false"; exit 1; } | |
| echo "✅ PASS" | |
| echo "" | |
| echo "=== Test 7: Invalid flow name pattern ===" | |
| RESULT=$(scripts/ci/verify_artifact_integrity.sh \ | |
| --review-flow "bad-flow-name" \ | |
| --trail-names "GH99-Final" \ | |
| --build-fingerprint "$MATCHING_FP") | |
| echo "$RESULT" | jq . | |
| MATCH=$(echo "$RESULT" | jq -r '.match') | |
| REASON=$(echo "$RESULT" | jq -r '.mismatch_reason') | |
| [ "$MATCH" = "false" ] || { echo "FAIL: expected match=false"; exit 1; } | |
| [[ "$REASON" == *"Invalid review_flow_name pattern"* ]] || { echo "FAIL: wrong reason: $REASON"; exit 1; } | |
| echo "✅ PASS" | |
| echo "" | |
| echo "=== All shell script tests passed ===" | |
| workflow-lint: | |
| name: Workflow YAML Lint | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 | |
| - name: Check kosli jobs use fetch-depth 0 | |
| run: | | |
| # `kosli begin trail` resolves arbitrary git refs (e.g. feature | |
| # branch commits). A shallow clone (default fetch-depth: 1) only | |
| # has HEAD, so the command fails with "reference not found" if the | |
| # commit lives on another branch. | |
| # | |
| # This check ensures any job calling `kosli begin trail` or | |
| # `kosli create flow` + `kosli begin trail` has fetch-depth: 0. | |
| FAILED=0 | |
| for wf in .github/workflows/*.yml; do | |
| # Skip this test workflow (it mentions the command in the lint script itself) | |
| [ "$(basename "$wf")" = "pipeline-tests.yml" ] && continue | |
| python3 - "$wf" <<'PYEOF' || FAILED=1 | |
| import sys, yaml | |
| wf_path = sys.argv[1] | |
| with open(wf_path) as f: | |
| doc = yaml.safe_load(f) | |
| if not doc or "jobs" not in doc: | |
| sys.exit(0) | |
| for job_name, job in doc["jobs"].items(): | |
| steps = job.get("steps", []) | |
| # Only flag jobs that call `kosli begin trail` | |
| needs_full_history = False | |
| for step in steps: | |
| run_block = step.get("run", "") | |
| if "kosli begin trail" in run_block: | |
| needs_full_history = True | |
| break | |
| if not needs_full_history: | |
| continue | |
| # Verify the checkout step has fetch-depth: 0 | |
| has_full_fetch = False | |
| for step in steps: | |
| uses = step.get("uses", "") | |
| if "actions/checkout" in uses: | |
| with_block = step.get("with", {}) or {} | |
| fd = with_block.get("fetch-depth") | |
| if fd == 0 or str(fd) == "0": | |
| has_full_fetch = True | |
| break | |
| if not has_full_fetch: | |
| print(f"FAIL: {wf_path} job '{job_name}' calls 'kosli begin trail' but checkout is missing fetch-depth: 0") | |
| sys.exit(1) | |
| PYEOF | |
| done | |
| if [ "$FAILED" -eq 1 ]; then | |
| echo "❌ Workflow lint failed" | |
| exit 1 | |
| fi | |
| echo "✅ All workflow files pass lint checks" | |
| - name: Check pull_request_target workflows never checkout PR head | |
| run: | | |
| # SECURITY: pull_request_target workflows run with access to repo | |
| # secrets. If they checkout the PR head branch and execute code from | |
| # it, an attacker can exfiltrate secrets via a fork PR — the classic | |
| # "pwn request" vulnerability. | |
| # | |
| # This check ensures any workflow triggered by pull_request_target | |
| # does NOT have a checkout step with ref: pointing to the PR head. | |
| # | |
| # Safe patterns: | |
| # - No ref: (defaults to base branch in pull_request_target context) | |
| # - ref: github.event.pull_request.base.sha | |
| # | |
| # Dangerous patterns (BLOCKED): | |
| # - ref: github.event.pull_request.head.sha | |
| # - ref: github.head_ref | |
| # - ref: refs/pull/.../merge | |
| FAILED=0 | |
| for wf in .github/workflows/*.yml; do | |
| [ "$(basename "$wf")" = "pipeline-tests.yml" ] && continue | |
| python3 - "$wf" <<'PYEOF' || FAILED=1 | |
| import sys, yaml, re | |
| wf_path = sys.argv[1] | |
| with open(wf_path) as f: | |
| doc = yaml.safe_load(f) | |
| if not doc: | |
| sys.exit(0) | |
| # YAML parses bare 'on' as boolean True, so check both keys | |
| triggers = doc.get("on", doc.get(True, {})) | |
| if isinstance(triggers, str): | |
| triggers = {triggers: None} | |
| elif isinstance(triggers, list): | |
| triggers = {t: None for t in triggers} | |
| if "pull_request_target" not in triggers: | |
| sys.exit(0) | |
| # Workflow uses pull_request_target — check all checkout steps | |
| dangerous_patterns = [ | |
| r"github\.event\.pull_request\.head", | |
| r"github\.head_ref", | |
| r"refs/pull/.*?/merge", | |
| ] | |
| for job_name, job in doc.get("jobs", {}).items(): | |
| for step in job.get("steps", []): | |
| uses = step.get("uses", "") | |
| if "actions/checkout" not in uses: | |
| continue | |
| with_block = step.get("with", {}) or {} | |
| ref_value = str(with_block.get("ref", "")) | |
| for pattern in dangerous_patterns: | |
| if re.search(pattern, ref_value): | |
| step_name = step.get("name", "(unnamed)") | |
| print( | |
| f"SECURITY FAIL: {wf_path} job '{job_name}' step '{step_name}'\n" | |
| f" pull_request_target workflow checks out PR head: ref: {ref_value}\n" | |
| f" This is a pwn request vulnerability — attacker code runs with secret access.\n" | |
| f" Fix: remove the ref: field (defaults to base branch in pull_request_target)." | |
| ) | |
| sys.exit(1) | |
| PYEOF | |
| done | |
| if [ "$FAILED" -eq 1 ]; then | |
| echo "❌ Security: pull_request_target pwn request vulnerability detected" | |
| exit 1 | |
| fi | |
| echo "✅ No pull_request_target pwn request patterns found" | |
| - name: Check GitHub Actions are SHA-pinned and use Node 24 versions | |
| run: | | |
| # All third-party actions must be pinned to commit SHAs with a | |
| # version comment: action@SHA # vX | |
| # | |
| # This check verifies: | |
| # 1. Actions are SHA-pinned (not using bare version tags) | |
| # 2. The version comment meets minimum Node 24 requirements | |
| # | |
| # Minimum versions for Node 24: | |
| # actions/checkout >= v5 | |
| # actions/setup-python >= v6 | |
| # actions/upload-artifact >= v5 | |
| # actions/download-artifact >= v5 | |
| # | |
| # Exceptions: | |
| # kosli-dev/setup-cli-action — no Node 24 release yet (v2 is latest) | |
| # anthropics/claude-code-action — not affected | |
| FAILED=0 | |
| for wf in .github/workflows/*.yml; do | |
| python3 - "$wf" <<'PYEOF' || FAILED=1 | |
| import sys, re | |
| wf_path = sys.argv[1] | |
| with open(wf_path) as f: | |
| lines = f.readlines() | |
| min_versions = { | |
| "actions/checkout": 5, | |
| "actions/setup-python": 6, | |
| "actions/upload-artifact": 5, | |
| "actions/download-artifact": 5, | |
| } | |
| # Actions exempt from SHA pinning and version checks | |
| exempt = {"kosli-dev/setup-cli-action", "anthropics/claude-code-action"} | |
| errors = [] | |
| for i, line in enumerate(lines, 1): | |
| m = re.search(r'uses:\s*(\S+)', line) | |
| if not m: | |
| continue | |
| action_ref = m.group(1) | |
| # Extract org/repo from action ref | |
| action_name = action_ref.split("@")[0] | |
| if action_name in exempt: | |
| continue | |
| # Check SHA pinning: ref after @ should be a hex string, not vX | |
| ref = action_ref.split("@")[1] if "@" in action_ref else "" | |
| if re.match(r'v\d+', ref): | |
| errors.append( | |
| f" {wf_path}:{i}: {action_ref} — must be SHA-pinned (use @SHA # vX)" | |
| ) | |
| continue | |
| # Extract version from trailing comment: # vX | |
| vc = re.search(r'#\s*v(\d+)', line) | |
| if not vc and action_name in min_versions: | |
| errors.append( | |
| f" {wf_path}:{i}: {action_ref} — SHA-pinned but missing version comment (# vX)" | |
| ) | |
| continue | |
| if vc and action_name in min_versions: | |
| major = int(vc.group(1)) | |
| min_ver = min_versions[action_name] | |
| if major < min_ver: | |
| errors.append( | |
| f" {wf_path}:{i}: {action_name} v{major} — needs v{min_ver}+ for Node 24" | |
| ) | |
| if errors: | |
| for e in errors: | |
| print(e) | |
| sys.exit(1) | |
| PYEOF | |
| done | |
| if [ "$FAILED" -eq 1 ]; then | |
| echo "::error::Some workflows have unpinned or outdated actions" | |
| exit 1 | |
| fi | |
| echo "All workflow actions are SHA-pinned and target Node 24+" | |
| - name: Run actionlint on all workflow files | |
| run: | | |
| # Install actionlint | |
| bash <(curl -s https://raw.githubusercontent.com/rhysd/actionlint/main/scripts/download-actionlint.bash) | |
| echo "actionlint version: $(./actionlint --version)" | |
| # Run actionlint with shellcheck disabled (not installed in CI by default) | |
| # and pyflakes disabled (our inline Python scripts use heredoc patterns) | |
| ./actionlint -shellcheck= -pyflakes= .github/workflows/*.yml |