Add hallucination detection controls for PR review pipeline #58

Workflow file for this run

.github/workflows/pipeline-tests.yml at feb44d3

	name: Pipeline Tests

	on:
	push:
	branches: [main]
	paths:
	- 'scripts/**'
	- 'kosli/policies/**'
	- 'tests/unit/test_ci_gate.py'
	- 'tests/unit/test_verify_integrity.py'
	- '.github/workflows/**'
	pull_request:
	branches: [main]
	paths:
	- 'scripts/**'
	- 'kosli/policies/**'
	- 'tests/unit/test_ci_gate.py'
	- 'tests/unit/test_verify_integrity.py'
	- '.github/workflows/**'

	env:
	FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: 'true'

	jobs:
	python-tests:
	name: Python Pipeline Tests
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6

	- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
	with:
	python-version: '3.12'

	- name: Install dependencies
	run: \|
	pip install pytest
	# Install project dependencies needed by pipeline_steps
	if [ -f requirements.txt ]; then pip install -r requirements.txt; fi

	- name: Run pipeline unit tests
	env:
	PYTHONPATH: ${{ github.workspace }}
	run: \|
	pytest tests/unit/test_ci_gate.py -v --tb=short
	pytest tests/unit/test_verify_integrity.py -v --tb=short 2>/dev/null \|\| echo "Shell script tests not yet implemented"

	rego-tests:
	name: OPA Rego Policy Tests
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6

	- name: Install OPA
	run: \|
	curl -L -o opa https://openpolicyagent.org/downloads/latest/opa_linux_amd64_static
	chmod +x opa
	sudo mv opa /usr/local/bin/

	- name: Run Rego policy tests
	run: \|
	cd kosli/policies
	FAILED=0
	for test_file in *_test.rego; do
	policy="${test_file%_test.rego}.rego"
	if [ -f "$policy" ]; then
	echo "=== Testing: $(basename $policy) ==="
	if ! opa test "$policy" "$test_file" -v; then
	FAILED=1
	fi
	echo
	fi
	done
	if [ "$FAILED" -eq 1 ]; then
	echo "❌ Some Rego tests failed"
	exit 1
	fi
	echo "✅ All Rego tests passed"

	shell-tests:
	name: Shell Script Tests
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6

	- name: Test verify_artifact_integrity.sh with mock inputs
	run: \|
	# Create mock kosli binary for testing source fingerprint comparison.
	# The script now compares source directory fingerprints (not commit SHAs).
	MOCK_BIN=$(mktemp -d)

	# Mock kosli — handles "get trail" and "get attestation" commands
	# Uses valid flow name patterns that pass the script's regex validation.
	# Mock dispatches on trail name to simulate different scenarios:
	# GH99-* → normal flow (returns reviewed-code artifact)
	# GH88-* → no-artifact flow (trail exists, no reviewed-code artifact)
	# GH77-* → missing flow (kosli get trail fails)
	cat > "$MOCK_BIN/kosli" << 'MOCK_EOF'
	#!/usr/bin/env bash
	# Mock kosli CLI for testing verify_artifact_integrity.sh
	#
	# Handles:
	# kosli get trail <name> --flow <flow> --output json
	# kosli get attestation <name> --flow <flow> --trail <trail> --output json

	if [[ "$" == "get trail"* ]]; then
	# Dispatch on trail name to simulate different scenarios
	if [[ "$" == "GH77-"* ]]; then
	echo '{}' ; exit 1
	fi
	if [[ "$" == "GH88-"* ]]; then
	# Trail exists but no reviewed-code artifact
	echo '{"artifacts": []}' ; exit 0
	fi
	cat << 'JSON'
	{
	"artifacts": [
	{
	"name": "reviewed-code",
	"fingerprint": "abc123def456abc123def456abc123def456abc123def456abc123def456abcdef"
	}
	]
	}
	JSON
	exit 0
	fi

	if [[ "$" == "get attestation"* ]]; then
	# Fallback: return source_fingerprint from user_data for GH88 (no-artifact) scenario
	if [[ "$" == "GH88-"* ]]; then
	cat << 'JSON'
	{
	"user_data": {
	"source_fingerprint": "fallback_fp_abc123def456abc123def456abc123def456abc123def456abcdef"
	}
	}
	JSON
	exit 0
	fi
	echo '{}' ; exit 1
	fi

	echo "mock kosli: unhandled command: $*" >&2
	exit 1
	MOCK_EOF
	chmod +x "$MOCK_BIN/kosli"

	export PATH="$MOCK_BIN:$PATH"

	# All test flow names must match the script's validation regex:
	# ^agentic-sdlc-demo-GH[0-9]+-CodeReview$
	FLOW_99="agentic-sdlc-demo-GH99-CodeReview"
	FLOW_88="agentic-sdlc-demo-GH88-CodeReview"
	FLOW_77="agentic-sdlc-demo-GH77-CodeReview"

	MATCHING_FP="abc123def456abc123def456abc123def456abc123def456abc123def456abcdef"
	DIFFERENT_FP="ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
	FALLBACK_FP="fallback_fp_abc123def456abc123def456abc123def456abc123def456abcdef"

	echo "=== Test 1: Matching source fingerprints ==="
	RESULT=$(scripts/ci/verify_artifact_integrity.sh \
	--review-flow "$FLOW_99" \
	--trail-names "GH99-Loop1 GH99-Final" \
	--build-fingerprint "$MATCHING_FP")
	echo "$RESULT" \| jq .
	MATCH=$(echo "$RESULT" \| jq -r '.match')
	[ "$MATCH" = "true" ] \|\| { echo "FAIL: expected match=true"; exit 1; }
	echo "✅ PASS"

	echo ""
	echo "=== Test 2: Mismatched source fingerprints ==="
	RESULT=$(scripts/ci/verify_artifact_integrity.sh \
	--review-flow "$FLOW_99" \
	--trail-names "GH99-Loop1 GH99-Final" \
	--build-fingerprint "$DIFFERENT_FP")
	echo "$RESULT" \| jq .
	MATCH=$(echo "$RESULT" \| jq -r '.match')
	[ "$MATCH" = "false" ] \|\| { echo "FAIL: expected match=false"; exit 1; }
	echo "✅ PASS"

	echo ""
	echo "=== Test 3: No Final trail ==="
	RESULT=$(scripts/ci/verify_artifact_integrity.sh \
	--review-flow "$FLOW_99" \
	--trail-names "GH99-Loop1 GH99-Loop2" \
	--build-fingerprint "$MATCHING_FP")
	echo "$RESULT" \| jq .
	MATCH=$(echo "$RESULT" \| jq -r '.match')
	REASON=$(echo "$RESULT" \| jq -r '.mismatch_reason')
	[ "$MATCH" = "false" ] \|\| { echo "FAIL: expected match=false"; exit 1; }
	[[ "$REASON" == "No Final trail" ]] \|\| { echo "FAIL: wrong reason: $REASON"; exit 1; }
	echo "✅ PASS"

	echo ""
	echo "=== Test 4: No build fingerprint provided ==="
	RESULT=$(scripts/ci/verify_artifact_integrity.sh \
	--review-flow "$FLOW_99" \
	--trail-names "GH99-Final" \
	--build-fingerprint "")
	echo "$RESULT" \| jq .
	MATCH=$(echo "$RESULT" \| jq -r '.match')
	REASON=$(echo "$RESULT" \| jq -r '.mismatch_reason')
	[ "$MATCH" = "false" ] \|\| { echo "FAIL: expected match=false"; exit 1; }
	[[ "$REASON" == "No build fingerprint" ]] \|\| { echo "FAIL: wrong reason: $REASON"; exit 1; }
	echo "✅ PASS"

	echo ""
	echo "=== Test 5: Missing reviewed-code artifact, fallback to attestation user_data ==="
	RESULT=$(scripts/ci/verify_artifact_integrity.sh \
	--review-flow "$FLOW_88" \
	--trail-names "GH88-Final" \
	--build-fingerprint "$FALLBACK_FP")
	echo "$RESULT" \| jq .
	MATCH=$(echo "$RESULT" \| jq -r '.match')
	[ "$MATCH" = "true" ] \|\| { echo "FAIL: expected match=true (fallback fingerprint)"; exit 1; }
	echo "✅ PASS"

	echo ""
	echo "=== Test 6: Missing flow (kosli get trail fails) ==="
	RESULT=$(scripts/ci/verify_artifact_integrity.sh \
	--review-flow "$FLOW_77" \
	--trail-names "GH77-Final" \
	--build-fingerprint "$MATCHING_FP")
	echo "$RESULT" \| jq .
	MATCH=$(echo "$RESULT" \| jq -r '.match')
	[ "$MATCH" = "false" ] \|\| { echo "FAIL: expected match=false"; exit 1; }
	echo "✅ PASS"

	echo ""
	echo "=== Test 7: Invalid flow name pattern ==="
	RESULT=$(scripts/ci/verify_artifact_integrity.sh \
	--review-flow "bad-flow-name" \
	--trail-names "GH99-Final" \
	--build-fingerprint "$MATCHING_FP")
	echo "$RESULT" \| jq .
	MATCH=$(echo "$RESULT" \| jq -r '.match')
	REASON=$(echo "$RESULT" \| jq -r '.mismatch_reason')
	[ "$MATCH" = "false" ] \|\| { echo "FAIL: expected match=false"; exit 1; }
	[[ "$REASON" == "Invalid review_flow_name pattern" ]] \|\| { echo "FAIL: wrong reason: $REASON"; exit 1; }
	echo "✅ PASS"

	echo ""
	echo "=== All shell script tests passed ==="

	workflow-lint:
	name: Workflow YAML Lint
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6

	- name: Check kosli jobs use fetch-depth 0
	run: \|
	# `kosli begin trail` resolves arbitrary git refs (e.g. feature
	# branch commits). A shallow clone (default fetch-depth: 1) only
	# has HEAD, so the command fails with "reference not found" if the
	# commit lives on another branch.
	#
	# This check ensures any job calling `kosli begin trail` or
	# `kosli create flow` + `kosli begin trail` has fetch-depth: 0.

	FAILED=0
	for wf in .github/workflows/*.yml; do
	# Skip this test workflow (it mentions the command in the lint script itself)
	[ "$(basename "$wf")" = "pipeline-tests.yml" ] && continue
	python3 - "$wf" <<'PYEOF' \|\| FAILED=1
	import sys, yaml

	wf_path = sys.argv[1]
	with open(wf_path) as f:
	doc = yaml.safe_load(f)

	if not doc or "jobs" not in doc:
	sys.exit(0)

	for job_name, job in doc["jobs"].items():
	steps = job.get("steps", [])

	# Only flag jobs that call `kosli begin trail`
	needs_full_history = False
	for step in steps:
	run_block = step.get("run", "")
	if "kosli begin trail" in run_block:
	needs_full_history = True
	break

	if not needs_full_history:
	continue

	# Verify the checkout step has fetch-depth: 0
	has_full_fetch = False
	for step in steps:
	uses = step.get("uses", "")
	if "actions/checkout" in uses:
	with_block = step.get("with", {}) or {}
	fd = with_block.get("fetch-depth")
	if fd == 0 or str(fd) == "0":
	has_full_fetch = True
	break

	if not has_full_fetch:
	print(f"FAIL: {wf_path} job '{job_name}' calls 'kosli begin trail' but checkout is missing fetch-depth: 0")
	sys.exit(1)

	PYEOF
	done

	if [ "$FAILED" -eq 1 ]; then
	echo "❌ Workflow lint failed"
	exit 1
	fi
	echo "✅ All workflow files pass lint checks"

	- name: Check pull_request_target workflows never checkout PR head
	run: \|
	# SECURITY: pull_request_target workflows run with access to repo
	# secrets. If they checkout the PR head branch and execute code from
	# it, an attacker can exfiltrate secrets via a fork PR — the classic
	# "pwn request" vulnerability.
	#
	# This check ensures any workflow triggered by pull_request_target
	# does NOT have a checkout step with ref: pointing to the PR head.
	#
	# Safe patterns:
	# - No ref: (defaults to base branch in pull_request_target context)
	# - ref: github.event.pull_request.base.sha
	#
	# Dangerous patterns (BLOCKED):
	# - ref: github.event.pull_request.head.sha
	# - ref: github.head_ref
	# - ref: refs/pull/.../merge

	FAILED=0
	for wf in .github/workflows/*.yml; do
	[ "$(basename "$wf")" = "pipeline-tests.yml" ] && continue
	python3 - "$wf" <<'PYEOF' \|\| FAILED=1
	import sys, yaml, re

	wf_path = sys.argv[1]
	with open(wf_path) as f:
	doc = yaml.safe_load(f)

	if not doc:
	sys.exit(0)

	# YAML parses bare 'on' as boolean True, so check both keys
	triggers = doc.get("on", doc.get(True, {}))
	if isinstance(triggers, str):
	triggers = {triggers: None}
	elif isinstance(triggers, list):
	triggers = {t: None for t in triggers}

	if "pull_request_target" not in triggers:
	sys.exit(0)

	# Workflow uses pull_request_target — check all checkout steps
	dangerous_patterns = [
	r"github\.event\.pull_request\.head",
	r"github\.head_ref",
	r"refs/pull/.*?/merge",
	]

	for job_name, job in doc.get("jobs", {}).items():
	for step in job.get("steps", []):
	uses = step.get("uses", "")
	if "actions/checkout" not in uses:
	continue
	with_block = step.get("with", {}) or {}
	ref_value = str(with_block.get("ref", ""))
	for pattern in dangerous_patterns:
	if re.search(pattern, ref_value):
	step_name = step.get("name", "(unnamed)")
	print(
	f"SECURITY FAIL: {wf_path} job '{job_name}' step '{step_name}'\n"
	f" pull_request_target workflow checks out PR head: ref: {ref_value}\n"
	f" This is a pwn request vulnerability — attacker code runs with secret access.\n"
	f" Fix: remove the ref: field (defaults to base branch in pull_request_target)."
	)
	sys.exit(1)

	PYEOF
	done

	if [ "$FAILED" -eq 1 ]; then
	echo "❌ Security: pull_request_target pwn request vulnerability detected"
	exit 1
	fi
	echo "✅ No pull_request_target pwn request patterns found"

	- name: Check GitHub Actions are SHA-pinned and use Node 24 versions
	run: \|
	# All third-party actions must be pinned to commit SHAs with a
	# version comment: action@SHA # vX
	#
	# This check verifies:
	# 1. Actions are SHA-pinned (not using bare version tags)
	# 2. The version comment meets minimum Node 24 requirements
	#
	# Minimum versions for Node 24:
	# actions/checkout >= v5
	# actions/setup-python >= v6
	# actions/upload-artifact >= v5
	# actions/download-artifact >= v5
	#
	# Exceptions:
	# kosli-dev/setup-cli-action — no Node 24 release yet (v2 is latest)
	# anthropics/claude-code-action — not affected

	FAILED=0
	for wf in .github/workflows/*.yml; do
	python3 - "$wf" <<'PYEOF' \|\| FAILED=1
	import sys, re

	wf_path = sys.argv[1]
	with open(wf_path) as f:
	lines = f.readlines()

	min_versions = {
	"actions/checkout": 5,
	"actions/setup-python": 6,
	"actions/upload-artifact": 5,
	"actions/download-artifact": 5,
	}

	# Actions exempt from SHA pinning and version checks
	exempt = {"kosli-dev/setup-cli-action", "anthropics/claude-code-action"}

	errors = []
	for i, line in enumerate(lines, 1):
	m = re.search(r'uses:\s*(\S+)', line)
	if not m:
	continue
	action_ref = m.group(1)
	# Extract org/repo from action ref
	action_name = action_ref.split("@")[0]

	if action_name in exempt:
	continue

	# Check SHA pinning: ref after @ should be a hex string, not vX
	ref = action_ref.split("@")[1] if "@" in action_ref else ""
	if re.match(r'v\d+', ref):
	errors.append(
	f" {wf_path}:{i}: {action_ref} — must be SHA-pinned (use @SHA # vX)"
	)
	continue

	# Extract version from trailing comment: # vX
	vc = re.search(r'#\s*v(\d+)', line)
	if not vc and action_name in min_versions:
	errors.append(
	f" {wf_path}:{i}: {action_ref} — SHA-pinned but missing version comment (# vX)"
	)
	continue

	if vc and action_name in min_versions:
	major = int(vc.group(1))
	min_ver = min_versions[action_name]
	if major < min_ver:
	errors.append(
	f" {wf_path}:{i}: {action_name} v{major} — needs v{min_ver}+ for Node 24"
	)

	if errors:
	for e in errors:
	print(e)
	sys.exit(1)

	PYEOF
	done

	if [ "$FAILED" -eq 1 ]; then
	echo "::error::Some workflows have unpinned or outdated actions"
	exit 1
	fi
	echo "All workflow actions are SHA-pinned and target Node 24+"

	- name: Run actionlint on all workflow files
	run: \|
	# Install actionlint
	bash <(curl -s https://raw.githubusercontent.com/rhysd/actionlint/main/scripts/download-actionlint.bash)
	echo "actionlint version: $(./actionlint --version)"

	# Run actionlint with shellcheck disabled (not installed in CI by default)
	# and pyflakes disabled (our inline Python scripts use heredoc patterns)
	./actionlint -shellcheck= -pyflakes= .github/workflows/*.yml

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add hallucination detection controls for PR review pipeline #58

Workflow file

Add hallucination detection controls for PR review pipeline #58

Uh oh!

Workflow file for this run