diff --git a/.github/workflows/pr-codebuild-trigger.yml b/.github/workflows/pr-codebuild-trigger.yml new file mode 100644 index 0000000..6249ca6 --- /dev/null +++ b/.github/workflows/pr-codebuild-trigger.yml @@ -0,0 +1,339 @@ +name: PR CodeBuild Trigger (Privileged) + +# DEPLOYMENT NOTE: This workflow references "PR Team Check (Untrusted)" workflow. +# If deploying both workflows simultaneously, the referenced workflow must exist +# in the main branch first. See docs/webhook-deployment-guide.md for deployment strategies. + +on: + workflow_run: + workflows: ["PR Team Check (Untrusted)"] + types: + - completed + +permissions: + contents: read + pull-requests: write + issues: write + +jobs: + trigger-codebuild: + runs-on: ubuntu-latest + if: > + github.event.workflow_run.event == 'pull_request' && + github.event.workflow_run.conclusion == 'success' + steps: + - name: Download PR information artifact + uses: actions/github-script@v7 + with: + script: | + var artifacts = await github.rest.actions.listWorkflowRunArtifacts({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: ${{ github.event.workflow_run.id }}, + }); + var matchArtifact = artifacts.data.artifacts.filter((artifact) => { + return artifact.name == "pr-info"; + })[0]; + var download = await github.rest.actions.downloadArtifact({ + owner: context.repo.owner, + repo: context.repo.repo, + artifact_id: matchArtifact.id, + archive_format: 'zip', + }); + var fs = require('fs'); + fs.writeFileSync('${{ github.workspace }}/pr-info.zip', Buffer.from(download.data)); + + - name: Extract and validate PR information + id: extract-pr-info + run: | + mkdir -p tmp + unzip -d tmp/ pr-info.zip + + # Read and validate PR information + PR_NUMBER=$(cat ./tmp/pr_number) + PR_AUTHOR=$(cat ./tmp/pr_author) + COMMIT_AUTHORS=$(cat ./tmp/commit_authors) + COMMIT_USERNAMES=$(cat ./tmp/commit_usernames) + WORKFLOW_MODIFIED=$(cat ./tmp/workflow_modified) + WORKFLOW_CHANGES=$(cat ./tmp/workflow_changes) + HEAD_SHA=$(cat ./tmp/head_sha) + BASE_SHA=$(cat ./tmp/base_sha) + REPOSITORY=$(cat ./tmp/repository) + + # Validate that PR_NUMBER is numeric + if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then + echo "::error::Invalid PR number: $PR_NUMBER" + exit 1 + fi + + # Validate SHA format (40 character hex) + if ! [[ "$HEAD_SHA" =~ ^[a-f0-9]{40}$ ]]; then + echo "::error::Invalid HEAD SHA format: $HEAD_SHA" + exit 1 + fi + + if ! [[ "$BASE_SHA" =~ ^[a-f0-9]{40}$ ]]; then + echo "::error::Invalid BASE SHA format: $BASE_SHA" + exit 1 + fi + + # Validate repository format + if ! [[ "$REPOSITORY" =~ ^[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+$ ]]; then + echo "::error::Invalid repository format: $REPOSITORY" + exit 1 + fi + + # Set outputs + echo "pr_number=$PR_NUMBER" >> $GITHUB_OUTPUT + echo "pr_author=$PR_AUTHOR" >> $GITHUB_OUTPUT + echo "commit_authors=$COMMIT_AUTHORS" >> $GITHUB_OUTPUT + echo "commit_usernames=$COMMIT_USERNAMES" >> $GITHUB_OUTPUT + echo "workflow_modified=$WORKFLOW_MODIFIED" >> $GITHUB_OUTPUT + echo "workflow_changes=$WORKFLOW_CHANGES" >> $GITHUB_OUTPUT + echo "head_sha=$HEAD_SHA" >> $GITHUB_OUTPUT + echo "base_sha=$BASE_SHA" >> $GITHUB_OUTPUT + echo "repository=$REPOSITORY" >> $GITHUB_OUTPUT + + echo "✅ PR information validated successfully" + echo "PR Number: $PR_NUMBER" + echo "PR Author: $PR_AUTHOR" + echo "Workflow Modified: $WORKFLOW_MODIFIED" + + - name: Security validation + id: security-check + env: + WORKFLOW_MODIFIED: ${{ steps.extract-pr-info.outputs.workflow_modified }} + WORKFLOW_CHANGES: ${{ steps.extract-pr-info.outputs.workflow_changes }} + REPOSITORY: ${{ steps.extract-pr-info.outputs.repository }} + PR_NUMBER: ${{ steps.extract-pr-info.outputs.pr_number }} + run: | + if [[ "$WORKFLOW_MODIFIED" == "true" ]]; then + echo "🚨 SECURITY BLOCK: This PR modifies workflow files" + echo "Modified files: $WORKFLOW_CHANGES" + + # Create JSON payload safely + jq -n \ + --arg body "🚨 **SECURITY BLOCK** + +**CodeBuild execution has been BLOCKED** because this PR modifies GitHub Actions workflow files: +\`\`\` +$WORKFLOW_CHANGES +\`\`\` + +**Security Policy:** PRs that modify workflows cannot trigger automated builds to prevent security bypass attacks. + +**Required Actions:** +1. Get approval from code owners (@awslabs/sagemaker-1p-algorithms) +2. Manual review of all workflow changes +3. Separate the workflow changes into a different PR if needed + +**This is an automated security measure to protect AWS resources.**" \ + '{body: $body}' > /tmp/comment_payload.json + + # Post security block comment on PR + curl -X POST \ + -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + -H "Accept: application/vnd.github.v3+json" \ + -H "Content-Type: application/json" \ + "https://api.github.com/repos/$REPOSITORY/issues/$PR_NUMBER/comments" \ + -d @/tmp/comment_payload.json + + echo "security_blocked=true" >> $GITHUB_OUTPUT + else + echo "✅ Security validation passed" + echo "security_blocked=false" >> $GITHUB_OUTPUT + fi + + - name: Check team membership for PR author + id: check-pr-author + if: steps.security-check.outputs.security_blocked == 'false' + env: + PR_AUTHOR: ${{ steps.extract-pr-info.outputs.pr_author }} + run: | + TEAM_MEMBER_FOUND=false + + # Validate PR author format (alphanumeric, hyphens, underscores only) + if ! [[ "$PR_AUTHOR" =~ ^[a-zA-Z0-9_-]+$ ]]; then + echo "::error::Invalid PR author format: $PR_AUTHOR" + exit 1 + fi + + # Check PR author team membership + echo "Checking team membership for PR author: $PR_AUTHOR" + + # Check if PR author is in the team + response=$(curl -s -w "%{http_code}" -o /tmp/team_check \ + -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + -H "Accept: application/vnd.github.v3+json" \ + "https://api.github.com/orgs/awslabs/teams/sagemaker-1p-algorithms/members/$PR_AUTHOR") + + if [[ "$response" == "204" ]]; then + echo "✅ PR author $PR_AUTHOR is a member of sagemaker-1p-algorithms team" + TEAM_MEMBER_FOUND=true + elif [[ "$response" == "404" ]]; then + echo "❌ PR author $PR_AUTHOR is not a member of sagemaker-1p-algorithms team" + else + echo "⚠️ Unable to verify team membership for PR author (HTTP $response)" + cat /tmp/team_check + fi + + echo "pr_author_is_member=$TEAM_MEMBER_FOUND" >> $GITHUB_OUTPUT + + - name: Check team membership for commit authors + id: check-commit-authors + if: steps.security-check.outputs.security_blocked == 'false' + env: + COMMIT_USERNAMES: ${{ steps.extract-pr-info.outputs.commit_usernames }} + run: | + TEAM_MEMBER_FOUND=false + + # Check commit authors if we have usernames + if [[ -n "$COMMIT_USERNAMES" ]]; then + IFS=',' read -ra USERNAMES <<< "$COMMIT_USERNAMES" + for username in "${USERNAMES[@]}"; do + if [[ -n "$username" ]]; then + # Validate username format (alphanumeric, hyphens, underscores only) + if ! [[ "$username" =~ ^[a-zA-Z0-9_-]+$ ]]; then + echo "⚠️ Skipping invalid username format: $username" + continue + fi + + echo "Checking team membership for commit author: $username" + + response=$(curl -s -w "%{http_code}" -o /tmp/team_check_commit \ + -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + -H "Accept: application/vnd.github.v3+json" \ + "https://api.github.com/orgs/awslabs/teams/sagemaker-1p-algorithms/members/$username") + + if [[ "$response" == "204" ]]; then + echo "✅ Commit author $username is a member of sagemaker-1p-algorithms team" + TEAM_MEMBER_FOUND=true + break + elif [[ "$response" == "404" ]]; then + echo "❌ Commit author $username is not a member of sagemaker-1p-algorithms team" + else + echo "⚠️ Unable to verify team membership for commit author $username (HTTP $response)" + fi + fi + done + else + echo "No GitHub usernames found in commit authors" + fi + + echo "commit_author_is_member=$TEAM_MEMBER_FOUND" >> $GITHUB_OUTPUT + + - name: Configure AWS Credentials + if: | + steps.security-check.outputs.security_blocked == 'false' && + (steps.check-pr-author.outputs.pr_author_is_member == 'true' || steps.check-commit-authors.outputs.commit_author_is_member == 'true') + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: us-west-2 + + - name: Trigger CodeBuild Projects + if: | + steps.security-check.outputs.security_blocked == 'false' && + (steps.check-pr-author.outputs.pr_author_is_member == 'true' || steps.check-commit-authors.outputs.commit_author_is_member == 'true') + env: + HEAD_SHA: ${{ steps.extract-pr-info.outputs.head_sha }} + BASE_SHA: ${{ steps.extract-pr-info.outputs.base_sha }} + PR_NUMBER: ${{ steps.extract-pr-info.outputs.pr_number }} + REPOSITORY: ${{ steps.extract-pr-info.outputs.repository }} + run: | + echo "🚀 Team member found! Triggering CodeBuild projects..." + + # Trigger tgi-pr-GPU CodeBuild project + echo "Starting tgi-pr-GPU build..." + TGI_BUILD_ID=$(aws codebuild start-build \ + --project-name tgi-pr-GPU \ + --source-version "$HEAD_SHA" \ + --environment-variables-override \ + name=GITHUB_PR_NUMBER,value="$PR_NUMBER" \ + name=GITHUB_PR_HEAD_SHA,value="$HEAD_SHA" \ + name=GITHUB_PR_BASE_SHA,value="$BASE_SHA" \ + name=GITHUB_REPOSITORY,value="$REPOSITORY" \ + --query 'build.id' --output text) + + echo "TGI CodeBuild started with ID: $TGI_BUILD_ID" + + # Trigger tei-pr-CPU CodeBuild project + echo "Starting tei-pr-CPU build..." + TEI_BUILD_ID=$(aws codebuild start-build \ + --project-name tei-pr-CPU \ + --source-version "$HEAD_SHA" \ + --environment-variables-override \ + name=GITHUB_PR_NUMBER,value="$PR_NUMBER" \ + name=GITHUB_PR_HEAD_SHA,value="$HEAD_SHA" \ + name=GITHUB_PR_BASE_SHA,value="$BASE_SHA" \ + name=GITHUB_REPOSITORY,value="$REPOSITORY" \ + --query 'build.id' --output text) + + echo "TEI CodeBuild started with ID: $TEI_BUILD_ID" + + # Create JSON payload safely for success comment + jq -n \ + --arg body "🚀 **CodeBuild Triggered** + +✅ Team member verification passed + +**Build IDs:** +- TGI GPU Build: \`$TGI_BUILD_ID\` +- TEI CPU Build: \`$TEI_BUILD_ID\` + +You can monitor the builds in the [AWS CodeBuild Console](https://us-west-2.console.aws.amazon.com/codesuite/codebuild/projects)." \ + '{body: $body}' > /tmp/success_comment.json + + # Create a comment on the PR with build information + curl -X POST \ + -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + -H "Accept: application/vnd.github.v3+json" \ + -H "Content-Type: application/json" \ + "https://api.github.com/repos/$REPOSITORY/issues/$PR_NUMBER/comments" \ + -d @/tmp/success_comment.json + + - name: Team membership check failed + if: | + steps.security-check.outputs.security_blocked == 'false' && + steps.check-pr-author.outputs.pr_author_is_member == 'false' && + steps.check-commit-authors.outputs.commit_author_is_member == 'false' + env: + REPOSITORY: ${{ steps.extract-pr-info.outputs.repository }} + PR_NUMBER: ${{ steps.extract-pr-info.outputs.pr_number }} + PR_AUTHOR: ${{ steps.extract-pr-info.outputs.pr_author }} + COMMIT_AUTHORS: ${{ steps.extract-pr-info.outputs.commit_authors }} + run: | + echo "❌ Access denied: Neither PR author nor commit authors are members of the sagemaker-1p-algorithms team" + + # Create JSON payload safely for failure comment + jq -n \ + --arg body "❌ **CodeBuild Access Denied** + +The PR author and commit authors are not members of the \`sagemaker-1p-algorithms\` team. + +**Checked:** +- PR Author: @$PR_AUTHOR +- Commit Authors: $COMMIT_AUTHORS + +Please ensure you are a member of the required team to trigger builds." \ + '{body: $body}' > /tmp/failure_comment.json + + # Create a comment on the PR about the failed check + curl -X POST \ + -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + -H "Accept: application/vnd.github.v3+json" \ + -H "Content-Type: application/json" \ + "https://api.github.com/repos/$REPOSITORY/issues/$PR_NUMBER/comments" \ + -d @/tmp/failure_comment.json + + exit 1 + + - name: Security block failure + if: steps.security-check.outputs.security_blocked == 'true' + run: | + echo "🚨 SECURITY BLOCK: Workflow execution terminated due to workflow file modifications" + echo "::error::SECURITY POLICY VIOLATION: This PR modifies GitHub Actions workflows and has been blocked from executing CodeBuild projects." + echo "::error::This is a security measure to prevent malicious workflow modifications from bypassing team membership checks." + echo "::error::Please get approval from code owners (@awslabs/sagemaker-1p-algorithms) and consider separating workflow changes into a different PR." + exit 1 diff --git a/.github/workflows/pr-team-check.yml b/.github/workflows/pr-team-check.yml new file mode 100644 index 0000000..1abcb7a --- /dev/null +++ b/.github/workflows/pr-team-check.yml @@ -0,0 +1,85 @@ +name: PR Team Check (Untrusted) + +on: + pull_request: + types: [opened, synchronize, reopened] + +permissions: + contents: read + +jobs: + check-team-membership: + runs-on: ubuntu-latest + steps: + - name: Checkout PR code (untrusted) + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + + - name: Get PR and commit authors + id: get-authors + run: | + # Get PR author + PR_AUTHOR="${{ github.event.pull_request.user.login }}" + echo "pr_author=$PR_AUTHOR" >> $GITHUB_OUTPUT + echo "PR Author: $PR_AUTHOR" + + # Get all commit authors in this PR + git fetch origin ${{ github.event.pull_request.base.ref }} + COMMIT_AUTHORS=$(git log origin/${{ github.event.pull_request.base.ref }}..${{ github.event.pull_request.head.sha }} --format="%an" | sort -u | tr '\n' ',' | sed 's/,$//') + echo "commit_authors=$COMMIT_AUTHORS" >> $GITHUB_OUTPUT + echo "Commit Authors: $COMMIT_AUTHORS" + + # Get all commit author usernames (GitHub usernames) + COMMIT_USERNAMES="" + for sha in $(git log origin/${{ github.event.pull_request.base.ref }}..${{ github.event.pull_request.head.sha }} --format="%H"); do + author_email=$(git show --format="%ae" --no-patch $sha) + # Try to get GitHub username from commit + if [[ "$author_email" == *"@users.noreply.github.com" ]]; then + username=$(echo "$author_email" | cut -d'@' -f1 | sed 's/^[0-9]*+//') + COMMIT_USERNAMES="$COMMIT_USERNAMES,$username" + fi + done + COMMIT_USERNAMES=$(echo "$COMMIT_USERNAMES" | sed 's/^,//' | sed 's/,$//') + echo "commit_usernames=$COMMIT_USERNAMES" >> $GITHUB_OUTPUT + echo "Commit Usernames: $COMMIT_USERNAMES" + + - name: Check for workflow modifications + id: check-workflow-changes + run: | + echo "🔒 Security Check: Validating PR does not modify workflow files" + + # Check if PR modifies any workflow files + WORKFLOW_CHANGES=$(git diff --name-only origin/${{ github.event.pull_request.base.ref }}...${{ github.event.pull_request.head.sha }} | grep -E '^\.github/workflows/' || true) + + if [[ -n "$WORKFLOW_CHANGES" ]]; then + echo "🚨 SECURITY ALERT: This PR modifies workflow files:" + echo "$WORKFLOW_CHANGES" + echo "workflow_modified=true" >> $GITHUB_OUTPUT + echo "workflow_changes=$WORKFLOW_CHANGES" >> $GITHUB_OUTPUT + else + echo "✅ No workflow files modified in this PR" + echo "workflow_modified=false" >> $GITHUB_OUTPUT + echo "workflow_changes=" >> $GITHUB_OUTPUT + fi + + - name: Save PR information for privileged workflow + run: | + mkdir -p ./pr-info + echo "${{ github.event.pull_request.number }}" > ./pr-info/pr_number + echo "${{ steps.get-authors.outputs.pr_author }}" > ./pr-info/pr_author + echo "${{ steps.get-authors.outputs.commit_authors }}" > ./pr-info/commit_authors + echo "${{ steps.get-authors.outputs.commit_usernames }}" > ./pr-info/commit_usernames + echo "${{ steps.check-workflow-changes.outputs.workflow_modified }}" > ./pr-info/workflow_modified + echo "${{ steps.check-workflow-changes.outputs.workflow_changes }}" > ./pr-info/workflow_changes + echo "${{ github.event.pull_request.head.sha }}" > ./pr-info/head_sha + echo "${{ github.event.pull_request.base.sha }}" > ./pr-info/base_sha + echo "${{ github.repository }}" > ./pr-info/repository + + - name: Upload PR information artifact + uses: actions/upload-artifact@v4 + with: + name: pr-info + path: pr-info/ + retention-days: 1 diff --git a/CODEOWNERS b/CODEOWNERS index 224a12c..c5b2259 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1 +1,21 @@ -* @awslabs/sagemaker-1p-algorithms \ No newline at end of file +# Code Owners for llm-hosting-container repository + +# Global fallback - require review from sagemaker-1p-algorithms team +* @awslabs/sagemaker-1p-algorithms + +# GitHub Actions workflows - CRITICAL SECURITY +# These files control CI/CD and AWS resource access +# Require mandatory review from the sagemaker-1p-algorithms team +.github/workflows/ @awslabs/sagemaker-1p-algorithms +.github/actions/ @awslabs/sagemaker-1p-algorithms + +# Security-sensitive configuration files +CODEOWNERS @awslabs/sagemaker-1p-algorithms +.gitignore @awslabs/sagemaker-1p-algorithms + +# Documentation that affects security setup +docs/github-webhook-setup.md @awslabs/sagemaker-1p-algorithms +docs/webhook-security-analysis.md @awslabs/sagemaker-1p-algorithms + +# AWS CodeBuild specifications +**/buildspec.yml @awslabs/sagemaker-1p-algorithms diff --git a/docs/github-webhook-setup.md b/docs/github-webhook-setup.md new file mode 100644 index 0000000..1edc1b2 --- /dev/null +++ b/docs/github-webhook-setup.md @@ -0,0 +1,171 @@ +# GitHub Webhook Setup for CodeBuild Integration + +This document describes how to set up the GitHub webhook that triggers AWS CodeBuild projects when pull requests are created by members of the `sagemaker-1p-algorithms` team. + +## Overview + +The webhook (`.github/workflows/pr-codebuild-webhook.yml`) performs the following actions: + +1. **Triggers on PR events**: opened, synchronize, reopened +2. **Checks team membership**: Verifies if PR author or commit authors are members of `awslabs/sagemaker-1p-algorithms` team +3. **Triggers CodeBuild**: If team membership check passes, triggers both: + - `tgi-pr-GPU` project + - `tei-pr-CPU` project + +## Required GitHub Repository Secrets + +You need to configure the following secrets in your GitHub repository: + +### 1. AWS Credentials +- `AWS_ACCESS_KEY_ID`: AWS access key with CodeBuild permissions +- `AWS_SECRET_ACCESS_KEY`: AWS secret access key + +### 2. GitHub Token (Already Available) +- `GITHUB_TOKEN`: Automatically provided by GitHub Actions (no setup required) + +## Setting up AWS Credentials + +### Option 1: IAM User with Access Keys (Recommended for initial setup) + +1. **Create an IAM User:** + ```bash + aws iam create-user --user-name github-codebuild-trigger + ``` + +2. **Create access keys:** + ```bash + aws iam create-access-key --user-name github-codebuild-trigger + ``` + +3. **Create IAM Policy:** + ```bash + cat > codebuild-trigger-policy.json << EOF + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "codebuild:StartBuild", + "codebuild:BatchGetBuilds" + ], + "Resource": [ + "arn:aws:codebuild:us-west-2:515193369038:project/tgi-pr-GPU", + "arn:aws:codebuild:us-west-2:515193369038:project/tei-pr-CPU" + ] + } + ] + } + EOF + + aws iam create-policy --policy-name CodeBuildTriggerPolicy --policy-document file://codebuild-trigger-policy.json + ``` + +4. **Attach policy to user:** + ```bash + aws iam attach-user-policy --user-name github-codebuild-trigger --policy-arn arn:aws:iam::515193369038:policy/CodeBuildTriggerPolicy + ``` + +### Option 2: OIDC (Recommended for production) + +For better security, you can use OIDC instead of long-lived access keys. This requires setting up an OIDC provider in AWS and using role assumption. + +## Adding Secrets to GitHub Repository + +1. Go to your repository on GitHub +2. Navigate to **Settings** → **Secrets and variables** → **Actions** +3. Click **New repository secret** +4. Add the following secrets: + - Name: `AWS_ACCESS_KEY_ID`, Value: `` + - Name: `AWS_SECRET_ACCESS_KEY`, Value: `` + +## Team Setup Requirements + +### GitHub Team Configuration + +The workflow checks membership of the `sagemaker-1p-algorithms` team in the `awslabs` organization. Ensure: + +1. **Team exists**: The team `sagemaker-1p-algorithms` must exist in the `awslabs` GitHub organization +2. **Team visibility**: The team should be visible (not secret) or the `GITHUB_TOKEN` needs appropriate permissions +3. **Members added**: Users who should trigger builds must be added to this team + +### Team Management Commands + +To manage the team membership: + +```bash +# List team members (requires appropriate permissions) +gh api orgs/awslabs/teams/sagemaker-1p-algorithms/members + +# Add a user to the team (requires admin permissions) +gh api -X PUT orgs/awslabs/teams/sagemaker-1p-algorithms/memberships/USERNAME +``` + +## CodeBuild Project Requirements + +Your CodeBuild projects should be configured to: + +1. **Source**: GitHub repository +2. **Webhook**: Not required (GitHub Actions will trigger directly) +3. **Environment**: Appropriate compute type for your builds +4. **Service Role**: Has permissions to access source and any required AWS services + +### Environment Variables + +The workflow passes the following environment variables to CodeBuild: + +- `GITHUB_PR_NUMBER`: Pull request number +- `GITHUB_PR_HEAD_SHA`: Head commit SHA of the PR +- `GITHUB_PR_BASE_SHA`: Base commit SHA of the PR +- `GITHUB_REPOSITORY`: Repository name (awslabs/llm-hosting-container) + +## Workflow Behavior + +### Success Flow +1. PR is opened/updated by a team member +2. Team membership check passes +3. Both CodeBuild projects are triggered +4. GitHub comment is posted with build IDs and links + +### Failure Flow +1. PR is opened/updated by a non-team member +2. Team membership check fails +3. GitHub comment is posted explaining the access denial +4. Workflow exits with error status + +## Monitoring and Troubleshooting + +### Check Workflow Runs +- Go to **Actions** tab in your GitHub repository +- Look for "PR Team Check and CodeBuild Trigger" workflow runs + +### Common Issues + +1. **Team membership check fails**: Verify team exists and user is a member +2. **AWS credentials error**: Check that secrets are set correctly +3. **CodeBuild start fails**: Verify project names and AWS permissions +4. **No commit usernames found**: This is normal for commits made with email addresses not linked to GitHub + +### Debug Information + +The workflow logs provide detailed information about: +- PR author identification +- Commit authors and their GitHub usernames +- Team membership check results +- CodeBuild trigger responses + +## Security Considerations + +1. **Principle of least privilege**: AWS IAM user only has permissions to start specific CodeBuild projects +2. **Team-based access**: Only members of the designated team can trigger builds +3. **Audit trail**: All actions are logged in GitHub Actions and AWS CloudTrail +4. **Limited scope**: Webhook only triggers on PR events, not on main branch pushes + +## Testing the Setup + +1. **Create a test PR** from a team member account +2. **Verify workflow runs** in GitHub Actions +3. **Check CodeBuild console** for triggered builds +4. **Review PR comments** for success/failure notifications + +The workflow will automatically comment on PRs with build status and relevant information. diff --git a/docs/webhook-deployment-guide.md b/docs/webhook-deployment-guide.md new file mode 100644 index 0000000..28796cc --- /dev/null +++ b/docs/webhook-deployment-guide.md @@ -0,0 +1,156 @@ +# GitHub Webhook Deployment Guide + +## Overview + +This guide explains how to properly deploy the two-workflow GitHub webhook architecture for the llm-hosting-container repository. The architecture consists of two workflows that work together to securely check team membership and trigger AWS CodeBuild projects. + +## Deployment Challenge + +The two-workflow architecture presents a deployment challenge: +- The privileged workflow (`pr-codebuild-trigger.yml`) references the untrusted workflow by name ("PR Team Check (Untrusted)") +- When deploying via PR, the untrusted workflow doesn't exist in the main branch yet +- This creates a chicken-and-egg problem where the privileged workflow fails to find the referenced workflow + +## Deployment Strategies + +### Strategy 1: Sequential Deployment (Recommended) + +Deploy the workflows in two separate PRs to avoid the chicken-and-egg problem: + +#### Phase 1: Deploy Untrusted Workflow Only +1. Create a PR with only the untrusted workflow: + - `.github/workflows/pr-team-check.yml` + - `docs/webhook-security-analysis.md` + - `docs/github-webhook-setup.md` + - `docs/webhook-deployment-guide.md` (this file) + +2. Merge this PR to main branch + +#### Phase 2: Deploy Privileged Workflow +1. Create a second PR with the privileged workflow: + - `.github/workflows/pr-codebuild-trigger.yml` + - Updated `CODEOWNERS` file + +2. Merge this PR to main branch + +#### Phase 3: Test the Complete System +1. Create a test PR to verify both workflows work together +2. Confirm team membership checks and CodeBuild triggering + +### Strategy 2: Temporary Workflow Name (Alternative) + +If you prefer single-PR deployment: + +1. Temporarily modify the privileged workflow to use a generic workflow name +2. Deploy both workflows in the same PR +3. Update the workflow reference in a follow-up commit + +### Strategy 3: Manual Workflow Creation (Advanced) + +For immediate deployment: + +1. Manually create the untrusted workflow file directly in the main branch via GitHub UI +2. Then deploy the privileged workflow via PR + +## Pre-Deployment Checklist + +Before deploying either workflow, ensure: + +- [ ] AWS credentials are configured in GitHub repository secrets: + - `AWS_ACCESS_KEY_ID` + - `AWS_SECRET_ACCESS_KEY` +- [ ] The `sagemaker-1p-algorithms` team exists in the awslabs GitHub organization +- [ ] Team members are properly added to the team +- [ ] AWS CodeBuild projects exist: + - `tgi-pr-GPU` in us-west-2 region + - `tei-pr-CPU` in us-west-2 region + +## Post-Deployment Configuration + +After both workflows are deployed: + +1. **Enable Branch Protection Rules**: + - Require PR reviews for main branch + - Require status checks to pass + - Restrict pushes to main branch + +2. **Verify CODEOWNERS Protection**: + - Ensure `.github/workflows/` requires team review + - Test that workflow modifications trigger proper reviews + +3. **Test the Complete Flow**: + - Create a test PR from a team member account + - Verify team membership check passes + - Confirm CodeBuild projects are triggered + - Test with non-team member to verify rejection + +## Workflow Dependencies + +The workflows have the following dependency chain: + +``` +PR Created/Updated + ↓ +pr-team-check.yml (Untrusted) + - Extracts PR info + - Creates artifacts + - Completes + ↓ +pr-codebuild-trigger.yml (Privileged) + - Triggered by workflow_run + - Downloads artifacts + - Checks team membership + - Triggers CodeBuild (if authorized) +``` + +## Troubleshooting + +### Common Issues + +1. **"Workflow not found" error**: + - Ensure the untrusted workflow exists in the main branch + - Check workflow name matches exactly in the privileged workflow + +2. **Team membership check fails**: + - Verify team exists and user is a member + - Check GitHub token permissions + - Ensure team visibility settings allow API access + +3. **CodeBuild not triggered**: + - Verify AWS credentials are configured + - Check CodeBuild project names and regions + - Review AWS IAM permissions + +4. **Artifacts not found**: + - Ensure untrusted workflow completed successfully + - Check artifact names match between workflows + - Verify workflow_run trigger conditions + +### Debug Steps + +1. Check GitHub Actions logs for both workflows +2. Verify artifact creation and download +3. Test team membership API calls manually +4. Validate AWS CLI commands in isolation + +## Security Considerations + +- Never merge both workflows simultaneously without proper testing +- Always test with non-team members to verify access control +- Monitor workflow logs for security events +- Regularly review team membership and access patterns + +## Rollback Plan + +If issues occur after deployment: + +1. **Immediate**: Disable the workflows via GitHub UI +2. **Short-term**: Revert the PR that introduced the problematic workflow +3. **Long-term**: Fix issues and redeploy using proper sequence + +## Support + +For issues with this deployment: +1. Check the troubleshooting section above +2. Review workflow logs in GitHub Actions +3. Consult the security analysis document for architecture details diff --git a/docs/webhook-security-analysis.md b/docs/webhook-security-analysis.md new file mode 100644 index 0000000..449ac3d --- /dev/null +++ b/docs/webhook-security-analysis.md @@ -0,0 +1,242 @@ +# GitHub Webhook Security Analysis + +## Overview + +This document analyzes the security implementation of the GitHub webhook for the `llm-hosting-container` repository, which triggers AWS CodeBuild projects based on team membership verification. + +## Security Architecture: Two-Workflow Pattern + +### The Secure Solution + +To address the "Checkout of untrusted code in trusted context" vulnerability, we've implemented a **two-workflow security pattern** that separates untrusted PR processing from privileged operations. + +## Workflow Architecture + +### Workflow 1: PR Team Check (Untrusted) - `pr-team-check.yml` + +**Purpose**: Process untrusted PR code in an isolated environment +**Trigger**: `pull_request` (runs in unprivileged context) +**Permissions**: `contents: read` only + +**Key Features**: +- ✅ Safely checks out PR code (untrusted) +- ✅ Extracts PR and commit author information +- ✅ Detects workflow file modifications +- ✅ Creates artifacts with validated data +- ✅ No access to secrets or AWS credentials + +### Workflow 2: PR CodeBuild Trigger (Privileged) - `pr-codebuild-trigger.yml` + +**Purpose**: Perform privileged operations with validated data +**Trigger**: `workflow_run` (triggered by completion of first workflow) +**Permissions**: `contents: read`, `pull-requests: write`, `issues: write` + +**Key Features**: +- ✅ Downloads and validates artifacts from untrusted workflow +- ✅ Performs team membership checks with GitHub API +- ✅ Has access to AWS credentials and secrets +- ✅ Triggers CodeBuild projects +- ✅ Posts comments on PRs + +## Security Vulnerability: Workflow Modification Attack + +### The Original Problem + +The initial single-workflow implementation had a critical security vulnerability: + +1. **Attacker creates a malicious PR** that modifies workflow files +2. **Workflow runs with modified code** from the PR branch +3. **Security checks are bypassed** because the attacker controls the workflow logic +4. **AWS CodeBuild projects are triggered** without proper authorization +5. **AWS resources are compromised** through unauthorized access + +### Attack Scenario (Previous Implementation) + +```yaml +# Malicious workflow modification in attacker's PR +- name: Check team membership for PR author + run: | + # Attacker bypasses the real team check + echo "pr_author_is_member=true" >> $GITHUB_OUTPUT +``` + +This allowed any external contributor to bypass team membership checks and trigger expensive AWS CodeBuild projects. + +## Security Solution: Multi-Layered Protection + +### 1. Two-Workflow Isolation Pattern + +**Untrusted Workflow (`pull_request`)**: +- Processes PR code in isolated environment +- No access to secrets or AWS credentials +- Cannot trigger privileged operations +- Creates validated artifacts for privileged workflow + +**Privileged Workflow (`workflow_run`)**: +- Runs trusted code from main branch +- Has access to secrets and AWS credentials +- Downloads and validates artifacts from untrusted workflow +- Performs all privileged operations + +### 2. Artifact Validation and Sanitization + +```yaml +# Privileged workflow validates all data from artifacts +- name: Extract and validate PR information + run: | + # Validate PR_NUMBER is numeric + if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then + echo "::error::Invalid PR number: $PR_NUMBER" + exit 1 + fi + + # Validate SHA format (40 character hex) + if ! [[ "$HEAD_SHA" =~ ^[a-f0-9]{40}$ ]]; then + echo "::error::Invalid HEAD SHA format: $HEAD_SHA" + exit 1 + fi +``` + +### 3. Security Validation Step + +```yaml +- name: Security validation + run: | + if [[ "$workflow_modified" == "true" ]]; then + echo "🚨 SECURITY BLOCK: This PR modifies workflow files" + # Block execution and post security warning + fi +``` + +### 4. Conditional Execution Protection + +```yaml +- name: Configure AWS Credentials + if: | + steps.security-check.outputs.security_blocked == 'false' && + (team membership conditions...) +``` + +### 5. CODEOWNERS Protection + +``` +.github/workflows/ @awslabs/sagemaker-1p-algorithms +``` + +## Security Benefits of Two-Workflow Pattern + +### ✅ **Isolation of Untrusted Code** +- PR code runs in unprivileged environment +- No access to secrets or AWS credentials +- Cannot directly trigger privileged operations + +### ✅ **Trusted Execution Context** +- Privileged operations run trusted code from main branch +- Attacker cannot modify privileged workflow logic +- All data from untrusted workflow is validated + +### ✅ **Defense in Depth** +- Multiple validation layers +- Artifact sanitization +- Security checks in both workflows + +### ✅ **Fail-Safe Defaults** +- Block execution when validation fails +- Explicit security error messages +- No silent failures + +## Implementation Phases + +### Phase 1: Two-Workflow Architecture ✅ +- Separate untrusted and privileged workflows +- Implement artifact-based communication +- Add comprehensive validation + +### Phase 2: Enhanced Security Validation ✅ +- Workflow modification detection +- Data sanitization and validation +- Security block mechanisms + +### Phase 3: Access Control & Monitoring ✅ +- CODEOWNERS file protection +- Explicit permissions configuration +- Comprehensive audit logging + +## Security Best Practices Applied + +1. **Principle of Least Privilege**: Minimal permissions for each workflow +2. **Defense in Depth**: Multiple security layers and validations +3. **Fail-Safe Defaults**: Block execution when security checks fail +4. **Input Validation**: Sanitize all data from untrusted sources +5. **Audit Trail**: Comprehensive logging of all security decisions +6. **Human Oversight**: Required reviews for workflow changes + +## Comparison: Before vs After + +### Before (Vulnerable) +```yaml +on: pull_request_target # Privileged context +jobs: + build: + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} # ❌ Untrusted code + - name: Team check + run: | + # ❌ Attacker can modify this logic +``` + +### After (Secure) +```yaml +# Workflow 1: Untrusted +on: pull_request # ❌ Unprivileged context +jobs: + check: + steps: + - uses: actions/checkout@v4 # ✅ Safe in unprivileged context + - name: Create artifacts # ✅ No secrets access + +# Workflow 2: Privileged +on: workflow_run # ✅ Triggered by completion +jobs: + trigger: + steps: + - name: Download artifacts # ✅ Validate untrusted data + - name: Team check # ✅ Trusted code from main branch +``` + +## Monitoring and Detection + +### Security Indicators to Monitor +- PRs modifying workflow files +- Unexpected CodeBuild executions +- Failed artifact validations +- Security block activations + +### Audit Trail +- All team membership checks logged +- Security decisions recorded +- CodeBuild triggers tracked +- PR comments provide transparency + +## Remaining Considerations + +1. **Team Management**: Ensure `sagemaker-1p-algorithms` team is properly maintained +2. **Access Control**: Regular audit of team membership +3. **Monitoring**: Watch for unusual CodeBuild activity +4. **Incident Response**: Plan for handling security breaches +5. **Artifact Retention**: Artifacts are retained for 1 day only + +## Conclusion + +The implemented two-workflow security architecture provides robust protection against workflow modification attacks and untrusted code execution while maintaining the required functionality for team-based CodeBuild triggering. + +**Key Security Achievements**: +- ✅ Eliminated untrusted code execution in privileged context +- ✅ Implemented comprehensive input validation +- ✅ Added multiple layers of security controls +- ✅ Maintained full functionality for authorized users +- ✅ Provided clear audit trail and transparency + +The multi-layered approach ensures that even if one security control fails, others will prevent unauthorized access to AWS resources. This architecture follows GitHub's recommended security best practices for handling untrusted PR content.