Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
217 changes: 217 additions & 0 deletions .github/workflows/samcli-vm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
name: samcli-vm

on:
pull_request:
branches:
- main
paths:
- 'scripts/samcli-vm/**'
- 'scripts/cleanup-aws-resources.sh'
- 'scripts/validate-test-results.sh'
- '.github/workflows/samcli-vm.yaml'
schedule:
- cron: '0 8 * * *'
workflow_dispatch:

env:
GO_VERSION: '1.25.7'
# Python 3.13 is fine, but need to pin chardet for requests compatibility
PYTHON_VERSION: '3.13'
PYTHON_BINARY: 'python3.13'
AWS_DEFAULT_REGION: ${{ secrets.SAMCLI_REGION }}
BY_CANARY: true # allows full testing
SAM_CLI_DEV: 1
SAM_CLI_TELEMETRY: 0
DOCKER_HOST: unix:///Applications/Finch/lima/data/finch/sock/finch.sock

permissions:
id-token: write
contents: read

jobs:
samcli-vm-test:
runs-on: codebuild-finch-macos-arm64-2-instance-${{ github.run_id }}-${{ github.run_attempt }}
timeout-minutes: 90
strategy:
fail-fast: false
matrix:
test_type: [invoke, start-api, sync, package, start-lambda]
steps:
- name: Check runner OS
run: |
echo "OS: $(uname -s)"
echo "Kernel: $(uname -r)"
echo "Architecture: $(uname -m)"
echo "Full uname: $(uname -a)"

- name: Detect runner user
id: user
run: |
if id ec2-user &>/dev/null; then
echo "user=ec2-user" >> $GITHUB_OUTPUT
echo "home=/Users/ec2-user" >> $GITHUB_OUTPUT
echo "✓ Using ec2-user"
else
CURRENT_USER=$(whoami)
echo "user=$CURRENT_USER" >> $GITHUB_OUTPUT
echo "home=$HOME" >> $GITHUB_OUTPUT
echo "✓ Using current user: $CURRENT_USER (home: $HOME)"
fi

- name: Set Docker config
run: echo "DOCKER_CONFIG=${{ steps.user.outputs.home }}/.finch" >> $GITHUB_ENV

- name: Clean macOS runner workspace
run: |
sudo rm -rf /Applications/Finch
sudo rm -rf /opt/finch
rm -rf ${{ steps.user.outputs.home }}/.finch || true
rm -rf ${{ steps.user.outputs.home }}/Library/Python || true
sudo rm -rf ./_output
if pgrep '^qemu-system'; then
sudo pkill '^qemu-system'
fi
if pgrep '^socket_vmnet'; then
sudo pkill '^socket_vmnet'
fi

- name: Configure Git for ec2-user
run: |
git config --global --add safe.directory "*"
shell: bash

- name: Set up Go
uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0
with:
go-version: ${{ env.GO_VERSION }}
cache: false

- name: Configure Go for runner
run: |
chown -R ${{ steps.user.outputs.user }}:staff $GOPATH || true
chown -R ${{ steps.user.outputs.user }}:staff $RUNNER_TOOL_CACHE/go || true

- name: Install Rosetta 2
run: echo "A" | /usr/sbin/softwareupdate --install-rosetta --agree-to-license || true

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}

- name: Configure Python for runner
run: |
chown -R ${{ steps.user.outputs.user }}:staff $(${{ env.PYTHON_BINARY }} -c "import site; print(site.USER_BASE)") || true
ln -sf $(which ${{ env.PYTHON_BINARY }}) /usr/local/bin/${{ env.PYTHON_BINARY }} || true

- name: Install dependencies
run: |
echo "Skipping dependency installation - will fail during build if truly required"
shell: bash

- name: Checkout finch
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
submodules: recursive

- name: Make & install Finch
run: |
chown -R ${{ steps.user.outputs.user }}:staff "$GITHUB_WORKSPACE"
su ec2-user -c "cd $GITHUB_WORKSPACE && make clean && GOOS=darwin make FINCH_OS_IMAGE_LOCATION_ROOT=/Applications/Finch && make install PREFIX=Applications/Finch"
su ec2-user -c "ls -lah /Applications/Finch"

- name: Initializing Finch VM
run: |
su ec2-user -c 'finch vm init'
su ec2-user -c 'while ! finch vm status | grep -q "Running"; do echo "Waiting for VM..."; sleep 5; done'

- name: Verify Finch socket
run: |
if su ec2-user -c 'curl -s --unix-socket /Applications/Finch/lima/data/finch/sock/finch.sock http://localhost/version' > /dev/null; then
echo "✓ Finch daemon is accessible"
else
echo "✗ Finch daemon connection failed"
ls -la /Applications/Finch/lima/data/finch/sock/ || echo "Socket directory not found"
exit 1
fi

- name: Ensure Docker is not available
run: |
sudo rm -f /usr/local/bin/docker /opt/homebrew/bin/docker || true
if su ec2-user -c 'which docker' > /dev/null 2>&1; then
echo "WARNING: Docker is still accessible"
else
echo "SUCCESS: Docker is not accessible - SAM CLI will use Finch"
fi
shell: bash

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@b47578312673ae6fa5b5096b330d9fbac3d116df # v4.2.1
with:
role-to-assume: ${{ secrets.SAMCLI_VM_ROLE }}
role-session-name: samcli-${{ matrix.test_type }}-tests
aws-region: ${{ secrets.SAMCLI_REGION }}
role-duration-seconds: 14400

- name: Pre-test AWS resource cleanup
timeout-minutes: 10
run: ./scripts/cleanup-aws-resources.sh

- name: Get latest SAM CLI tag
id: sam-tag
run: |
TAG=$(curl -s https://api.github.com/repos/aws/aws-sam-cli/releases/latest | jq -r .tag_name)
echo "tag=$TAG" >> $GITHUB_OUTPUT

- name: Checkout SAM CLI
uses: actions/checkout@v4
with:
repository: aws/aws-sam-cli
submodules: recursive
path: aws-sam-cli
ref: ${{ steps.sam-tag.outputs.tag }}

- name: Set up SAM CLI from source
run: |
sudo rm -rf /Users/ec2-user/aws-sam-cli || true
sudo mv aws-sam-cli /Users/ec2-user/aws-sam-cli
sudo chown -R ec2-user:staff /Users/ec2-user/aws-sam-cli

su ec2-user -c 'cd /Users/ec2-user/aws-sam-cli && ${{ env.PYTHON_BINARY }} -m pip install --upgrade pip --user'
su ec2-user -c 'cd /Users/ec2-user/aws-sam-cli && SAM_CLI_DEV=1 ${{ env.PYTHON_BINARY }} -m pip install -e ".[dev]" --user'
# Pin chardet to avoid 6.0.0 which breaks requests compatibility (released Feb 22, 2026)
su ec2-user -c '${{ env.PYTHON_BINARY }} -m pip install "chardet<6.0.0" --user'
su ec2-user -c "/Users/ec2-user/Library/Python/${{ env.PYTHON_VERSION }}/bin/samdev --version"
shell: bash

- name: Run ${{ matrix.test_type }} tests
run: ./scripts/samcli-vm/run-${{ matrix.test_type }}-tests.sh

cleanup:
runs-on: codebuild-finch-macos-arm64-2-instance-${{ github.run_id }}-${{ github.run_attempt }}
needs: samcli-vm-test
if: always()
steps:
- name: Final cleanup
run: |
su ec2-user -c 'finch vm stop' || true
su ec2-user -c 'finch vm remove -f' || true
sudo pkill -f socket_vmnet || true
sudo pkill -f finch-daemon || true
sudo rm -rf /private/var/run/finch-lima/*.sock || true
sudo rm -rf /Applications/Finch/lima/data/finch/_cache || true
sudo rm -rf /tmp/finch-* || true

- name: Checkout repository
uses: actions/checkout@v4

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@b47578312673ae6fa5b5096b330d9fbac3d116df
with:
role-to-assume: ${{ secrets.SAMCLI_VM_ROLE }}
role-session-name: cleanup
aws-region: ${{ secrets.SAMCLI_REGION }}

- name: Comprehensive AWS resource cleanup
timeout-minutes: 10
run: ./scripts/cleanup-aws-resources.sh
62 changes: 62 additions & 0 deletions scripts/cleanup-aws-resources.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/bin/bash
set +e # Continue on failures

# Default to us-east-1 if not set
AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION:-us-east-1}

# Function to safely run AWS commands with retries
safe_aws_command() {
local max_attempts=3
local attempt=1
local command="$@"
while [ $attempt -le $max_attempts ]; do
if eval "$command"; then
return 0
fi
echo "Retry $attempt/$max_attempts failed: $command"
sleep 5
attempt=$((attempt + 1))
done
echo "Command failed after $max_attempts attempts: $command"
return 1
}

# Clean up S3 buckets from SAM CLI test stacks
echo "=== Cleaning S3 buckets ==="
TEST_PATTERNS=("sam-app" "test-" "integration-test" "samcli" "aws-sam-cli-managed")

for pattern in "${TEST_PATTERNS[@]}"; do
STACKS=$(aws cloudformation list-stacks --region $AWS_DEFAULT_REGION --stack-status-filter CREATE_COMPLETE UPDATE_COMPLETE ROLLBACK_COMPLETE UPDATE_ROLLBACK_COMPLETE --query "StackSummaries[?contains(StackName, '$pattern')].[StackName]" --output text 2>/dev/null || true)

for stack in $STACKS; do
echo "Processing stack: $stack"

# Get S3 buckets from stack
BUCKET_NAMES=$(aws cloudformation describe-stacks --stack-name "$stack" --region $AWS_DEFAULT_REGION --query 'Stacks[0].Outputs[?contains(OutputKey, `Bucket`) || contains(OutputKey, `bucket`)].OutputValue' --output text 2>/dev/null || true)
RESOURCE_BUCKETS=$(aws cloudformation describe-stack-resources --stack-name "$stack" --region $AWS_DEFAULT_REGION --query 'StackResources[?ResourceType==`AWS::S3::Bucket`].PhysicalResourceId' --output text 2>/dev/null || true)

# Empty buckets (don't delete them)
for bucket in $BUCKET_NAMES $RESOURCE_BUCKETS; do
if [ -n "$bucket" ] && [ "$bucket" != "None" ]; then
echo "Emptying S3 bucket: $bucket"
if aws s3api head-bucket --bucket "$bucket" 2>/dev/null; then
safe_aws_command "aws s3 rm s3://$bucket --recursive --quiet" || true
echo "✅ Emptied bucket: $bucket"
fi
fi
done
done
done

# Clean up ECR repositories
echo "=== Cleaning ECR repositories ==="
ECR_PATTERNS=("sam-app" "test-" "integration-test")
for pattern in "${ECR_PATTERNS[@]}"; do
REPOS=$(aws ecr describe-repositories --region $AWS_DEFAULT_REGION --query "repositories[?contains(repositoryName, '$pattern')].repositoryName" --output text 2>/dev/null || true)
for repo in $REPOS; do
echo "Deleting ECR repository: $repo"
safe_aws_command "aws ecr delete-repository --repository-name '$repo' --force --region $AWS_DEFAULT_REGION" || true
done
done

echo "✅ Cleanup completed"
42 changes: 42 additions & 0 deletions scripts/samcli-vm/run-invoke-tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash
set -e

echo "=== INVOKE TESTS - Started at $(date) ==="
touch /tmp/invoke_test_output.txt
chown ec2-user:staff /tmp/invoke_test_output.txt

su ec2-user -c "
cd /Users/ec2-user/aws-sam-cli && \
export PATH='/Users/ec2-user/Library/Python/$PYTHON_VERSION/bin:$PATH' && \
export DOCKER_HOST='$DOCKER_HOST' && \
export AWS_REGION='${AWS_DEFAULT_REGION:-us-east-1}' && \
AWS_DEFAULT_REGION='${AWS_DEFAULT_REGION:-us-east-1}' \
BY_CANARY='$BY_CANARY' \
SAM_CLI_DEV='$SAM_CLI_DEV' \
SAM_CLI_TELEMETRY='$SAM_CLI_TELEMETRY' \
'$PYTHON_BINARY' -m pytest tests/integration/local/invoke -k 'not Terraform' -v --tb=short
" 2>&1 | tee /tmp/invoke_test_output.txt || true

# test_invoke_with_error_during_image_build: Build error message differs from expected.
# test_invoke_with_timeout_set_X_TimeoutFunction: Returns timeout message instead of empty string,
# but matches actual Lambda service behavior.
# test_building_new_rapid_image_removes_old_rapid_images: Cannot remove images with same digest,
# Docker creates different IDs for each.
# test_caching_two_layers and test_caching_two_layers_with_layer_cache_env_set: error due to sequential
# test runs within invoke. Work when run in isolation and locally.
# test_successful_invoke: Related to symlink mount errors due to permissions. Works locally.
# test_invoke_returns_execpted_results_2_HelloWorldServerlessWithFunctionNameRefFunction: Module import error.
cat > expected_invoke_failures.txt << 'EOF'
test_invoke_with_error_during_image_build
test_invoke_with_timeout_set_0_TimeoutFunction
test_invoke_with_timeout_set_1_TimeoutFunctionWithParameter
test_invoke_with_timeout_set_2_TimeoutFunctionWithStringParameter
test_building_new_rapid_image_removes_old_rapid_images
test_caching_two_layers
test_caching_two_layers_with_layer_cache_env_set
test_successful_invoke
test_invoke_returns_execpted_results_2_HelloWorldServerlessWithFunctionNameRefFunction
EOF

# Validate test results
$GITHUB_WORKSPACE/scripts/validate-test-results.sh /tmp/invoke_test_output.txt expected_invoke_failures.txt "Invoke tests"
32 changes: 32 additions & 0 deletions scripts/samcli-vm/run-package-tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash
set -e

echo "=== PACKAGE TESTS - Started at $(date) ==="
touch /tmp/package_test_output.txt
chown ec2-user:staff /tmp/package_test_output.txt
su ec2-user -c "
cd /Users/ec2-user/aws-sam-cli && \
export PATH='/Users/ec2-user/Library/Python/$PYTHON_VERSION/bin:$PATH' && \
export DOCKER_HOST='$DOCKER_HOST' && \
export AWS_REGION='${AWS_DEFAULT_REGION:-us-east-1}' && \
export AWS_DEFAULT_REGION='${AWS_DEFAULT_REGION:-us-east-1}' && \
export AWS_EC2_METADATA_DISABLED=true && \
AWS_EC2_METADATA_DISABLED=true \
BY_CANARY='$BY_CANARY' \
SAM_CLI_DEV='$SAM_CLI_DEV' \
SAM_CLI_TELEMETRY='$SAM_CLI_TELEMETRY' \
'$PYTHON_BINARY' -m pytest tests/integration/package/test_package_command_image.py -v --tb=short
" 2>&1 | tee /tmp/package_test_output.txt || true

# test_package_with_deep_nested_template_image: Expects Docker-specific push stream pattern.
# test_package_template_with_image_repositories_nested_stack_x: Push API stream differs from Docker.
# test_package_with_loadable_image_archive_0_template_image_load_yaml: Docker imports by digest,
# Finch imports as "overlayfs:" tag causing image info lookup to fail.
cat > expected_package_failures.txt << 'EOF'
test_package_with_deep_nested_template_image
test_package_template_with_image_repositories_nested_stack
test_package_with_loadable_image_archive_0_template_image_load_yaml
EOF

# Validate test results
$GITHUB_WORKSPACE/scripts/validate-test-results.sh /tmp/package_test_output.txt expected_package_failures.txt "Package tests"
33 changes: 33 additions & 0 deletions scripts/samcli-vm/run-start-api-tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash
set -e

echo "=== START-API TESTS - Started at $(date) ==="
touch /tmp/start_api_test_output.txt
chown ec2-user:staff /tmp/start_api_test_output.txt

# Run tests with live output
su ec2-user -c "
cd /Users/ec2-user/aws-sam-cli && \
export PATH='/Users/ec2-user/Library/Python/$PYTHON_VERSION/bin:$PATH' && \
export DOCKER_HOST='$DOCKER_HOST' && \
ulimit -n 65536 && \
export AWS_REGION='${AWS_DEFAULT_REGION:-us-east-1}' && \
AWS_DEFAULT_REGION='${AWS_DEFAULT_REGION:-us-east-1}' \
BY_CANARY='$BY_CANARY' \
SAM_CLI_DEV='$SAM_CLI_DEV' \
SAM_CLI_TELEMETRY='$SAM_CLI_TELEMETRY' \
'$PYTHON_BINARY' -m pytest tests/integration/local/start_api -k 'not Terraform' -v --tb=short
" 2>&1 | tee /tmp/start_api_test_output.txt || true
echo "=== START-API TESTS - Finished at $(date) ==="

# test_can_invoke_lambda_layer_successfully: Uses random port, fails occasionally.
# Only 1 test of 386 total, acceptable failure rate.
# test_changed_code_got_observed_and_loaded: Fails due to race conditions with containers.
# Fails infrequently, but is of note.
cat > expected_start_api_failures.txt << 'EOF'
test_can_invoke_lambda_layer_successfully
test_changed_code_got_observed_and_loaded
EOF

# Validate test results
$GITHUB_WORKSPACE/scripts/validate-test-results.sh /tmp/start_api_test_output.txt expected_start_api_failures.txt "Start-API tests"
Loading