Skip to content

Commit 147be1b

Browse files
authored
ci: troubleshoot hanging test (runfinch#340)
ci: fix daemon injection and samcli-vm error handling Signed-off-by: ayush-panta <[email protected]>
1 parent d8d5f8a commit 147be1b

17 files changed

+272
-407
lines changed

.github/workflows/finch-vm-test.yaml

Lines changed: 18 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ on:
1212
- '**.md'
1313
workflow_dispatch:
1414
env:
15-
GO_VERSION: '1.23.8'
15+
GO_VERSION: '1.24.x'
1616
jobs:
1717
mac-test-e2e:
1818
runs-on: codebuild-finch-daemon-arm64-2-instance-${{ github.run_id }}-${{ github.run_attempt }}
@@ -65,57 +65,34 @@ jobs:
6565
su ec2-user -c 'source /Users/ec2-user/.brewrc && brew install lz4 automake autoconf libtool yq'
6666
shell: bash
6767

68-
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
68+
- name: Checkout mainline finch repo
69+
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
6970
with:
70-
# We need to get all the git tags to make version injection work. See VERSION in Makefile for more detail.
71+
ref: main
72+
repository: runfinch/finch
7173
fetch-depth: 0
7274
persist-credentials: false
7375
submodules: recursive
7476

75-
- name: Configure workspace for ec2-user
76-
run: |
77-
# Ensure workspace is properly owned by ec2-user
78-
chown -R ec2-user:staff ${{ github.workspace }}
79-
80-
# Install Finch
81-
- name: Install Finch
82-
run: |
83-
echo "Installing Finch as ec2-user..."
84-
85-
# Run brew with custom environment
86-
su ec2-user -c 'source /Users/ec2-user/.brewrc && brew install finch --cask'
87-
88-
# Verify installation
89-
su ec2-user -c 'source /Users/ec2-user/.brewrc && brew list | grep finch || echo "finch not installed"'
90-
mkdir -p /private/var/run/finch-lima
91-
cat /etc/passwd
92-
chown ec2-user:daemon /private/var/run/finch-lima
93-
shell: bash
94-
95-
# Build binaries
96-
- name: Build binaries
97-
run: |
98-
echo "Building cross architecture binaries..."
99-
su ec2-user -c 'cd ${{ github.workspace }} && STATIC=1 GOPROXY=direct GOOS=linux GOARCH=$(GOARCH) make'
100-
su ec2-user -c 'finch vm remove -f'
101-
cp -f ${{ github.workspace }}/bin/finch-daemon /Applications/Finch/finch-daemon/finch-daemon
102-
shell: bash
77+
- name: Checkout finch-daemon PR
78+
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
79+
with:
80+
ref: ${{ github.head_ref }}
81+
fetch-depth: 0
82+
persist-credentials: false
83+
submodules: recursive
84+
path: finch-daemon-pr
10385

104-
# Initialize VM and check version
105-
- name: Check Finch version
106-
run: |
107-
echo "Initializing VM and checking version..."
108-
su ec2-user -c 'finch vm init'
109-
sleep 5 # Wait for services to be ready
110-
echo "Checking Finch version..."
111-
su ec2-user -c 'LIMA_HOME=/Applications/Finch/lima/data /Applications/Finch/lima/bin/limactl shell finch curl --unix-socket /var/run/finch.sock -X GET http:/v1.43/version'
112-
shell: bash
86+
- name: Build and setup Finch VM
87+
run: ./finch-daemon-pr/scripts/build-and-setup-finch-vm.sh
11388

11489
# Run e2e tests
11590
- name: Run e2e tests
11691
run: |
11792
echo "Running e2e tests..."
118-
su ec2-user -c 'make test-e2e-inside-vm'
93+
94+
su ec2-user -c 'finch version' || true
95+
su ec2-user -c 'cd ${{ github.workspace }}/finch-daemon-pr && make test-e2e-inside-vm'
11996
shell: bash
12097

12198
# Cleanup

.github/workflows/samcli-direct.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
name: samcli-direct
22

33
on:
4+
pull_request:
5+
types:
6+
- closed
47
schedule:
58
- cron: '0 8 * * *'
69
workflow_dispatch:
@@ -15,6 +18,7 @@ permissions:
1518

1619
jobs:
1720
samcli-direct-test:
21+
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true
1822
runs-on: ubuntu-latest
1923
timeout-minutes: 30 # start-api is the longest at ~ 20 minutes
2024
strategy:
@@ -48,7 +52,7 @@ jobs:
4852
with:
4953
go-version: ${{ env.GO_VERSION }}
5054

51-
# from aws/aws-sam-cli/setup.py: python_requires=">=3.9, <=4.0, !=4.0
55+
# from aws/aws-sam-cli/setup.py: python_requires=">=3.9, <=4.0, !=4.0
5256
- name: Set up Python
5357
uses: actions/setup-python@v4
5458
with:

.github/workflows/samcli-vm.yaml

Lines changed: 58 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ jobs:
2828
- name: Clean macOS runner workspace
2929
run: |
3030
rm -rf ${{ github.workspace }}/*
31+
# Clean up any leftover Finch VM state
32+
su ec2-user -c 'finch vm remove -f' || true
33+
sudo pkill -f socket_vmnet || true
34+
sudo rm -rf /private/var/run/finch-lima/*.sock || true
35+
sudo rm -rf /Applications/Finch/lima/data/finch/_cache || true
36+
# Clean up containers and images via Finch CLI
37+
su ec2-user -c 'finch container prune -f' || true
38+
su ec2-user -c 'finch image prune -a -f' || true
3139
3240
- name: Configure Git for ec2-user
3341
run: |
@@ -85,49 +93,26 @@ jobs:
8593
su ec2-user -c 'source /Users/ec2-user/.brewrc && brew install lz4 automake autoconf libtool yq'
8694
shell: bash
8795

88-
- name: Checkout finch-daemon repo
89-
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
96+
- name: Checkout mainline finch repo
97+
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
9098
with:
99+
ref: main
100+
repository: runfinch/finch
91101
fetch-depth: 0
92102
persist-credentials: false
93103
submodules: recursive
94104

95-
- name: Configure workspace for ec2-user
96-
run: |
97-
chown -R ec2-user:staff ${{ github.workspace }}
98-
99-
- name: Install Finch
100-
run: |
101-
echo "Installing Finch as ec2-user..."
102-
su ec2-user -c 'source /Users/ec2-user/.brewrc && brew install finch --cask'
103-
su ec2-user -c 'source /Users/ec2-user/.brewrc && brew list | grep finch || echo "finch not installed"'
104-
mkdir -p /private/var/run/finch-lima
105-
cat /etc/passwd
106-
chown ec2-user:daemon /private/var/run/finch-lima
107-
shell: bash
108-
109-
- name: Build binaries
110-
run: |
111-
echo "Building cross architecture binaries..."
112-
su ec2-user -c 'cd ${{ github.workspace }} && STATIC=1 GOPROXY=direct GOOS=linux GOARCH=arm64 make'
113-
su ec2-user -c 'finch vm remove -f' || true
114-
cp -f ${{ github.workspace }}/bin/finch-daemon /Applications/Finch/finch-daemon/finch-daemon
115-
# Restart finch-daemon with new binary
116-
su ec2-user -c 'finch vm stop' || true
117-
su ec2-user -c 'finch vm start' || true
118-
shell: bash
105+
- name: Checkout finch-daemon PR
106+
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
107+
with:
108+
ref: ${{ github.head_ref }}
109+
fetch-depth: 0
110+
persist-credentials: false
111+
submodules: recursive
112+
path: finch-daemon-pr
119113

120-
- name: Check Finch version
121-
run: |
122-
echo "Initializing VM and checking version..."
123-
# Clean up any leftover network state
124-
sudo pkill -f socket_vmnet || true
125-
sudo rm -f /private/var/run/finch-lima/*.sock || true
126-
su ec2-user -c 'finch vm init'
127-
sleep 5 # Wait for services to be ready
128-
echo "Checking Finch version..."
129-
su ec2-user -c 'LIMA_HOME=/Applications/Finch/lima/data /Applications/Finch/lima/bin/limactl shell finch curl --unix-socket /var/run/finch.sock -X GET http:/v1.43/version'
130-
shell: bash
114+
- name: Build and setup Finch VM
115+
run: ./finch-daemon-pr/scripts/build-and-setup-finch-vm.sh
131116

132117
- name: Configure AWS credentials
133118
uses: aws-actions/configure-aws-credentials@b47578312673ae6fa5b5096b330d9fbac3d116df # v4.2.1
@@ -169,58 +154,56 @@ jobs:
169154
shell: bash
170155

171156
- name: Run unit tests
172-
continue-on-error: true
173-
run: |
174-
./scripts/samcli-vm/run-unit-tests.sh
175-
echo "UNIT_EXIT_CODE=$(cat /tmp/unit_exit_code 2>/dev/null || echo 1)" >> $GITHUB_ENV
176-
177-
- name: Run sync tests
178-
continue-on-error: true
179-
run: |
180-
./scripts/samcli-vm/run-sync-tests.sh
181-
echo "SYNC_EXIT_CODE=$(cat /tmp/sync_exit_code 2>/dev/null || echo 1)" >> $GITHUB_ENV
182-
183-
- name: Run package tests
184-
continue-on-error: true
185-
run: |
186-
./scripts/samcli-vm/run-package-tests.sh
187-
echo "PACKAGE_EXIT_CODE=$(cat /tmp/package_exit_code 2>/dev/null || echo 1)" >> $GITHUB_ENV
188-
189-
- name: Run start-api tests
190-
continue-on-error: true
191-
run: |
192-
./scripts/samcli-vm/run-start-api-tests.sh
193-
echo "START_API_EXIT_CODE=$(cat /tmp/start_api_exit_code 2>/dev/null || echo 1)" >> $GITHUB_ENV
194-
195-
- name: Run start-lambda tests
196-
continue-on-error: true
197-
run: |
198-
./scripts/samcli-vm/run-start-lambda-tests.sh
199-
echo "START_LAMBDA_EXIT_CODE=$(cat /tmp/start_lambda_exit_code 2>/dev/null || echo 1)" >> $GITHUB_ENV
157+
timeout-minutes: 30
158+
run: ./finch-daemon-pr/scripts/samcli-vm/run-unit-tests.sh
200159

201160
- name: Patch SAM CLI for Docker image cleanup
202-
continue-on-error: true
203161
run: |
204162
# Apply git patch to handle ImageNotFound exceptions for all Docker tests
205-
su ec2-user -c 'cd /Users/ec2-user/aws-sam-cli && git apply ${{ github.workspace }}/scripts/samcli-vm/invoke-teardown.patch'
206-
echo "PATCH_EXIT_CODE=$?" >> $GITHUB_ENV
163+
su ec2-user -c 'cd /Users/ec2-user/aws-sam-cli && git apply ${{ github.workspace }}/finch-daemon-pr/scripts/samcli-vm/invoke-teardown.patch'
207164
shell: bash
208165

209166
- name: Run invoke tests
210-
continue-on-error: true
211-
run: |
212-
./scripts/samcli-vm/run-invoke-tests.sh
213-
echo "INVOKE_EXIT_CODE=$(cat /tmp/invoke_exit_code 2>/dev/null || echo 1)" >> $GITHUB_ENV
167+
timeout-minutes: 40
168+
run: ./finch-daemon-pr/scripts/samcli-vm/run-invoke-tests.sh
169+
170+
- name: Run start-api tests
171+
timeout-minutes: 70
172+
run: ./finch-daemon-pr/scripts/samcli-vm/run-start-api-tests.sh
173+
174+
- name: Run sync tests
175+
timeout-minutes: 20
176+
run: ./finch-daemon-pr/scripts/samcli-vm/run-sync-tests.sh
214177

215-
- name: Check test results
216-
run: ./scripts/samcli-vm/check-test-results.sh
178+
- name: Run package tests
179+
timeout-minutes: 10
180+
run: ./finch-daemon-pr/scripts/samcli-vm/run-package-tests.sh
181+
182+
- name: Run start-lambda tests
183+
timeout-minutes: 30
184+
run: ./finch-daemon-pr/scripts/samcli-vm/run-start-lambda-tests.sh
217185

218186
# ensuring resources are clean post-test
219187
cleanup:
220-
runs-on: ubuntu-latest
188+
runs-on: codebuild-finch-daemon-arm64-2-instance-${{ github.run_id }}-${{ github.run_attempt }}
221189
needs: samcli-vm-test
222190
if: always()
223191
steps:
192+
- name: Final cleanup
193+
run: |
194+
# Stop and remove VM
195+
su ec2-user -c 'finch vm stop' || true
196+
su ec2-user -c 'finch vm remove -f' || true
197+
198+
# Clean up processes and sockets
199+
sudo pkill -f socket_vmnet || true
200+
sudo pkill -f finch-daemon || true
201+
sudo rm -rf /private/var/run/finch-lima/*.sock || true
202+
203+
# Clean up cache and temporary files
204+
sudo rm -rf /Applications/Finch/lima/data/finch/_cache || true
205+
sudo rm -rf /tmp/finch-* || true
206+
224207
- name: Checkout repository
225208
uses: actions/checkout@v4
226209

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/bin/bash
2+
set -e
3+
4+
echo "=== Building and Setting up Finch VM ==="
5+
6+
# Configure workspace permissions
7+
echo "Configuring workspace permissions..."
8+
chown -R ec2-user:staff "$GITHUB_WORKSPACE"
9+
10+
# Build and Install Finch from upstream
11+
echo "Building Finch from upstream..."
12+
su ec2-user -c "cd $GITHUB_WORKSPACE && make clean && make FINCH_OS_IMAGE_LOCATION_ROOT=/Applications/Finch && make install PREFIX=Applications/Finch"
13+
14+
# Build finch-daemon from PR and inject into VM
15+
echo "Building finch-daemon from PR..."
16+
su ec2-user -c "cd $GITHUB_WORKSPACE/finch-daemon-pr && STATIC=1 GOPROXY=direct GOOS=linux GOARCH=\$(go env GOARCH) make"
17+
su ec2-user -c 'finch vm remove -f'
18+
su ec2-user -c "cp $GITHUB_WORKSPACE/finch-daemon-pr/bin/finch-daemon /Applications/Finch/finch-daemon/finch-daemon"
19+
20+
# Check Finch version and initialize VM
21+
echo "Initializing VM and checking version..."
22+
# Clean up any leftover network state
23+
sudo pkill -f socket_vmnet || true
24+
sudo rm -f /private/var/run/finch-lima/*.sock || true
25+
26+
su ec2-user -c 'finch vm init'
27+
sleep 10 # Wait for services to be ready
28+
29+
echo "Checking Finch version..."
30+
su ec2-user -c 'LIMA_HOME=/Applications/Finch/lima/data /Applications/Finch/lima/bin/limactl shell finch curl --unix-socket /var/run/finch.sock -X GET http:/v1.43/version'
31+
32+
echo "Verifying Docker daemon is accessible..."
33+
su ec2-user -c 'finch info' || echo "Finch info failed"
34+
su ec2-user -c 'finch version' || echo "Finch version failed"
35+
36+
echo "✅ Finch VM build and setup complete"

scripts/samcli-direct/run-invoke-tests.sh

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,6 @@ cd aws-sam-cli
55

66
python -m pytest tests/integration/local/invoke -k 'not Terraform' -v --tb=short > invoke_output.txt 2>&1 || true
77

8-
echo ""
9-
echo "=== PASSES ==="
10-
grep "PASSED" invoke_output.txt || echo "No passes found"
11-
12-
echo ""
13-
echo "=== FAILURES ==="
14-
grep "FAILED" invoke_output.txt || echo "No failures found"
15-
168
# test_invoke_with_error_during_image_build: Build error message differs from expected.
179
# test_invoke_with_timeout_set_X_TimeoutFunction: Returns timeout message instead of empty string,
1810
# but matches actual Lambda service behavior.
@@ -26,23 +18,5 @@ test_invoke_with_timeout_set_2_TimeoutFunctionWithStringParameter
2618
test_building_new_rapid_image_removes_old_rapid_images
2719
EOF
2820

29-
# Extract actual failures
30-
grep "FAILED" invoke_output.txt | grep -o "test_[^[:space:]]*" > actual_invoke_failures.txt || true
31-
32-
# Find unexpected failures
33-
UNEXPECTED=$(grep -v -f expected_invoke_failures.txt actual_invoke_failures.txt 2>/dev/null || true)
34-
35-
if [ -n "$UNEXPECTED" ]; then
36-
echo "❌ Unexpected failures found:"
37-
echo "$UNEXPECTED"
38-
echo ""
39-
echo "=== FULL OUTPUT FOR DEBUGGING ==="
40-
cat invoke_output.txt || echo "No output file found"
41-
exit 1
42-
else
43-
echo "✅ All failures were expected."
44-
fi
45-
46-
echo ""
47-
echo "=== PYTEST SUMMARY ==="
48-
grep -E "=+ .*(failed|passed|skipped|deselected).* =+$" invoke_output.txt | tail -1 || echo "No pytest summary found"
21+
# Validate test results
22+
$(dirname "$0")/../validate-test-results.sh invoke_output.txt expected_invoke_failures.txt "Invoke tests"

0 commit comments

Comments
 (0)