Skip to content

Test Tutorial Agents #64

Test Tutorial Agents

Test Tutorial Agents #64

name: Test Tutorial Agents
on:
workflow_dispatch:
jobs:
find-tutorials:
runs-on: ubuntu-latest
outputs:
tutorials: ${{ steps.get-tutorials.outputs.tutorials }}
steps:
- name: Checkout agentex-python repo
uses: actions/checkout@v4
- name: Find all tutorials
id: get-tutorials
run: |
cd examples/tutorials
# Find all tutorials and exclude specific temporal ones
all_tutorials=$(find . -name "manifest.yaml" -exec dirname {} \; | sort | sed 's|^\./||')
# Filter out the specified temporal tutorials that are being updated
filtered_tutorials=$(echo "$all_tutorials" | grep -v -E "(temporal)")
# Convert to JSON array
tutorials=$(echo "$filtered_tutorials" | jq -R -s -c 'split("\n") | map(select(length > 0))')
echo "tutorials=$tutorials" >> $GITHUB_OUTPUT
echo "All tutorials found: $(echo "$all_tutorials" | wc -l)"
echo "Filtered tutorials: $(echo "$filtered_tutorials" | wc -l)"
echo "Excluded tutorials:"
echo "$all_tutorials" | grep -E "(10_temporal/050_|10_temporal/070_|10_temporal/080_)" || echo " (none matched exclusion pattern)"
echo "Final tutorial list: $tutorials"
test-tutorial:
needs: find-tutorials
runs-on: ubuntu-latest
timeout-minutes: 15
strategy:
matrix:
tutorial: ${{ fromJson(needs.find-tutorials.outputs.tutorials) }}
fail-fast: false
name: test-${{ matrix.tutorial }}
steps:
- name: Checkout agentex-python repo
uses: actions/checkout@v4
- name: Install UV
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Pull latest AgentEx image
run: |
echo "🐳 Pulling latest Scale AgentEx Docker image..."
docker pull ghcr.io/scaleapi/scale-agentex/agentex:latest
echo "✅ Successfully pulled AgentEx Docker image"
- name: Checkout scale-agentex repo
uses: actions/checkout@v4
with:
repository: scaleapi/scale-agentex
path: scale-agentex
- name: Configure Docker Compose for pulled image and host networking
run: |
cd scale-agentex/agentex
echo "🔧 Configuring AgentEx container to use pulled image and host networking..."
# Install yq for YAML manipulation
sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64
sudo chmod +x /usr/local/bin/yq
# Override to use pulled image instead of building
yq eval '.services.agentex.image = "ghcr.io/scaleapi/scale-agentex/agentex:latest"' -i docker-compose.yml
yq eval 'del(.services.agentex.build)' -i docker-compose.yml
# Add extra_hosts to agentex service to make host.docker.internal work
yq eval '.services.agentex.extra_hosts = ["host.docker.internal:host-gateway"]' -i docker-compose.yml
echo "✅ Configured docker-compose to use pulled image with host access"
- name: Start AgentEx Server
run: |
cd scale-agentex/agentex
echo "🚀 Starting AgentEx server and dependencies..."
# Start all services
docker compose up -d
echo "⏳ Waiting for dependencies to be healthy..."
# Wait for services to be healthy
for i in {1..30}; do
if docker compose ps | grep -q "healthy"; then
echo "✅ Dependencies are healthy"
break
fi
echo " Attempt $i/30: Waiting for services..."
sleep 5
done
# Wait specifically for AgentEx server to be ready
echo "⏳ Waiting for AgentEx server to be ready..."
for i in {1..30}; do
if curl -s --max-time 5 http://localhost:5003/health >/dev/null 2>&1; then
echo "✅ AgentEx server is ready"
break
fi
echo " Attempt $i/30: Waiting for AgentEx server..."
sleep 5
done
- name: Build AgentEx SDK
run: |
echo "🔨 Building AgentEx SDK wheel..."
uv build
echo "✅ SDK built successfully"
ls -la dist/
- name: Test Tutorial
id: run-test
working-directory: ./examples/tutorials
env:
OPENAI_API_KEY: ${{ secrets.TUTORIAL_OPENAI_API_KEY }}
HEALTH_CHECK_PORT: 8080 # Use non-privileged port for temporal worker health checks
run: |
echo "Testing tutorial: ${{ matrix.tutorial }}"
AGENTEX_API_BASE_URL="http://localhost:5003" \
./run_agent_test.sh --build-cli "${{ matrix.tutorial }}"
- name: Print agent logs on failure
if: failure()
working-directory: ./examples/tutorials
run: |
echo "🚨 Test failed for tutorial: ${{ matrix.tutorial }}"
echo "📋 Printing agent logs..."
# Look for agent log files in the tutorial directory
if find "${{ matrix.tutorial }}" -name "*.log" -type f 2>/dev/null | grep -q .; then
echo "Found agent log files:"
find "${{ matrix.tutorial }}" -name "*.log" -type f -exec echo "=== {} ===" \; -exec cat {} \;
else
echo "No .log files found, checking for other common log locations..."
fi
# Check for any output files or dumps
if find "${{ matrix.tutorial }}" -name "agent_output*" -o -name "debug*" -o -name "*.out" 2>/dev/null | grep -q .; then
echo "Found other output files:"
find "${{ matrix.tutorial }}" -name "agent_output*" -o -name "debug*" -o -name "*.out" -exec echo "=== {} ===" \; -exec cat {} \;
fi
# Print the last 50 lines of any python processes that might still be running
echo "🔍 Checking for running python processes..."
ps aux | grep python || echo "No python processes found"
- name: Record test result
id: test-result
if: always()
run: |
# Create results directory
mkdir -p test-results
# Determine result
if [ "${{ steps.run-test.outcome }}" == "success" ]; then
result="passed"
echo "result=passed" >> $GITHUB_OUTPUT
echo "tutorial=${{ matrix.tutorial }}" >> $GITHUB_OUTPUT
else
result="failed"
echo "result=failed" >> $GITHUB_OUTPUT
echo "tutorial=${{ matrix.tutorial }}" >> $GITHUB_OUTPUT
fi
# Save result to file for artifact upload
# Create a safe filename from tutorial path
safe_name=$(echo "${{ matrix.tutorial }}" | tr '/' '_' | tr -d ' ')
echo "$result" > "test-results/result-${safe_name}.txt"
echo "${{ matrix.tutorial }}" > "test-results/tutorial-${safe_name}.txt"
- name: Upload test result
if: always()
uses: actions/upload-artifact@v4
with:
name: test-result-${{ strategy.job-index }}
path: test-results/
retention-days: 1
test-summary:
if: always()
needs: [find-tutorials, test-tutorial]
runs-on: ubuntu-latest
name: Test Summary
steps:
- name: Download all test results
uses: actions/download-artifact@v4
with:
pattern: test-result-*
path: all-results/
merge-multiple: true
continue-on-error: true
- name: Generate Test Summary
run: |
echo "# 🧪 Tutorial Tests Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
# Initialize counters
passed_count=0
failed_count=0
skipped_count=0
total_count=0
# Get all tutorials that were supposed to run
tutorials='${{ needs.find-tutorials.outputs.tutorials }}'
if [ -d "all-results" ] && [ "$(ls -A all-results 2>/dev/null)" ]; then
echo "📊 Processing individual test results from artifacts..."
echo "## Test Results" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Tutorial | Status | Result |" >> $GITHUB_STEP_SUMMARY
echo "|----------|--------|--------|" >> $GITHUB_STEP_SUMMARY
# Process each result file
for result_file in all-results/result-*.txt; do
if [ -f "$result_file" ]; then
# Extract the safe name from filename
safe_name=$(basename "$result_file" .txt | sed 's/result-//')
# Get corresponding tutorial name file
tutorial_file="all-results/tutorial-${safe_name}.txt"
if [ -f "$tutorial_file" ]; then
tutorial_name=$(cat "$tutorial_file")
result=$(cat "$result_file")
total_count=$((total_count + 1))
if [ "$result" = "passed" ]; then
echo "| \`$tutorial_name\` | ✅ | Passed |" >> $GITHUB_STEP_SUMMARY
passed_count=$((passed_count + 1))
else
echo "| \`$tutorial_name\` | ❌ | Failed |" >> $GITHUB_STEP_SUMMARY
failed_count=$((failed_count + 1))
fi
fi
fi
done
# Check for any tutorials that didn't have results (skipped/cancelled)
echo "$tutorials" | jq -r '.[]' | while read expected_tutorial; do
safe_expected=$(echo "$expected_tutorial" | tr '/' '_' | tr -d ' ')
if [ ! -f "all-results/result-${safe_expected}.txt" ]; then
echo "| \`$expected_tutorial\` | ⏭️ | Skipped/Cancelled |" >> $GITHUB_STEP_SUMMARY
skipped_count=$((skipped_count + 1))
total_count=$((total_count + 1))
fi
done
else
echo "⚠️ No individual test results found. This could mean:"
echo "- Test jobs were cancelled before completion"
echo "- Artifacts failed to upload"
echo "- No tutorials were found to test"
echo ""
overall_result="${{ needs.test-tutorial.result }}"
echo "Overall job status: **$overall_result**"
if [[ "$overall_result" == "success" ]]; then
echo "✅ All tests appear to have passed based on job status."
elif [[ "$overall_result" == "failure" ]]; then
echo "❌ Some tests appear to have failed based on job status."
echo ""
echo "💡 **Tip:** Check individual job logs for specific failure details."
elif [[ "$overall_result" == "cancelled" ]]; then
echo "⏭️ Tests were cancelled."
else
echo "❓ Test status is unclear: $overall_result"
fi
# Don't show detailed breakdown when we don't have individual results
tutorial_count=$(echo "$tutorials" | jq -r '. | length')
echo ""
echo "Expected tutorial count: $tutorial_count"
fi
# Only show detailed statistics if we have individual results
if [ -d "all-results" ] && [ "$(ls -A all-results 2>/dev/null)" ]; then
echo "" >> $GITHUB_STEP_SUMMARY
echo "## Summary Statistics" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "- **Total Tests:** $total_count" >> $GITHUB_STEP_SUMMARY
echo "- **Passed:** $passed_count ✅" >> $GITHUB_STEP_SUMMARY
echo "- **Failed:** $failed_count ❌" >> $GITHUB_STEP_SUMMARY
echo "- **Skipped:** $skipped_count ⏭️" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
if [ $failed_count -eq 0 ] && [ $passed_count -gt 0 ]; then
echo "🎉 **All tests passed!**" >> $GITHUB_STEP_SUMMARY
elif [ $failed_count -gt 0 ]; then
echo "⚠️ **Some tests failed.** Check individual job logs for details." >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "💡 **Tip:** Look for the 'Print agent logs on failure' step in failed jobs for debugging information." >> $GITHUB_STEP_SUMMARY
else
echo "ℹ️ **Tests were cancelled or skipped.**" >> $GITHUB_STEP_SUMMARY
fi
fi