Test Tutorial Agents #65

Workflow file for this run

.github/workflows/agentex-tutorials-test.yml at cdb7eac

	name: Test Tutorial Agents

	on:
	workflow_dispatch:

	jobs:
	find-tutorials:
	runs-on: ubuntu-latest
	outputs:
	tutorials: ${{ steps.get-tutorials.outputs.tutorials }}
	steps:
	- name: Checkout agentex-python repo
	uses: actions/checkout@v4

	- name: Find all tutorials
	id: get-tutorials
	run: \|
	cd examples/tutorials
	# Find all tutorials and exclude specific temporal ones
	all_tutorials=$(find . -name "manifest.yaml" -exec dirname {} \; \| sort \| sed 's\|^\./\|\|')

	# Filter out the specified temporal tutorials that are being updated
	filtered_tutorials=$(echo "$all_tutorials" \| grep -v -E "(temporal)")

	# Convert to JSON array
	tutorials=$(echo "$filtered_tutorials" \| jq -R -s -c 'split("\n") \| map(select(length > 0))')

	echo "tutorials=$tutorials" >> $GITHUB_OUTPUT
	echo "All tutorials found: $(echo "$all_tutorials" \| wc -l)"
	echo "Filtered tutorials: $(echo "$filtered_tutorials" \| wc -l)"
	echo "Excluded tutorials:"
	echo "$all_tutorials" \| grep -E "(10_temporal/050_\|10_temporal/070_\|10_temporal/080_)" \|\| echo " (none matched exclusion pattern)"
	echo "Final tutorial list: $tutorials"

	test-tutorial:
	needs: find-tutorials
	runs-on: ubuntu-latest
	timeout-minutes: 15
	strategy:
	matrix:
	tutorial: ${{ fromJson(needs.find-tutorials.outputs.tutorials) }}
	fail-fast: false
	name: test-${{ matrix.tutorial }}

	steps:
	- name: Checkout agentex-python repo
	uses: actions/checkout@v4

	- name: Install UV
	run: \|
	curl -LsSf https://astral.sh/uv/install.sh \| sh
	echo "$HOME/.local/bin" >> $GITHUB_PATH

	- name: Pull latest AgentEx image
	run: \|
	echo "🐳 Pulling latest Scale AgentEx Docker image..."
	docker pull ghcr.io/scaleapi/scale-agentex/agentex:latest
	echo "✅ Successfully pulled AgentEx Docker image"

	- name: Checkout scale-agentex repo
	uses: actions/checkout@v4
	with:
	repository: scaleapi/scale-agentex
	path: scale-agentex

	- name: Configure Docker Compose for pulled image and host networking
	run: \|
	cd scale-agentex/agentex
	echo "🔧 Configuring AgentEx container to use pulled image and host networking..."

	# Install yq for YAML manipulation
	sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64
	sudo chmod +x /usr/local/bin/yq

	# Override to use pulled image instead of building
	yq eval '.services.agentex.image = "ghcr.io/scaleapi/scale-agentex/agentex:latest"' -i docker-compose.yml
	yq eval 'del(.services.agentex.build)' -i docker-compose.yml

	# Add extra_hosts to agentex service to make host.docker.internal work
	yq eval '.services.agentex.extra_hosts = ["host.docker.internal:host-gateway"]' -i docker-compose.yml

	echo "✅ Configured docker-compose to use pulled image with host access"

	- name: Start AgentEx Server
	run: \|
	cd scale-agentex/agentex
	echo "🚀 Starting AgentEx server and dependencies..."

	# Start all services
	docker compose up -d

	echo "⏳ Waiting for dependencies to be healthy..."

	# Wait for services to be healthy
	for i in {1..30}; do
	if docker compose ps \| grep -q "healthy"; then
	echo "✅ Dependencies are healthy"
	break
	fi
	echo " Attempt $i/30: Waiting for services..."
	sleep 5
	done

	# Wait specifically for AgentEx server to be ready
	echo "⏳ Waiting for AgentEx server to be ready..."
	for i in {1..30}; do
	if curl -s --max-time 5 http://localhost:5003/health >/dev/null 2>&1; then
	echo "✅ AgentEx server is ready"
	break
	fi
	echo " Attempt $i/30: Waiting for AgentEx server..."
	sleep 5
	done

	- name: Build AgentEx SDK
	run: \|
	echo "🔨 Building AgentEx SDK wheel..."
	uv build
	echo "✅ SDK built successfully"
	ls -la dist/

	- name: Test Tutorial
	id: run-test
	working-directory: ./examples/tutorials
	env:
	OPENAI_API_KEY: ${{ secrets.TUTORIAL_OPENAI_API_KEY }}
	HEALTH_CHECK_PORT: 8080 # Use non-privileged port for temporal worker health checks
	run: \|
	echo "Testing tutorial: ${{ matrix.tutorial }}"
	AGENTEX_API_BASE_URL="http://localhost:5003" \
	./run_agent_test.sh --build-cli "${{ matrix.tutorial }}"

	- name: Print agent logs on failure
	if: failure()
	working-directory: ./examples/tutorials
	run: \|
	echo "🚨 Test failed for tutorial: ${{ matrix.tutorial }}"
	echo "📋 Printing agent logs..."

	# Look for agent log files in the tutorial directory
	if find "${{ matrix.tutorial }}" -name "*.log" -type f 2>/dev/null \| grep -q .; then
	echo "Found agent log files:"
	find "${{ matrix.tutorial }}" -name "*.log" -type f -exec echo "=== {} ===" \; -exec cat {} \;
	else
	echo "No .log files found, checking for other common log locations..."
	fi

	# Check for any output files or dumps
	if find "${{ matrix.tutorial }}" -name "agent_output" -o -name "debug" -o -name "*.out" 2>/dev/null \| grep -q .; then
	echo "Found other output files:"
	find "${{ matrix.tutorial }}" -name "agent_output" -o -name "debug" -o -name "*.out" -exec echo "=== {} ===" \; -exec cat {} \;
	fi

	# Print the last 50 lines of any python processes that might still be running
	echo "🔍 Checking for running python processes..."
	ps aux \| grep python \|\| echo "No python processes found"

	- name: Record test result
	id: test-result
	if: always()
	run: \|
	# Create results directory
	mkdir -p test-results

	# Determine result
	if [ "${{ steps.run-test.outcome }}" == "success" ]; then
	result="passed"
	echo "result=passed" >> $GITHUB_OUTPUT
	echo "tutorial=${{ matrix.tutorial }}" >> $GITHUB_OUTPUT
	else
	result="failed"
	echo "result=failed" >> $GITHUB_OUTPUT
	echo "tutorial=${{ matrix.tutorial }}" >> $GITHUB_OUTPUT
	fi

	# Save result to file for artifact upload
	# Create a safe filename from tutorial path
	safe_name=$(echo "${{ matrix.tutorial }}" \| tr '/' '_' \| tr -d ' ')
	echo "$result" > "test-results/result-${safe_name}.txt"
	echo "${{ matrix.tutorial }}" > "test-results/tutorial-${safe_name}.txt"

	- name: Upload test result
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: test-result-${{ matrix.tutorial }}
	path: test-results/
	retention-days: 1

	test-summary:
	if: always()
	needs: [find-tutorials, test-tutorial]
	runs-on: ubuntu-latest
	name: Test Summary
	steps:
	- name: Download all test results
	uses: actions/download-artifact@v4
	with:
	pattern: test-result-*
	path: all-results/
	merge-multiple: true
	continue-on-error: true

	- name: Generate Test Summary
	run: \|
	echo "# 🧪 Tutorial Tests Summary" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY

	# Initialize counters
	passed_count=0
	failed_count=0
	skipped_count=0
	total_count=0

	# Get all tutorials that were supposed to run
	tutorials='${{ needs.find-tutorials.outputs.tutorials }}'

	if [ -d "all-results" ] && [ "$(ls -A all-results 2>/dev/null)" ]; then
	echo "📊 Processing individual test results from artifacts..."

	echo "## Test Results" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "\| Tutorial \| Status \| Result \|" >> $GITHUB_STEP_SUMMARY
	echo "\|----------\|--------\|--------\|" >> $GITHUB_STEP_SUMMARY

	# Process each result file
	for result_file in all-results/result-*.txt; do
	if [ -f "$result_file" ]; then
	# Extract the safe name from filename
	safe_name=$(basename "$result_file" .txt \| sed 's/result-//')

	# Get corresponding tutorial name file
	tutorial_file="all-results/tutorial-${safe_name}.txt"

	if [ -f "$tutorial_file" ]; then
	tutorial_name=$(cat "$tutorial_file")
	result=$(cat "$result_file")

	total_count=$((total_count + 1))

	if [ "$result" = "passed" ]; then
	echo "\| \`$tutorial_name\` \| ✅ \| Passed \|" >> $GITHUB_STEP_SUMMARY
	passed_count=$((passed_count + 1))
	else
	echo "\| \`$tutorial_name\` \| ❌ \| Failed \|" >> $GITHUB_STEP_SUMMARY
	failed_count=$((failed_count + 1))
	fi
	fi
	fi
	done

	# Check for any tutorials that didn't have results (skipped/cancelled)
	echo "$tutorials" \| jq -r '.[]' \| while read expected_tutorial; do
	safe_expected=$(echo "$expected_tutorial" \| tr '/' '_' \| tr -d ' ')
	if [ ! -f "all-results/result-${safe_expected}.txt" ]; then
	echo "\| \`$expected_tutorial\` \| ⏭️ \| Skipped/Cancelled \|" >> $GITHUB_STEP_SUMMARY
	skipped_count=$((skipped_count + 1))
	total_count=$((total_count + 1))
	fi
	done

	else
	echo "⚠️ No individual test results found. This could mean:"
	echo "- Test jobs were cancelled before completion"
	echo "- Artifacts failed to upload"
	echo "- No tutorials were found to test"
	echo ""

	overall_result="${{ needs.test-tutorial.result }}"
	echo "Overall job status: $overall_result"

	if [[ "$overall_result" == "success" ]]; then
	echo "✅ All tests appear to have passed based on job status."
	elif [[ "$overall_result" == "failure" ]]; then
	echo "❌ Some tests appear to have failed based on job status."
	echo ""
	echo "💡 Tip: Check individual job logs for specific failure details."
	elif [[ "$overall_result" == "cancelled" ]]; then
	echo "⏭️ Tests were cancelled."
	else
	echo "❓ Test status is unclear: $overall_result"
	fi

	# Don't show detailed breakdown when we don't have individual results
	tutorial_count=$(echo "$tutorials" \| jq -r '. \| length')
	echo ""
	echo "Expected tutorial count: $tutorial_count"
	fi

	# Only show detailed statistics if we have individual results
	if [ -d "all-results" ] && [ "$(ls -A all-results 2>/dev/null)" ]; then
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "## Summary Statistics" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "- Total Tests: $total_count" >> $GITHUB_STEP_SUMMARY
	echo "- Passed: $passed_count ✅" >> $GITHUB_STEP_SUMMARY
	echo "- Failed: $failed_count ❌" >> $GITHUB_STEP_SUMMARY
	echo "- Skipped: $skipped_count ⏭️" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY

	if [ $failed_count -eq 0 ] && [ $passed_count -gt 0 ]; then
	echo "🎉 All tests passed!" >> $GITHUB_STEP_SUMMARY
	elif [ $failed_count -gt 0 ]; then
	echo "⚠️ Some tests failed. Check individual job logs for details." >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "💡 Tip: Look for the 'Print agent logs on failure' step in failed jobs for debugging information." >> $GITHUB_STEP_SUMMARY
	else
	echo "ℹ️ Tests were cancelled or skipped." >> $GITHUB_STEP_SUMMARY
	fi
	fi

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Test Tutorial Agents #65

Workflow file

Test Tutorial Agents #65

Uh oh!

Jobs

Run details

Workflow file for this run