Skip to content

Fm/stg 474 more tests #25

Fm/stg 474 more tests

Fm/stg 474 more tests #25

Workflow file for this run

name: Test Suite
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main, develop ]
types: [opened, synchronize, reopened, labeled, unlabeled]
# schedule:
# # Run tests daily at 6 AM UTC
# - cron: '0 6 * * *'
jobs:
test-unit:
name: Unit Tests
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Cache pip dependencies
uses: actions/cache@v3
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements*.txt', '**/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[dev]"
# Install jsonschema for schema validation tests
pip install jsonschema
# Install temporary Google GenAI wheel
pip install temp/google_genai-1.14.0-py3-none-any.whl
- name: Run unit tests
run: |
pytest tests/unit/ -v \
--cov=stagehand \
--cov-report=xml \
--cov-report=term-missing \
--junit-xml=junit-unit-${{ matrix.python-version }}.xml
- name: Upload unit test results
uses: actions/upload-artifact@v4
if: always()
with:
name: unit-test-results-${{ matrix.python-version }}
path: junit-unit-${{ matrix.python-version }}.xml
- name: Upload coverage data
uses: actions/upload-artifact@v4
if: always()
with:
name: coverage-data-${{ matrix.python-version }}
path: |
.coverage
coverage.xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
if: matrix.python-version == '3.11'
with:
file: ./coverage.xml
flags: unit
name: unit-tests
test-integration-local:
name: Integration Tests (Local)
runs-on: ubuntu-latest
needs: test-unit
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y xvfb
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[dev]"
pip install jsonschema
# Install temporary Google GenAI wheel
pip install temp/google_genai-1.14.0-py3-none-any.whl
# Install Playwright browsers for integration tests
playwright install chromium
playwright install-deps chromium
- name: Run local integration tests
run: |
# Run integration tests marked as 'local' and not 'slow'
xvfb-run -a pytest tests/integration/ -v \
--cov=stagehand \
--cov-report=xml \
--junit-xml=junit-integration-local.xml \
-m "local and not slow" \
--tb=short \
--maxfail=5
env:
MODEL_API_KEY: ${{ secrets.MODEL_API_KEY || 'mock-model-key' }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY || 'mock-openai-key' }}
DISPLAY: ":99"
- name: Upload integration test results
uses: actions/upload-artifact@v4
if: always()
with:
name: integration-test-results-local
path: junit-integration-local.xml
- name: Upload coverage data
uses: actions/upload-artifact@v4
if: always()
with:
name: coverage-data-integration-local
path: |
.coverage
coverage.xml
test-integration-slow:
name: Integration Tests (Slow)
runs-on: ubuntu-latest
needs: test-unit
if: |
contains(github.event.pull_request.labels.*.name, 'test-slow') ||
contains(github.event.pull_request.labels.*.name, 'slow') ||
github.event_name == 'schedule'
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y xvfb
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[dev]"
pip install jsonschema
# Install temporary Google GenAI wheel
pip install temp/google_genai-1.14.0-py3-none-any.whl
# Install Playwright browsers for integration tests
playwright install chromium
playwright install-deps chromium
- name: Run slow integration tests
run: |
# Run integration tests marked as 'slow' and 'local'
xvfb-run -a pytest tests/integration/ -v \
--cov=stagehand \
--cov-report=xml \
--junit-xml=junit-integration-slow.xml \
-m "slow and local" \
--tb=short \
--maxfail=3
env:
MODEL_API_KEY: ${{ secrets.MODEL_API_KEY || 'mock-model-key' }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY || 'mock-openai-key' }}
DISPLAY: ":99"
- name: Upload slow test results
uses: actions/upload-artifact@v4
if: always()
with:
name: integration-test-results-slow
path: junit-integration-slow.xml
test-browserbase:
name: Browserbase Integration Tests
runs-on: ubuntu-latest
needs: test-unit
if: |
github.event_name == 'schedule' ||
contains(github.event.pull_request.labels.*.name, 'test-browserbase') ||
contains(github.event.pull_request.labels.*.name, 'browserbase')
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[dev]"
pip install jsonschema
# Install temporary Google GenAI wheel
pip install temp/google_genai-1.14.0-py3-none-any.whl
- name: Run Browserbase tests
run: |
pytest tests/ -v \
--cov=stagehand \
--cov-report=xml \
--junit-xml=junit-browserbase.xml \
-m "browserbase" \
--tb=short
env:
BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
MODEL_API_KEY: ${{ secrets.MODEL_API_KEY }}
STAGEHAND_API_URL: ${{ secrets.STAGEHAND_API_URL }}
- name: Upload Browserbase test results
uses: actions/upload-artifact@v4
if: always()
with:
name: browserbase-test-results
path: junit-browserbase.xml
test-performance:
name: Performance Tests
runs-on: ubuntu-latest
needs: test-unit
if: |
github.event_name == 'schedule' ||
contains(github.event.pull_request.labels.*.name, 'test-performance') ||
contains(github.event.pull_request.labels.*.name, 'performance')
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[dev]"
pip install jsonschema
# Install temporary Google GenAI wheel
pip install temp/google_genai-1.14.0-py3-none-any.whl
playwright install chromium
- name: Run performance tests
run: |
pytest tests/performance/ -v \
--junit-xml=junit-performance.xml \
-m "performance" \
--tb=short
env:
MODEL_API_KEY: ${{ secrets.MODEL_API_KEY || 'mock-model-key' }}
- name: Upload performance test results
uses: actions/upload-artifact@v4
if: always()
with:
name: performance-test-results
path: junit-performance.xml
smoke-tests:
name: Smoke Tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[dev]"
pip install jsonschema
# Install temporary Google GenAI wheel
pip install temp/google_genai-1.14.0-py3-none-any.whl
- name: Run smoke tests
run: |
pytest tests/ -v \
--junit-xml=junit-smoke.xml \
-m "smoke" \
--tb=line \
--maxfail=5
- name: Upload smoke test results
uses: actions/upload-artifact@v4
if: always()
with:
name: smoke-test-results
path: junit-smoke.xml
test-llm:
name: LLM Integration Tests
runs-on: ubuntu-latest
needs: test-unit
if: |
contains(github.event.pull_request.labels.*.name, 'test-llm') ||
contains(github.event.pull_request.labels.*.name, 'llm')
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[dev]"
pip install jsonschema
# Install temporary Google GenAI wheel
pip install temp/google_genai-1.14.0-py3-none-any.whl
- name: Run LLM tests
run: |
pytest tests/ -v \
--cov=stagehand \
--cov-report=xml \
--junit-xml=junit-llm.xml \
-m "llm" \
--tb=short
env:
MODEL_API_KEY: ${{ secrets.MODEL_API_KEY || 'mock-model-key' }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY || 'mock-openai-key' }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY || 'mock-anthropic-key' }}
- name: Upload LLM test results
uses: actions/upload-artifact@v4
if: always()
with:
name: llm-test-results
path: junit-llm.xml
test-e2e:
name: End-to-End Tests
runs-on: ubuntu-latest
needs: test-unit
if: |
contains(github.event.pull_request.labels.*.name, 'test-e2e') ||
contains(github.event.pull_request.labels.*.name, 'e2e')
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[dev]"
pip install jsonschema
# Install temporary Google GenAI wheel
pip install temp/google_genai-1.14.0-py3-none-any.whl
playwright install chromium
- name: Run E2E tests
run: |
pytest tests/ -v \
--cov=stagehand \
--cov-report=xml \
--junit-xml=junit-e2e.xml \
-m "e2e" \
--tb=short
env:
BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY || 'mock-api-key' }}
BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID || 'mock-project-id' }}
MODEL_API_KEY: ${{ secrets.MODEL_API_KEY || 'mock-model-key' }}
STAGEHAND_API_URL: ${{ secrets.STAGEHAND_API_URL || 'http://localhost:3000' }}
- name: Upload E2E test results
uses: actions/upload-artifact@v4
if: always()
with:
name: e2e-test-results
path: junit-e2e.xml
test-slow:
name: Slow Tests
runs-on: ubuntu-latest
needs: test-unit
if: |
contains(github.event.pull_request.labels.*.name, 'test-slow') ||
contains(github.event.pull_request.labels.*.name, 'slow')
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[dev]"
pip install jsonschema
# Install temporary Google GenAI wheel
pip install temp/google_genai-1.14.0-py3-none-any.whl
playwright install chromium
- name: Run slow tests
run: |
pytest tests/ -v \
--cov=stagehand \
--cov-report=xml \
--junit-xml=junit-slow.xml \
-m "slow" \
--tb=short
env:
BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY || 'mock-api-key' }}
BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID || 'mock-project-id' }}
MODEL_API_KEY: ${{ secrets.MODEL_API_KEY || 'mock-model-key' }}
- name: Upload slow test results
uses: actions/upload-artifact@v4
if: always()
with:
name: slow-test-results
path: junit-slow.xml
test-all:
name: Complete Test Suite
runs-on: ubuntu-latest
needs: test-unit
if: |
contains(github.event.pull_request.labels.*.name, 'test-all') ||
contains(github.event.pull_request.labels.*.name, 'full-test')
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[dev]"
pip install jsonschema
# Install temporary Google GenAI wheel
pip install temp/google_genai-1.14.0-py3-none-any.whl
playwright install chromium
- name: Run complete test suite
run: |
pytest tests/ -v \
--cov=stagehand \
--cov-report=xml \
--cov-report=html \
--junit-xml=junit-all.xml \
--maxfail=10 \
--tb=short
env:
BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
MODEL_API_KEY: ${{ secrets.MODEL_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
STAGEHAND_API_URL: ${{ secrets.STAGEHAND_API_URL }}
- name: Upload complete test results
uses: actions/upload-artifact@v4
if: always()
with:
name: complete-test-results
path: |
junit-all.xml
htmlcov/
coverage-report:
name: Coverage Report
runs-on: ubuntu-latest
needs: [test-unit, test-integration-local]
if: always() && (needs.test-unit.result == 'success')
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install coverage[toml] codecov
- name: Download coverage artifacts
uses: actions/download-artifact@v4
with:
pattern: coverage-data-*
path: coverage-reports/
- name: Combine coverage reports
run: |
# List downloaded artifacts for debugging
echo "Downloaded coverage artifacts:"
find coverage-reports/ -name ".coverage*" -o -name "coverage.xml" | sort || echo "No coverage files found"
# Find and combine coverage files
COVERAGE_FILES=$(find coverage-reports/ -name ".coverage" -type f 2>/dev/null | head -10)
if [ -n "$COVERAGE_FILES" ]; then
echo "Found coverage files:"
echo "$COVERAGE_FILES"
# Copy coverage files to current directory for combining
for file in $COVERAGE_FILES; do
cp "$file" ".coverage.$(basename $(dirname $file))"
done
# Combine coverage files
coverage combine .coverage.* || echo "Failed to combine coverage files"
coverage report --show-missing || echo "No coverage data to report"
coverage html || echo "No coverage data for HTML report"
coverage xml || echo "No coverage data for XML report"
else
echo "No .coverage files found to combine"
# Create minimal coverage.xml to prevent downstream failures
echo '<?xml version="1.0" encoding="UTF-8"?><coverage version="0" timestamp="0" lines-valid="0" lines-covered="0" line-rate="0"></coverage>' > coverage.xml
fi
- name: Upload combined coverage
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
name: combined-coverage
- name: Upload coverage HTML report
uses: actions/upload-artifact@v4
with:
name: coverage-html-report
path: htmlcov/
test-summary:
name: Test Summary
runs-on: ubuntu-latest
needs: [test-unit, test-integration-local, smoke-tests]
if: always()
steps:
- name: Download all test artifacts
uses: actions/download-artifact@v4
with:
path: test-results/
- name: Generate test summary
run: |
echo "## Test Results Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
# Count test files
UNIT_TESTS=$(find test-results/ -name "junit-unit-*.xml" | wc -l)
INTEGRATION_TESTS=$(find test-results/ -name "junit-integration-*.xml" | wc -l)
echo "- Unit test configurations: $UNIT_TESTS" >> $GITHUB_STEP_SUMMARY
echo "- Integration test categories: $INTEGRATION_TESTS" >> $GITHUB_STEP_SUMMARY
# Check for optional test runs
if [ -f test-results/*/junit-browserbase.xml ]; then
echo "- Browserbase tests: ✅ Executed" >> $GITHUB_STEP_SUMMARY
else
echo "- Browserbase tests: ⏭️ Skipped (add 'test-browserbase' label to run)" >> $GITHUB_STEP_SUMMARY
fi
if [ -f test-results/*/junit-performance.xml ]; then
echo "- Performance tests: ✅ Executed" >> $GITHUB_STEP_SUMMARY
else
echo "- Performance tests: ⏭️ Skipped (add 'test-performance' label to run)" >> $GITHUB_STEP_SUMMARY
fi
if [ -f test-results/*/junit-llm.xml ]; then
echo "- LLM tests: ✅ Executed" >> $GITHUB_STEP_SUMMARY
else
echo "- LLM tests: ⏭️ Skipped (add 'test-llm' label to run)" >> $GITHUB_STEP_SUMMARY
fi
if [ -f test-results/*/junit-e2e.xml ]; then
echo "- E2E tests: ✅ Executed" >> $GITHUB_STEP_SUMMARY
else
echo "- E2E tests: ⏭️ Skipped (add 'test-e2e' label to run)" >> $GITHUB_STEP_SUMMARY
fi
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Available Test Labels" >> $GITHUB_STEP_SUMMARY
echo "- \`test-browserbase\` - Run Browserbase integration tests" >> $GITHUB_STEP_SUMMARY
echo "- \`test-performance\` - Run performance and load tests" >> $GITHUB_STEP_SUMMARY
echo "- \`test-llm\` - Run LLM integration tests" >> $GITHUB_STEP_SUMMARY
echo "- \`test-e2e\` - Run end-to-end workflow tests" >> $GITHUB_STEP_SUMMARY
echo "- \`test-slow\` - Run all slow-marked tests" >> $GITHUB_STEP_SUMMARY
echo "- \`test-all\` - Run complete test suite" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "Detailed results are available in the artifacts section." >> $GITHUB_STEP_SUMMARY