55 branches : [ main, develop ]
66 pull_request :
77 branches : [ main, develop ]
8+ types : [opened, synchronize, reopened, labeled, unlabeled]
89 # schedule:
910 # # Run tests daily at 6 AM UTC
1011 # - cron: '0 6 * * *'
@@ -140,7 +141,10 @@ jobs:
140141 name : Browserbase Integration Tests
141142 runs-on : ubuntu-latest
142143 needs : test-unit
143- if : github.event_name == 'schedule' || contains(github.event.head_commit.message, '[test-browserbase]')
144+ if : |
145+ github.event_name == 'schedule' ||
146+ contains(github.event.pull_request.labels.*.name, 'test-browserbase') ||
147+ contains(github.event.pull_request.labels.*.name, 'browserbase')
144148
145149 steps :
146150 - uses : actions/checkout@v4
@@ -183,7 +187,10 @@ jobs:
183187 name : Performance Tests
184188 runs-on : ubuntu-latest
185189 needs : test-unit
186- if : github.event_name == 'schedule' || contains(github.event.head_commit.message, '[test-performance]')
190+ if : |
191+ github.event_name == 'schedule' ||
192+ contains(github.event.pull_request.labels.*.name, 'test-performance') ||
193+ contains(github.event.pull_request.labels.*.name, 'performance')
187194
188195 steps :
189196 - uses : actions/checkout@v4
@@ -253,6 +260,192 @@ jobs:
253260 name : smoke-test-results
254261 path : junit-smoke.xml
255262
263+ test-llm :
264+ name : LLM Integration Tests
265+ runs-on : ubuntu-latest
266+ needs : test-unit
267+ if : |
268+ contains(github.event.pull_request.labels.*.name, 'test-llm') ||
269+ contains(github.event.pull_request.labels.*.name, 'llm')
270+
271+ steps :
272+ - uses : actions/checkout@v4
273+
274+ - name : Set up Python 3.11
275+ uses : actions/setup-python@v4
276+ with :
277+ python-version : " 3.11"
278+
279+ - name : Install dependencies
280+ run : |
281+ python -m pip install --upgrade pip
282+ pip install -e ".[dev]"
283+ pip install jsonschema
284+ # Install temporary Google GenAI wheel
285+ pip install temp/google_genai-1.14.0-py3-none-any.whl
286+
287+ - name : Run LLM tests
288+ run : |
289+ pytest tests/ -v \
290+ --cov=stagehand \
291+ --cov-report=xml \
292+ --junit-xml=junit-llm.xml \
293+ -m "llm" \
294+ --tb=short
295+ env :
296+ MODEL_API_KEY : ${{ secrets.MODEL_API_KEY || 'mock-model-key' }}
297+ OPENAI_API_KEY : ${{ secrets.OPENAI_API_KEY || 'mock-openai-key' }}
298+ ANTHROPIC_API_KEY : ${{ secrets.ANTHROPIC_API_KEY || 'mock-anthropic-key' }}
299+
300+ - name : Upload LLM test results
301+ uses : actions/upload-artifact@v4
302+ if : always()
303+ with :
304+ name : llm-test-results
305+ path : junit-llm.xml
306+
307+ test-e2e :
308+ name : End-to-End Tests
309+ runs-on : ubuntu-latest
310+ needs : test-unit
311+ if : |
312+ contains(github.event.pull_request.labels.*.name, 'test-e2e') ||
313+ contains(github.event.pull_request.labels.*.name, 'e2e')
314+
315+ steps :
316+ - uses : actions/checkout@v4
317+
318+ - name : Set up Python 3.11
319+ uses : actions/setup-python@v4
320+ with :
321+ python-version : " 3.11"
322+
323+ - name : Install dependencies
324+ run : |
325+ python -m pip install --upgrade pip
326+ pip install -e ".[dev]"
327+ pip install jsonschema
328+ # Install temporary Google GenAI wheel
329+ pip install temp/google_genai-1.14.0-py3-none-any.whl
330+ playwright install chromium
331+
332+ - name : Run E2E tests
333+ run : |
334+ pytest tests/ -v \
335+ --cov=stagehand \
336+ --cov-report=xml \
337+ --junit-xml=junit-e2e.xml \
338+ -m "e2e" \
339+ --tb=short
340+ env :
341+ BROWSERBASE_API_KEY : ${{ secrets.BROWSERBASE_API_KEY || 'mock-api-key' }}
342+ BROWSERBASE_PROJECT_ID : ${{ secrets.BROWSERBASE_PROJECT_ID || 'mock-project-id' }}
343+ MODEL_API_KEY : ${{ secrets.MODEL_API_KEY || 'mock-model-key' }}
344+ STAGEHAND_API_URL : ${{ secrets.STAGEHAND_API_URL || 'http://localhost:3000' }}
345+
346+ - name : Upload E2E test results
347+ uses : actions/upload-artifact@v4
348+ if : always()
349+ with :
350+ name : e2e-test-results
351+ path : junit-e2e.xml
352+
353+ test-slow :
354+ name : Slow Tests
355+ runs-on : ubuntu-latest
356+ needs : test-unit
357+ if : |
358+ contains(github.event.pull_request.labels.*.name, 'test-slow') ||
359+ contains(github.event.pull_request.labels.*.name, 'slow')
360+
361+ steps :
362+ - uses : actions/checkout@v4
363+
364+ - name : Set up Python 3.11
365+ uses : actions/setup-python@v4
366+ with :
367+ python-version : " 3.11"
368+
369+ - name : Install dependencies
370+ run : |
371+ python -m pip install --upgrade pip
372+ pip install -e ".[dev]"
373+ pip install jsonschema
374+ # Install temporary Google GenAI wheel
375+ pip install temp/google_genai-1.14.0-py3-none-any.whl
376+ playwright install chromium
377+
378+ - name : Run slow tests
379+ run : |
380+ pytest tests/ -v \
381+ --cov=stagehand \
382+ --cov-report=xml \
383+ --junit-xml=junit-slow.xml \
384+ -m "slow" \
385+ --tb=short
386+ env :
387+ BROWSERBASE_API_KEY : ${{ secrets.BROWSERBASE_API_KEY || 'mock-api-key' }}
388+ BROWSERBASE_PROJECT_ID : ${{ secrets.BROWSERBASE_PROJECT_ID || 'mock-project-id' }}
389+ MODEL_API_KEY : ${{ secrets.MODEL_API_KEY || 'mock-model-key' }}
390+
391+ - name : Upload slow test results
392+ uses : actions/upload-artifact@v4
393+ if : always()
394+ with :
395+ name : slow-test-results
396+ path : junit-slow.xml
397+
398+ test-all :
399+ name : Complete Test Suite
400+ runs-on : ubuntu-latest
401+ needs : test-unit
402+ if : |
403+ contains(github.event.pull_request.labels.*.name, 'test-all') ||
404+ contains(github.event.pull_request.labels.*.name, 'full-test')
405+
406+ steps :
407+ - uses : actions/checkout@v4
408+
409+ - name : Set up Python 3.11
410+ uses : actions/setup-python@v4
411+ with :
412+ python-version : " 3.11"
413+
414+ - name : Install dependencies
415+ run : |
416+ python -m pip install --upgrade pip
417+ pip install -e ".[dev]"
418+ pip install jsonschema
419+ # Install temporary Google GenAI wheel
420+ pip install temp/google_genai-1.14.0-py3-none-any.whl
421+ playwright install chromium
422+
423+ - name : Run complete test suite
424+ run : |
425+ pytest tests/ -v \
426+ --cov=stagehand \
427+ --cov-report=xml \
428+ --cov-report=html \
429+ --junit-xml=junit-all.xml \
430+ --maxfail=10 \
431+ --tb=short
432+ env :
433+ BROWSERBASE_API_KEY : ${{ secrets.BROWSERBASE_API_KEY }}
434+ BROWSERBASE_PROJECT_ID : ${{ secrets.BROWSERBASE_PROJECT_ID }}
435+ MODEL_API_KEY : ${{ secrets.MODEL_API_KEY }}
436+ OPENAI_API_KEY : ${{ secrets.OPENAI_API_KEY }}
437+ ANTHROPIC_API_KEY : ${{ secrets.ANTHROPIC_API_KEY }}
438+ STAGEHAND_API_URL : ${{ secrets.STAGEHAND_API_URL }}
439+
440+ - name : Upload complete test results
441+ uses : actions/upload-artifact@v4
442+ if : always()
443+ with :
444+ name : complete-test-results
445+ path : |
446+ junit-all.xml
447+ htmlcov/
448+
256449 coverage-report :
257450 name : Coverage Report
258451 runs-on : ubuntu-latest
@@ -342,12 +535,38 @@ jobs:
342535 echo "- Unit test configurations: $UNIT_TESTS" >> $GITHUB_STEP_SUMMARY
343536 echo "- Integration test categories: $INTEGRATION_TESTS" >> $GITHUB_STEP_SUMMARY
344537
345- # Check for test failures
346- if [ -f test-results/*/junit-*.xml ]; then
347- echo "- Test artifacts generated successfully ✅" >> $GITHUB_STEP_SUMMARY
538+ # Check for optional test runs
539+ if [ -f test-results/*/junit-browserbase.xml ]; then
540+ echo "- Browserbase tests: ✅ Executed" >> $GITHUB_STEP_SUMMARY
541+ else
542+ echo "- Browserbase tests: ⏭️ Skipped (add 'test-browserbase' label to run)" >> $GITHUB_STEP_SUMMARY
543+ fi
544+
545+ if [ -f test-results/*/junit-performance.xml ]; then
546+ echo "- Performance tests: ✅ Executed" >> $GITHUB_STEP_SUMMARY
547+ else
548+ echo "- Performance tests: ⏭️ Skipped (add 'test-performance' label to run)" >> $GITHUB_STEP_SUMMARY
549+ fi
550+
551+ if [ -f test-results/*/junit-llm.xml ]; then
552+ echo "- LLM tests: ✅ Executed" >> $GITHUB_STEP_SUMMARY
348553 else
349- echo "- Test artifacts missing ❌ " >> $GITHUB_STEP_SUMMARY
554+ echo "- LLM tests: ⏭️ Skipped (add 'test-llm' label to run) " >> $GITHUB_STEP_SUMMARY
350555 fi
351556
557+ if [ -f test-results/*/junit-e2e.xml ]; then
558+ echo "- E2E tests: ✅ Executed" >> $GITHUB_STEP_SUMMARY
559+ else
560+ echo "- E2E tests: ⏭️ Skipped (add 'test-e2e' label to run)" >> $GITHUB_STEP_SUMMARY
561+ fi
562+
563+ echo "" >> $GITHUB_STEP_SUMMARY
564+ echo "### Available Test Labels" >> $GITHUB_STEP_SUMMARY
565+ echo "- \`test-browserbase\` - Run Browserbase integration tests" >> $GITHUB_STEP_SUMMARY
566+ echo "- \`test-performance\` - Run performance and load tests" >> $GITHUB_STEP_SUMMARY
567+ echo "- \`test-llm\` - Run LLM integration tests" >> $GITHUB_STEP_SUMMARY
568+ echo "- \`test-e2e\` - Run end-to-end workflow tests" >> $GITHUB_STEP_SUMMARY
569+ echo "- \`test-slow\` - Run all slow-marked tests" >> $GITHUB_STEP_SUMMARY
570+ echo "- \`test-all\` - Run complete test suite" >> $GITHUB_STEP_SUMMARY
352571 echo "" >> $GITHUB_STEP_SUMMARY
353572 echo "Detailed results are available in the artifacts section." >> $GITHUB_STEP_SUMMARY
0 commit comments