5
5
branches : [ main, develop ]
6
6
pull_request :
7
7
branches : [ main, develop ]
8
+ types : [opened, synchronize, reopened, labeled, unlabeled]
8
9
# schedule:
9
10
# # Run tests daily at 6 AM UTC
10
11
# - cron: '0 6 * * *'
@@ -140,7 +141,10 @@ jobs:
140
141
name : Browserbase Integration Tests
141
142
runs-on : ubuntu-latest
142
143
needs : test-unit
143
- if : github.event_name == 'schedule' || contains(github.event.head_commit.message, '[test-browserbase]')
144
+ if : |
145
+ github.event_name == 'schedule' ||
146
+ contains(github.event.pull_request.labels.*.name, 'test-browserbase') ||
147
+ contains(github.event.pull_request.labels.*.name, 'browserbase')
144
148
145
149
steps :
146
150
- uses : actions/checkout@v4
@@ -183,7 +187,10 @@ jobs:
183
187
name : Performance Tests
184
188
runs-on : ubuntu-latest
185
189
needs : test-unit
186
- if : github.event_name == 'schedule' || contains(github.event.head_commit.message, '[test-performance]')
190
+ if : |
191
+ github.event_name == 'schedule' ||
192
+ contains(github.event.pull_request.labels.*.name, 'test-performance') ||
193
+ contains(github.event.pull_request.labels.*.name, 'performance')
187
194
188
195
steps :
189
196
- uses : actions/checkout@v4
@@ -253,6 +260,192 @@ jobs:
253
260
name : smoke-test-results
254
261
path : junit-smoke.xml
255
262
263
+ test-llm :
264
+ name : LLM Integration Tests
265
+ runs-on : ubuntu-latest
266
+ needs : test-unit
267
+ if : |
268
+ contains(github.event.pull_request.labels.*.name, 'test-llm') ||
269
+ contains(github.event.pull_request.labels.*.name, 'llm')
270
+
271
+ steps :
272
+ - uses : actions/checkout@v4
273
+
274
+ - name : Set up Python 3.11
275
+ uses : actions/setup-python@v4
276
+ with :
277
+ python-version : " 3.11"
278
+
279
+ - name : Install dependencies
280
+ run : |
281
+ python -m pip install --upgrade pip
282
+ pip install -e ".[dev]"
283
+ pip install jsonschema
284
+ # Install temporary Google GenAI wheel
285
+ pip install temp/google_genai-1.14.0-py3-none-any.whl
286
+
287
+ - name : Run LLM tests
288
+ run : |
289
+ pytest tests/ -v \
290
+ --cov=stagehand \
291
+ --cov-report=xml \
292
+ --junit-xml=junit-llm.xml \
293
+ -m "llm" \
294
+ --tb=short
295
+ env :
296
+ MODEL_API_KEY : ${{ secrets.MODEL_API_KEY || 'mock-model-key' }}
297
+ OPENAI_API_KEY : ${{ secrets.OPENAI_API_KEY || 'mock-openai-key' }}
298
+ ANTHROPIC_API_KEY : ${{ secrets.ANTHROPIC_API_KEY || 'mock-anthropic-key' }}
299
+
300
+ - name : Upload LLM test results
301
+ uses : actions/upload-artifact@v4
302
+ if : always()
303
+ with :
304
+ name : llm-test-results
305
+ path : junit-llm.xml
306
+
307
+ test-e2e :
308
+ name : End-to-End Tests
309
+ runs-on : ubuntu-latest
310
+ needs : test-unit
311
+ if : |
312
+ contains(github.event.pull_request.labels.*.name, 'test-e2e') ||
313
+ contains(github.event.pull_request.labels.*.name, 'e2e')
314
+
315
+ steps :
316
+ - uses : actions/checkout@v4
317
+
318
+ - name : Set up Python 3.11
319
+ uses : actions/setup-python@v4
320
+ with :
321
+ python-version : " 3.11"
322
+
323
+ - name : Install dependencies
324
+ run : |
325
+ python -m pip install --upgrade pip
326
+ pip install -e ".[dev]"
327
+ pip install jsonschema
328
+ # Install temporary Google GenAI wheel
329
+ pip install temp/google_genai-1.14.0-py3-none-any.whl
330
+ playwright install chromium
331
+
332
+ - name : Run E2E tests
333
+ run : |
334
+ pytest tests/ -v \
335
+ --cov=stagehand \
336
+ --cov-report=xml \
337
+ --junit-xml=junit-e2e.xml \
338
+ -m "e2e" \
339
+ --tb=short
340
+ env :
341
+ BROWSERBASE_API_KEY : ${{ secrets.BROWSERBASE_API_KEY || 'mock-api-key' }}
342
+ BROWSERBASE_PROJECT_ID : ${{ secrets.BROWSERBASE_PROJECT_ID || 'mock-project-id' }}
343
+ MODEL_API_KEY : ${{ secrets.MODEL_API_KEY || 'mock-model-key' }}
344
+ STAGEHAND_API_URL : ${{ secrets.STAGEHAND_API_URL || 'http://localhost:3000' }}
345
+
346
+ - name : Upload E2E test results
347
+ uses : actions/upload-artifact@v4
348
+ if : always()
349
+ with :
350
+ name : e2e-test-results
351
+ path : junit-e2e.xml
352
+
353
+ test-slow :
354
+ name : Slow Tests
355
+ runs-on : ubuntu-latest
356
+ needs : test-unit
357
+ if : |
358
+ contains(github.event.pull_request.labels.*.name, 'test-slow') ||
359
+ contains(github.event.pull_request.labels.*.name, 'slow')
360
+
361
+ steps :
362
+ - uses : actions/checkout@v4
363
+
364
+ - name : Set up Python 3.11
365
+ uses : actions/setup-python@v4
366
+ with :
367
+ python-version : " 3.11"
368
+
369
+ - name : Install dependencies
370
+ run : |
371
+ python -m pip install --upgrade pip
372
+ pip install -e ".[dev]"
373
+ pip install jsonschema
374
+ # Install temporary Google GenAI wheel
375
+ pip install temp/google_genai-1.14.0-py3-none-any.whl
376
+ playwright install chromium
377
+
378
+ - name : Run slow tests
379
+ run : |
380
+ pytest tests/ -v \
381
+ --cov=stagehand \
382
+ --cov-report=xml \
383
+ --junit-xml=junit-slow.xml \
384
+ -m "slow" \
385
+ --tb=short
386
+ env :
387
+ BROWSERBASE_API_KEY : ${{ secrets.BROWSERBASE_API_KEY || 'mock-api-key' }}
388
+ BROWSERBASE_PROJECT_ID : ${{ secrets.BROWSERBASE_PROJECT_ID || 'mock-project-id' }}
389
+ MODEL_API_KEY : ${{ secrets.MODEL_API_KEY || 'mock-model-key' }}
390
+
391
+ - name : Upload slow test results
392
+ uses : actions/upload-artifact@v4
393
+ if : always()
394
+ with :
395
+ name : slow-test-results
396
+ path : junit-slow.xml
397
+
398
+ test-all :
399
+ name : Complete Test Suite
400
+ runs-on : ubuntu-latest
401
+ needs : test-unit
402
+ if : |
403
+ contains(github.event.pull_request.labels.*.name, 'test-all') ||
404
+ contains(github.event.pull_request.labels.*.name, 'full-test')
405
+
406
+ steps :
407
+ - uses : actions/checkout@v4
408
+
409
+ - name : Set up Python 3.11
410
+ uses : actions/setup-python@v4
411
+ with :
412
+ python-version : " 3.11"
413
+
414
+ - name : Install dependencies
415
+ run : |
416
+ python -m pip install --upgrade pip
417
+ pip install -e ".[dev]"
418
+ pip install jsonschema
419
+ # Install temporary Google GenAI wheel
420
+ pip install temp/google_genai-1.14.0-py3-none-any.whl
421
+ playwright install chromium
422
+
423
+ - name : Run complete test suite
424
+ run : |
425
+ pytest tests/ -v \
426
+ --cov=stagehand \
427
+ --cov-report=xml \
428
+ --cov-report=html \
429
+ --junit-xml=junit-all.xml \
430
+ --maxfail=10 \
431
+ --tb=short
432
+ env :
433
+ BROWSERBASE_API_KEY : ${{ secrets.BROWSERBASE_API_KEY }}
434
+ BROWSERBASE_PROJECT_ID : ${{ secrets.BROWSERBASE_PROJECT_ID }}
435
+ MODEL_API_KEY : ${{ secrets.MODEL_API_KEY }}
436
+ OPENAI_API_KEY : ${{ secrets.OPENAI_API_KEY }}
437
+ ANTHROPIC_API_KEY : ${{ secrets.ANTHROPIC_API_KEY }}
438
+ STAGEHAND_API_URL : ${{ secrets.STAGEHAND_API_URL }}
439
+
440
+ - name : Upload complete test results
441
+ uses : actions/upload-artifact@v4
442
+ if : always()
443
+ with :
444
+ name : complete-test-results
445
+ path : |
446
+ junit-all.xml
447
+ htmlcov/
448
+
256
449
coverage-report :
257
450
name : Coverage Report
258
451
runs-on : ubuntu-latest
@@ -342,12 +535,38 @@ jobs:
342
535
echo "- Unit test configurations: $UNIT_TESTS" >> $GITHUB_STEP_SUMMARY
343
536
echo "- Integration test categories: $INTEGRATION_TESTS" >> $GITHUB_STEP_SUMMARY
344
537
345
- # Check for test failures
346
- if [ -f test-results/*/junit-*.xml ]; then
347
- echo "- Test artifacts generated successfully ✅" >> $GITHUB_STEP_SUMMARY
538
+ # Check for optional test runs
539
+ if [ -f test-results/*/junit-browserbase.xml ]; then
540
+ echo "- Browserbase tests: ✅ Executed" >> $GITHUB_STEP_SUMMARY
541
+ else
542
+ echo "- Browserbase tests: ⏭️ Skipped (add 'test-browserbase' label to run)" >> $GITHUB_STEP_SUMMARY
543
+ fi
544
+
545
+ if [ -f test-results/*/junit-performance.xml ]; then
546
+ echo "- Performance tests: ✅ Executed" >> $GITHUB_STEP_SUMMARY
547
+ else
548
+ echo "- Performance tests: ⏭️ Skipped (add 'test-performance' label to run)" >> $GITHUB_STEP_SUMMARY
549
+ fi
550
+
551
+ if [ -f test-results/*/junit-llm.xml ]; then
552
+ echo "- LLM tests: ✅ Executed" >> $GITHUB_STEP_SUMMARY
348
553
else
349
- echo "- Test artifacts missing ❌ " >> $GITHUB_STEP_SUMMARY
554
+ echo "- LLM tests: ⏭️ Skipped (add 'test-llm' label to run) " >> $GITHUB_STEP_SUMMARY
350
555
fi
351
556
557
+ if [ -f test-results/*/junit-e2e.xml ]; then
558
+ echo "- E2E tests: ✅ Executed" >> $GITHUB_STEP_SUMMARY
559
+ else
560
+ echo "- E2E tests: ⏭️ Skipped (add 'test-e2e' label to run)" >> $GITHUB_STEP_SUMMARY
561
+ fi
562
+
563
+ echo "" >> $GITHUB_STEP_SUMMARY
564
+ echo "### Available Test Labels" >> $GITHUB_STEP_SUMMARY
565
+ echo "- \`test-browserbase\` - Run Browserbase integration tests" >> $GITHUB_STEP_SUMMARY
566
+ echo "- \`test-performance\` - Run performance and load tests" >> $GITHUB_STEP_SUMMARY
567
+ echo "- \`test-llm\` - Run LLM integration tests" >> $GITHUB_STEP_SUMMARY
568
+ echo "- \`test-e2e\` - Run end-to-end workflow tests" >> $GITHUB_STEP_SUMMARY
569
+ echo "- \`test-slow\` - Run all slow-marked tests" >> $GITHUB_STEP_SUMMARY
570
+ echo "- \`test-all\` - Run complete test suite" >> $GITHUB_STEP_SUMMARY
352
571
echo "" >> $GITHUB_STEP_SUMMARY
353
572
echo "Detailed results are available in the artifacts section." >> $GITHUB_STEP_SUMMARY
0 commit comments