Skip to content

feat: Support Prohibited Domains (#3115) #1

feat: Support Prohibited Domains (#3115)

feat: Support Prohibited Domains (#3115) #1

Workflow file for this run

name: test
permissions:
actions: read
contents: write
pull-requests: write # Allow writing comments on PRs
issues: write # Allow writing comments on issues
statuses: write # Allow writing statuses on PRs
discussions: write
# Cancel in-progress runs when a new commit is pushed to the same branch/PR
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
on:
push:
branches:
- main
- stable
- 'releases/**'
tags:
- '*'
pull_request:
workflow_dispatch:
jobs:
find_tests:
runs-on: ubuntu-latest
timeout-minutes: 5 # Prevent hanging
outputs:
TEST_FILENAMES: ${{ steps.lsgrep.outputs.TEST_FILENAMES }}
# ["test_browser", "test_tools", "test_browser_session", "test_tab_management", ...]
steps:
- uses: actions/checkout@v4
with:
# Force fresh checkout to avoid any caching issues
fetch-depth: 1
- id: lsgrep
run: |
echo "πŸ” Discovering test files at $(date)"
echo "Git commit: $(git rev-parse HEAD)"
echo "Git branch: $(git branch --show-current)"
echo ""
TEST_FILENAMES="$(ls tests/ci/test_*.py | sed 's|^tests/ci/||' | sed 's|\.py$||' | jq -R -s -c 'split("\n")[:-1]')"
echo "TEST_FILENAMES=${TEST_FILENAMES}" >> "$GITHUB_OUTPUT"
echo "πŸ“‹ Test matrix: $TEST_FILENAMES"
# https://code.dblock.org/2021/09/03/generating-task-matrix-by-looping-over-repo-files-with-github-actions.html
- name: Check that at least one test file is found
run: |
if [ -z "${{ steps.lsgrep.outputs.TEST_FILENAMES }}" ]; then
echo "Failed to find any test_*.py files in tests/ci/ folder!" > /dev/stderr
exit 1
fi
tests:
needs: find_tests
runs-on: ubuntu-latest
timeout-minutes: 10 # Prevent individual tests from hanging
env:
IN_DOCKER: 'True'
ANONYMIZED_TELEMETRY: 'false'
BROWSER_USE_LOGGING_LEVEL: 'DEBUG'
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
strategy:
matrix:
test_filename: ${{ fromJson(needs.find_tests.outputs.TEST_FILENAMES || '["FAILED_TO_DISCOVER_TESTS"]') }}
# autodiscovers all the files in tests/ci/test_*.py
# - test_browser
# - test_tools
# - test_browser_session
# - test_tab_management
# ... and more
name: ${{ matrix.test_filename }}
steps:
- name: Check that the previous step managed to find some test files for us to run
run: |
if [[ "${{ matrix.test_filename }}" == "FAILED_TO_DISCOVER_TESTS" ]]; then
echo "Failed get list of test files in tests/ci/test_*.py from find_tests job" > /dev/stderr
exit 1
fi
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v6
with:
enable-cache: true
activate-environment: true
- run: uv sync --dev --all-extras
- name: Cache chromium binaries
uses: actions/cache@v4
with:
path: |
~/.cache/ms-playwright
key: ${{ runner.os }}-${{ runner.arch }}-chromium-browser
- name: Install Chromium browser
run: uvx playwright install chromium --with-deps --no-shell
- name: Cache browser-use extensions
uses: actions/cache@v4
with:
path: |
~/.config/browseruse/extensions
key: ${{ runner.os }}-browseruse-extensions-${{ hashFiles('browser_use/browser/profile.py') }}
restore-keys: |
${{ runner.os }}-browseruse-extensions-
- name: Check if test file exists and run it
run: |
TEST_FILE="tests/ci/${{ matrix.test_filename }}.py"
if [ -f "$TEST_FILE" ]; then
echo "βœ… Running test file: $TEST_FILE"
pytest "$TEST_FILE"
else
echo "❌ Test file not found: $TEST_FILE"
echo "This file may have been renamed or removed. Current test files:"
ls -1 tests/ci/test_*.py | sed 's|tests/ci/||' | sed 's|\.py$||' | sort
echo ""
echo "Skipping this test job since the file no longer exists."
exit 0 # Exit successfully to not fail the entire workflow
fi
evaluate-tasks:
runs-on: ubuntu-latest
env:
IN_DOCKER: 'true'
BROWSER_USE_CLOUD_SYNC: 'false'
ANONYMIZED_TELEMETRY: 'false'
BROWSER_USE_LOGGING_LEVEL: 'DEBUG'
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
steps:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v6
with:
enable-cache: true
activate-environment: true
- run: uv sync --dev --all-extras
- name: Cache chromium binaries
uses: actions/cache@v4
with:
path: |
~/.cache/ms-playwright
key: ${{ runner.os }}-${{ runner.arch }}-chromium-browser
- name: Install Chromium browser
run: uvx playwright install chromium --with-deps --no-shell
- name: Cache browser-use extensions
uses: actions/cache@v4
with:
path: |
~/.config/browseruse/extensions
key: ${{ runner.os }}-browseruse-extensions-${{ hashFiles('browser_use/browser/profile.py') }}
restore-keys: |
${{ runner.os }}-browseruse-extensions-
- name: Run agent tasks evaluation and capture score
id: eval
run: |
python tests/ci/evaluate_tasks.py > result.txt
cat result.txt
echo "PASSED=$(grep '^PASSED=' result.txt | cut -d= -f2)" >> $GITHUB_ENV
echo "TOTAL=$(grep '^TOTAL=' result.txt | cut -d= -f2)" >> $GITHUB_ENV
echo "DETAILED_RESULTS=$(grep '^DETAILED_RESULTS=' result.txt | cut -d= -f2-)" >> $GITHUB_ENV
- name: Print agent evaluation summary
run: |
echo "Agent tasks passed: $PASSED / $TOTAL"
- name: Write agent evaluation summary to workflow overview
run: |
if [ "$PASSED" = "$TOTAL" ]; then
COLOR="green"
else
COLOR="yellow"
fi
echo "<h2>Agent Tasks Score: <span style='color:$COLOR;'>$PASSED/$TOTAL</span></h2>" >> $GITHUB_STEP_SUMMARY
- name: Comment PR with agent evaluation results
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
continue-on-error: true
with:
script: |
const passed = parseInt(process.env.PASSED);
const total = parseInt(process.env.TOTAL);
const detailedResults = JSON.parse(process.env.DETAILED_RESULTS);
const score = `${passed}/${total}`;
const percentage = Math.round((passed / total) * 100);
// Fail the workflow if 0% pass rate
if (percentage === 0) {
core.setFailed(`Evaluation failed: 0% pass rate (${passed}/${total})`);
}
// Create detailed table
let tableRows = '';
detailedResults.forEach(result => {
const emoji = result.success ? 'βœ…' : '❌';
const status = result.success ? 'Pass' : 'Fail';
tableRows += `| ${result.task} | ${emoji} ${status} | ${result.reason} |\n`;
});
const comment = `## Agent Task Evaluation Results: ${score} (${percentage}%)
<details>
<summary>View detailed results</summary>
| Task | Result | Reason |
|------|--------|--------|
${tableRows}
Check the [evaluate-tasks job](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for detailed task execution logs.
</details>`;
// Find existing comment to update or create new one
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
});
const botComment = comments.find(comment =>
comment.user.type === 'Bot' &&
comment.body.includes('Agent Task Evaluation Results')
);
if (botComment) {
// Update existing comment
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: botComment.id,
body: comment
});
} else {
// Create new comment
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: comment
});
}