Significant-Gravitas · ntindle · Dec 26, 2025 · Jan 18, 2026 · Jan 18, 2026 · Jan 18, 2026
@@ -0,0 +1,10 @@
+{
+  "permissions": {
+    "allowedTools": [
+      "Read", "Grep", "Glob",
+      "Bash(ls:*)", "Bash(cat:*)", "Bash(grep:*)", "Bash(find:*)",
+      "Bash(git status:*)", "Bash(git diff:*)", "Bash(git log:*)", "Bash(git worktree:*)",
+      "Bash(tmux:*)", "Bash(sleep:*)", "Bash(branchlet:*)"
+    ]
+  }
+}
@@ -6,11 +6,15 @@ on:
     paths:
       - '.github/workflows/classic-autogpt-ci.yml'
       - 'classic/original_autogpt/**'
+      - 'classic/direct_benchmark/**'
+      - 'classic/forge/**'
   pull_request:
     branches: [ master, dev, release-* ]
     paths:
       - '.github/workflows/classic-autogpt-ci.yml'
       - 'classic/original_autogpt/**'
+      - 'classic/direct_benchmark/**'
+      - 'classic/forge/**'
 
 concurrency:
   group: ${{ format('classic-autogpt-ci-{0}', github.head_ref && format('{0}-{1}', github.event_name, github.event.pull_request.number) || github.sha) }}
@@ -19,47 +23,22 @@ concurrency:
 defaults:
   run:
     shell: bash
-    working-directory: classic/original_autogpt
+    working-directory: classic
 
 jobs:
   test:
     permissions:
       contents: read
     timeout-minutes: 30
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.10"]
-        platform-os: [ubuntu, macos, macos-arm64, windows]
-    runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}
+    runs-on: ubuntu-latest
 
     steps:
-      # Quite slow on macOS (2~4 minutes to set up Docker)
-      # - name: Set up Docker (macOS)
-      #   if: runner.os == 'macOS'
-      #   uses: crazy-max/ghaction-setup-docker@v3
-
-      - name: Start MinIO service (Linux)
-        if: runner.os == 'Linux'
+      - name: Start MinIO service
         working-directory: '.'
         run: |
           docker pull minio/minio:edge-cicd
           docker run -d -p 9000:9000 minio/minio:edge-cicd
 
-      - name: Start MinIO service (macOS)
-        if: runner.os == 'macOS'
-        working-directory: ${{ runner.temp }}
-        run: |
-          brew install minio/stable/minio
-          mkdir data
-          minio server ./data &
-
-      # No MinIO on Windows:
-      # - Windows doesn't support running Linux Docker containers
-      # - It doesn't seem possible to start background processes on Windows. They are
-      #   killed after the step returns.
-      #   See: https://github.com/actions/runner/issues/598#issuecomment-2011890429
-
       - name: Checkout repository
         uses: actions/checkout@v4
         with:
@@ -71,41 +50,23 @@ jobs:
           git config --global user.name "Auto-GPT-Bot"
           git config --global user.email "github-bot@agpt.co"
 
-      - name: Set up Python ${{ matrix.python-version }}
+      - name: Set up Python 3.12
         uses: actions/setup-python@v5
         with:
-          python-version: ${{ matrix.python-version }}
+          python-version: "3.12"
 
       - id: get_date
         name: Get date
         run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
 
       - name: Set up Python dependency cache
-        # On Windows, unpacking cached dependencies takes longer than just installing them
-        if: runner.os != 'Windows'
         uses: actions/cache@v4
         with:
-          path: ${{ runner.os == 'macOS' && '~/Library/Caches/pypoetry' || '~/.cache/pypoetry' }}
-          key: poetry-${{ runner.os }}-${{ hashFiles('classic/original_autogpt/poetry.lock') }}
-
-      - name: Install Poetry (Unix)
-        if: runner.os != 'Windows'
-        run: |
-          curl -sSL https://install.python-poetry.org | python3 -
-
-          if [ "${{ runner.os }}" = "macOS" ]; then
-            PATH="$HOME/.local/bin:$PATH"
-            echo "$HOME/.local/bin" >> $GITHUB_PATH
-          fi
-
-      - name: Install Poetry (Windows)
-        if: runner.os == 'Windows'
-        shell: pwsh
-        run: |
-          (Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python -
+          path: ~/.cache/pypoetry
+          key: poetry-${{ runner.os }}-${{ hashFiles('classic/poetry.lock') }}
 
-          $env:PATH += ";$env:APPDATA\Python\Scripts"
-          echo "$env:APPDATA\Python\Scripts" >> $env:GITHUB_PATH
+      - name: Install Poetry
+        run: curl -sSL https://install.python-poetry.org | python3 -
 
       - name: Install Python dependencies
         run: poetry install
@@ -116,12 +77,13 @@ jobs:
             --cov=autogpt --cov-branch --cov-report term-missing --cov-report xml \
             --numprocesses=logical --durations=10 \
             --junitxml=junit.xml -o junit_family=legacy \
-            tests/unit tests/integration
+            original_autogpt/tests/unit original_autogpt/tests/integration
         env:
           CI: true
           PLAIN_OUTPUT: True
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          S3_ENDPOINT_URL: ${{ runner.os != 'Windows' && 'http://127.0.0.1:9000' || '' }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          S3_ENDPOINT_URL: http://127.0.0.1:9000
           AWS_ACCESS_KEY_ID: minioadmin
           AWS_SECRET_ACCESS_KEY: minioadmin
 
@@ -135,11 +97,11 @@ jobs:
         uses: codecov/codecov-action@v5
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
-          flags: autogpt-agent,${{ runner.os }}
+          flags: autogpt-agent
 
       - name: Upload logs to artifact
         if: always()
         uses: actions/upload-artifact@v4
         with:
           name: test-logs
-          path: classic/original_autogpt/logs/
+          path: classic/logs/
@@ -11,9 +11,6 @@ on:
       - 'classic/original_autogpt/**'
       - 'classic/forge/**'
       - 'classic/benchmark/**'
-      - 'classic/run'
-      - 'classic/cli.py'
-      - 'classic/setup.py'
       - '!**/*.md'
   pull_request:
     branches: [ master, dev, release-* ]
@@ -22,9 +19,6 @@ on:
       - 'classic/original_autogpt/**'
       - 'classic/forge/**'
       - 'classic/benchmark/**'
-      - 'classic/run'
-      - 'classic/cli.py'
-      - 'classic/setup.py'
       - '!**/*.md'
 
 defaults:
@@ -35,13 +29,9 @@ defaults:
 jobs:
   serve-agent-protocol:
     runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        agent-name: [ original_autogpt ]
-      fail-fast: false
     timeout-minutes: 20
     env:
-      min-python-version: '3.10'
+      min-python-version: '3.12'
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -55,22 +45,22 @@ jobs:
           python-version: ${{ env.min-python-version }}
 
       - name: Install Poetry
-        working-directory: ./classic/${{ matrix.agent-name }}/
         run: |
           curl -sSL https://install.python-poetry.org | python -
 
-      - name: Run regression tests
+      - name: Install dependencies
+        run: poetry install
+
+      - name: Run smoke tests with direct-benchmark
         run: |
-          ./run agent start ${{ matrix.agent-name }}
-          cd ${{ matrix.agent-name }}
-          poetry run agbenchmark --mock --test=BasicRetrieval --test=Battleship --test=WebArenaTask_0
-          poetry run agbenchmark --test=WriteFile
+          poetry run direct-benchmark run \
+            --strategies one_shot \
+            --models claude \
+            --tests ReadFile,WriteFile \
+            --json
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          AGENT_NAME: ${{ matrix.agent-name }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
           REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
-          HELICONE_CACHE_ENABLED: false
-          HELICONE_PROPERTY_AGENT: ${{ matrix.agent-name }}
-          REPORTS_FOLDER: ${{ format('../../reports/{0}', matrix.agent-name) }}
-          TELEMETRY_ENVIRONMENT: autogpt-ci
-          TELEMETRY_OPT_IN: ${{ github.ref_name == 'master' }}
+          NONINTERACTIVE_MODE: "true"
+          CI: true