Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
83 commits
Select commit Hold shift + click to select a range
ea521ee
wip: add supprot for new openai models (non working)
ntindle Dec 26, 2025
ae2cc97
feat(classic): add modern Anthropic models and fix deprecated API
ntindle Jan 18, 2026
fd66be2
chore(classic): remove unneeded files and add CLAUDE.md docs
ntindle Jan 18, 2026
ef8a6d2
feat(classic): make AutoGPT installable and runnable from any directory
ntindle Jan 18, 2026
7a20de8
chore: add .autogpt/ to gitignore
ntindle Jan 18, 2026
8f3291b
feat(classic): add workspace permissions system for agent commands
ntindle Jan 18, 2026
0adbc0b
fix(classic): update CI for removed frontend and helper scripts
ntindle Jan 18, 2026
4c264b7
feat(classic): add TodoComponent with LLM-powered decomposition
ntindle Jan 19, 2026
9e96d11
feat(classic): add utility components for agent capabilities
ntindle Jan 19, 2026
3f65da0
feat(classic): add new exception types for enhanced error handling
ntindle Jan 19, 2026
b9113be
feat(classic): enhance existing components with new capabilities
ntindle Jan 19, 2026
cacc897
feat(classic): improve AutoGPT configuration and setup
ntindle Jan 19, 2026
8fc174c
refactor(classic): simplify log format by removing timestamps
ntindle Jan 19, 2026
6fbd208
chore: ignore .claude/settings.local.json in all directories
ntindle Jan 19, 2026
0a65df5
fix(classic): always use native tool calling, fix N/A command loop
ntindle Jan 19, 2026
46aabab
feat(classic): upgrade to Python 3.12+ with CI testing on 3.12, 3.13,…
ntindle Jan 19, 2026
94b6f74
feat(classic): add multiple prompt strategies for agent reasoning
ntindle Jan 19, 2026
f0641c2
fix(classic): auto-advance plan steps in Plan-Execute strategy
ntindle Jan 19, 2026
699fffb
feat(classic): add Rich interactive selector for command approval
ntindle Jan 19, 2026
864c5a7
fix(classic): approve+feedback now executes command then sends feedback
ntindle Jan 19, 2026
44182af
feat(classic): add strategy benchmark test harness for CI
ntindle Jan 19, 2026
18edeae
fix(classic): fix linting and formatting errors across codebase
ntindle Jan 19, 2026
515504c
fix(classic): resolve pyright type errors in original_autogpt
ntindle Jan 19, 2026
3040f39
feat(forge): modernize web search with tiered provider system
ntindle Jan 19, 2026
e0784f8
refactor(forge): simplify deeply nested error handling in Anthropic p…
ntindle Jan 19, 2026
cda9572
feat(forge): add lightweight web fetch component
ntindle Jan 19, 2026
013f728
feat(forge): improve tool call error feedback for LLM self-correction
ntindle Jan 19, 2026
bead811
docs(classic): add workspace, settings, and permissions documentation
ntindle Jan 19, 2026
49f56b4
feat(classic): enhance strategy benchmark harness with model comparis…
ntindle Jan 20, 2026
32f68d5
feat(classic): add failure analysis tool and improve benchmark output
ntindle Jan 20, 2026
acb320d
feat(classic): add noninteractive mode env var and benchmark config l…
ntindle Jan 20, 2026
804430e
refactor(classic): migrate from agbenchmark to direct_benchmark harness
ntindle Jan 20, 2026
e477150
Merge branch 'dev' into make-old-work
ntindle Jan 20, 2026
ab95077
refactor(forge): remove VCR cassettes, use real API calls with skip f…
ntindle Jan 20, 2026
0a616d9
feat(direct_benchmark): add step-level logging with colored prefixes
ntindle Jan 20, 2026
ffe9325
feat(direct_benchmark): multi-panel UI with copy-paste completion blocks
ntindle Jan 20, 2026
9108b21
fix(direct_benchmark): parallel execution and always show completion …
ntindle Jan 20, 2026
7db962b
feat(direct_benchmark): dynamic column layout up to 10 wide
ntindle Jan 20, 2026
4eeb6ee
feat(direct_benchmark): add CI mode for non-interactive environments
ntindle Jan 20, 2026
a347bed
feat(direct_benchmark): add incremental resume and selective reset
ntindle Jan 20, 2026
d591f36
fix(direct_benchmark): track cost from LLM provider
ntindle Jan 20, 2026
634bff8
refactor(forge): replace Selenium with Playwright for web browsing
ntindle Jan 20, 2026
00e02a4
feat(direct_benchmark): add run ID to completion blocks
ntindle Jan 20, 2026
f07dff1
fix(direct_benchmark): add pytest dependency for challenge evaluation
ntindle Jan 20, 2026
0e65785
fix(direct_benchmark): don't mark timed-out challenges as passed
ntindle Jan 20, 2026
89003a5
feat(direct_benchmark): show "would have passed" for timed-out challe…
ntindle Jan 20, 2026
572c3f5
refactor(classic): consolidate Poetry projects into single pyproject.…
ntindle Jan 20, 2026
b849eaf
feat(direct_benchmark): enable shell command execution with safety de…
ntindle Jan 20, 2026
c1031b2
ci(classic): update CI workflows for consolidated Poetry project
ntindle Jan 20, 2026
6f27834
feat(classic): add sub-agent architecture and LATS/multi-agent debate…
ntindle Jan 20, 2026
60fdee1
fix(classic): resolve linting and formatting issues for CI compliance
ntindle Jan 20, 2026
bd10da1
ci: update pre-commit hooks for consolidated classic Poetry project
ntindle Jan 20, 2026
98cde46
style(classic): fix import sorting with isort
ntindle Jan 20, 2026
5373a6e
style(classic): fix code formatting with black
ntindle Jan 20, 2026
b32bfca
chore: remove test.db from tracking
ntindle Jan 20, 2026
aeec0ce
chore: add test.db to gitignore
ntindle Jan 20, 2026
7d6375f
style(classic): fix flake8 line length issue
ntindle Jan 20, 2026
a4d7b01
fix(classic): resolve all pyright type errors
ntindle Jan 20, 2026
5e22a18
chore: add classic benchmark reports and workspaces to gitignore
ntindle Jan 20, 2026
326554d
style(classic): update black to 24.10.0 and reformat
ntindle Jan 20, 2026
a67d475
fix(classic): handle parallel tool calls in action history
ntindle Jan 21, 2026
6faabef
fix(classic): always recreate Docker containers for code execution
ntindle Jan 21, 2026
57fbab5
feat(classic): add external benchmark adapters for GAIA, SWE-bench, a…
ntindle Jan 21, 2026
ff076b1
feat(classic): add platform blocks component for classic agents
ntindle Jan 21, 2026
68b9bd0
refactor(classic): use platform API for blocks instead of local loading
ntindle Jan 22, 2026
114856c
refactor(classic): improve prompt strategies with both general and co…
ntindle Jan 22, 2026
7dd181f
feat(classic): make CWD the default agent workspace for CLI mode
ntindle Jan 22, 2026
c671af8
feat(classic): add platform_blocks to Agent, enable via PLATFORM_API_KEY
ntindle Jan 22, 2026
0040636
fix(permissions): update wildcard handling for command patterns
ntindle Jan 26, 2026
791e1d8
fix(classic): resolve CI lint, type, and test failures
ntindle Jan 29, 2026
d8d87f2
Merge branch 'dev' into make-old-work
ntindle Jan 30, 2026
b3f3595
feat(classic): add interactive config command to CLI
ntindle Feb 3, 2026
60f506a
feat(classic): add Agent Skills (SKILL.md) support
ntindle Feb 4, 2026
6210b32
fix(classic): ensure user feedback on denied commands reaches the agent
ntindle Feb 4, 2026
f56abce
fix(classic): convert mid-conversation system messages to user messages
ntindle Feb 11, 2026
ac7de17
Merge remote-tracking branch 'origin/dev' into make-old-work
ntindle Feb 11, 2026
711f0da
fix(classic): fix CI failures - install Playwright and auto-detect model
ntindle Feb 12, 2026
1480183
fix(classic): fix flake8 line too long
ntindle Feb 12, 2026
24b38f2
fix(classic): skip S3 tests in CI due to MinIO compatibility issues
ntindle Feb 12, 2026
053b92e
fix(classic): add ANTHROPIC_API_KEY to AutoGPT CI workflow
ntindle Feb 12, 2026
9622ba8
fix(classic): use tmp_path for bulletin tests instead of hardcoded paths
ntindle Feb 12, 2026
d437e75
fix(classic): fix bulletin test - mock web to return content when tes…
ntindle Feb 12, 2026
b075495
Merge branch 'dev' into make-old-work
ntindle Feb 13, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
10 changes: 10 additions & 0 deletions .claude/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"permissions": {
"allowedTools": [
"Read", "Grep", "Glob",
"Bash(ls:*)", "Bash(cat:*)", "Bash(grep:*)", "Bash(find:*)",
"Bash(git status:*)", "Bash(git diff:*)", "Bash(git log:*)", "Bash(git worktree:*)",
"Bash(tmux:*)", "Bash(sleep:*)", "Bash(branchlet:*)"
]
}
}
74 changes: 18 additions & 56 deletions .github/workflows/classic-autogpt-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,15 @@ on:
paths:
- '.github/workflows/classic-autogpt-ci.yml'
- 'classic/original_autogpt/**'
- 'classic/direct_benchmark/**'
- 'classic/forge/**'
pull_request:
branches: [ master, dev, release-* ]
paths:
- '.github/workflows/classic-autogpt-ci.yml'
- 'classic/original_autogpt/**'
- 'classic/direct_benchmark/**'
- 'classic/forge/**'

concurrency:
group: ${{ format('classic-autogpt-ci-{0}', github.head_ref && format('{0}-{1}', github.event_name, github.event.pull_request.number) || github.sha) }}
Expand All @@ -19,47 +23,22 @@ concurrency:
defaults:
run:
shell: bash
working-directory: classic/original_autogpt
working-directory: classic

jobs:
test:
permissions:
contents: read
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
python-version: ["3.10"]
platform-os: [ubuntu, macos, macos-arm64, windows]
runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}
runs-on: ubuntu-latest

steps:
# Quite slow on macOS (2~4 minutes to set up Docker)
# - name: Set up Docker (macOS)
# if: runner.os == 'macOS'
# uses: crazy-max/ghaction-setup-docker@v3

- name: Start MinIO service (Linux)
if: runner.os == 'Linux'
- name: Start MinIO service
working-directory: '.'
run: |
docker pull minio/minio:edge-cicd
docker run -d -p 9000:9000 minio/minio:edge-cicd

- name: Start MinIO service (macOS)
if: runner.os == 'macOS'
working-directory: ${{ runner.temp }}
run: |
brew install minio/stable/minio
mkdir data
minio server ./data &

# No MinIO on Windows:
# - Windows doesn't support running Linux Docker containers
# - It doesn't seem possible to start background processes on Windows. They are
# killed after the step returns.
# See: https://github.com/actions/runner/issues/598#issuecomment-2011890429

- name: Checkout repository
uses: actions/checkout@v4
with:
Expand All @@ -71,41 +50,23 @@ jobs:
git config --global user.name "Auto-GPT-Bot"
git config --global user.email "github-bot@agpt.co"

- name: Set up Python ${{ matrix.python-version }}
- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
python-version: "3.12"

- id: get_date
name: Get date
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT

- name: Set up Python dependency cache
# On Windows, unpacking cached dependencies takes longer than just installing them
if: runner.os != 'Windows'
uses: actions/cache@v4
with:
path: ${{ runner.os == 'macOS' && '~/Library/Caches/pypoetry' || '~/.cache/pypoetry' }}
key: poetry-${{ runner.os }}-${{ hashFiles('classic/original_autogpt/poetry.lock') }}

- name: Install Poetry (Unix)
if: runner.os != 'Windows'
run: |
curl -sSL https://install.python-poetry.org | python3 -

if [ "${{ runner.os }}" = "macOS" ]; then
PATH="$HOME/.local/bin:$PATH"
echo "$HOME/.local/bin" >> $GITHUB_PATH
fi

- name: Install Poetry (Windows)
if: runner.os == 'Windows'
shell: pwsh
run: |
(Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python -
path: ~/.cache/pypoetry
key: poetry-${{ runner.os }}-${{ hashFiles('classic/poetry.lock') }}

$env:PATH += ";$env:APPDATA\Python\Scripts"
echo "$env:APPDATA\Python\Scripts" >> $env:GITHUB_PATH
- name: Install Poetry
run: curl -sSL https://install.python-poetry.org | python3 -

- name: Install Python dependencies
run: poetry install
Expand All @@ -116,12 +77,13 @@ jobs:
--cov=autogpt --cov-branch --cov-report term-missing --cov-report xml \
--numprocesses=logical --durations=10 \
--junitxml=junit.xml -o junit_family=legacy \
tests/unit tests/integration
original_autogpt/tests/unit original_autogpt/tests/integration
env:
CI: true
PLAIN_OUTPUT: True
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
S3_ENDPOINT_URL: ${{ runner.os != 'Windows' && 'http://127.0.0.1:9000' || '' }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
S3_ENDPOINT_URL: http://127.0.0.1:9000
AWS_ACCESS_KEY_ID: minioadmin
AWS_SECRET_ACCESS_KEY: minioadmin

Expand All @@ -135,11 +97,11 @@ jobs:
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}
flags: autogpt-agent,${{ runner.os }}
flags: autogpt-agent

- name: Upload logs to artifact
if: always()
uses: actions/upload-artifact@v4
with:
name: test-logs
path: classic/original_autogpt/logs/
path: classic/logs/
36 changes: 13 additions & 23 deletions .github/workflows/classic-autogpts-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@ on:
- 'classic/original_autogpt/**'
- 'classic/forge/**'
- 'classic/benchmark/**'
- 'classic/run'
- 'classic/cli.py'
- 'classic/setup.py'
- '!**/*.md'
pull_request:
branches: [ master, dev, release-* ]
Expand All @@ -22,9 +19,6 @@ on:
- 'classic/original_autogpt/**'
- 'classic/forge/**'
- 'classic/benchmark/**'
- 'classic/run'
- 'classic/cli.py'
- 'classic/setup.py'
- '!**/*.md'

defaults:
Expand All @@ -35,13 +29,9 @@ defaults:
jobs:
serve-agent-protocol:
runs-on: ubuntu-latest
strategy:
matrix:
agent-name: [ original_autogpt ]
fail-fast: false
timeout-minutes: 20
env:
min-python-version: '3.10'
min-python-version: '3.12'
steps:
- name: Checkout repository
uses: actions/checkout@v4
Expand All @@ -55,22 +45,22 @@ jobs:
python-version: ${{ env.min-python-version }}

- name: Install Poetry
working-directory: ./classic/${{ matrix.agent-name }}/
run: |
curl -sSL https://install.python-poetry.org | python -

- name: Run regression tests
- name: Install dependencies
run: poetry install

- name: Run smoke tests with direct-benchmark
run: |
./run agent start ${{ matrix.agent-name }}
cd ${{ matrix.agent-name }}
poetry run agbenchmark --mock --test=BasicRetrieval --test=Battleship --test=WebArenaTask_0
poetry run agbenchmark --test=WriteFile
poetry run direct-benchmark run \
--strategies one_shot \
--models claude \
--tests ReadFile,WriteFile \
--json
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
AGENT_NAME: ${{ matrix.agent-name }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
HELICONE_CACHE_ENABLED: false
HELICONE_PROPERTY_AGENT: ${{ matrix.agent-name }}
REPORTS_FOLDER: ${{ format('../../reports/{0}', matrix.agent-name) }}
TELEMETRY_ENVIRONMENT: autogpt-ci
TELEMETRY_OPT_IN: ${{ github.ref_name == 'master' }}
NONINTERACTIVE_MODE: "true"
CI: true
Loading
Loading