diff --git a/conductor/tracks.md b/conductor/tracks.md new file mode 100644 index 000000000..f627ae228 --- /dev/null +++ b/conductor/tracks.md @@ -0,0 +1,15 @@ +# Conductor Tracks + +## [x] Phase 1: Critical Fixes & Stabilization +- [x] 1.1 Fix Docker Build Failure (`dbus-uuidgen`) +- [x] 1.2 Fix UI Proxy Crash (`Missing "target" option`) + +## [x] Phase 2: Performance Optimization +- [x] 2.1 Optimize Shared Memory for Desktop Container + +## [x] Phase 3: Feature Expansion +- [x] 3.1 Unified Model Adapter Pattern + +## [x] Phase 4: Reliability & Intelligence +- [x] 4.1 "Self-Healing" Loop Detection +- [x] 4.2 Structured "Playbooks" (Backend Support) \ No newline at end of file diff --git a/packages/bytebot-agent/prisma/schema.prisma b/packages/bytebot-agent/prisma/schema.prisma index ae03635bd..ecf42b2ef 100644 --- a/packages/bytebot-agent/prisma/schema.prisma +++ b/packages/bytebot-agent/prisma/schema.prisma @@ -80,6 +80,16 @@ model Summary { childSummaries Summary[] @relation("SummaryHierarchy") } +model Playbook { + id String @id @default(uuid()) + name String + description String? + promptTemplate String + requiredVariables String[] // Array of variable names like ["month", "year"] + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt +} + model Message { id String @id @default(uuid()) // Content field follows Anthropic's content blocks structure diff --git a/packages/bytebot-agent/src/agent/agent.processor.ts b/packages/bytebot-agent/src/agent/agent.processor.ts index c48912fae..b41e580e7 100644 --- a/packages/bytebot-agent/src/agent/agent.processor.ts +++ b/packages/bytebot-agent/src/agent/agent.processor.ts @@ -40,6 +40,8 @@ import { SummariesService } from '../summaries/summaries.service'; import { handleComputerToolUse } from './agent.computer-use'; import { ProxyService } from '../proxy/proxy.service'; +import { ConfigService } from '@nestjs/config'; + @Injectable() export class AgentProcessor { private readonly logger = new Logger(AgentProcessor.name); @@ -47,6 +49,7 @@ export class AgentProcessor { private isProcessing = false; private abortController: AbortController | null = null; private services: Record = {}; + private recentActions: string[] = []; constructor( private readonly tasksService: TasksService, @@ -57,6 +60,7 @@ export class AgentProcessor { private readonly googleService: GoogleService, private readonly proxyService: ProxyService, private readonly inputCaptureService: InputCaptureService, + private readonly configService: ConfigService, ) { this.services = { anthropic: this.anthropicService, @@ -99,6 +103,7 @@ export class AgentProcessor { if (this.currentTaskId === taskId && this.isProcessing) { this.logger.log(`Task resume event received for task ID: ${taskId}`); this.abortController = new AbortController(); + this.recentActions = []; // Reset loop detection on resume void this.runIteration(taskId); } @@ -122,6 +127,7 @@ export class AgentProcessor { this.isProcessing = true; this.currentTaskId = taskId; this.abortController = new AbortController(); + this.recentActions = []; // Reset loop detection // Kick off the first iteration without blocking the caller void this.runIteration(taskId); @@ -185,7 +191,24 @@ export class AgentProcessor { const model = task.model as unknown as BytebotAgentModel; let agentResponse: BytebotAgentResponse; - const service = this.services[model.provider]; + let service = this.services[model.provider]; + + // Check for forced proxy usage + if (this.configService.get('LLM_FORCE_PROXY', false)) { + service = this.proxyService; + } + + // Fallback for unknown providers if enabled + if ( + !service && + this.configService.get('LLM_USE_PROXY_FALLBACK', true) + ) { + this.logger.log( + `Using proxy service for unknown provider: ${model.provider}`, + ); + service = this.proxyService; + } + if (!service) { this.logger.warn( `No service found for model provider: ${model.provider}`, @@ -308,6 +331,31 @@ export class AgentProcessor { for (const block of messageContentBlocks) { if (isComputerToolUseContentBlock(block)) { + // Loop Detection Logic + const actionSignature = `${block.name}:${JSON.stringify(block.input)}`; + this.recentActions.push(actionSignature); + if (this.recentActions.length > 5) { + this.recentActions.shift(); + } + + // Check for exact repetition of the last 3 actions + if ( + this.recentActions.length >= 3 && + this.recentActions[this.recentActions.length - 1] === + this.recentActions[this.recentActions.length - 2] && + this.recentActions[this.recentActions.length - 2] === + this.recentActions[this.recentActions.length - 3] + ) { + this.logger.warn( + `Loop detected for task ${taskId}. Action repeated 3 times: ${block.name}`, + ); + // In the future, we can inject a "system" message telling the LLM to stop. + // For now, we interrupt the loop to prevent infinite billing/hanging. + throw new Error( + 'Self-Correction: Loop detected. The agent is repeating the same action.', + ); + } + const result = await handleComputerToolUse(block, this.logger); generatedToolResults.push(result); } diff --git a/packages/bytebot-agent/src/app.module.ts b/packages/bytebot-agent/src/app.module.ts index 95f84a442..1aefb250d 100644 --- a/packages/bytebot-agent/src/app.module.ts +++ b/packages/bytebot-agent/src/app.module.ts @@ -13,6 +13,7 @@ import { ScheduleModule } from '@nestjs/schedule'; import { EventEmitterModule } from '@nestjs/event-emitter'; import { SummariesModule } from './summaries/summaries.modue'; import { ProxyModule } from './proxy/proxy.module'; +import { PlaybooksModule } from './playbooks/playbooks.module'; @Module({ imports: [ @@ -29,6 +30,7 @@ import { ProxyModule } from './proxy/proxy.module'; OpenAIModule, GoogleModule, ProxyModule, + PlaybooksModule, PrismaModule, ], controllers: [AppController], diff --git a/packages/bytebot-agent/src/config.ts b/packages/bytebot-agent/src/config.ts new file mode 100644 index 000000000..28bd3d1b3 --- /dev/null +++ b/packages/bytebot-agent/src/config.ts @@ -0,0 +1,19 @@ +import { BytebotAgentModel } from './agent/agent.types'; + +export const getCustomModels = (): BytebotAgentModel[] => { + const llmConfigJson = process.env.LLM_CONFIG_JSON; + if (!llmConfigJson) { + return []; + } + + try { + const customModels = JSON.parse(llmConfigJson); + if (Array.isArray(customModels)) { + return customModels as BytebotAgentModel[]; + } + return []; + } catch (error) { + console.error('Failed to parse LLM_CONFIG_JSON', error); + return []; + } +}; diff --git a/packages/bytebot-agent/src/playbooks/playbooks.controller.ts b/packages/bytebot-agent/src/playbooks/playbooks.controller.ts new file mode 100644 index 000000000..de41e5e82 --- /dev/null +++ b/packages/bytebot-agent/src/playbooks/playbooks.controller.ts @@ -0,0 +1,63 @@ +import { + Controller, + Get, + Post, + Body, + Param, + Delete, + Put, + NotFoundException, +} from '@nestjs/common'; +import { PlaybooksService } from './playbooks.service'; +import { Playbook } from '@prisma/client'; + +@Controller('playbooks') +export class PlaybooksController { + constructor(private readonly playbooksService: PlaybooksService) {} + + @Get() + findAll(): Promise { + return this.playbooksService.findAll(); + } + + @Get(':id') + async findOne(@Param('id') id: string): Promise { + const playbook = await this.playbooksService.findOne(id); + if (!playbook) { + throw new NotFoundException(`Playbook with ID ${id} not found`); + } + return playbook; + } + + @Post() + create( + @Body() + createPlaybookDto: { + name: string; + description?: string; + promptTemplate: string; + requiredVariables: string[]; + }, + ): Promise { + return this.playbooksService.create(createPlaybookDto); + } + + @Put(':id') + update( + @Param('id') id: string, + @Body() + updatePlaybookDto: { + name?: string; + description?: string; + promptTemplate?: string; + requiredVariables?: string[]; + }, + ): Promise { + return this.playbooksService.update(id, updatePlaybookDto); + } + + @Delete(':id') + remove(@Param('id') id: string): Promise { + return this.playbooksService.remove(id); + } +} diff --git a/packages/bytebot-agent/src/playbooks/playbooks.module.ts b/packages/bytebot-agent/src/playbooks/playbooks.module.ts new file mode 100644 index 000000000..1e86fa442 --- /dev/null +++ b/packages/bytebot-agent/src/playbooks/playbooks.module.ts @@ -0,0 +1,12 @@ +import { Module } from '@nestjs/common'; +import { PlaybooksService } from './playbooks.service'; +import { PlaybooksController } from './playbooks.controller'; +import { PrismaModule } from '../prisma/prisma.module'; + +@Module({ + imports: [PrismaModule], + controllers: [PlaybooksController], + providers: [PlaybooksService], + exports: [PlaybooksService], +}) +export class PlaybooksModule {} diff --git a/packages/bytebot-agent/src/playbooks/playbooks.service.ts b/packages/bytebot-agent/src/playbooks/playbooks.service.ts new file mode 100644 index 000000000..524374daa --- /dev/null +++ b/packages/bytebot-agent/src/playbooks/playbooks.service.ts @@ -0,0 +1,52 @@ +import { Injectable } from '@nestjs/common'; +import { PrismaService } from '../prisma/prisma.service'; +import { Playbook } from '@prisma/client'; + +@Injectable() +export class PlaybooksService { + constructor(private readonly prisma: PrismaService) {} + + async findAll(): Promise { + return this.prisma.playbook.findMany({ + orderBy: { createdAt: 'desc' }, + }); + } + + async findOne(id: string): Promise { + return this.prisma.playbook.findUnique({ + where: { id }, + }); + } + + async create(data: { + name: string; + description?: string; + promptTemplate: string; + requiredVariables: string[]; + }): Promise { + return this.prisma.playbook.create({ + data, + }); + } + + async update( + id: string, + data: { + name?: string; + description?: string; + promptTemplate?: string; + requiredVariables?: string[]; + }, + ): Promise { + return this.prisma.playbook.update({ + where: { id }, + data, + }); + } + + async remove(id: string): Promise { + return this.prisma.playbook.delete({ + where: { id }, + }); + } +} diff --git a/packages/bytebot-agent/src/tasks/tasks.controller.ts b/packages/bytebot-agent/src/tasks/tasks.controller.ts index 982c4a4f1..5ed675679 100644 --- a/packages/bytebot-agent/src/tasks/tasks.controller.ts +++ b/packages/bytebot-agent/src/tasks/tasks.controller.ts @@ -19,6 +19,7 @@ import { ANTHROPIC_MODELS } from '../anthropic/anthropic.constants'; import { OPENAI_MODELS } from '../openai/openai.constants'; import { GOOGLE_MODELS } from '../google/google.constants'; import { BytebotAgentModel } from 'src/agent/agent.types'; +import { getCustomModels } from '../config'; const geminiApiKey = process.env.GEMINI_API_KEY; const anthropicApiKey = process.env.ANTHROPIC_API_KEY; @@ -30,6 +31,7 @@ const models = [ ...(anthropicApiKey ? ANTHROPIC_MODELS : []), ...(openaiApiKey ? OPENAI_MODELS : []), ...(geminiApiKey ? GOOGLE_MODELS : []), + ...getCustomModels(), ]; @Controller('tasks') diff --git a/packages/bytebot-ui/server.ts b/packages/bytebot-ui/server.ts index c942f7799..0b64b09b4 100644 --- a/packages/bytebot-ui/server.ts +++ b/packages/bytebot-ui/server.ts @@ -13,9 +13,17 @@ const hostname = process.env.HOSTNAME || "localhost"; const port = parseInt(process.env.PORT || "9992", 10); // Backend URLs -const BYTEBOT_AGENT_BASE_URL = process.env.BYTEBOT_AGENT_BASE_URL; +const BYTEBOT_AGENT_BASE_URL = + process.env.BYTEBOT_AGENT_BASE_URL || "http://bytebot-agent:9991"; const BYTEBOT_DESKTOP_VNC_URL = process.env.BYTEBOT_DESKTOP_VNC_URL; +if (!BYTEBOT_AGENT_BASE_URL) { + console.error( + "CRITICAL: BYTEBOT_AGENT_BASE_URL environment variable is missing.", + ); + process.exit(1); +} + const app = next({ dev, hostname, port }); app diff --git a/plan.md b/plan.md new file mode 100755 index 000000000..f55c268bb --- /dev/null +++ b/plan.md @@ -0,0 +1,178 @@ +file:Contribution Plan:contribution_plan.md + +# Contribution Plan: Bytebot Stabilization & Enhancement + +## Overview + +**Repository:** `bytebot-ai/bytebot` + +**Goal:** Stabilize the build process, fix critical runtime errors, optimize performance, and expand model support. + +This plan details the steps to resolve currently reported blocking issues (Docker build failures, UI crashes) and implements a roadmap for high-demand feature requests (Universal Model Support). + +--- + +## Phase 1: Critical Fixes & Stabilization + +**Objective:** Resolve build-time and boot-time failures that prevent users from deploying the application. + +### 1.1 Fix Docker Build Failure (`dbus-uuidgen`) + +**Issue:** #172 - `dbus-uuidgen: not found` during `docker build`. + +**Severity:** Critical (Blocks deployment). + +**Root Cause:** The `bytebot-desktop` Dockerfile attempts to generate a machine ID using `dbus-uuidgen` without having the `dbus` package installed in the base image. + +**Task Description:** +Modify the `packages/bytebot-desktop/Dockerfile` to ensure `dbus` is installed before the machine ID generation step. + +**Technical Implementation Detail:** + +```dockerfile +# File: packages/bytebot-desktop/Dockerfile + +# Locate the apt-get install section +RUN apt-get update && apt-get install -y \ + # ... existing packages ... + dbus \ + # ... + && rm -rf /var/lib/apt/lists/* + +# The existing command will now succeed: +# RUN mkdir -p /run/dbus && dbus-uuidgen --ensure=/etc/machine-id + +``` + +**Verification/Testing:** + +* Run `docker compose build bytebot-desktop`. +* Verify the build completes without the `exit code: 127` error. + +### 1.2 Fix UI Proxy Crash (`Missing "target" option`) + +**Issue:** #156 - `[HPM] Missing "target" option` in `bytebot-ui`. + +**Severity:** High (UI fails to start). + +**Root Cause:** The `http-proxy-middleware` in `packages/bytebot-ui/server.ts` is initialized with an undefined `target` because the environment variable (likely `AGENT_URL` or `API_BASE_URL`) is not correctly passed or read. + +**Task Description:** + +1. Harden `server.ts` to provide a fallback target or a descriptive error. +2. Update `docker/docker-compose.yml` to explicitly pass `AGENT_URL` to the UI service. + +**Technical Implementation Detail:** + +```typescript +// File: packages/bytebot-ui/server.ts + +// Current logic likely resembles: +// app.use('/api', createProxyMiddleware({ target: process.env.AGENT_URL, ... })); + +// Proposed fix: +const proxyTarget = process.env.AGENT_URL || process.env.API_BASE_URL || 'http://bytebot-agent:3000'; + +if (!proxyTarget) { + console.error("CRITICAL: AGENT_URL environment variable is missing."); + process.exit(1); +} + +app.use('/api', createProxyMiddleware({ + target: proxyTarget, + changeOrigin: true, + // ... +})); + +``` + +```yaml +# File: docker/docker-compose.yml +services: + bytebot-ui: + environment: + - AGENT_URL=http://bytebot-agent:3000 + # ... + +``` + +**Verification/Testing:** + +* Run `docker compose up bytebot-ui`. +* Check logs for "Proxy target set to: http://bytebot-agent:3000". +* Verify the UI is accessible at `http://localhost:9992`. + +--- + +## Phase 2: Performance Optimization + +**Objective:** Address user reports of "sluggish" task execution and VNC delays (Issue #150). + +### 2.1 Optimize Shared Memory for Desktop Container + +**Issue:** High VNC latency and browser crashing. + +**Root Cause:** Chrome/Firefox and VNC sessions inside Docker often run out of shared memory (`/dev/shm`), defaulting to 64MB, which causes poor rendering performance and crashes. + +**Task Description:** +Increase the `shm_size` for the `bytebot-desktop` service in the compose file. + +**Technical Implementation Detail:** + +```yaml +# File: docker/docker-compose.yml +services: + bytebot-desktop: + image: ghcr.io/bytebot-ai/bytebot-desktop:edge + shm_size: '2gb' # Critical for browser/VNC performance + # ... + +``` + +**Verification/Testing:** + +* Deploy the stack. +* Open the "Desktop" view in the UI. +* Visually verify that dragging windows or scrolling in Firefox is smoother. + +--- + +## Phase 3: Feature Expansion + +**Objective:** Implement "Universal Model Support" to allow users to bring any LLM (OpenRouter, Grok, Local Ollama) without code changes. + +### 3.1 Unified Model Adapter Pattern + +**Issue:** #144 - Users want to use providers other than the hardcoded defaults (OpenAI/Claude). + +**Task Description:** + +Refactor the agent's model service to use a configuration-driven Adapter pattern. Instead of `if (provider === 'openai')`, use a map of strategy classes. + +**Technical Implementation Detail:** + +1. **Interface Definition:** +```typescript +interface LLMProvider { + generateResponse(prompt: string, images?: string[]): Promise; +} + +``` + + +2. **Configuration:** +Update `packages/bytebot-agent/src/config.ts` to read a generic `LLM_CONFIG_JSON` env var, allowing users to define custom endpoints and model names. +3. **LiteLLM Integration:** +Fully leverage the existing `bytebot-llm-proxy` (LiteLLM) by making the agent default to hitting the proxy for *all* requests if a specific flag is set, rather than maintaining separate client libraries for each vendor. + +**Verification/Testing:** + +* Configure `LLM_PROVIDER=openrouter` and `OPENROUTER_API_KEY=...`. +* Run a simple task ("Open Calculator"). +* Verify in logs that the request was routed through the generic provider adapter. + +The analysis of `bytebot-ai/bytebot` reveals two critical blocking issues that should be addressed immediately: a Docker build failure caused by a missing `dbus` dependency (Issue #172) and a UI startup crash due to a missing proxy configuration (Issue #156). + +I have generated a comprehensive **Contribution Plan** (see file) that prioritizes these fixes to stabilize the project. Following stabilization, the plan outlines performance optimizations for the VNC environment and a roadmap for a "Universal Model Adapter" to support the community's request for more LLM providers. + +For a deeper dive into the "First-Ever AI Operating System" claims and a visual overview of Bytebot's architecture, you might find this video helpful: [ByteBot OS: First-Ever AI Operating System IS INSANE!](https://www.youtube.com/watch?v=UxoDxG7bah4). \ No newline at end of file