Skip to content

Commit 99dd1a8

Browse files
committed
Switch desktop execution to Sonnet 4.5
1 parent 5267695 commit 99dd1a8

File tree

4 files changed

+146
-13
lines changed

4 files changed

+146
-13
lines changed

packages/bytebot-agent/src/proxy/proxy.service.spec.ts

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,77 @@ describe('ProxyService endpoint failover', () => {
252252
expect(localCreate).toHaveBeenCalledTimes(0);
253253
});
254254

255+
it('treats claude-sonnet-4-5 as desktop-vision for endpoint ordering', async () => {
256+
const eventEmitter = { emit: jest.fn() };
257+
const llmResilienceService = makeResilience(eventEmitter);
258+
259+
const configService = {
260+
get: jest.fn((key: string) => {
261+
const map: Record<string, string> = {
262+
BYTEBOT_LLM_PROXY_URL: 'http://local-proxy:4000',
263+
BYTEBOT_LLM_PROXY_ENDPOINTS:
264+
'http://local-proxy:4000,http://global-proxy:4000',
265+
// Desktop-vision ordering is global-first
266+
BYTEBOT_LLM_PROXY_DESKTOP_VISION_ENDPOINTS:
267+
'http://global-proxy:4000,http://local-proxy:4000',
268+
BYTEBOT_LLM_PROXY_API_KEY: 'dummy',
269+
BYTEBOT_LLM_PROXY_ENDPOINT_PREFLIGHT_ENABLED: 'false',
270+
};
271+
return map[key] ?? '';
272+
}),
273+
} as any;
274+
275+
const localCreate = jest.fn(async () => {
276+
const error = new Error('connect ECONNREFUSED 10.0.0.1:4000');
277+
(error as any).code = 'ECONNREFUSED';
278+
throw error;
279+
});
280+
const globalCreate = jest.fn(async () => {
281+
return {
282+
model: 'claude-sonnet-4-5',
283+
choices: [{ message: { content: 'ok' } }],
284+
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
285+
};
286+
});
287+
288+
class TestProxyService extends ProxyService {
289+
protected override createOpenAIClient(baseURL: string): any {
290+
if (baseURL.includes('local-proxy')) {
291+
return { chat: { completions: { create: localCreate } } };
292+
}
293+
return { chat: { completions: { create: globalCreate } } };
294+
}
295+
}
296+
297+
const service = new TestProxyService(
298+
configService,
299+
llmResilienceService,
300+
eventEmitter as any,
301+
);
302+
303+
const messages = [
304+
{
305+
id: 'm1',
306+
createdAt: new Date(),
307+
updatedAt: new Date(),
308+
taskId: 't1',
309+
summaryId: null,
310+
role: Role.USER,
311+
content: [{ type: MessageContentType.Text, text: 'hello' }],
312+
},
313+
] as any;
314+
315+
await service.generateMessage(
316+
'system',
317+
messages,
318+
'claude-sonnet-4-5',
319+
{ useTools: false },
320+
);
321+
322+
expect(globalCreate).toHaveBeenCalledTimes(1);
323+
expect(localCreate).toHaveBeenCalledTimes(0);
324+
});
325+
255326
it('disables LiteLLM caching for desktop-vision model requests', async () => {
256327
const eventEmitter = { emit: jest.fn() };
257328
const llmResilienceService = makeResilience(eventEmitter);
@@ -310,6 +381,64 @@ describe('ProxyService endpoint failover', () => {
310381
expect(create.mock.calls[0][0].cache).toEqual({ 'no-cache': true });
311382
});
312383

384+
it('disables LiteLLM caching for claude-sonnet-4-5 desktop-vision requests', async () => {
385+
const eventEmitter = { emit: jest.fn() };
386+
const llmResilienceService = makeResilience(eventEmitter);
387+
388+
const configService = {
389+
get: jest.fn((key: string) => {
390+
const map: Record<string, string> = {
391+
BYTEBOT_LLM_PROXY_URL: 'http://proxy:4000',
392+
BYTEBOT_LLM_PROXY_ENDPOINTS: 'http://proxy:4000',
393+
BYTEBOT_LLM_PROXY_DESKTOP_VISION_ENDPOINTS: 'http://proxy:4000',
394+
BYTEBOT_LLM_PROXY_API_KEY: 'dummy',
395+
BYTEBOT_LLM_PROXY_ENDPOINT_PREFLIGHT_ENABLED: 'false',
396+
};
397+
return map[key] ?? '';
398+
}),
399+
} as any;
400+
401+
const create = jest.fn(async (request: any) => {
402+
return {
403+
model: 'claude-sonnet-4-5',
404+
choices: [{ message: { content: 'ok' } }],
405+
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
406+
__request: request,
407+
};
408+
});
409+
410+
class TestProxyService extends ProxyService {
411+
protected override createOpenAIClient(): any {
412+
return { chat: { completions: { create } } };
413+
}
414+
}
415+
416+
const service = new TestProxyService(
417+
configService,
418+
llmResilienceService,
419+
eventEmitter as any,
420+
);
421+
422+
const messages = [
423+
{
424+
id: 'm1',
425+
createdAt: new Date(),
426+
updatedAt: new Date(),
427+
taskId: 't1',
428+
summaryId: null,
429+
role: Role.USER,
430+
content: [{ type: MessageContentType.Text, text: 'hello' }],
431+
},
432+
] as any;
433+
434+
await service.generateMessage('system', messages, 'claude-sonnet-4-5', {
435+
useTools: false,
436+
});
437+
438+
expect(create).toHaveBeenCalledTimes(1);
439+
expect(create.mock.calls[0][0].cache).toEqual({ 'no-cache': true });
440+
});
441+
313442
it('does not replay Thinking blocks into Chat Completions history', async () => {
314443
const eventEmitter = { emit: jest.fn() };
315444
const llmResilienceService = makeResilience(eventEmitter);

packages/bytebot-agent/src/proxy/proxy.service.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,8 +325,12 @@ export class ProxyService implements BytebotAgentService {
325325
const trimmed = (model || '').trim();
326326
return (
327327
trimmed === 'desktop-vision' ||
328+
trimmed === 'claude-sonnet-4-5' ||
329+
trimmed === 'claude-opus-4-5' ||
328330
trimmed === 'qwen3-vl-32b' ||
329-
trimmed.endsWith('/qwen3-vl-32b')
331+
trimmed.endsWith('/qwen3-vl-32b') ||
332+
trimmed.endsWith('/claude-sonnet-4-5-20250929') ||
333+
trimmed.endsWith('/claude-opus-4-5-20251101')
330334
);
331335
}
332336

packages/bytebot-temporal-worker/src/activities/planning.activities.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*
1414
* Model Flow (Phase 13):
1515
* - Planning (planGoal, refinePlan): Uses PLANNING_MODEL (default: gpt-oss-120b)
16-
* - Execution (via activities): Uses EXECUTION_MODEL (default: qwen3-vl-32b when available)
16+
* - Execution (via activities): Uses EXECUTION_MODEL (default: claude-sonnet-4-5)
1717
* - Fallback: Uses FALLBACK_MODEL (default: claude-sonnet-4-5 - verified in LiteLLM)
1818
*
1919
* Agent Paradigm (Phase 13.3):
@@ -57,10 +57,10 @@ const LLM_API_KEY = process.env.LLM_API_KEY ?? process.env.OPENAI_API_KEY ?? '';
5757
const PLANNING_TIMEOUT_MS = parseInt(process.env.PLANNING_TIMEOUT_MS ?? '120000', 10);
5858

5959
// Phase 13: In-house model configuration with proper fallback
60-
// Use gpt-oss-120b for planning (oversight), qwen3-vl-32b for execution (when available)
60+
// Use gpt-oss-120b for planning (oversight), claude-sonnet-4-5 for execution
6161
// Fallback uses claude-sonnet-4-5 which is configured in LiteLLM
6262
const PLANNING_MODEL = process.env.PLANNING_MODEL ?? 'gpt-oss-120b';
63-
const EXECUTION_MODEL = process.env.EXECUTION_MODEL ?? 'qwen3-vl-32b';
63+
const EXECUTION_MODEL = process.env.EXECUTION_MODEL ?? 'claude-sonnet-4-5';
6464
// Phase 13: Fixed fallback model - gpt-4 was not in LiteLLM config
6565
// Using claude-sonnet-4-5 as fallback (reliable, available in LiteLLM)
6666
const FALLBACK_MODEL = process.env.FALLBACK_MODEL ?? 'claude-sonnet-4-5';

packages/bytebot-workflow-orchestrator/src/services/task-dispatch.service.ts

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ import { hasDesktopExecutionTool } from '../contracts/planner-tools';
4949
// - gpt-oss-120b is running (1/1 replicas) with 131K context window
5050
// - 120B parameter model provides superior reasoning for browser automation
5151
//
52-
// Routes tasks to appropriate in-house models based on execution surface:
52+
// Routes tasks to appropriate in-house/external models based on execution surface:
5353
// - Browser tasks (requiresDesktop: false) → gpt-oss-120b (high-capability reasoning)
54-
// - Desktop tasks (requiresDesktop: true) → desktop-vision (vision-capable model group with fallbacks)
54+
// - Desktop tasks (requiresDesktop: true) → claude-sonnet-4-5 (vision-capable model)
5555
// - Fallback → claude-sonnet-4-5 (external API for complex reasoning)
5656
//
5757
// Model configuration for bytebot-agent's task schema
@@ -63,18 +63,18 @@ const BROWSER_TASK_MODEL = {
6363
};
6464

6565
const DESKTOP_TASK_MODEL = {
66-
// IMPORTANT: Use the model group alias (not a single backend) to ensure fallbacks are always enabled.
67-
// See: docs/bytebot/contracts/VISION_MODEL_GROUPS.md
68-
name: 'desktop-vision',
69-
title: 'desktop-vision',
66+
// Phase 15: Switch desktop execution to Sonnet 4.5 for reliability.
67+
// Note: Proxy layer treats claude-sonnet-4-5 as a desktop-vision model for endpoint ordering + no-cache.
68+
name: 'claude-sonnet-4-5',
69+
title: 'claude-sonnet-4-5',
7070
provider: 'proxy',
71-
contextWindow: 32000,
71+
contextWindow: 128000,
7272
};
7373

7474
// Fallback model for tasks that don't specify execution surface
7575
// or when in-house models fail
7676
const FALLBACK_TASK_MODEL = {
77-
name: 'anthropic/claude-sonnet-4-5-20250929',
77+
name: 'claude-sonnet-4-5',
7878
title: 'claude-sonnet-4-5',
7979
provider: 'proxy',
8080
contextWindow: 128000,
@@ -335,7 +335,7 @@ export class TaskDispatchService implements OnModuleInit {
335335
*/
336336
private selectModelForTask(requiresDesktop?: boolean): typeof BROWSER_TASK_MODEL {
337337
if (requiresDesktop === true) {
338-
this.logger.debug('Selected DESKTOP_TASK_MODEL (desktop-vision) for desktop task');
338+
this.logger.debug('Selected DESKTOP_TASK_MODEL (claude-sonnet-4-5) for desktop task');
339339
return DESKTOP_TASK_MODEL;
340340
}
341341

0 commit comments

Comments
 (0)