From 0e644f102ff0b9e74213f502c9c41cda7e7776e5 Mon Sep 17 00:00:00 2001 From: cte Date: Mon, 24 Feb 2025 12:48:55 -0800 Subject: [PATCH 1/5] Default to Claude 3.7 where appropriate --- .github/ISSUE_TEMPLATE/bug_report.yml | 2 +- src/core/Cline.ts | 2 +- src/core/webview/ClineProvider.ts | 50 +++---------------- src/shared/api.ts | 24 +++++---- src/test/suite/index.ts | 2 +- .../components/settings/GlamaModelPicker.tsx | 2 +- .../settings/OpenRouterModelPicker.tsx | 2 +- .../settings/RequestyModelPicker.tsx | 2 +- .../src/components/settings/SettingsView.tsx | 2 +- 9 files changed, 26 insertions(+), 62 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 501180c3d5..dc66b4f390 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -33,7 +33,7 @@ body: id: model attributes: label: Which Model are you using? - description: Please specify the model you're using (e.g. Claude 3.5 Sonnet) + description: Please specify the model you're using (e.g. Claude 3.7 Sonnet) validations: required: true - type: textarea diff --git a/src/core/Cline.ts b/src/core/Cline.ts index 12cf062406..f1f5e41b33 100644 --- a/src/core/Cline.ts +++ b/src/core/Cline.ts @@ -2792,7 +2792,7 @@ export class Cline { "mistake_limit_reached", this.api.getModel().id.includes("claude") ? `This may indicate a failure in his thought process or inability to use a tool properly, which can be mitigated with some user guidance (e.g. "Try breaking down the task into smaller steps").` - : "Roo Code uses complex prompts and iterative task execution that may be challenging for less capable models. For best results, it's recommended to use Claude 3.5 Sonnet for its advanced agentic coding capabilities.", + : "Roo Code uses complex prompts and iterative task execution that may be challenging for less capable models. For best results, it's recommended to use Claude 3.7 Sonnet for its advanced agentic coding capabilities.", ) if (response === "messageResponse") { userContent.push( diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 6790224eca..b4819d9683 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -1900,23 +1900,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { } const response = await axios.get("https://router.requesty.ai/v1/models", config) - /* - { - "id": "anthropic/claude-3-5-sonnet-20240620", - "object": "model", - "created": 1738243330, - "owned_by": "system", - "input_price": 0.000003, - "caching_price": 0.00000375, - "cached_price": 3E-7, - "output_price": 0.000015, - "max_output_tokens": 8192, - "context_window": 200000, - "supports_caching": true, - "description": "Anthropic's most intelligent model. Highest level of intelligence and capability" - }, - } - */ + if (response.data) { const rawModels = response.data.data const parsePrice = (price: any) => { @@ -2116,34 +2100,10 @@ export class ClineProvider implements vscode.WebviewViewProvider { ) const models: Record = {} + try { const response = await axios.get("https://openrouter.ai/api/v1/models") - /* - { - "id": "anthropic/claude-3.5-sonnet", - "name": "Anthropic: Claude 3.5 Sonnet", - "created": 1718841600, - "description": "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: Autonomously writes, edits, and runs code with reasoning and troubleshooting\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal", - "context_length": 200000, - "architecture": { - "modality": "text+image-\u003Etext", - "tokenizer": "Claude", - "instruct_type": null - }, - "pricing": { - "prompt": "0.000003", - "completion": "0.000015", - "image": "0.0048", - "request": "0" - }, - "top_provider": { - "context_length": 200000, - "max_completion_tokens": 8192, - "is_moderated": true - }, - "per_request_limits": null - }, - */ + if (response.data?.data) { const rawModels = response.data.data const parsePrice = (price: any) => { @@ -2152,6 +2112,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { } return undefined } + for (const rawModel of rawModels) { const modelInfo: ModelInfo = { maxTokens: rawModel.top_provider?.max_completion_tokens, @@ -2164,9 +2125,10 @@ export class ClineProvider implements vscode.WebviewViewProvider { } switch (rawModel.id) { + case "anthropic/claude-3.7-sonnet": case "anthropic/claude-3.5-sonnet": case "anthropic/claude-3.5-sonnet:beta": - // NOTE: this needs to be synced with api.ts/openrouter default model info + // NOTE: this needs to be synced with api.ts/openrouter default model info. modelInfo.supportsComputerUse = true modelInfo.supportsPromptCache = true modelInfo.cacheWritesPrice = 3.75 diff --git a/src/shared/api.ts b/src/shared/api.ts index 23fe60696c..4619d2930d 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -93,7 +93,7 @@ export interface ModelInfo { // Anthropic // https://docs.anthropic.com/en/docs/about-claude/models export type AnthropicModelId = keyof typeof anthropicModels -export const anthropicDefaultModelId: AnthropicModelId = "claude-3-5-sonnet-20241022" +export const anthropicDefaultModelId: AnthropicModelId = "claude-3-7-sonnet-20250219" export const anthropicModels = { "claude-3-7-sonnet-20250219": { maxTokens: 64_000, @@ -355,9 +355,9 @@ export const bedrockModels = { // Glama // https://glama.ai/models -export const glamaDefaultModelId = "anthropic/claude-3-5-sonnet" +export const glamaDefaultModelId = "anthropic/claude-3-7-sonnet" export const glamaDefaultModelInfo: ModelInfo = { - maxTokens: 8192, + maxTokens: 64_000, contextWindow: 200_000, supportsImages: true, supportsComputerUse: true, @@ -367,11 +367,14 @@ export const glamaDefaultModelInfo: ModelInfo = { cacheWritesPrice: 3.75, cacheReadsPrice: 0.3, description: - "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._", + "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)", } +// Requesty +// https://requesty.ai/router-2 +export const requestyDefaultModelId = "anthropic/claude-3-7-sonnet-latest" export const requestyDefaultModelInfo: ModelInfo = { - maxTokens: 8192, + maxTokens: 64_000, contextWindow: 200_000, supportsImages: true, supportsComputerUse: true, @@ -381,15 +384,14 @@ export const requestyDefaultModelInfo: ModelInfo = { cacheWritesPrice: 3.75, cacheReadsPrice: 0.3, description: - "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._", + "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)", } -export const requestyDefaultModelId = "anthropic/claude-3-5-sonnet" // OpenRouter // https://openrouter.ai/models?order=newest&supported_parameters=tools -export const openRouterDefaultModelId = "anthropic/claude-3.5-sonnet:beta" // will always exist in openRouterModels +export const openRouterDefaultModelId = "anthropic/claude-3.7-sonnet" export const openRouterDefaultModelInfo: ModelInfo = { - maxTokens: 8192, + maxTokens: 64_000, contextWindow: 200_000, supportsImages: true, supportsComputerUse: true, @@ -399,13 +401,13 @@ export const openRouterDefaultModelInfo: ModelInfo = { cacheWritesPrice: 3.75, cacheReadsPrice: 0.3, description: - "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._", + "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)", } // Vertex AI // https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude export type VertexModelId = keyof typeof vertexModels -export const vertexDefaultModelId: VertexModelId = "claude-3-5-sonnet-v2@20241022" +export const vertexDefaultModelId: VertexModelId = "claude-3-7-sonnet@20250219" export const vertexModels = { "claude-3-7-sonnet@20250219": { maxTokens: 8192, diff --git a/src/test/suite/index.ts b/src/test/suite/index.ts index ffb8de7473..540be7cef8 100644 --- a/src/test/suite/index.ts +++ b/src/test/suite/index.ts @@ -39,7 +39,7 @@ export async function run(): Promise { : await globalThis.extension.activate() globalThis.provider = globalThis.api.sidebarProvider await globalThis.provider.updateGlobalState("apiProvider", "openrouter") - await globalThis.provider.updateGlobalState("openRouterModelId", "anthropic/claude-3.5-sonnet") + await globalThis.provider.updateGlobalState("openRouterModelId", "anthropic/claude-3.7-sonnet") await globalThis.provider.storeSecret( "openRouterApiKey", process.env.OPENROUTER_API_KEY || "sk-or-v1-fake-api-key", diff --git a/webview-ui/src/components/settings/GlamaModelPicker.tsx b/webview-ui/src/components/settings/GlamaModelPicker.tsx index cb813a0d05..37e326d8f8 100644 --- a/webview-ui/src/components/settings/GlamaModelPicker.tsx +++ b/webview-ui/src/components/settings/GlamaModelPicker.tsx @@ -10,6 +10,6 @@ export const GlamaModelPicker = () => ( refreshMessageType="refreshGlamaModels" serviceName="Glama" serviceUrl="https://glama.ai/models" - recommendedModel="anthropic/claude-3-5-sonnet" + recommendedModel="anthropic/claude-3-7-sonnet" /> ) diff --git a/webview-ui/src/components/settings/OpenRouterModelPicker.tsx b/webview-ui/src/components/settings/OpenRouterModelPicker.tsx index 9111407cd6..c773478e54 100644 --- a/webview-ui/src/components/settings/OpenRouterModelPicker.tsx +++ b/webview-ui/src/components/settings/OpenRouterModelPicker.tsx @@ -10,6 +10,6 @@ export const OpenRouterModelPicker = () => ( refreshMessageType="refreshOpenRouterModels" serviceName="OpenRouter" serviceUrl="https://openrouter.ai/models" - recommendedModel="anthropic/claude-3.5-sonnet:beta" + recommendedModel="anthropic/claude-3.7-sonnet" /> ) diff --git a/webview-ui/src/components/settings/RequestyModelPicker.tsx b/webview-ui/src/components/settings/RequestyModelPicker.tsx index e0759a43ba..c65067068a 100644 --- a/webview-ui/src/components/settings/RequestyModelPicker.tsx +++ b/webview-ui/src/components/settings/RequestyModelPicker.tsx @@ -16,7 +16,7 @@ export const RequestyModelPicker = () => { }} serviceName="Requesty" serviceUrl="https://requesty.ai" - recommendedModel="anthropic/claude-3-5-sonnet-latest" + recommendedModel="anthropic/claude-3-7-sonnet-latest" /> ) } diff --git a/webview-ui/src/components/settings/SettingsView.tsx b/webview-ui/src/components/settings/SettingsView.tsx index 495bf49bd7..0d80580b49 100644 --- a/webview-ui/src/components/settings/SettingsView.tsx +++ b/webview-ui/src/components/settings/SettingsView.tsx @@ -765,7 +765,7 @@ const SettingsView = forwardRef(({ onDone }, color: "var(--vscode-descriptionForeground)", }}> When enabled, Roo will be able to edit files more quickly and will automatically reject - truncated full-file writes. Works best with the latest Claude 3.5 Sonnet model. + truncated full-file writes. Works best with the latest Claude 3.7 Sonnet model.

{diffEnabled && ( From d94067aba8b375fa14b76b04a0bb2e5774a43a06 Mon Sep 17 00:00:00 2001 From: cte Date: Mon, 24 Feb 2025 12:54:05 -0800 Subject: [PATCH 2/5] Revert maxTokens change for now --- src/api/providers/openrouter.ts | 1 + src/shared/api.ts | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index af087226eb..eb9e819d77 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -107,6 +107,7 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler { // (models usually default to max tokens allowed) let maxTokens: number | undefined switch (this.getModel().id) { + case "anthropic/claude-3.7-sonnet": case "anthropic/claude-3.5-sonnet": case "anthropic/claude-3.5-sonnet:beta": case "anthropic/claude-3.5-sonnet-20240620": diff --git a/src/shared/api.ts b/src/shared/api.ts index 4619d2930d..056a40c49e 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -96,7 +96,7 @@ export type AnthropicModelId = keyof typeof anthropicModels export const anthropicDefaultModelId: AnthropicModelId = "claude-3-7-sonnet-20250219" export const anthropicModels = { "claude-3-7-sonnet-20250219": { - maxTokens: 64_000, + maxTokens: 8192, contextWindow: 200_000, supportsImages: true, supportsComputerUse: true, @@ -357,7 +357,7 @@ export const bedrockModels = { // https://glama.ai/models export const glamaDefaultModelId = "anthropic/claude-3-7-sonnet" export const glamaDefaultModelInfo: ModelInfo = { - maxTokens: 64_000, + maxTokens: 8192, contextWindow: 200_000, supportsImages: true, supportsComputerUse: true, @@ -374,7 +374,7 @@ export const glamaDefaultModelInfo: ModelInfo = { // https://requesty.ai/router-2 export const requestyDefaultModelId = "anthropic/claude-3-7-sonnet-latest" export const requestyDefaultModelInfo: ModelInfo = { - maxTokens: 64_000, + maxTokens: 8192, contextWindow: 200_000, supportsImages: true, supportsComputerUse: true, @@ -391,7 +391,7 @@ export const requestyDefaultModelInfo: ModelInfo = { // https://openrouter.ai/models?order=newest&supported_parameters=tools export const openRouterDefaultModelId = "anthropic/claude-3.7-sonnet" export const openRouterDefaultModelInfo: ModelInfo = { - maxTokens: 64_000, + maxTokens: 8192, contextWindow: 200_000, supportsImages: true, supportsComputerUse: true, From a130e656f1ecf7e04cb5af2352b844d2f80e4908 Mon Sep 17 00:00:00 2001 From: cte Date: Mon, 24 Feb 2025 13:07:00 -0800 Subject: [PATCH 3/5] Fix tests --- src/api/providers/__tests__/glama.test.ts | 12 +++++++----- src/api/providers/__tests__/openrouter.test.ts | 4 +++- src/api/providers/__tests__/vertex.test.ts | 7 +++++-- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/api/providers/__tests__/glama.test.ts b/src/api/providers/__tests__/glama.test.ts index c3fc90e32b..5e017ccd0a 100644 --- a/src/api/providers/__tests__/glama.test.ts +++ b/src/api/providers/__tests__/glama.test.ts @@ -1,9 +1,11 @@ -import { GlamaHandler } from "../glama" -import { ApiHandlerOptions } from "../../../shared/api" -import OpenAI from "openai" +// npx jest src/api/providers/__tests__/glama.test.ts + import { Anthropic } from "@anthropic-ai/sdk" import axios from "axios" +import { GlamaHandler } from "../glama" +import { ApiHandlerOptions } from "../../../shared/api" + // Mock OpenAI client const mockCreate = jest.fn() const mockWithResponse = jest.fn() @@ -71,8 +73,8 @@ describe("GlamaHandler", () => { beforeEach(() => { mockOptions = { - apiModelId: "anthropic/claude-3-5-sonnet", - glamaModelId: "anthropic/claude-3-5-sonnet", + apiModelId: "anthropic/claude-3-7-sonnet", + glamaModelId: "anthropic/claude-3-7-sonnet", glamaApiKey: "test-api-key", } handler = new GlamaHandler(mockOptions) diff --git a/src/api/providers/__tests__/openrouter.test.ts b/src/api/providers/__tests__/openrouter.test.ts index 18f81ce2fd..aabd7f71a8 100644 --- a/src/api/providers/__tests__/openrouter.test.ts +++ b/src/api/providers/__tests__/openrouter.test.ts @@ -1,3 +1,5 @@ +// npx jest src/api/providers/__tests__/openrouter.test.ts + import { OpenRouterHandler } from "../openrouter" import { ApiHandlerOptions, ModelInfo } from "../../../shared/api" import OpenAI from "openai" @@ -55,7 +57,7 @@ describe("OpenRouterHandler", () => { const handler = new OpenRouterHandler({}) const result = handler.getModel() - expect(result.id).toBe("anthropic/claude-3.5-sonnet:beta") + expect(result.id).toBe("anthropic/claude-3.7-sonnet") expect(result.info.supportsPromptCache).toBe(true) }) diff --git a/src/api/providers/__tests__/vertex.test.ts b/src/api/providers/__tests__/vertex.test.ts index a51033af2d..ebe60ba0c6 100644 --- a/src/api/providers/__tests__/vertex.test.ts +++ b/src/api/providers/__tests__/vertex.test.ts @@ -1,7 +1,10 @@ -import { VertexHandler } from "../vertex" +// npx jest src/api/providers/__tests__/vertex.test.ts + import { Anthropic } from "@anthropic-ai/sdk" import { AnthropicVertex } from "@anthropic-ai/vertex-sdk" +import { VertexHandler } from "../vertex" + // Mock Vertex SDK jest.mock("@anthropic-ai/vertex-sdk", () => ({ AnthropicVertex: jest.fn().mockImplementation(() => ({ @@ -289,7 +292,7 @@ describe("VertexHandler", () => { vertexRegion: "us-central1", }) const modelInfo = invalidHandler.getModel() - expect(modelInfo.id).toBe("claude-3-5-sonnet-v2@20241022") // Default model + expect(modelInfo.id).toBe("claude-3-7-sonnet@20250219") // Default model }) }) }) From 1d1f5c9c3b4e5523f483ef16187c92df48bdb254 Mon Sep 17 00:00:00 2001 From: cte Date: Mon, 24 Feb 2025 13:43:47 -0800 Subject: [PATCH 4/5] Integration test cleanup --- src/test/suite/index.ts | 17 ++-- src/test/suite/modes.test.ts | 152 ++++++++++++++++++----------------- src/test/suite/task.test.ts | 58 ++++++------- 3 files changed, 114 insertions(+), 113 deletions(-) diff --git a/src/test/suite/index.ts b/src/test/suite/index.ts index 540be7cef8..cc487b0bf7 100644 --- a/src/test/suite/index.ts +++ b/src/test/suite/index.ts @@ -13,23 +13,23 @@ declare global { } export async function run(): Promise { - // Create the mocha test const mocha = new Mocha({ ui: "tdd", - timeout: 600000, // 10 minutes to compensate for time communicating with LLM while running in GHA + timeout: 600000, // 10 minutes to compensate for time communicating with LLM while running in GHA. }) const testsRoot = path.resolve(__dirname, "..") try { - // Find all test files + // Find all test files. const files = await glob("**/**.test.js", { cwd: testsRoot }) - // Add files to the test suite + // Add files to the test suite. files.forEach((f: string) => mocha.addFile(path.resolve(testsRoot, f))) - //Set up global extension, api, provider, and panel + // Set up global extension, api, provider, and panel. globalThis.extension = vscode.extensions.getExtension("RooVeterinaryInc.roo-cline") + if (!globalThis.extension) { throw new Error("Extension not found") } @@ -37,9 +37,12 @@ export async function run(): Promise { globalThis.api = globalThis.extension.isActive ? globalThis.extension.exports : await globalThis.extension.activate() + globalThis.provider = globalThis.api.sidebarProvider + await globalThis.provider.updateGlobalState("apiProvider", "openrouter") - await globalThis.provider.updateGlobalState("openRouterModelId", "anthropic/claude-3.7-sonnet") + await globalThis.provider.updateGlobalState("openRouterModelId", "anthropic/claude-3.5-sonnet") + await globalThis.provider.storeSecret( "openRouterApiKey", process.env.OPENROUTER_API_KEY || "sk-or-v1-fake-api-key", @@ -71,7 +74,7 @@ export async function run(): Promise { await new Promise((resolve) => setTimeout(resolve, interval)) } - // Run the mocha test + // Run the mocha test. return new Promise((resolve, reject) => { try { mocha.run((failures: number) => { diff --git a/src/test/suite/modes.test.ts b/src/test/suite/modes.test.ts index 2fe0eaa597..b94e71d110 100644 --- a/src/test/suite/modes.test.ts +++ b/src/test/suite/modes.test.ts @@ -1,101 +1,105 @@ import * as assert from "assert" -import * as vscode from "vscode" suite("Roo Code Modes", () => { test("Should handle switching modes correctly", async function () { const timeout = 30000 const interval = 1000 + const testPrompt = "For each mode (Code, Architect, Ask) respond with the mode name and what it specializes in after switching to that mode, do not start with the current mode, be sure to say 'I AM DONE' after the task is complete" + if (!globalThis.extension) { assert.fail("Extension not found") } - try { - let startTime = Date.now() - - // Ensure the webview is launched. - while (Date.now() - startTime < timeout) { - if (globalThis.provider.viewLaunched) { - break - } + let startTime = Date.now() - await new Promise((resolve) => setTimeout(resolve, interval)) + // Ensure the webview is launched. + while (Date.now() - startTime < timeout) { + if (globalThis.provider.viewLaunched) { + break } - await globalThis.provider.updateGlobalState("mode", "Ask") - await globalThis.provider.updateGlobalState("alwaysAllowModeSwitch", true) - await globalThis.provider.updateGlobalState("autoApprovalEnabled", true) + await new Promise((resolve) => setTimeout(resolve, interval)) + } - // Start a new task. - await globalThis.api.startNewTask(testPrompt) + await globalThis.provider.updateGlobalState("mode", "Ask") + await globalThis.provider.updateGlobalState("alwaysAllowModeSwitch", true) + await globalThis.provider.updateGlobalState("autoApprovalEnabled", true) - // Wait for task to appear in history with tokens. - startTime = Date.now() + // Start a new task. + await globalThis.api.startNewTask(testPrompt) - while (Date.now() - startTime < timeout) { - const messages = globalThis.provider.messages + // Wait for task to appear in history with tokens. + startTime = Date.now() - if ( - messages.some( - ({ type, text }) => - type === "say" && text?.includes("I AM DONE") && !text?.includes("be sure to say"), - ) - ) { - break - } + while (Date.now() - startTime < timeout) { + const messages = globalThis.provider.messages - await new Promise((resolve) => setTimeout(resolve, interval)) - } - if (globalThis.provider.messages.length === 0) { - assert.fail("No messages received") + if ( + messages.some( + ({ type, text }) => + type === "say" && text?.includes("I AM DONE") && !text?.includes("be sure to say"), + ) + ) { + break } - //Log the messages to the console - globalThis.provider.messages.forEach(({ type, text }) => { - if (type === "say") { - console.log(text) - } - }) - - //Start Grading Portion of test to grade the response from 1 to 10 - await globalThis.provider.updateGlobalState("mode", "Ask") - let output = globalThis.provider.messages.map(({ type, text }) => (type === "say" ? text : "")).join("\n") - await globalThis.api.startNewTask( - `Given this prompt: ${testPrompt} grade the response from 1 to 10 in the format of "Grade: (1-10)": ${output} \n Be sure to say 'I AM DONE GRADING' after the task is complete`, - ) - - startTime = Date.now() - - while (Date.now() - startTime < timeout) { - const messages = globalThis.provider.messages - - if ( - messages.some( - ({ type, text }) => - type === "say" && text?.includes("I AM DONE GRADING") && !text?.includes("be sure to say"), - ) - ) { - break - } - - await new Promise((resolve) => setTimeout(resolve, interval)) + await new Promise((resolve) => setTimeout(resolve, interval)) + } + + if (globalThis.provider.messages.length === 0) { + assert.fail("No messages received") + } + + // Log the messages to the console. + globalThis.provider.messages.forEach(({ type, text }) => { + if (type === "say") { + console.log(text) } - if (globalThis.provider.messages.length === 0) { - assert.fail("No messages received") + }) + + // Start Grading Portion of test to grade the response from 1 to 10. + await globalThis.provider.updateGlobalState("mode", "Ask") + let output = globalThis.provider.messages.map(({ type, text }) => (type === "say" ? text : "")).join("\n") + + await globalThis.api.startNewTask( + `Given this prompt: ${testPrompt} grade the response from 1 to 10 in the format of "Grade: (1-10)": ${output} \n Be sure to say 'I AM DONE GRADING' after the task is complete`, + ) + + startTime = Date.now() + + while (Date.now() - startTime < timeout) { + const messages = globalThis.provider.messages + + if ( + messages.some( + ({ type, text }) => + type === "say" && text?.includes("I AM DONE GRADING") && !text?.includes("be sure to say"), + ) + ) { + break } - globalThis.provider.messages.forEach(({ type, text }) => { - if (type === "say" && text?.includes("Grade:")) { - console.log(text) - } - }) - const gradeMessage = globalThis.provider.messages.find( - ({ type, text }) => type === "say" && !text?.includes("Grade: (1-10)") && text?.includes("Grade:"), - )?.text - const gradeMatch = gradeMessage?.match(/Grade: (\d+)/) - const gradeNum = gradeMatch ? parseInt(gradeMatch[1]) : undefined - assert.ok(gradeNum !== undefined && gradeNum >= 7 && gradeNum <= 10, "Grade must be between 7 and 10") - } finally { + + await new Promise((resolve) => setTimeout(resolve, interval)) + } + + if (globalThis.provider.messages.length === 0) { + assert.fail("No messages received") } + + globalThis.provider.messages.forEach(({ type, text }) => { + if (type === "say" && text?.includes("Grade:")) { + console.log(text) + } + }) + + const gradeMessage = globalThis.provider.messages.find( + ({ type, text }) => type === "say" && !text?.includes("Grade: (1-10)") && text?.includes("Grade:"), + )?.text + + const gradeMatch = gradeMessage?.match(/Grade: (\d+)/) + const gradeNum = gradeMatch ? parseInt(gradeMatch[1]) : undefined + assert.ok(gradeNum !== undefined && gradeNum >= 7 && gradeNum <= 10, "Grade must be between 7 and 10") }) }) diff --git a/src/test/suite/task.test.ts b/src/test/suite/task.test.ts index 2d34bc78ff..6bdedcde00 100644 --- a/src/test/suite/task.test.ts +++ b/src/test/suite/task.test.ts @@ -1,5 +1,4 @@ import * as assert from "assert" -import * as vscode from "vscode" suite("Roo Code Task", () => { test("Should handle prompt and response correctly", async function () { @@ -10,48 +9,43 @@ suite("Roo Code Task", () => { assert.fail("Extension not found") } - try { - // Ensure the webview is launched. - let startTime = Date.now() + // Ensure the webview is launched. + let startTime = Date.now() - while (Date.now() - startTime < timeout) { - if (globalThis.provider.viewLaunched) { - break - } - - await new Promise((resolve) => setTimeout(resolve, interval)) + while (Date.now() - startTime < timeout) { + if (globalThis.provider.viewLaunched) { + break } - await globalThis.provider.updateGlobalState("mode", "Code") - await globalThis.provider.updateGlobalState("alwaysAllowModeSwitch", true) - await globalThis.provider.updateGlobalState("autoApprovalEnabled", true) + await new Promise((resolve) => setTimeout(resolve, interval)) + } - await globalThis.api.startNewTask("Hello world, what is your name? Respond with 'My name is ...'") + await globalThis.provider.updateGlobalState("mode", "Code") + await globalThis.provider.updateGlobalState("alwaysAllowModeSwitch", true) + await globalThis.provider.updateGlobalState("autoApprovalEnabled", true) - // Wait for task to appear in history with tokens. - startTime = Date.now() + await globalThis.api.startNewTask("Hello world, what is your name? Respond with 'My name is ...'") - while (Date.now() - startTime < timeout) { - const messages = globalThis.provider.messages + // Wait for task to appear in history with tokens. + startTime = Date.now() - if (messages.some(({ type, text }) => type === "say" && text?.includes("My name is Roo"))) { - break - } + while (Date.now() - startTime < timeout) { + const messages = globalThis.provider.messages - await new Promise((resolve) => setTimeout(resolve, interval)) + if (messages.some(({ type, text }) => type === "say" && text?.includes("My name is Roo"))) { + break } - if (globalThis.provider.messages.length === 0) { - assert.fail("No messages received") - } + await new Promise((resolve) => setTimeout(resolve, interval)) + } - assert.ok( - globalThis.provider.messages.some( - ({ type, text }) => type === "say" && text?.includes("My name is Roo"), - ), - "Did not receive expected response containing 'My name is Roo'", - ) - } finally { + if (globalThis.provider.messages.length === 0) { + assert.fail("No messages received") } + + assert.ok( + globalThis.provider.messages.some(({ type, text }) => type === "say" && text?.includes("My name is Roo")), + "Did not receive expected response containing 'My name is Roo'", + ) }) }) From b24a3355036f6f6e25bdfe01c26133110ede725e Mon Sep 17 00:00:00 2001 From: cte Date: Mon, 24 Feb 2025 13:48:47 -0800 Subject: [PATCH 5/5] Set maxTokens to 64K for Anthropic / 3.7 Sonnet --- src/shared/api.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shared/api.ts b/src/shared/api.ts index 056a40c49e..3a550891f5 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -96,7 +96,7 @@ export type AnthropicModelId = keyof typeof anthropicModels export const anthropicDefaultModelId: AnthropicModelId = "claude-3-7-sonnet-20250219" export const anthropicModels = { "claude-3-7-sonnet-20250219": { - maxTokens: 8192, + maxTokens: 64_000, contextWindow: 200_000, supportsImages: true, supportsComputerUse: true,