From df0b82117f4dc6d2f4adf3112ab7a9e03a22cfdf Mon Sep 17 00:00:00 2001 From: cte Date: Thu, 17 Apr 2025 16:20:30 -0700 Subject: [PATCH 1/9] Record tool usage in Cline class --- src/core/Cline.ts | 21 +++++++++++- src/core/__tests__/CodeActionProvider.test.ts | 3 ++ src/core/__tests__/EditorUtils.test.ts | 3 ++ src/core/__tests__/mode-validator.test.ts | 2 ++ .../read-file-maxReadFileLine.test.ts | 9 +++-- src/core/__tests__/read-file-tool.test.ts | 2 ++ src/core/__tests__/read-file-xml.test.ts | 5 ++- .../__tests__/executeCommandTool.test.ts | 4 ++- src/core/tools/accessMcpResourceTool.ts | 19 +++++++++-- src/core/tools/appendToFileTool.ts | 25 ++++++++++++-- src/core/tools/applyDiffTool.ts | 33 +++++++++++++++---- src/core/tools/askFollowupQuestionTool.ts | 13 ++++---- src/core/tools/attemptCompletionTool.ts | 12 +++---- src/core/tools/browserActionTool.ts | 30 ++++++++++++++--- src/core/tools/executeCommandTool.ts | 9 ++++- src/core/tools/fetchInstructionsTool.ts | 27 +++++++-------- src/core/tools/insertContentTool.ts | 13 +++++--- src/core/tools/listCodeDefinitionNamesTool.ts | 22 ++++++++----- src/core/tools/listFilesTool.ts | 21 +++++++----- src/core/tools/newTaskTool.ts | 9 +++++ src/core/tools/readFileTool.ts | 16 ++++++--- src/core/tools/searchAndReplaceTool.ts | 17 +++++++--- src/core/tools/searchFilesTool.ts | 23 ++++++++----- src/core/tools/switchModeTool.ts | 23 +++++++++---- src/core/tools/useMcpToolTool.ts | 26 +++++++++++---- src/core/tools/writeToFileTool.ts | 27 ++++++++++++++- src/shared/tools.ts | 14 ++++---- 27 files changed, 322 insertions(+), 106 deletions(-) diff --git a/src/core/Cline.ts b/src/core/Cline.ts index 69278bd125..7401002cfb 100644 --- a/src/core/Cline.ts +++ b/src/core/Cline.ts @@ -39,7 +39,7 @@ import { GlobalFileNames } from "../shared/globalFileNames" import { defaultModeSlug, getModeBySlug, getFullModeDetails, isToolAllowedForMode } from "../shared/modes" import { EXPERIMENT_IDS, experiments as Experiments, ExperimentId } from "../shared/experiments" import { formatLanguage } from "../shared/language" -import { ToolParamName, ToolName, ToolResponse } from "../shared/tools" +import { ToolParamName, ToolName, ToolResponse, ToolUsage } from "../shared/tools" // services import { UrlContentFetcher } from "../services/browser/UrlContentFetcher" @@ -189,6 +189,9 @@ export class Cline extends EventEmitter { private didAlreadyUseTool = false private didCompleteReadingStream = false + // metrics + private toolUsage: ToolUsage = {} + constructor({ provider, apiConfiguration, @@ -2693,4 +2696,20 @@ export class Cline extends EventEmitter { public getFileContextTracker(): FileContextTracker { return this.fileContextTracker } + + // Metrics + + public recordToolUsage({ toolName, success = true }: { toolName: ToolName; success?: boolean }): ToolUsage { + if (!this.toolUsage[toolName]) { + this.toolUsage[toolName] = { attempts: 0, failures: 0 } + } + + this.toolUsage[toolName].attempts++ + + if (!success) { + this.toolUsage[toolName].failures++ + } + + return this.toolUsage + } } diff --git a/src/core/__tests__/CodeActionProvider.test.ts b/src/core/__tests__/CodeActionProvider.test.ts index 6ea2adf894..be462e1e06 100644 --- a/src/core/__tests__/CodeActionProvider.test.ts +++ b/src/core/__tests__/CodeActionProvider.test.ts @@ -1,4 +1,7 @@ +// npx jest src/core/__tests__/CodeActionProvider.test.ts + import * as vscode from "vscode" + import { CodeActionProvider, ACTION_NAMES } from "../CodeActionProvider" import { EditorUtils } from "../EditorUtils" diff --git a/src/core/__tests__/EditorUtils.test.ts b/src/core/__tests__/EditorUtils.test.ts index 1a01838693..44b079fcd1 100644 --- a/src/core/__tests__/EditorUtils.test.ts +++ b/src/core/__tests__/EditorUtils.test.ts @@ -1,4 +1,7 @@ +// npx jest src/core/__tests__/EditorUtils.test.ts + import * as vscode from "vscode" + import { EditorUtils } from "../EditorUtils" // Use simple classes to simulate VSCode's Range and Position behavior. diff --git a/src/core/__tests__/mode-validator.test.ts b/src/core/__tests__/mode-validator.test.ts index 66b23ff2ed..72c08d0028 100644 --- a/src/core/__tests__/mode-validator.test.ts +++ b/src/core/__tests__/mode-validator.test.ts @@ -1,3 +1,5 @@ +// npx jest src/core/__tests__/mode-validator.test.ts + import { isToolAllowedForMode, getModeConfig, modes, ModeConfig } from "../../shared/modes" import { TOOL_GROUPS } from "../../shared/tools" import { validateToolUse } from "../mode-validator" diff --git a/src/core/__tests__/read-file-maxReadFileLine.test.ts b/src/core/__tests__/read-file-maxReadFileLine.test.ts index 3a3f7e97bb..0d5c9436d1 100644 --- a/src/core/__tests__/read-file-maxReadFileLine.test.ts +++ b/src/core/__tests__/read-file-maxReadFileLine.test.ts @@ -1,11 +1,13 @@ +// npx jest src/core/__tests__/read-file-maxReadFileLine.test.ts + import * as path from "path" import { countFileLines } from "../../integrations/misc/line-counter" import { readLines } from "../../integrations/misc/read-lines" -import { extractTextFromFile, addLineNumbers } from "../../integrations/misc/extract-text" +import { extractTextFromFile } from "../../integrations/misc/extract-text" import { parseSourceCodeDefinitionsForFile } from "../../services/tree-sitter" import { isBinaryFile } from "isbinaryfile" -import { ReadFileToolUse } from "../../shared/tools" +import { ReadFileToolUse, ToolUsage } from "../../shared/tools" // Mock dependencies jest.mock("../../integrations/misc/line-counter") @@ -69,7 +71,6 @@ describe("read_file tool with maxReadFileLine setting", () => { const mockedCountFileLines = countFileLines as jest.MockedFunction const mockedReadLines = readLines as jest.MockedFunction const mockedExtractTextFromFile = extractTextFromFile as jest.MockedFunction - const mockedAddLineNumbers = addLineNumbers as jest.MockedFunction const mockedParseSourceCodeDefinitionsForFile = parseSourceCodeDefinitionsForFile as jest.MockedFunction< typeof parseSourceCodeDefinitionsForFile > @@ -126,6 +127,8 @@ describe("read_file tool with maxReadFileLine setting", () => { trackFileContext: jest.fn().mockResolvedValue(undefined), }) + mockCline.recordToolUsage = jest.fn().mockReturnValue({} as ToolUsage) + // Reset tool result toolResult = undefined }) diff --git a/src/core/__tests__/read-file-tool.test.ts b/src/core/__tests__/read-file-tool.test.ts index c410159d4e..151b6df2bc 100644 --- a/src/core/__tests__/read-file-tool.test.ts +++ b/src/core/__tests__/read-file-tool.test.ts @@ -1,3 +1,5 @@ +// npx jest src/core/__tests__/read-file-tool.test.ts + import * as path from "path" import { countFileLines } from "../../integrations/misc/line-counter" import { readLines } from "../../integrations/misc/read-lines" diff --git a/src/core/__tests__/read-file-xml.test.ts b/src/core/__tests__/read-file-xml.test.ts index 46ca065514..6b5a5c0037 100644 --- a/src/core/__tests__/read-file-xml.test.ts +++ b/src/core/__tests__/read-file-xml.test.ts @@ -1,3 +1,5 @@ +// npx jest src/core/__tests__/read-file-xml.test.ts + import * as path from "path" import { countFileLines } from "../../integrations/misc/line-counter" @@ -5,7 +7,7 @@ import { readLines } from "../../integrations/misc/read-lines" import { extractTextFromFile } from "../../integrations/misc/extract-text" import { parseSourceCodeDefinitionsForFile } from "../../services/tree-sitter" import { isBinaryFile } from "isbinaryfile" -import { ReadFileToolUse } from "../../shared/tools" +import { ReadFileToolUse, ToolUsage } from "../../shared/tools" // Mock dependencies jest.mock("../../integrations/misc/line-counter") @@ -118,6 +120,7 @@ describe("read_file tool XML output structure", () => { mockCline.getFileContextTracker = jest.fn().mockReturnValue({ trackFileContext: jest.fn().mockResolvedValue(undefined), }) + mockCline.recordToolUsage = jest.fn().mockReturnValue({} as ToolUsage) // Reset tool result toolResult = undefined diff --git a/src/core/tools/__tests__/executeCommandTool.test.ts b/src/core/tools/__tests__/executeCommandTool.test.ts index 859d79ad7f..1d85831d57 100644 --- a/src/core/tools/__tests__/executeCommandTool.test.ts +++ b/src/core/tools/__tests__/executeCommandTool.test.ts @@ -5,7 +5,7 @@ import { describe, expect, it, jest, beforeEach } from "@jest/globals" import { executeCommandTool } from "../executeCommandTool" import { Cline } from "../../Cline" import { formatResponse } from "../../prompts/responses" -import { ToolUse, AskApproval, HandleError, PushToolResult, RemoveClosingTag } from "../../../shared/tools" +import { ToolUse, AskApproval, HandleError, PushToolResult, RemoveClosingTag, ToolUsage } from "../../../shared/tools" // Mock dependencies jest.mock("../../Cline") @@ -40,6 +40,8 @@ describe("executeCommandTool", () => { // @ts-expect-error - Jest mock function type issues validateCommand: jest.fn().mockReturnValue(null), }, + // @ts-expect-error - Jest mock function type issues + recordToolUsage: jest.fn().mockReturnValue({} as ToolUsage), } // @ts-expect-error - Jest mock function type issues diff --git a/src/core/tools/accessMcpResourceTool.ts b/src/core/tools/accessMcpResourceTool.ts index ced110f8b6..0832d8ddac 100644 --- a/src/core/tools/accessMcpResourceTool.ts +++ b/src/core/tools/accessMcpResourceTool.ts @@ -13,6 +13,7 @@ export async function accessMcpResourceTool( ) { const server_name: string | undefined = block.params.server_name const uri: string | undefined = block.params.uri + try { if (block.partial) { const partialMessage = JSON.stringify({ @@ -20,32 +21,42 @@ export async function accessMcpResourceTool( serverName: removeClosingTag("server_name", server_name), uri: removeClosingTag("uri", uri), } satisfies ClineAskUseMcpServer) + await cline.ask("use_mcp_server", partialMessage, block.partial).catch(() => {}) return } else { if (!server_name) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "access_mcp_resource", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("access_mcp_resource", "server_name")) return } + if (!uri) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "access_mcp_resource", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("access_mcp_resource", "uri")) return } + cline.consecutiveMistakeCount = 0 + const completeMessage = JSON.stringify({ type: "access_mcp_resource", serverName: server_name, uri, } satisfies ClineAskUseMcpServer) + const didApprove = await askApproval("use_mcp_server", completeMessage) + if (!didApprove) { return } - // now execute the tool + + // Now execute the tool await cline.say("mcp_server_request_started") const resourceResult = await cline.providerRef.deref()?.getMcpHub()?.readResource(server_name, uri) + const resourceResultPretty = resourceResult?.contents .map((item) => { @@ -57,15 +68,19 @@ export async function accessMcpResourceTool( .filter(Boolean) .join("\n\n") || "(Empty response)" - // handle images (image must contain mimetype and blob) + // Handle images (image must contain mimetype and blob) let images: string[] = [] + resourceResult?.contents.forEach((item) => { if (item.mimeType?.startsWith("image") && item.blob) { images.push(item.blob) } }) + await cline.say("mcp_server_response", resourceResultPretty, images) pushToolResult(formatResponse.toolResult(resourceResultPretty, images)) + cline.recordToolUsage({ toolName: "access_mcp_resource" }) + return } } catch (error) { diff --git a/src/core/tools/appendToFileTool.ts b/src/core/tools/appendToFileTool.ts index a812677ae8..882d6401c6 100644 --- a/src/core/tools/appendToFileTool.ts +++ b/src/core/tools/appendToFileTool.ts @@ -23,11 +23,13 @@ export async function appendToFileTool( ) { const relPath: string | undefined = block.params.path let newContent: string | undefined = block.params.content + if (!relPath || !newContent) { return } const accessAllowed = cline.rooIgnoreController?.validateAccess(relPath) + if (!accessAllowed) { await cline.say("rooignore_error", relPath) pushToolResult(formatResponse.toolError(formatResponse.rooIgnoreError(relPath))) @@ -48,6 +50,7 @@ export async function appendToFileTool( if (newContent.startsWith("```")) { newContent = newContent.split("\n").slice(1).join("\n").trim() } + if (newContent.endsWith("```")) { newContent = newContent.split("\n").slice(0, -1).join("\n").trim() } @@ -68,36 +71,44 @@ export async function appendToFileTool( try { if (block.partial) { - // update gui message + // Update GUI message const partialMessage = JSON.stringify(sharedMessageProps) await cline.ask("tool", partialMessage, block.partial).catch(() => {}) - // update editor + + // Update editor if (!cline.diffViewProvider.isEditing) { await cline.diffViewProvider.open(relPath) } + // If file exists, append newContent to existing content if (fileExists && cline.diffViewProvider.originalContent) { newContent = cline.diffViewProvider.originalContent + "\n" + newContent } - // editor is open, stream content in + + // Editor is open, stream content in await cline.diffViewProvider.update( everyLineHasLineNumbers(newContent) ? stripLineNumbers(newContent) : newContent, false, ) + return } else { if (!relPath) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "append_to_file", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("append_to_file", "path")) await cline.diffViewProvider.reset() return } + if (!newContent) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "append_to_file", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("append_to_file", "content")) await cline.diffViewProvider.reset() return } + cline.consecutiveMistakeCount = 0 if (!cline.diffViewProvider.isEditing) { @@ -125,17 +136,21 @@ export async function appendToFileTool( ? formatResponse.createPrettyPatch(relPath, cline.diffViewProvider.originalContent, newContent) : undefined, } satisfies ClineSayTool) + const didApprove = await askApproval("tool", completeMessage) + if (!didApprove) { await cline.diffViewProvider.revertChanges() return } + const { newProblemsMessage, userEdits, finalContent } = await cline.diffViewProvider.saveChanges() // Track file edit operation if (relPath) { await cline.getFileContextTracker().trackFileContext(relPath, "roo_edited" as RecordSource) } + cline.didEditFile = true if (userEdits) { @@ -147,6 +162,7 @@ export async function appendToFileTool( diff: userEdits, } satisfies ClineSayTool), ) + pushToolResult( `The user made the following updates to your content:\n\n${userEdits}\n\n` + `The updated content, which includes both your original modifications and the user's edits, has been successfully saved to ${relPath.toPosix()}. Here is the full, updated content of the file, including line numbers:\n\n` + @@ -162,7 +178,10 @@ export async function appendToFileTool( } else { pushToolResult(`The content was successfully appended to ${relPath.toPosix()}.${newProblemsMessage}`) } + + cline.recordToolUsage({ toolName: "append_to_file" }) await cline.diffViewProvider.reset() + return } } catch (error) { diff --git a/src/core/tools/applyDiffTool.ts b/src/core/tools/applyDiffTool.ts index 433d23a42b..b1f7740201 100644 --- a/src/core/tools/applyDiffTool.ts +++ b/src/core/tools/applyDiffTool.ts @@ -35,33 +35,36 @@ export async function applyDiffTool( try { if (block.partial) { - // update gui message + // Update GUI message let toolProgressStatus + if (cline.diffStrategy && cline.diffStrategy.getProgressStatus) { toolProgressStatus = cline.diffStrategy.getProgressStatus(block) } const partialMessage = JSON.stringify(sharedMessageProps) - await cline.ask("tool", partialMessage, block.partial, toolProgressStatus).catch(() => {}) return } else { if (!relPath) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "apply_diff", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("apply_diff", "path")) return } + if (!diffContent) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "apply_diff", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("apply_diff", "diff")) return } const accessAllowed = cline.rooIgnoreController?.validateAccess(relPath) + if (!accessAllowed) { await cline.say("rooignore_error", relPath) pushToolResult(formatResponse.toolError(formatResponse.rooIgnoreError(relPath))) - return } @@ -87,14 +90,15 @@ export async function applyDiffTool( success: false, error: "No diff strategy available", } + let partResults = "" if (!diffResult.success) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "apply_diff", success: false }) const currentCount = (cline.consecutiveMistakeCountForApplyDiff.get(relPath) || 0) + 1 cline.consecutiveMistakeCountForApplyDiff.set(relPath, currentCount) let formattedError = "" - telemetryService.captureDiffApplicationError(cline.taskId, currentCount) if (diffResult.failParts && diffResult.failParts.length > 0) { @@ -102,14 +106,18 @@ export async function applyDiffTool( if (failPart.success) { continue } + const errorDetails = failPart.details ? JSON.stringify(failPart.details, null, 2) : "" + formattedError = `\n${ failPart.error }${errorDetails ? `\n\nDetails:\n${errorDetails}` : ""}\n` + partResults += formattedError } } else { const errorDetails = diffResult.details ? JSON.stringify(diffResult.details, null, 2) : "" + formattedError = `Unable to apply diff to file: ${absolutePath}\n\n\n${ diffResult.error }${errorDetails ? `\n\nDetails:\n${errorDetails}` : ""}\n` @@ -118,12 +126,14 @@ export async function applyDiffTool( if (currentCount >= 2) { await cline.say("diff_error", formattedError) } + pushToolResult(formattedError) return } cline.consecutiveMistakeCount = 0 cline.consecutiveMistakeCountForApplyDiff.delete(relPath) + // Show diff view before asking for approval cline.diffViewProvider.editType = "modify" await cline.diffViewProvider.open(relPath) @@ -136,26 +146,33 @@ export async function applyDiffTool( } satisfies ClineSayTool) let toolProgressStatus + if (cline.diffStrategy && cline.diffStrategy.getProgressStatus) { toolProgressStatus = cline.diffStrategy.getProgressStatus(block, diffResult) } const didApprove = await askApproval("tool", completeMessage, toolProgressStatus) + if (!didApprove) { - await cline.diffViewProvider.revertChanges() // cline likely handles closing the diff view + await cline.diffViewProvider.revertChanges() // Cline likely handles closing the diff view return } const { newProblemsMessage, userEdits, finalContent } = await cline.diffViewProvider.saveChanges() + // Track file edit operation if (relPath) { await cline.getFileContextTracker().trackFileContext(relPath, "roo_edited" as RecordSource) } - cline.didEditFile = true // used to determine if we should wait for busy terminal to update before sending api request + + // Used to determine if we should wait for busy terminal to update before sending api request + cline.didEditFile = true let partFailHint = "" + if (diffResult.failParts && diffResult.failParts.length > 0) { partFailHint = `But unable to apply all diff parts to file: ${absolutePath}, silently use tool to check newest file version and re-apply diffs\n` } + if (userEdits) { await cline.say( "user_feedback_diff", @@ -165,6 +182,7 @@ export async function applyDiffTool( diff: userEdits, } satisfies ClineSayTool), ) + pushToolResult( `The user made the following updates to your content:\n\n${userEdits}\n\n` + partFailHint + @@ -183,7 +201,10 @@ export async function applyDiffTool( `Changes successfully applied to ${relPath.toPosix()}:\n\n${newProblemsMessage}\n` + partFailHint, ) } + + cline.recordToolUsage({ toolName: "apply_diff" }) await cline.diffViewProvider.reset() + return } } catch (error) { diff --git a/src/core/tools/askFollowupQuestionTool.ts b/src/core/tools/askFollowupQuestionTool.ts index 2e7263ad58..31a8cc5a70 100644 --- a/src/core/tools/askFollowupQuestionTool.ts +++ b/src/core/tools/askFollowupQuestionTool.ts @@ -13,6 +13,7 @@ export async function askFollowupQuestionTool( ) { const question: string | undefined = block.params.question const follow_up: string | undefined = block.params.follow_up + try { if (block.partial) { await cline.ask("followup", removeClosingTag("question", question), block.partial).catch(() => {}) @@ -24,9 +25,7 @@ export async function askFollowupQuestionTool( return } - type Suggest = { - answer: string - } + type Suggest = { answer: string } let follow_up_json = { question, @@ -39,11 +38,10 @@ export async function askFollowupQuestionTool( } try { - parsedSuggest = parseXml(follow_up, ["suggest"]) as { - suggest: Suggest[] | Suggest - } + parsedSuggest = parseXml(follow_up, ["suggest"]) as { suggest: Suggest[] | Suggest } } catch (error) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "ask_followup_question", success: false }) await cline.say("error", `Failed to parse operations: ${error.message}`) pushToolResult(formatResponse.toolError("Invalid operations xml format")) return @@ -57,10 +55,11 @@ export async function askFollowupQuestionTool( } cline.consecutiveMistakeCount = 0 - const { text, images } = await cline.ask("followup", JSON.stringify(follow_up_json), false) await cline.say("user_feedback", text ?? "", images) pushToolResult(formatResponse.toolResult(`\n${text}\n`, images)) + cline.recordToolUsage({ toolName: "ask_followup_question" }) + return } } catch (error) { diff --git a/src/core/tools/attemptCompletionTool.ts b/src/core/tools/attemptCompletionTool.ts index 891673969e..0dd458adbf 100644 --- a/src/core/tools/attemptCompletionTool.ts +++ b/src/core/tools/attemptCompletionTool.ts @@ -26,8 +26,10 @@ export async function attemptCompletionTool( ) { const result: string | undefined = block.params.result const command: string | undefined = block.params.command + try { const lastMessage = cline.clineMessages.at(-1) + if (block.partial) { if (command) { // the attempt_completion text is done, now we're getting command @@ -55,6 +57,7 @@ export async function attemptCompletionTool( } else { if (!result) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "attempt_completion", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("attempt_completion", "result")) return } @@ -136,13 +139,10 @@ export async function attemptCompletionTool( }) toolResults.push(...formatResponse.imageBlocks(images)) - - cline.userMessageContent.push({ - type: "text", - text: `${toolDescription()} Result:`, - }) - + cline.userMessageContent.push({ type: "text", text: `${toolDescription()} Result:` }) cline.userMessageContent.push(...toolResults) + cline.recordToolUsage({ toolName: "attempt_completion" }) + return } } catch (error) { diff --git a/src/core/tools/browserActionTool.ts b/src/core/tools/browserActionTool.ts index c3f02821c1..bdc15b9c41 100644 --- a/src/core/tools/browserActionTool.ts +++ b/src/core/tools/browserActionTool.ts @@ -21,14 +21,17 @@ export async function browserActionTool( const coordinate: string | undefined = block.params.coordinate const text: string | undefined = block.params.text const size: string | undefined = block.params.size + if (!action || !browserActions.includes(action)) { // checking for action to ensure it is complete and valid if (!block.partial) { // if the block is complete and we don't have a valid action cline is a mistake cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "browser_action", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "action")) await cline.browserSession.closeBrowser() } + return } @@ -52,51 +55,63 @@ export async function browserActionTool( } else { // Initialize with empty object to avoid "used before assigned" errors let browserActionResult: BrowserActionResult = {} + if (action === "launch") { if (!url) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "browser_action", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "url")) await cline.browserSession.closeBrowser() return } + cline.consecutiveMistakeCount = 0 const didApprove = await askApproval("browser_action_launch", url) + if (!didApprove) { return } - // NOTE: it's okay that we call cline message since the partial inspect_site is finished streaming. The only scenario we have to avoid is sending messages WHILE a partial message exists at the end of the messages array. For example the api_req_finished message would interfere with the partial message, so we needed to remove that. - // await cline.say("inspect_site_result", "") // no result, starts the loading spinner waiting for result - await cline.say("browser_action_result", "") // starts loading spinner - + // NOTE: It's okay that we call cline message since the partial inspect_site is finished streaming. + // The only scenario we have to avoid is sending messages WHILE a partial message exists at the end of the messages array. + // For example the api_req_finished message would interfere with the partial message, so we needed to remove that. + // await cline.say("inspect_site_result", "") // No result, starts the loading spinner waiting for result + await cline.say("browser_action_result", "") // Starts loading spinner await cline.browserSession.launchBrowser() browserActionResult = await cline.browserSession.navigateToUrl(url) } else { if (action === "click" || action === "hover") { if (!coordinate) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "browser_action", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "coordinate")) await cline.browserSession.closeBrowser() return // can't be within an inner switch } } + if (action === "type") { if (!text) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "browser_action", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "text")) await cline.browserSession.closeBrowser() return } } + if (action === "resize") { if (!size) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "browser_action", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "size")) await cline.browserSession.closeBrowser() return } } + cline.consecutiveMistakeCount = 0 + await cline.say( "browser_action", JSON.stringify({ @@ -107,6 +122,7 @@ export async function browserActionTool( undefined, false, ) + switch (action) { case "click": browserActionResult = await cline.browserSession.click(coordinate!) @@ -141,6 +157,7 @@ export async function browserActionTool( case "scroll_up": case "resize": await cline.say("browser_action_result", JSON.stringify(browserActionResult)) + pushToolResult( formatResponse.toolResult( `The browser action has been executed. The console logs and screenshot have been captured for your analysis.\n\nConsole logs:\n${ @@ -149,6 +166,7 @@ export async function browserActionTool( browserActionResult?.screenshot ? [browserActionResult.screenshot] : [], ), ) + break case "close": pushToolResult( @@ -156,8 +174,12 @@ export async function browserActionTool( `The browser has been closed. You may now proceed to using other tools.`, ), ) + break } + + cline.recordToolUsage({ toolName: "browser_action" }) + return } } catch (error) { diff --git a/src/core/tools/executeCommandTool.ts b/src/core/tools/executeCommandTool.ts index 8c54200bd7..592ab25787 100644 --- a/src/core/tools/executeCommandTool.ts +++ b/src/core/tools/executeCommandTool.ts @@ -13,6 +13,7 @@ export async function executeCommandTool( ) { let command: string | undefined = block.params.command const customCwd: string | undefined = block.params.cwd + try { if (block.partial) { await cline.ask("command", removeClosingTag("command", command), block.partial).catch(() => {}) @@ -20,6 +21,7 @@ export async function executeCommandTool( } else { if (!command) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "execute_command", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("execute_command", "command")) return } @@ -28,7 +30,6 @@ export async function executeCommandTool( if (ignoredFileAttemptedToAccess) { await cline.say("rooignore_error", ignoredFileAttemptedToAccess) pushToolResult(formatResponse.toolError(formatResponse.rooIgnoreError(ignoredFileAttemptedToAccess))) - return } @@ -38,14 +39,20 @@ export async function executeCommandTool( cline.consecutiveMistakeCount = 0 const didApprove = await askApproval("command", command) + if (!didApprove) { return } + const [userRejected, result] = await cline.executeCommandTool(command, customCwd) + if (userRejected) { cline.didRejectTool = true } + pushToolResult(result) + cline.recordToolUsage({ toolName: "execute_command" }) + return } } catch (error) { diff --git a/src/core/tools/fetchInstructionsTool.ts b/src/core/tools/fetchInstructionsTool.ts index eaa27737e9..5bdefdd316 100644 --- a/src/core/tools/fetchInstructionsTool.ts +++ b/src/core/tools/fetchInstructionsTool.ts @@ -12,50 +12,51 @@ export async function fetchInstructionsTool( pushToolResult: PushToolResult, ) { const task: string | undefined = block.params.task - const sharedMessageProps: ClineSayTool = { - tool: "fetchInstructions", - content: task, - } + const sharedMessageProps: ClineSayTool = { tool: "fetchInstructions", content: task } + try { if (block.partial) { - const partialMessage = JSON.stringify({ - ...sharedMessageProps, - content: undefined, - } satisfies ClineSayTool) + const partialMessage = JSON.stringify({ ...sharedMessageProps, content: undefined } satisfies ClineSayTool) await cline.ask("tool", partialMessage, block.partial).catch(() => {}) return } else { if (!task) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "fetch_instructions", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("fetch_instructions", "task")) return } cline.consecutiveMistakeCount = 0 - const completeMessage = JSON.stringify({ - ...sharedMessageProps, - content: task, - } satisfies ClineSayTool) + const completeMessage = JSON.stringify({ ...sharedMessageProps, content: task } satisfies ClineSayTool) const didApprove = await askApproval("tool", completeMessage) + if (!didApprove) { return } - // now fetch the content and provide it to the agent. + // Bow fetch the content and provide it to the agent. const provider = cline.providerRef.deref() const mcpHub = provider?.getMcpHub() + if (!mcpHub) { throw new Error("MCP hub not available") } + const diffStrategy = cline.diffStrategy const context = provider?.context const content = await fetchInstructions(task, { mcpHub, diffStrategy, context }) + if (!content) { pushToolResult(formatResponse.toolError(`Invalid instructions request: ${task}`)) return } + pushToolResult(content) + cline.recordToolUsage({ toolName: "fetch_instructions" }) + + return } } catch (error) { await handleError("fetch instructions", error) diff --git a/src/core/tools/insertContentTool.ts b/src/core/tools/insertContentTool.ts index f05407f502..e55155aeab 100644 --- a/src/core/tools/insertContentTool.ts +++ b/src/core/tools/insertContentTool.ts @@ -37,12 +37,14 @@ export async function insertContentTool( // Validate required parameters if (!relPath) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "insert_content", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("insert_content", "path")) return } if (!operations) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "insert_content", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("insert_content", "operations")) return } @@ -52,6 +54,7 @@ export async function insertContentTool( if (!fileExists) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "insert_content", success: false }) const formattedError = `File does not exist at path: ${absolutePath}\n\n\nThe specified file could not be found. Please verify the file path and try again.\n` await cline.say("error", formattedError) pushToolResult(formattedError) @@ -70,6 +73,7 @@ export async function insertContentTool( } } catch (error) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "insert_content", success: false }) await cline.say("error", `Failed to parse operations JSON: ${error.message}`) pushToolResult(formatResponse.toolError("Invalid operations JSON format")) return @@ -112,10 +116,7 @@ export async function insertContentTool( await cline.diffViewProvider.update(updatedContent, true) - const completeMessage = JSON.stringify({ - ...sharedMessageProps, - diff, - } satisfies ClineSayTool) + const completeMessage = JSON.stringify({ ...sharedMessageProps, diff } satisfies ClineSayTool) const didApprove = await cline .ask("tool", completeMessage, false) @@ -133,6 +134,7 @@ export async function insertContentTool( if (relPath) { await cline.getFileContextTracker().trackFileContext(relPath, "roo_edited" as RecordSource) } + cline.didEditFile = true if (!userEdits) { @@ -149,6 +151,7 @@ export async function insertContentTool( console.debug("[DEBUG] User made edits, sending feedback diff:", userFeedbackDiff) await cline.say("user_feedback_diff", userFeedbackDiff) + pushToolResult( `The user made the following updates to your content:\n\n${userEdits}\n\n` + `The updated content, which includes both your original modifications and the user's edits, has been successfully saved to ${relPath.toPosix()}. Here is the full, updated content of the file:\n\n` + @@ -159,6 +162,8 @@ export async function insertContentTool( `3. If the user's edits have addressed part of the task or changed the requirements, adjust your approach accordingly.` + `${newProblemsMessage}`, ) + + cline.recordToolUsage({ toolName: "insert_content" }) await cline.diffViewProvider.reset() } catch (error) { handleError("insert content", error) diff --git a/src/core/tools/listCodeDefinitionNamesTool.ts b/src/core/tools/listCodeDefinitionNamesTool.ts index 8487367e2b..7e4fad5bf8 100644 --- a/src/core/tools/listCodeDefinitionNamesTool.ts +++ b/src/core/tools/listCodeDefinitionNamesTool.ts @@ -17,29 +17,33 @@ export async function listCodeDefinitionNamesTool( removeClosingTag: RemoveClosingTag, ) { const relPath: string | undefined = block.params.path + const sharedMessageProps: ClineSayTool = { tool: "listCodeDefinitionNames", path: getReadablePath(cline.cwd, removeClosingTag("path", relPath)), } + try { if (block.partial) { - const partialMessage = JSON.stringify({ - ...sharedMessageProps, - content: "", - } satisfies ClineSayTool) + const partialMessage = JSON.stringify({ ...sharedMessageProps, content: "" } satisfies ClineSayTool) await cline.ask("tool", partialMessage, block.partial).catch(() => {}) return } else { if (!relPath) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "list_code_definition_names", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("list_code_definition_names", "path")) return } + cline.consecutiveMistakeCount = 0 + const absolutePath = path.resolve(cline.cwd, relPath) let result: string + try { const stats = await fs.stat(absolutePath) + if (stats.isFile()) { const fileResult = await parseSourceCodeDefinitionsForFile(absolutePath, cline.rooIgnoreController) result = fileResult ?? "No source code definitions found in cline file." @@ -51,18 +55,20 @@ export async function listCodeDefinitionNamesTool( } catch { result = `${absolutePath}: does not exist or cannot be accessed.` } - const completeMessage = JSON.stringify({ - ...sharedMessageProps, - content: result, - } satisfies ClineSayTool) + + const completeMessage = JSON.stringify({ ...sharedMessageProps, content: result } satisfies ClineSayTool) const didApprove = await askApproval("tool", completeMessage) + if (!didApprove) { return } + if (relPath) { await cline.getFileContextTracker().trackFileContext(relPath, "read_tool" as RecordSource) } + pushToolResult(result) + cline.recordToolUsage({ toolName: "list_code_definition_names" }) return } } catch (error) { diff --git a/src/core/tools/listFilesTool.ts b/src/core/tools/listFilesTool.ts index a010191f75..b9e1592ec0 100644 --- a/src/core/tools/listFilesTool.ts +++ b/src/core/tools/listFilesTool.ts @@ -21,6 +21,7 @@ import { ToolUse, AskApproval, HandleError, PushToolResult, RemoveClosingTag } f * conversation. * @param removeClosingTag - A function that removes a closing tag from a string. */ + export async function listFilesTool( cline: Cline, block: ToolUse, @@ -32,28 +33,31 @@ export async function listFilesTool( const relDirPath: string | undefined = block.params.path const recursiveRaw: string | undefined = block.params.recursive const recursive = recursiveRaw?.toLowerCase() === "true" + const sharedMessageProps: ClineSayTool = { tool: !recursive ? "listFilesTopLevel" : "listFilesRecursive", path: getReadablePath(cline.cwd, removeClosingTag("path", relDirPath)), } + try { if (block.partial) { - const partialMessage = JSON.stringify({ - ...sharedMessageProps, - content: "", - } satisfies ClineSayTool) + const partialMessage = JSON.stringify({ ...sharedMessageProps, content: "" } satisfies ClineSayTool) await cline.ask("tool", partialMessage, block.partial).catch(() => {}) return } else { if (!relDirPath) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "list_files", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("list_files", "path")) return } + cline.consecutiveMistakeCount = 0 + const absolutePath = path.resolve(cline.cwd, relDirPath) const [files, didHitLimit] = await listFiles(absolutePath, recursive, 200) const { showRooIgnoredFiles = true } = (await cline.providerRef.deref()?.getState()) ?? {} + const result = formatResponse.formatFilesList( absolutePath, files, @@ -61,15 +65,16 @@ export async function listFilesTool( cline.rooIgnoreController, showRooIgnoredFiles, ) - const completeMessage = JSON.stringify({ - ...sharedMessageProps, - content: result, - } satisfies ClineSayTool) + + const completeMessage = JSON.stringify({ ...sharedMessageProps, content: result } satisfies ClineSayTool) const didApprove = await askApproval("tool", completeMessage) + if (!didApprove) { return } + pushToolResult(result) + cline.recordToolUsage({ toolName: "list_files" }) } } catch (error) { await handleError("listing files", error) diff --git a/src/core/tools/newTaskTool.ts b/src/core/tools/newTaskTool.ts index d6c94dd838..e299f09737 100644 --- a/src/core/tools/newTaskTool.ts +++ b/src/core/tools/newTaskTool.ts @@ -15,6 +15,7 @@ export async function newTaskTool( ) { const mode: string | undefined = block.params.mode const message: string | undefined = block.params.message + try { if (block.partial) { const partialMessage = JSON.stringify({ @@ -22,23 +23,29 @@ export async function newTaskTool( mode: removeClosingTag("mode", mode), message: removeClosingTag("message", message), }) + await cline.ask("tool", partialMessage, block.partial).catch(() => {}) return } else { if (!mode) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "new_task", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("new_task", "mode")) return } + if (!message) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "new_task", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("new_task", "message")) return } + cline.consecutiveMistakeCount = 0 // Verify the mode exists const targetMode = getModeBySlug(mode, (await cline.providerRef.deref()?.getState())?.customModes) + if (!targetMode) { pushToolResult(formatResponse.toolError(`Invalid mode: ${mode}`)) return @@ -49,6 +56,7 @@ export async function newTaskTool( mode: targetMode.name, content: message, }) + const didApprove = await askApproval("tool", toolMessage) if (!didApprove) { @@ -74,6 +82,7 @@ export async function newTaskTool( cline.emit("taskSpawned", newCline.taskId) pushToolResult(`Successfully created new task in ${targetMode.name} mode with message: ${message}`) + cline.recordToolUsage({ toolName: "new_task" }) // Set the isPaused flag to true so the parent // task can wait for the sub-task to finish. diff --git a/src/core/tools/readFileTool.ts b/src/core/tools/readFileTool.ts index 022ec4321c..1d8b23ca0e 100644 --- a/src/core/tools/readFileTool.ts +++ b/src/core/tools/readFileTool.ts @@ -37,15 +37,13 @@ export async function readFileTool( } try { if (block.partial) { - const partialMessage = JSON.stringify({ - ...sharedMessageProps, - content: undefined, - } satisfies ClineSayTool) + const partialMessage = JSON.stringify({ ...sharedMessageProps, content: undefined } satisfies ClineSayTool) await cline.ask("tool", partialMessage, block.partial).catch(() => {}) return } else { if (!relPath) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "read_file", success: false }) const errorMsg = await cline.sayAndCreateMissingParamError("read_file", "path") pushToolResult(`${errorMsg}`) return @@ -67,6 +65,7 @@ export async function readFileTool( // Parse start_line if provided if (startLineStr) { startLine = parseInt(startLineStr) + if (isNaN(startLine)) { // Invalid start_line cline.consecutiveMistakeCount++ @@ -74,6 +73,7 @@ export async function readFileTool( pushToolResult(`${relPath}Invalid start_line value`) return } + startLine -= 1 // Convert to 0-based index } @@ -94,6 +94,7 @@ export async function readFileTool( } const accessAllowed = cline.rooIgnoreController?.validateAccess(relPath) + if (!accessAllowed) { await cline.say("rooignore_error", relPath) const errorMsg = formatResponse.rooIgnoreError(relPath) @@ -103,6 +104,7 @@ export async function readFileTool( // Create line snippet description for approval message let lineSnippet = "" + if (isFullRead) { // No snippet for full read } else if (startLine !== undefined && endLine !== undefined) { @@ -127,12 +129,14 @@ export async function readFileTool( } satisfies ClineSayTool) const didApprove = await askApproval("tool", completeMessage) + if (!didApprove) { return } // Count total lines in the file let totalLines = 0 + try { totalLines = await countFileLines(absolutePath) } catch (error) { @@ -163,6 +167,7 @@ export async function readFileTool( content = res[0].length > 0 ? addLineNumbers(res[0]) : "" const result = res[1] + if (result) { sourceCodeDef = `${result}` } @@ -211,9 +216,11 @@ export async function readFileTool( else { // For non-range reads, always show line range let lines = totalLines + if (maxReadFileLine >= 0 && totalLines > maxReadFileLine) { lines = maxReadFileLine } + const lineRangeAttr = ` lines="1-${lines}"` // Maintain exact format expected by tests @@ -228,6 +235,7 @@ export async function readFileTool( // Format the result into the required XML structure const xmlResult = `${relPath}\n${contentTag}${xmlInfo}` pushToolResult(xmlResult) + cline.recordToolUsage({ toolName: "read_file" }) } } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error) diff --git a/src/core/tools/searchAndReplaceTool.ts b/src/core/tools/searchAndReplaceTool.ts index 7b88405e37..89cd623795 100644 --- a/src/core/tools/searchAndReplaceTool.ts +++ b/src/core/tools/searchAndReplaceTool.ts @@ -32,16 +32,20 @@ export async function searchAndReplaceTool( path: removeClosingTag("path", relPath), operations: removeClosingTag("operations", operations), }) + await cline.ask("tool", partialMessage, block.partial).catch(() => {}) return } else { if (!relPath) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "search_and_replace", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("search_and_replace", "path")) return } + if (!operations) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "search_and_replace", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("search_and_replace", "operations")) return } @@ -69,6 +73,7 @@ export async function searchAndReplaceTool( try { parsedOperations = JSON.parse(operations) + if (!Array.isArray(parsedOperations)) { throw new Error("Operations must be an array") } @@ -132,18 +137,16 @@ export async function searchAndReplaceTool( await cline.diffViewProvider.update(newContent, true) cline.diffViewProvider.scrollToFirstDiff() - const completeMessage = JSON.stringify({ - ...sharedMessageProps, - diff: diff, - } satisfies ClineSayTool) - + const completeMessage = JSON.stringify({ ...sharedMessageProps, diff: diff } satisfies ClineSayTool) const didApprove = await askApproval("tool", completeMessage) + if (!didApprove) { await cline.diffViewProvider.revertChanges() // cline likely handles closing the diff view return } const { newProblemsMessage, userEdits, finalContent } = await cline.diffViewProvider.saveChanges() + if (relPath) { await cline.getFileContextTracker().trackFileContext(relPath, "roo_edited" as RecordSource) } @@ -158,6 +161,7 @@ export async function searchAndReplaceTool( diff: userEdits, } satisfies ClineSayTool), ) + pushToolResult( `The user made the following updates to your content:\n\n${userEdits}\n\n` + `The updated content, which includes both your original modifications and the user's edits, has been successfully saved to ${relPath.toPosix()}. Here is the full, updated content of the file, including line numbers:\n\n` + @@ -171,7 +175,10 @@ export async function searchAndReplaceTool( } else { pushToolResult(`Changes successfully applied to ${relPath.toPosix()}:\n\n${newProblemsMessage}`) } + + cline.recordToolUsage({ toolName: "search_and_replace" }) await cline.diffViewProvider.reset() + return } } catch (error) { diff --git a/src/core/tools/searchFilesTool.ts b/src/core/tools/searchFilesTool.ts index 3cf651a0db..3c1b09b6a4 100644 --- a/src/core/tools/searchFilesTool.ts +++ b/src/core/tools/searchFilesTool.ts @@ -17,33 +17,38 @@ export async function searchFilesTool( const relDirPath: string | undefined = block.params.path const regex: string | undefined = block.params.regex const filePattern: string | undefined = block.params.file_pattern + const sharedMessageProps: ClineSayTool = { tool: "searchFiles", path: getReadablePath(cline.cwd, removeClosingTag("path", relDirPath)), regex: removeClosingTag("regex", regex), filePattern: removeClosingTag("file_pattern", filePattern), } + try { if (block.partial) { - const partialMessage = JSON.stringify({ - ...sharedMessageProps, - content: "", - } satisfies ClineSayTool) + const partialMessage = JSON.stringify({ ...sharedMessageProps, content: "" } satisfies ClineSayTool) await cline.ask("tool", partialMessage, block.partial).catch(() => {}) return } else { if (!relDirPath) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "search_files", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("search_files", "path")) return } + if (!regex) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "search_files", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("search_files", "regex")) return } + cline.consecutiveMistakeCount = 0 + const absolutePath = path.resolve(cline.cwd, relDirPath) + const results = await regexSearchFiles( cline.cwd, absolutePath, @@ -51,15 +56,17 @@ export async function searchFilesTool( filePattern, cline.rooIgnoreController, ) - const completeMessage = JSON.stringify({ - ...sharedMessageProps, - content: results, - } satisfies ClineSayTool) + + const completeMessage = JSON.stringify({ ...sharedMessageProps, content: results } satisfies ClineSayTool) const didApprove = await askApproval("tool", completeMessage) + if (!didApprove) { return } + pushToolResult(results) + cline.recordToolUsage({ toolName: "search_files" }) + return } } catch (error) { diff --git a/src/core/tools/switchModeTool.ts b/src/core/tools/switchModeTool.ts index 595eb04290..0d0da1de39 100644 --- a/src/core/tools/switchModeTool.ts +++ b/src/core/tools/switchModeTool.ts @@ -15,6 +15,7 @@ export async function switchModeTool( ) { const mode_slug: string | undefined = block.params.mode_slug const reason: string | undefined = block.params.reason + try { if (block.partial) { const partialMessage = JSON.stringify({ @@ -22,49 +23,57 @@ export async function switchModeTool( mode: removeClosingTag("mode_slug", mode_slug), reason: removeClosingTag("reason", reason), }) + await cline.ask("tool", partialMessage, block.partial).catch(() => {}) return } else { if (!mode_slug) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "switch_mode", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("switch_mode", "mode_slug")) return } + cline.consecutiveMistakeCount = 0 // Verify the mode exists const targetMode = getModeBySlug(mode_slug, (await cline.providerRef.deref()?.getState())?.customModes) + if (!targetMode) { + cline.recordToolUsage({ toolName: "switch_mode", success: false }) pushToolResult(formatResponse.toolError(`Invalid mode: ${mode_slug}`)) return } // Check if already in requested mode const currentMode = (await cline.providerRef.deref()?.getState())?.mode ?? defaultModeSlug + if (currentMode === mode_slug) { + cline.recordToolUsage({ toolName: "switch_mode", success: false }) pushToolResult(`Already in ${targetMode.name} mode.`) return } - const completeMessage = JSON.stringify({ - tool: "switchMode", - mode: mode_slug, - reason, - }) - + const completeMessage = JSON.stringify({ tool: "switchMode", mode: mode_slug, reason }) const didApprove = await askApproval("tool", completeMessage) + if (!didApprove) { return } // Switch the mode using shared handler await cline.providerRef.deref()?.handleModeSwitch(mode_slug) + pushToolResult( `Successfully switched from ${getModeBySlug(currentMode)?.name ?? currentMode} mode to ${ targetMode.name } mode${reason ? ` because: ${reason}` : ""}.`, ) - await delay(500) // delay to allow mode change to take effect before next tool is executed + + cline.recordToolUsage({ toolName: "switch_mode" }) + + await delay(500) // Delay to allow mode change to take effect before next tool is executed + return } } catch (error) { diff --git a/src/core/tools/useMcpToolTool.ts b/src/core/tools/useMcpToolTool.ts index f89a2938b7..04a400371c 100644 --- a/src/core/tools/useMcpToolTool.ts +++ b/src/core/tools/useMcpToolTool.ts @@ -22,51 +22,60 @@ export async function useMcpToolTool( toolName: removeClosingTag("tool_name", tool_name), arguments: removeClosingTag("arguments", mcp_arguments), } satisfies ClineAskUseMcpServer) + await cline.ask("use_mcp_server", partialMessage, block.partial).catch(() => {}) return } else { if (!server_name) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "switch_mode", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("use_mcp_tool", "server_name")) return } + if (!tool_name) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "use_mcp_tool", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("use_mcp_tool", "tool_name")) return } - // arguments are optional, but if they are provided they must be valid JSON - // if (!mcp_arguments) { - // cline.consecutiveMistakeCount++ - // pushToolResult(await cline.sayAndCreateMissingParamError("use_mcp_tool", "arguments")) - // return - // } + let parsedArguments: Record | undefined + if (mcp_arguments) { try { parsedArguments = JSON.parse(mcp_arguments) } catch (error) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "use_mcp_tool", success: false }) await cline.say("error", `Roo tried to use ${tool_name} with an invalid JSON argument. Retrying...`) + pushToolResult( formatResponse.toolError(formatResponse.invalidMcpToolArgumentError(server_name, tool_name)), ) + return } } + cline.consecutiveMistakeCount = 0 + const completeMessage = JSON.stringify({ type: "use_mcp_tool", serverName: server_name, toolName: tool_name, arguments: mcp_arguments, } satisfies ClineAskUseMcpServer) + const didApprove = await askApproval("use_mcp_server", completeMessage) + if (!didApprove) { return } - // now execute the tool + + // Now execute the tool await cline.say("mcp_server_request_started") // same as browser_action_result + const toolResult = await cline.providerRef .deref() ?.getMcpHub() @@ -88,8 +97,11 @@ export async function useMcpToolTool( }) .filter(Boolean) .join("\n\n") || "(No response)" + await cline.say("mcp_server_response", toolResultPretty) pushToolResult(formatResponse.toolResult(toolResultPretty)) + cline.recordToolUsage({ toolName: "use_mcp_tool" }) + return } } catch (error) { diff --git a/src/core/tools/writeToFileTool.ts b/src/core/tools/writeToFileTool.ts index 89dd010254..cf2ced16b5 100644 --- a/src/core/tools/writeToFileTool.ts +++ b/src/core/tools/writeToFileTool.ts @@ -25,6 +25,7 @@ export async function writeToFileTool( const relPath: string | undefined = block.params.path let newContent: string | undefined = block.params.content let predictedLineCount: number | undefined = parseInt(block.params.line_count ?? "0") + if (!relPath || !newContent) { // checking for newContent ensure relPath is complete // wait so we can determine if it's a new file or editing an existing file @@ -32,15 +33,16 @@ export async function writeToFileTool( } const accessAllowed = cline.rooIgnoreController?.validateAccess(relPath) + if (!accessAllowed) { await cline.say("rooignore_error", relPath) pushToolResult(formatResponse.toolError(formatResponse.rooIgnoreError(relPath))) - return } // Check if file exists using cached map or fs.access let fileExists: boolean + if (cline.diffViewProvider.editType !== undefined) { fileExists = cline.diffViewProvider.editType === "modify" } else { @@ -54,6 +56,7 @@ export async function writeToFileTool( // cline handles cases where it includes language specifiers like ```python ```js newContent = newContent.split("\n").slice(1).join("\n").trim() } + if (newContent.endsWith("```")) { newContent = newContent.split("\n").slice(0, -1).join("\n").trim() } @@ -71,41 +74,51 @@ export async function writeToFileTool( path: getReadablePath(cline.cwd, removeClosingTag("path", relPath)), isOutsideWorkspace, } + try { if (block.partial) { // update gui message const partialMessage = JSON.stringify(sharedMessageProps) await cline.ask("tool", partialMessage, block.partial).catch(() => {}) + // update editor if (!cline.diffViewProvider.isEditing) { // open the editor and prepare to stream content in await cline.diffViewProvider.open(relPath) } + // editor is open, stream content in await cline.diffViewProvider.update( everyLineHasLineNumbers(newContent) ? stripLineNumbers(newContent) : newContent, false, ) + return } else { if (!relPath) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "write_to_file", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("write_to_file", "path")) await cline.diffViewProvider.reset() return } + if (!newContent) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "write_to_file", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("write_to_file", "content")) await cline.diffViewProvider.reset() return } + if (!predictedLineCount) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "write_to_file", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("write_to_file", "line_count")) await cline.diffViewProvider.reset() return } + cline.consecutiveMistakeCount = 0 // if isEditingFile false, that means we have the full contents of the file already. @@ -117,10 +130,12 @@ export async function writeToFileTool( await cline.ask("tool", partialMessage, true).catch(() => {}) // sending true for partial even though it's not a partial, cline shows the edit row before the content is streamed into the editor await cline.diffViewProvider.open(relPath) } + await cline.diffViewProvider.update( everyLineHasLineNumbers(newContent) ? stripLineNumbers(newContent) : newContent, true, ) + await delay(300) // wait for diff view to update cline.diffViewProvider.scrollToFirstDiff() @@ -128,6 +143,7 @@ export async function writeToFileTool( if (detectCodeOmission(cline.diffViewProvider.originalContent || "", newContent, predictedLineCount)) { if (cline.diffStrategy) { await cline.diffViewProvider.revertChanges() + pushToolResult( formatResponse.toolError( `Content appears to be truncated (file has ${ @@ -161,18 +177,23 @@ export async function writeToFileTool( ? formatResponse.createPrettyPatch(relPath, cline.diffViewProvider.originalContent, newContent) : undefined, } satisfies ClineSayTool) + const didApprove = await askApproval("tool", completeMessage) + if (!didApprove) { await cline.diffViewProvider.revertChanges() return } + const { newProblemsMessage, userEdits, finalContent } = await cline.diffViewProvider.saveChanges() // Track file edit operation if (relPath) { await cline.getFileContextTracker().trackFileContext(relPath, "roo_edited" as RecordSource) } + cline.didEditFile = true // used to determine if we should wait for busy terminal to update before sending api request + if (userEdits) { await cline.say( "user_feedback_diff", @@ -182,6 +203,7 @@ export async function writeToFileTool( diff: userEdits, } satisfies ClineSayTool), ) + pushToolResult( `The user made the following updates to your content:\n\n${userEdits}\n\n` + `The updated content, which includes both your original modifications and the user's edits, has been successfully saved to ${relPath.toPosix()}. Here is the full, updated content of the file, including line numbers:\n\n` + @@ -197,7 +219,10 @@ export async function writeToFileTool( } else { pushToolResult(`The content was successfully saved to ${relPath.toPosix()}.${newProblemsMessage}`) } + + cline.recordToolUsage({ toolName: "write_to_file" }) await cline.diffViewProvider.reset() + return } } catch (error) { diff --git a/src/shared/tools.ts b/src/shared/tools.ts index 7dd12893a3..65f8a16c88 100644 --- a/src/shared/tools.ts +++ b/src/shared/tools.ts @@ -167,12 +167,14 @@ export interface NewTaskToolUse extends ToolUse { params: Partial, "mode" | "message">> } -export type ToolUsage = Record< - ToolName, - { - attempts: number - failures: number - } +export type ToolUsage = Partial< + Record< + ToolName, + { + attempts: number + failures: number + } + > > // Define tool group configuration From 1eac22b2f7e795d9f70bf43d39db4b16edcb8127 Mon Sep 17 00:00:00 2001 From: cte Date: Thu, 17 Apr 2025 16:20:46 -0700 Subject: [PATCH 2/9] Remove unused import --- src/core/__tests__/Cline.test.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/core/__tests__/Cline.test.ts b/src/core/__tests__/Cline.test.ts index 90e365caf1..90e26655a8 100644 --- a/src/core/__tests__/Cline.test.ts +++ b/src/core/__tests__/Cline.test.ts @@ -3,7 +3,6 @@ import * as os from "os" import * as path from "path" -import pWaitFor from "p-wait-for" import * as vscode from "vscode" import { Anthropic } from "@anthropic-ai/sdk" From b56ae14feac97d7260ddaf36b8daf82645b02bfc Mon Sep 17 00:00:00 2001 From: cte Date: Thu, 17 Apr 2025 16:53:51 -0700 Subject: [PATCH 3/9] Persist toolUsage in taskMetrics --- evals/apps/cli/src/index.ts | 7 +- .../db/drizzle/0003_familiar_miss_america.sql | 1 + .../db/drizzle/meta/0003_snapshot.json | 296 ++++++++++++++++++ evals/packages/db/drizzle/meta/_journal.json | 7 + evals/packages/db/src/schema.ts | 7 +- evals/packages/types/src/roo-code.ts | 45 ++- src/core/Cline.ts | 38 +-- .../read-file-maxReadFileLine.test.ts | 4 +- src/core/__tests__/read-file-xml.test.ts | 3 +- .../parse-assistant-message.ts | 3 +- src/core/mode-validator.ts | 2 +- src/core/prompts/tools/index.ts | 12 +- .../__tests__/executeCommandTool.test.ts | 4 +- src/core/tools/attemptCompletionTool.ts | 6 +- src/exports/api.ts | 6 +- src/exports/roo-code.d.ts | 6 + src/exports/types.ts | 6 + src/schemas/index.ts | 41 ++- src/shared/tools.ts | 35 +-- 19 files changed, 454 insertions(+), 75 deletions(-) create mode 100644 evals/packages/db/drizzle/0003_familiar_miss_america.sql create mode 100644 evals/packages/db/drizzle/meta/0003_snapshot.json diff --git a/evals/apps/cli/src/index.ts b/evals/apps/cli/src/index.ts index 61c0a019f6..d552092fa5 100644 --- a/evals/apps/cli/src/index.ts +++ b/evals/apps/cli/src/index.ts @@ -275,7 +275,12 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server }) } - if (eventName === RooCodeEventName.TaskCompleted || eventName === RooCodeEventName.TaskAborted) { + if (eventName === RooCodeEventName.TaskCompleted && taskMetricsId) { + const toolUsage = payload[2] + await updateTaskMetrics(taskMetricsId, { toolUsage }) + } + + if (eventName === RooCodeEventName.TaskAborted || eventName === RooCodeEventName.TaskCompleted) { taskFinishedAt = Date.now() await updateTask(task.id, { finishedAt: new Date() }) } diff --git a/evals/packages/db/drizzle/0003_familiar_miss_america.sql b/evals/packages/db/drizzle/0003_familiar_miss_america.sql new file mode 100644 index 0000000000..5b6d3a5140 --- /dev/null +++ b/evals/packages/db/drizzle/0003_familiar_miss_america.sql @@ -0,0 +1 @@ +ALTER TABLE `taskMetrics` ADD `toolUsage` blob; \ No newline at end of file diff --git a/evals/packages/db/drizzle/meta/0003_snapshot.json b/evals/packages/db/drizzle/meta/0003_snapshot.json new file mode 100644 index 0000000000..e3f02413bf --- /dev/null +++ b/evals/packages/db/drizzle/meta/0003_snapshot.json @@ -0,0 +1,296 @@ +{ + "version": "6", + "dialect": "sqlite", + "id": "a7a893e2-373a-4706-bcd4-772e2525db62", + "prevId": "f49d9b0b-fda9-467a-9adb-c941d6cbf7ce", + "tables": { + "runs": { + "name": "runs", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": true + }, + "taskMetricsId": { + "name": "taskMetricsId", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "model": { + "name": "model", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "settings": { + "name": "settings", + "type": "blob", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "pid": { + "name": "pid", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "socketPath": { + "name": "socketPath", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "concurrency": { + "name": "concurrency", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": 2 + }, + "passed": { + "name": "passed", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": 0 + }, + "failed": { + "name": "failed", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": 0 + }, + "createdAt": { + "name": "createdAt", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "runs_taskMetricsId_taskMetrics_id_fk": { + "name": "runs_taskMetricsId_taskMetrics_id_fk", + "tableFrom": "runs", + "tableTo": "taskMetrics", + "columnsFrom": ["taskMetricsId"], + "columnsTo": ["id"], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "taskMetrics": { + "name": "taskMetrics", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": true + }, + "tokensIn": { + "name": "tokensIn", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "tokensOut": { + "name": "tokensOut", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "tokensContext": { + "name": "tokensContext", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "cacheWrites": { + "name": "cacheWrites", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "cacheReads": { + "name": "cacheReads", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "cost": { + "name": "cost", + "type": "real", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "duration": { + "name": "duration", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "toolUsage": { + "name": "toolUsage", + "type": "blob", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "createdAt": { + "name": "createdAt", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "tasks": { + "name": "tasks", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": true + }, + "runId": { + "name": "runId", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "taskMetricsId": { + "name": "taskMetricsId", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "language": { + "name": "language", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "exercise": { + "name": "exercise", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "passed": { + "name": "passed", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "startedAt": { + "name": "startedAt", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "finishedAt": { + "name": "finishedAt", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "createdAt": { + "name": "createdAt", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": { + "tasks_language_exercise_idx": { + "name": "tasks_language_exercise_idx", + "columns": ["runId", "language", "exercise"], + "isUnique": true + } + }, + "foreignKeys": { + "tasks_runId_runs_id_fk": { + "name": "tasks_runId_runs_id_fk", + "tableFrom": "tasks", + "tableTo": "runs", + "columnsFrom": ["runId"], + "columnsTo": ["id"], + "onDelete": "no action", + "onUpdate": "no action" + }, + "tasks_taskMetricsId_taskMetrics_id_fk": { + "name": "tasks_taskMetricsId_taskMetrics_id_fk", + "tableFrom": "tasks", + "tableTo": "taskMetrics", + "columnsFrom": ["taskMetricsId"], + "columnsTo": ["id"], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + } + }, + "views": {}, + "enums": {}, + "_meta": { + "schemas": {}, + "tables": {}, + "columns": {} + }, + "internal": { + "indexes": {} + } +} diff --git a/evals/packages/db/drizzle/meta/_journal.json b/evals/packages/db/drizzle/meta/_journal.json index c35d084ff7..2291a395f3 100644 --- a/evals/packages/db/drizzle/meta/_journal.json +++ b/evals/packages/db/drizzle/meta/_journal.json @@ -22,6 +22,13 @@ "when": 1743698195142, "tag": "0002_white_flatman", "breakpoints": true + }, + { + "idx": 3, + "version": "6", + "when": 1744933023667, + "tag": "0003_familiar_miss_america", + "breakpoints": true } ] } diff --git a/evals/packages/db/src/schema.ts b/evals/packages/db/src/schema.ts index f2fa86a826..5add8460c2 100644 --- a/evals/packages/db/src/schema.ts +++ b/evals/packages/db/src/schema.ts @@ -2,7 +2,7 @@ import { sqliteTable, text, real, integer, blob, uniqueIndex } from "drizzle-orm import { relations } from "drizzle-orm" import { createInsertSchema } from "drizzle-zod" -import { RooCodeSettings, exerciseLanguages, rooCodeSettingsSchema } from "@evals/types" +import { RooCodeSettings, ToolUsage, exerciseLanguages, rooCodeSettingsSchema, toolUsageSchema } from "@evals/types" /** * runs @@ -84,12 +84,15 @@ export const taskMetrics = sqliteTable("taskMetrics", { cacheReads: integer({ mode: "number" }).notNull(), cost: real().notNull(), duration: integer({ mode: "number" }).notNull(), + toolUsage: blob({ mode: "json" }).$type(), createdAt: integer({ mode: "timestamp" }).notNull(), }) export type TaskMetrics = typeof taskMetrics.$inferSelect -export const insertTaskMetricsSchema = createInsertSchema(taskMetrics).omit({ id: true, createdAt: true }) +export const insertTaskMetricsSchema = createInsertSchema(taskMetrics) + .omit({ id: true, createdAt: true }) + .extend({ toolUsage: toolUsageSchema.optional() }) export type InsertTaskMetrics = Omit diff --git a/evals/packages/types/src/roo-code.ts b/evals/packages/types/src/roo-code.ts index fc87247ee4..ebac66fb17 100644 --- a/evals/packages/types/src/roo-code.ts +++ b/evals/packages/types/src/roo-code.ts @@ -802,6 +802,49 @@ export const tokenUsageSchema = z.object({ export type TokenUsage = z.infer +/** + * ToolName + */ + +export const toolNames = [ + "execute_command", + "read_file", + "write_to_file", + "append_to_file", + "apply_diff", + "insert_content", + "search_and_replace", + "search_files", + "list_files", + "list_code_definition_names", + "browser_action", + "use_mcp_tool", + "access_mcp_resource", + "ask_followup_question", + "attempt_completion", + "switch_mode", + "new_task", + "fetch_instructions", +] as const + +export const toolNamesSchema = z.enum(toolNames) + +export type ToolName = z.infer + +/** + * ToolUsage + */ + +export const toolUsageSchema = z.record( + toolNamesSchema, + z.object({ + attempts: z.number(), + failures: z.number(), + }), +) + +export type ToolUsage = z.infer + /** * RooCodeEvent */ @@ -837,7 +880,7 @@ export const rooCodeEventsSchema = z.object({ [RooCodeEventName.TaskAskResponded]: z.tuple([z.string()]), [RooCodeEventName.TaskAborted]: z.tuple([z.string()]), [RooCodeEventName.TaskSpawned]: z.tuple([z.string(), z.string()]), - [RooCodeEventName.TaskCompleted]: z.tuple([z.string(), tokenUsageSchema]), + [RooCodeEventName.TaskCompleted]: z.tuple([z.string(), tokenUsageSchema, toolUsageSchema]), [RooCodeEventName.TaskTokenUsageUpdated]: z.tuple([z.string(), tokenUsageSchema]), }) diff --git a/src/core/Cline.ts b/src/core/Cline.ts index 7401002cfb..1b06517b79 100644 --- a/src/core/Cline.ts +++ b/src/core/Cline.ts @@ -13,7 +13,7 @@ import { serializeError } from "serialize-error" import * as vscode from "vscode" // schemas -import { TokenUsage } from "../schemas" +import { TokenUsage, ToolUsage, ToolName } from "../schemas" // api import { ApiHandler, buildApiHandler } from "../api" @@ -39,7 +39,7 @@ import { GlobalFileNames } from "../shared/globalFileNames" import { defaultModeSlug, getModeBySlug, getFullModeDetails, isToolAllowedForMode } from "../shared/modes" import { EXPERIMENT_IDS, experiments as Experiments, ExperimentId } from "../shared/experiments" import { formatLanguage } from "../shared/language" -import { ToolParamName, ToolName, ToolResponse, ToolUsage } from "../shared/tools" +import { ToolParamName, ToolResponse } from "../shared/tools" // services import { UrlContentFetcher } from "../services/browser/UrlContentFetcher" @@ -106,8 +106,8 @@ export type ClineEvents = { taskAskResponded: [] taskAborted: [] taskSpawned: [taskId: string] - taskCompleted: [taskId: string, usage: TokenUsage] - taskTokenUsageUpdated: [taskId: string, usage: TokenUsage] + taskCompleted: [taskId: string, tokenUsage: TokenUsage, toolUsage: ToolUsage] + taskTokenUsageUpdated: [taskId: string, tokenUsage: TokenUsage] } export type ClineOptions = { @@ -369,19 +369,15 @@ export class Cline extends EventEmitter { this.emit("message", { action: "updated", message: partialMessage }) } - getTokenUsage() { - const usage = getApiMetrics(combineApiRequests(combineCommandSequences(this.clineMessages.slice(1)))) - this.emit("taskTokenUsageUpdated", this.taskId, usage) - return usage - } - private async saveClineMessages() { try { const taskDir = await this.ensureTaskDirectoryExists() const filePath = path.join(taskDir, GlobalFileNames.uiMessages) await fs.writeFile(filePath, JSON.stringify(this.clineMessages)) - // combined as they are in ChatView - const apiMetrics = this.getTokenUsage() + + const tokenUsage = this.getTokenUsage() + this.emit("taskTokenUsageUpdated", this.taskId, tokenUsage) + const taskMessage = this.clineMessages[0] // first message is always the task say const lastRelevantMessage = this.clineMessages[ @@ -406,11 +402,11 @@ export class Cline extends EventEmitter { number: this.taskNumber, ts: lastRelevantMessage.ts, task: taskMessage.text ?? "", - tokensIn: apiMetrics.totalTokensIn, - tokensOut: apiMetrics.totalTokensOut, - cacheWrites: apiMetrics.totalCacheWrites, - cacheReads: apiMetrics.totalCacheReads, - totalCost: apiMetrics.totalCost, + tokensIn: tokenUsage.totalTokensIn, + tokensOut: tokenUsage.totalTokensOut, + cacheWrites: tokenUsage.totalCacheWrites, + cacheReads: tokenUsage.totalCacheReads, + totalCost: tokenUsage.totalCost, size: taskDirSize, workspace: this.cwd, }) @@ -2699,7 +2695,11 @@ export class Cline extends EventEmitter { // Metrics - public recordToolUsage({ toolName, success = true }: { toolName: ToolName; success?: boolean }): ToolUsage { + public getTokenUsage() { + return getApiMetrics(combineApiRequests(combineCommandSequences(this.clineMessages.slice(1)))) + } + + public recordToolUsage({ toolName, success = true }: { toolName: ToolName; success?: boolean }) { if (!this.toolUsage[toolName]) { this.toolUsage[toolName] = { attempts: 0, failures: 0 } } @@ -2709,7 +2709,9 @@ export class Cline extends EventEmitter { if (!success) { this.toolUsage[toolName].failures++ } + } + public getToolUsage() { return this.toolUsage } } diff --git a/src/core/__tests__/read-file-maxReadFileLine.test.ts b/src/core/__tests__/read-file-maxReadFileLine.test.ts index 0d5c9436d1..4d9f9e1cfa 100644 --- a/src/core/__tests__/read-file-maxReadFileLine.test.ts +++ b/src/core/__tests__/read-file-maxReadFileLine.test.ts @@ -7,7 +7,8 @@ import { readLines } from "../../integrations/misc/read-lines" import { extractTextFromFile } from "../../integrations/misc/extract-text" import { parseSourceCodeDefinitionsForFile } from "../../services/tree-sitter" import { isBinaryFile } from "isbinaryfile" -import { ReadFileToolUse, ToolUsage } from "../../shared/tools" +import { ReadFileToolUse } from "../../shared/tools" +import { ToolUsage } from "../../schemas" // Mock dependencies jest.mock("../../integrations/misc/line-counter") @@ -126,7 +127,6 @@ describe("read_file tool with maxReadFileLine setting", () => { mockCline.getFileContextTracker = jest.fn().mockReturnValue({ trackFileContext: jest.fn().mockResolvedValue(undefined), }) - mockCline.recordToolUsage = jest.fn().mockReturnValue({} as ToolUsage) // Reset tool result diff --git a/src/core/__tests__/read-file-xml.test.ts b/src/core/__tests__/read-file-xml.test.ts index 6b5a5c0037..c995003a1a 100644 --- a/src/core/__tests__/read-file-xml.test.ts +++ b/src/core/__tests__/read-file-xml.test.ts @@ -7,7 +7,8 @@ import { readLines } from "../../integrations/misc/read-lines" import { extractTextFromFile } from "../../integrations/misc/extract-text" import { parseSourceCodeDefinitionsForFile } from "../../services/tree-sitter" import { isBinaryFile } from "isbinaryfile" -import { ReadFileToolUse, ToolUsage } from "../../shared/tools" +import { ReadFileToolUse } from "../../shared/tools" +import { ToolUsage } from "../../schemas" // Mock dependencies jest.mock("../../integrations/misc/line-counter") diff --git a/src/core/assistant-message/parse-assistant-message.ts b/src/core/assistant-message/parse-assistant-message.ts index aa97873701..0cac4dfb98 100644 --- a/src/core/assistant-message/parse-assistant-message.ts +++ b/src/core/assistant-message/parse-assistant-message.ts @@ -1,4 +1,5 @@ -import { TextContent, ToolUse, ToolParamName, toolParamNames, toolNames, ToolName } from "../../shared/tools" +import { TextContent, ToolUse, ToolParamName, toolParamNames } from "../../shared/tools" +import { toolNames, ToolName } from "../../schemas" export type AssistantMessageContent = TextContent | ToolUse diff --git a/src/core/mode-validator.ts b/src/core/mode-validator.ts index 8a9ac881c7..4c5e8fbf7f 100644 --- a/src/core/mode-validator.ts +++ b/src/core/mode-validator.ts @@ -1,4 +1,4 @@ -import { ToolName } from "../shared/tools" +import { ToolName } from "../schemas" import { Mode, isToolAllowedForMode, ModeConfig } from "../shared/modes" export function validateToolUse( diff --git a/src/core/prompts/tools/index.ts b/src/core/prompts/tools/index.ts index 642b9fd652..adc6cff613 100644 --- a/src/core/prompts/tools/index.ts +++ b/src/core/prompts/tools/index.ts @@ -1,3 +1,10 @@ +import { ToolName } from "../../../schemas" +import { TOOL_GROUPS, ALWAYS_AVAILABLE_TOOLS } from "../../../shared/tools" +import { DiffStrategy } from "../../diff/DiffStrategy" +import { McpHub } from "../../../services/mcp/McpHub" +import { Mode, ModeConfig, getModeConfig, isToolAllowedForMode, getGroupName } from "../../../shared/modes" + +import { ToolArgs } from "./types" import { getExecuteCommandDescription } from "./execute-command" import { getReadFileDescription } from "./read-file" import { getFetchInstructionsDescription } from "./fetch-instructions" @@ -15,11 +22,6 @@ import { getUseMcpToolDescription } from "./use-mcp-tool" import { getAccessMcpResourceDescription } from "./access-mcp-resource" import { getSwitchModeDescription } from "./switch-mode" import { getNewTaskDescription } from "./new-task" -import { DiffStrategy } from "../../diff/DiffStrategy" -import { McpHub } from "../../../services/mcp/McpHub" -import { Mode, ModeConfig, getModeConfig, isToolAllowedForMode, getGroupName } from "../../../shared/modes" -import { ToolName, TOOL_GROUPS, ALWAYS_AVAILABLE_TOOLS } from "../../../shared/tools" -import { ToolArgs } from "./types" // Map of tool names to their description functions const toolDescriptionMap: Record string | undefined> = { diff --git a/src/core/tools/__tests__/executeCommandTool.test.ts b/src/core/tools/__tests__/executeCommandTool.test.ts index 1d85831d57..408c45f994 100644 --- a/src/core/tools/__tests__/executeCommandTool.test.ts +++ b/src/core/tools/__tests__/executeCommandTool.test.ts @@ -5,7 +5,8 @@ import { describe, expect, it, jest, beforeEach } from "@jest/globals" import { executeCommandTool } from "../executeCommandTool" import { Cline } from "../../Cline" import { formatResponse } from "../../prompts/responses" -import { ToolUse, AskApproval, HandleError, PushToolResult, RemoveClosingTag, ToolUsage } from "../../../shared/tools" +import { ToolUse, AskApproval, HandleError, PushToolResult, RemoveClosingTag } from "../../../shared/tools" +import { ToolUsage } from "../../../schemas" // Mock dependencies jest.mock("../../Cline") @@ -40,7 +41,6 @@ describe("executeCommandTool", () => { // @ts-expect-error - Jest mock function type issues validateCommand: jest.fn().mockReturnValue(null), }, - // @ts-expect-error - Jest mock function type issues recordToolUsage: jest.fn().mockReturnValue({} as ToolUsage), } diff --git a/src/core/tools/attemptCompletionTool.ts b/src/core/tools/attemptCompletionTool.ts index 0dd458adbf..ac2051cf9c 100644 --- a/src/core/tools/attemptCompletionTool.ts +++ b/src/core/tools/attemptCompletionTool.ts @@ -45,7 +45,7 @@ export async function attemptCompletionTool( await cline.say("completion_result", removeClosingTag("result", result), undefined, false) telemetryService.captureTaskCompleted(cline.taskId) - cline.emit("taskCompleted", cline.taskId, cline.getTokenUsage()) + cline.emit("taskCompleted", cline.taskId, cline.getTokenUsage(), cline.getToolUsage()) await cline.ask("command", removeClosingTag("command", command), block.partial).catch(() => {}) } @@ -71,7 +71,7 @@ export async function attemptCompletionTool( // Haven't sent a command message yet so first send completion_result then command. await cline.say("completion_result", result, undefined, false) telemetryService.captureTaskCompleted(cline.taskId) - cline.emit("taskCompleted", cline.taskId, cline.getTokenUsage()) + cline.emit("taskCompleted", cline.taskId, cline.getTokenUsage(), cline.getToolUsage()) } // Complete command message. @@ -94,7 +94,7 @@ export async function attemptCompletionTool( } else { await cline.say("completion_result", result, undefined, false) telemetryService.captureTaskCompleted(cline.taskId) - cline.emit("taskCompleted", cline.taskId, cline.getTokenUsage()) + cline.emit("taskCompleted", cline.taskId, cline.getTokenUsage(), cline.getToolUsage()) } if (cline.parentTask) { diff --git a/src/exports/api.ts b/src/exports/api.ts index 2da90a84a5..47464ff00f 100644 --- a/src/exports/api.ts +++ b/src/exports/api.ts @@ -296,12 +296,12 @@ export class API extends EventEmitter implements RooCodeAPI { this.taskMap.delete(cline.taskId) }) - cline.on("taskCompleted", async (_, usage) => { - this.emit(RooCodeEventName.TaskCompleted, cline.taskId, usage) + cline.on("taskCompleted", async (_, tokenUsage, toolUsage) => { + this.emit(RooCodeEventName.TaskCompleted, cline.taskId, tokenUsage, toolUsage) this.taskMap.delete(cline.taskId) await this.fileLog( - `[${new Date().toISOString()}] taskCompleted -> ${cline.taskId} | ${JSON.stringify(usage, null, 2)}\n`, + `[${new Date().toISOString()}] taskCompleted -> ${cline.taskId} | ${JSON.stringify(tokenUsage, null, 2)} | ${JSON.stringify(toolUsage, null, 2)}\n`, ) }) diff --git a/src/exports/roo-code.d.ts b/src/exports/roo-code.d.ts index 7c7439f538..ed02d336d9 100644 --- a/src/exports/roo-code.d.ts +++ b/src/exports/roo-code.d.ts @@ -521,6 +521,12 @@ type RooCodeEvents = { totalCost: number contextTokens: number }, + { + [x: string]: { + attempts: number + failures: number + } + }, ] taskTokenUsageUpdated: [ string, diff --git a/src/exports/types.ts b/src/exports/types.ts index 9d91b1e1f7..09a7fd6f78 100644 --- a/src/exports/types.ts +++ b/src/exports/types.ts @@ -530,6 +530,12 @@ type RooCodeEvents = { totalCost: number contextTokens: number }, + { + [x: string]: { + attempts: number + failures: number + } + }, ] taskTokenUsageUpdated: [ string, diff --git a/src/schemas/index.ts b/src/schemas/index.ts index 259d968c00..1b8a63fab5 100644 --- a/src/schemas/index.ts +++ b/src/schemas/index.ts @@ -818,6 +818,45 @@ export const tokenUsageSchema = z.object({ export type TokenUsage = z.infer +export const toolNames = [ + "execute_command", + "read_file", + "write_to_file", + "append_to_file", + "apply_diff", + "insert_content", + "search_and_replace", + "search_files", + "list_files", + "list_code_definition_names", + "browser_action", + "use_mcp_tool", + "access_mcp_resource", + "ask_followup_question", + "attempt_completion", + "switch_mode", + "new_task", + "fetch_instructions", +] as const + +export const toolNamesSchema = z.enum(toolNames) + +export type ToolName = z.infer + +/** + * ToolUsage + */ + +export const toolUsageSchema = z.record( + toolNamesSchema, + z.object({ + attempts: z.number(), + failures: z.number(), + }), +) + +export type ToolUsage = z.infer + /** * RooCodeEvent */ @@ -852,7 +891,7 @@ export const rooCodeEventsSchema = z.object({ [RooCodeEventName.TaskAskResponded]: z.tuple([z.string()]), [RooCodeEventName.TaskAborted]: z.tuple([z.string()]), [RooCodeEventName.TaskSpawned]: z.tuple([z.string(), z.string()]), - [RooCodeEventName.TaskCompleted]: z.tuple([z.string(), tokenUsageSchema]), + [RooCodeEventName.TaskCompleted]: z.tuple([z.string(), tokenUsageSchema, toolUsageSchema]), [RooCodeEventName.TaskTokenUsageUpdated]: z.tuple([z.string(), tokenUsageSchema]), }) diff --git a/src/shared/tools.ts b/src/shared/tools.ts index 65f8a16c88..858bf591d3 100644 --- a/src/shared/tools.ts +++ b/src/shared/tools.ts @@ -1,6 +1,6 @@ import { Anthropic } from "@anthropic-ai/sdk" -import { ClineAsk, ToolProgressStatus, ToolGroup } from "../schemas" +import { ClineAsk, ToolProgressStatus, ToolGroup, ToolName } from "../schemas" export type ToolResponse = string | Array @@ -26,29 +26,6 @@ export interface TextContent { partial: boolean } -export const toolNames = [ - "execute_command", - "read_file", - "write_to_file", - "append_to_file", - "apply_diff", - "insert_content", - "search_and_replace", - "search_files", - "list_files", - "list_code_definition_names", - "browser_action", - "use_mcp_tool", - "access_mcp_resource", - "ask_followup_question", - "attempt_completion", - "switch_mode", - "new_task", - "fetch_instructions", -] as const - -export type ToolName = (typeof toolNames)[number] - export const toolParamNames = [ "command", "path", @@ -167,16 +144,6 @@ export interface NewTaskToolUse extends ToolUse { params: Partial, "mode" | "message">> } -export type ToolUsage = Partial< - Record< - ToolName, - { - attempts: number - failures: number - } - > -> - // Define tool group configuration export type ToolGroupConfig = { tools: readonly string[] From bb8a04899bc06c8003f0c5a753b3456f36191a6f Mon Sep 17 00:00:00 2001 From: cte Date: Thu, 17 Apr 2025 17:02:34 -0700 Subject: [PATCH 4/9] Update experiments --- evals/packages/types/src/roo-code.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/evals/packages/types/src/roo-code.ts b/evals/packages/types/src/roo-code.ts index ebac66fb17..bb525f71b4 100644 --- a/evals/packages/types/src/roo-code.ts +++ b/evals/packages/types/src/roo-code.ts @@ -271,7 +271,7 @@ export type CustomSupportPrompts = z.infer * ExperimentId */ -export const experimentIds = ["search_and_replace", "insert_content", "powerSteering"] as const +export const experimentIds = ["search_and_replace", "insert_content", "powerSteering", "append_to_file"] as const export const experimentIdsSchema = z.enum(experimentIds) @@ -285,6 +285,7 @@ const experimentsSchema = z.object({ search_and_replace: z.boolean(), insert_content: z.boolean(), powerSteering: z.boolean(), + append_to_file: z.boolean(), }) export type Experiments = z.infer From aa20a814f8ee2b82a0e54b45a81bbd3d004d449e Mon Sep 17 00:00:00 2001 From: cte Date: Thu, 17 Apr 2025 21:45:24 -0700 Subject: [PATCH 5/9] Add tests --- evals/packages/db/.gitignore | 1 + .../db/drizzle/0003_familiar_miss_america.sql | 1 - .../db/drizzle/0003_sweet_chimera.sql | 1 + .../db/drizzle/meta/0003_snapshot.json | 4 +- evals/packages/db/drizzle/meta/_journal.json | 4 +- evals/packages/db/package.json | 5 +- .../db/src/queries/__tests__/runs.spec.ts | 87 +++++++++++++++++++ evals/packages/db/src/queries/runs.ts | 22 ++++- evals/packages/db/src/schema.ts | 2 +- evals/packages/db/tsconfig.json | 3 + evals/packages/db/vitest.config.ts | 7 ++ evals/packages/db/vitest.setup.ts | 20 +++++ evals/packages/types/src/roo-code-defaults.ts | 1 + evals/pnpm-lock.yaml | 6 ++ evals/turbo.json | 4 +- 15 files changed, 155 insertions(+), 13 deletions(-) create mode 100644 evals/packages/db/.gitignore delete mode 100644 evals/packages/db/drizzle/0003_familiar_miss_america.sql create mode 100644 evals/packages/db/drizzle/0003_sweet_chimera.sql create mode 100644 evals/packages/db/src/queries/__tests__/runs.spec.ts create mode 100644 evals/packages/db/vitest.config.ts create mode 100644 evals/packages/db/vitest.setup.ts diff --git a/evals/packages/db/.gitignore b/evals/packages/db/.gitignore new file mode 100644 index 0000000000..c370cb644f --- /dev/null +++ b/evals/packages/db/.gitignore @@ -0,0 +1 @@ +test.db diff --git a/evals/packages/db/drizzle/0003_familiar_miss_america.sql b/evals/packages/db/drizzle/0003_familiar_miss_america.sql deleted file mode 100644 index 5b6d3a5140..0000000000 --- a/evals/packages/db/drizzle/0003_familiar_miss_america.sql +++ /dev/null @@ -1 +0,0 @@ -ALTER TABLE `taskMetrics` ADD `toolUsage` blob; \ No newline at end of file diff --git a/evals/packages/db/drizzle/0003_sweet_chimera.sql b/evals/packages/db/drizzle/0003_sweet_chimera.sql new file mode 100644 index 0000000000..7248ec01df --- /dev/null +++ b/evals/packages/db/drizzle/0003_sweet_chimera.sql @@ -0,0 +1 @@ +ALTER TABLE `taskMetrics` ADD `toolUsage` text; \ No newline at end of file diff --git a/evals/packages/db/drizzle/meta/0003_snapshot.json b/evals/packages/db/drizzle/meta/0003_snapshot.json index e3f02413bf..0b7fa5b94d 100644 --- a/evals/packages/db/drizzle/meta/0003_snapshot.json +++ b/evals/packages/db/drizzle/meta/0003_snapshot.json @@ -1,7 +1,7 @@ { "version": "6", "dialect": "sqlite", - "id": "a7a893e2-373a-4706-bcd4-772e2525db62", + "id": "61d48d20-f662-445d-9962-cf9cb165cbe7", "prevId": "f49d9b0b-fda9-467a-9adb-c941d6cbf7ce", "tables": { "runs": { @@ -165,7 +165,7 @@ }, "toolUsage": { "name": "toolUsage", - "type": "blob", + "type": "text", "primaryKey": false, "notNull": false, "autoincrement": false diff --git a/evals/packages/db/drizzle/meta/_journal.json b/evals/packages/db/drizzle/meta/_journal.json index 2291a395f3..d40254559a 100644 --- a/evals/packages/db/drizzle/meta/_journal.json +++ b/evals/packages/db/drizzle/meta/_journal.json @@ -26,8 +26,8 @@ { "idx": 3, "version": "6", - "when": 1744933023667, - "tag": "0003_familiar_miss_america", + "when": 1744950664129, + "tag": "0003_sweet_chimera", "breakpoints": true } ] diff --git a/evals/packages/db/package.json b/evals/packages/db/package.json index 833750e7d5..ffc298ea01 100644 --- a/evals/packages/db/package.json +++ b/evals/packages/db/package.json @@ -6,6 +6,7 @@ "scripts": { "lint": "eslint src/**/*.ts --max-warnings=0", "check-types": "tsc --noEmit", + "test": "vitest --globals --run", "format": "prettier --write src", "drizzle-kit": "dotenvx run -f ../../.env -- tsx node_modules/drizzle-kit/bin.cjs", "db:generate": "pnpm drizzle-kit generate", @@ -29,6 +30,8 @@ "devDependencies": { "@evals/eslint-config": "workspace:^", "@evals/typescript-config": "workspace:^", - "drizzle-kit": "^0.30.5" + "drizzle-kit": "^0.30.5", + "execa": "^9.5.2", + "vitest": "^3.0.9" } } diff --git a/evals/packages/db/src/queries/__tests__/runs.spec.ts b/evals/packages/db/src/queries/__tests__/runs.spec.ts new file mode 100644 index 0000000000..1b340893d5 --- /dev/null +++ b/evals/packages/db/src/queries/__tests__/runs.spec.ts @@ -0,0 +1,87 @@ +import { createRun, finishRun } from "../runs.js" +import { createTask } from "../tasks.js" +import { createTaskMetrics } from "../taskMetrics.js" + +describe("finishRun", async () => { + it("aggregates task metrics, including tool usage", async () => { + const run = await createRun({ model: "gpt-4.1-mini", socketPath: "/tmp/roo.sock" }) + + await createTask({ + runId: run.id, + taskMetricsId: ( + await createTaskMetrics({ + duration: 45_000, + tokensIn: 100_000, + tokensOut: 2_000, + tokensContext: 102_000, + cacheWrites: 0, + cacheReads: 0, + cost: 0.05, + toolUsage: { + read_file: { + attempts: 3, + failures: 0, + }, + apply_diff: { + attempts: 3, + failures: 1, + }, + }, + }) + ).id, + language: "go", + exercise: "go/say", + passed: true, + startedAt: new Date(), + finishedAt: new Date(), + }) + + await createTask({ + runId: run.id, + taskMetricsId: ( + await createTaskMetrics({ + duration: 30_000, + tokensIn: 75_000, + tokensOut: 1_000, + tokensContext: 76_000, + cacheWrites: 0, + cacheReads: 0, + cost: 0.04, + toolUsage: { + read_file: { + attempts: 3, + failures: 0, + }, + apply_diff: { + attempts: 2, + failures: 0, + }, + }, + }) + ).id, + language: "go", + exercise: "go/octal", + passed: true, + startedAt: new Date(), + finishedAt: new Date(), + }) + + const { taskMetrics } = await finishRun(run.id) + + expect(taskMetrics).toEqual({ + id: expect.any(Number), + tokensIn: 175000, + tokensOut: 3000, + tokensContext: 178000, + cacheWrites: 0, + cacheReads: 0, + cost: 0.09, + duration: 75000, + toolUsage: { + read_file: { attempts: 6, failures: 0 }, + apply_diff: { attempts: 5, failures: 1 }, + }, + createdAt: expect.any(Date), + }) + }) +}) diff --git a/evals/packages/db/src/queries/runs.ts b/evals/packages/db/src/queries/runs.ts index 88d446f284..1a4f6d4c57 100644 --- a/evals/packages/db/src/queries/runs.ts +++ b/evals/packages/db/src/queries/runs.ts @@ -1,10 +1,13 @@ import { desc, eq, inArray, sql, sum } from "drizzle-orm" +import { ToolUsage } from "@evals/types" + import { RecordNotFoundError, RecordNotCreatedError } from "./errors.js" import type { InsertRun, UpdateRun } from "../schema.js" import { insertRunSchema, schema } from "../schema.js" import { db } from "../db.js" import { createTaskMetrics } from "./taskMetrics.js" +import { getTasks } from "./tasks.js" const table = schema.runs @@ -71,17 +74,30 @@ export const finishRun = async (runId: number) => { throw new RecordNotFoundError() } + const tasks = await getTasks(runId) + + const toolUsage = tasks.reduce((acc, task) => { + Object.entries(task.taskMetrics?.toolUsage || {}).forEach(([key, { attempts, failures }]) => { + const tool = key as keyof ToolUsage + acc[tool] ??= { attempts: 0, failures: 0 } + acc[tool].attempts += attempts + acc[tool].failures += failures + }) + + return acc + }, {} as ToolUsage) + const { passed, failed, ...rest } = values - const taskMetrics = await createTaskMetrics(rest) + const taskMetrics = await createTaskMetrics({ ...rest, toolUsage }) await updateRun(runId, { taskMetricsId: taskMetrics.id, passed, failed }) - const run = await db.query.runs.findFirst({ where: eq(table.id, runId), with: { taskMetrics: true } }) + const run = await findRun(runId) if (!run) { throw new RecordNotFoundError() } - return run + return { ...run, taskMetrics } } export const deleteRun = async (runId: number) => { diff --git a/evals/packages/db/src/schema.ts b/evals/packages/db/src/schema.ts index 5add8460c2..902bb91a42 100644 --- a/evals/packages/db/src/schema.ts +++ b/evals/packages/db/src/schema.ts @@ -84,7 +84,7 @@ export const taskMetrics = sqliteTable("taskMetrics", { cacheReads: integer({ mode: "number" }).notNull(), cost: real().notNull(), duration: integer({ mode: "number" }).notNull(), - toolUsage: blob({ mode: "json" }).$type(), + toolUsage: text({ mode: "json" }).$type(), createdAt: integer({ mode: "timestamp" }).notNull(), }) diff --git a/evals/packages/db/tsconfig.json b/evals/packages/db/tsconfig.json index 48fa99573e..e23679a84c 100644 --- a/evals/packages/db/tsconfig.json +++ b/evals/packages/db/tsconfig.json @@ -1,5 +1,8 @@ { "extends": "@evals/typescript-config/base.json", + "compilerOptions": { + "types": ["vitest/globals"] + }, "include": ["src"], "exclude": ["node_modules"] } diff --git a/evals/packages/db/vitest.config.ts b/evals/packages/db/vitest.config.ts new file mode 100644 index 0000000000..e8586252d2 --- /dev/null +++ b/evals/packages/db/vitest.config.ts @@ -0,0 +1,7 @@ +import { defineConfig } from "vitest/config" + +export default defineConfig({ + test: { + globalSetup: ["./vitest.setup.ts"], + }, +}) diff --git a/evals/packages/db/vitest.setup.ts b/evals/packages/db/vitest.setup.ts new file mode 100644 index 0000000000..c296ef6cf1 --- /dev/null +++ b/evals/packages/db/vitest.setup.ts @@ -0,0 +1,20 @@ +import fs from "node:fs/promises" +import path from "node:path" + +import { execa } from "execa" + +const TEST_DB_PATH = path.join(process.cwd(), "test.db") + +export default async function () { + const exists = await fs.stat(TEST_DB_PATH).catch(() => false) + + if (exists) { + await fs.unlink(TEST_DB_PATH) + } + + await execa({ + env: { BENCHMARKS_DB_PATH: `file:${TEST_DB_PATH}` }, + })`pnpm db:push` + + process.env.BENCHMARKS_DB_PATH = `file:${TEST_DB_PATH}` +} diff --git a/evals/packages/types/src/roo-code-defaults.ts b/evals/packages/types/src/roo-code-defaults.ts index e02bda5d38..596a5810ae 100644 --- a/evals/packages/types/src/roo-code-defaults.ts +++ b/evals/packages/types/src/roo-code-defaults.ts @@ -59,6 +59,7 @@ export const rooCodeDefaults: RooCodeSettings = { search_and_replace: false, insert_content: false, powerSteering: false, + append_to_file: false, }, language: "en", diff --git a/evals/pnpm-lock.yaml b/evals/pnpm-lock.yaml index c1f145099a..ef2171d29d 100644 --- a/evals/pnpm-lock.yaml +++ b/evals/pnpm-lock.yaml @@ -274,6 +274,12 @@ importers: drizzle-kit: specifier: ^0.30.5 version: 0.30.5 + execa: + specifier: ^9.5.2 + version: 9.5.2 + vitest: + specifier: ^3.0.9 + version: 3.0.9(@types/node@20.17.24)(jiti@2.4.2)(lightningcss@1.29.2)(tsx@4.19.3) packages/ipc: dependencies: diff --git a/evals/turbo.json b/evals/turbo.json index 5f567ac63b..5692ec9065 100644 --- a/evals/turbo.json +++ b/evals/turbo.json @@ -15,9 +15,7 @@ ], "tasks": { "lint": {}, - "check-types": { - "dependsOn": [] - }, + "check-types": {}, "test": {}, "format": {}, "dev": { From b7e36abb74762dd62e668be3ff113ffc4d200497 Mon Sep 17 00:00:00 2001 From: cte Date: Thu, 17 Apr 2025 21:48:53 -0700 Subject: [PATCH 6/9] PR feedback --- .../db/src/queries/__tests__/{runs.spec.ts => runs.test.ts} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename evals/packages/db/src/queries/__tests__/{runs.spec.ts => runs.test.ts} (98%) diff --git a/evals/packages/db/src/queries/__tests__/runs.spec.ts b/evals/packages/db/src/queries/__tests__/runs.test.ts similarity index 98% rename from evals/packages/db/src/queries/__tests__/runs.spec.ts rename to evals/packages/db/src/queries/__tests__/runs.test.ts index 1b340893d5..9032871176 100644 --- a/evals/packages/db/src/queries/__tests__/runs.spec.ts +++ b/evals/packages/db/src/queries/__tests__/runs.test.ts @@ -2,7 +2,7 @@ import { createRun, finishRun } from "../runs.js" import { createTask } from "../tasks.js" import { createTaskMetrics } from "../taskMetrics.js" -describe("finishRun", async () => { +describe("finishRun", () => { it("aggregates task metrics, including tool usage", async () => { const run = await createRun({ model: "gpt-4.1-mini", socketPath: "/tmp/roo.sock" }) From a747ce6a39b0ed9671d1040f1a2ff993dcf1343f Mon Sep 17 00:00:00 2001 From: cte Date: Thu, 17 Apr 2025 22:03:07 -0700 Subject: [PATCH 7/9] Add diff edit metrics --- evals/apps/web/src/app/home.tsx | 16 ++++++-- evals/apps/web/src/app/runs/[id]/run.tsx | 2 +- evals/apps/web/src/lib/format-currency.ts | 6 --- evals/apps/web/src/lib/format-duration.ts | 22 ----------- evals/apps/web/src/lib/format-tokens.ts | 15 ------- evals/apps/web/src/lib/formatters.ts | 48 +++++++++++++++++++++++ evals/apps/web/src/lib/index.ts | 3 -- 7 files changed, 62 insertions(+), 50 deletions(-) delete mode 100644 evals/apps/web/src/lib/format-currency.ts delete mode 100644 evals/apps/web/src/lib/format-duration.ts delete mode 100644 evals/apps/web/src/lib/format-tokens.ts create mode 100644 evals/apps/web/src/lib/formatters.ts delete mode 100644 evals/apps/web/src/lib/index.ts diff --git a/evals/apps/web/src/app/home.tsx b/evals/apps/web/src/app/home.tsx index 6ba4a34ede..90f9d02b3e 100644 --- a/evals/apps/web/src/app/home.tsx +++ b/evals/apps/web/src/app/home.tsx @@ -8,7 +8,7 @@ import { Ellipsis, Rocket } from "lucide-react" import type { Run, TaskMetrics } from "@evals/db" import { deleteRun } from "@/lib/server/runs" -import { formatCurrency, formatDuration, formatTokens } from "@/lib" +import { formatCurrency, formatDuration, formatTokens, formatToolUsageSuccessRate } from "@/lib/formatters" import { Button, Table, @@ -59,7 +59,8 @@ export function Home({ runs }: { runs: (Run & { taskMetrics: TaskMetrics | null Passed Failed % Correct - Tokens In / Out + Tokens In / Out + Diff Edits Cost Duration @@ -79,12 +80,21 @@ export function Home({ runs }: { runs: (Run & { taskMetrics: TaskMetrics | null {taskMetrics && ( -
+
{formatTokens(taskMetrics.tokensIn)}
/
{formatTokens(taskMetrics.tokensOut)}
)} + + {taskMetrics?.toolUsage?.apply_diff && ( +
+
{taskMetrics.toolUsage.apply_diff.attempts}
+
/
+
{formatToolUsageSuccessRate(taskMetrics.toolUsage.apply_diff)}
+
+ )} +
{taskMetrics && formatCurrency(taskMetrics.cost)} {taskMetrics && formatDuration(taskMetrics.duration)} diff --git a/evals/apps/web/src/app/runs/[id]/run.tsx b/evals/apps/web/src/app/runs/[id]/run.tsx index 84749fc916..9d5e74f98b 100644 --- a/evals/apps/web/src/app/runs/[id]/run.tsx +++ b/evals/apps/web/src/app/runs/[id]/run.tsx @@ -5,7 +5,7 @@ import { LoaderCircle } from "lucide-react" import * as db from "@evals/db" -import { formatCurrency, formatDuration, formatTokens } from "@/lib" +import { formatCurrency, formatDuration, formatTokens } from "@/lib/formatters" import { useRunStatus } from "@/hooks/use-run-status" import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui" diff --git a/evals/apps/web/src/lib/format-currency.ts b/evals/apps/web/src/lib/format-currency.ts deleted file mode 100644 index c628815951..0000000000 --- a/evals/apps/web/src/lib/format-currency.ts +++ /dev/null @@ -1,6 +0,0 @@ -const formatter = new Intl.NumberFormat("en-US", { - style: "currency", - currency: "USD", -}) - -export const formatCurrency = (amount: number) => formatter.format(amount) diff --git a/evals/apps/web/src/lib/format-duration.ts b/evals/apps/web/src/lib/format-duration.ts deleted file mode 100644 index 7de767f947..0000000000 --- a/evals/apps/web/src/lib/format-duration.ts +++ /dev/null @@ -1,22 +0,0 @@ -export const formatDuration = (durationMs: number) => { - const seconds = Math.floor(durationMs / 1000) - const hours = Math.floor(seconds / 3600) - const minutes = Math.floor((seconds % 3600) / 60) - const remainingSeconds = seconds % 60 - - const parts = [] - - if (hours > 0) { - parts.push(`${hours}h`) - } - - if (minutes > 0) { - parts.push(`${minutes}m`) - } - - if (remainingSeconds > 0 || parts.length === 0) { - parts.push(`${remainingSeconds}s`) - } - - return parts.join(" ") -} diff --git a/evals/apps/web/src/lib/format-tokens.ts b/evals/apps/web/src/lib/format-tokens.ts deleted file mode 100644 index c51009478a..0000000000 --- a/evals/apps/web/src/lib/format-tokens.ts +++ /dev/null @@ -1,15 +0,0 @@ -export const formatTokens = (tokens: number) => { - if (tokens < 1000) { - return tokens.toString() - } - - if (tokens < 1000000) { - return `${(tokens / 1000).toFixed(1)}k` - } - - if (tokens < 1000000000) { - return `${(tokens / 1000000).toFixed(1)}M` - } - - return `${(tokens / 1000000000).toFixed(1)}B` -} diff --git a/evals/apps/web/src/lib/formatters.ts b/evals/apps/web/src/lib/formatters.ts new file mode 100644 index 0000000000..99fafcfbf6 --- /dev/null +++ b/evals/apps/web/src/lib/formatters.ts @@ -0,0 +1,48 @@ +const formatter = new Intl.NumberFormat("en-US", { + style: "currency", + currency: "USD", +}) + +export const formatCurrency = (amount: number) => formatter.format(amount) + +export const formatDuration = (durationMs: number) => { + const seconds = Math.floor(durationMs / 1000) + const hours = Math.floor(seconds / 3600) + const minutes = Math.floor((seconds % 3600) / 60) + const remainingSeconds = seconds % 60 + + const parts = [] + + if (hours > 0) { + parts.push(`${hours}h`) + } + + if (minutes > 0) { + parts.push(`${minutes}m`) + } + + if (remainingSeconds > 0 || parts.length === 0) { + parts.push(`${remainingSeconds}s`) + } + + return parts.join(" ") +} + +export const formatTokens = (tokens: number) => { + if (tokens < 1000) { + return tokens.toString() + } + + if (tokens < 1000000) { + return `${(tokens / 1000).toFixed(1)}k` + } + + if (tokens < 1000000000) { + return `${(tokens / 1000000).toFixed(1)}M` + } + + return `${(tokens / 1000000000).toFixed(1)}B` +} + +export const formatToolUsageSuccessRate = (usage: { attempts: number; failures: number }) => + `${(((usage.attempts - usage.failures) / usage.attempts) * 100).toFixed(1)}%` diff --git a/evals/apps/web/src/lib/index.ts b/evals/apps/web/src/lib/index.ts deleted file mode 100644 index f4262c384f..0000000000 --- a/evals/apps/web/src/lib/index.ts +++ /dev/null @@ -1,3 +0,0 @@ -export { formatCurrency } from "./format-currency" -export { formatDuration } from "./format-duration" -export { formatTokens } from "./format-tokens" From 25c7f3fe342a0dd9c051b651ec62e5a2c52f5803 Mon Sep 17 00:00:00 2001 From: Chris Estreich Date: Thu, 17 Apr 2025 22:06:21 -0700 Subject: [PATCH 8/9] Update evals/apps/web/src/lib/formatters.ts Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> --- evals/apps/web/src/lib/formatters.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evals/apps/web/src/lib/formatters.ts b/evals/apps/web/src/lib/formatters.ts index 99fafcfbf6..207e13a5e1 100644 --- a/evals/apps/web/src/lib/formatters.ts +++ b/evals/apps/web/src/lib/formatters.ts @@ -45,4 +45,4 @@ export const formatTokens = (tokens: number) => { } export const formatToolUsageSuccessRate = (usage: { attempts: number; failures: number }) => - `${(((usage.attempts - usage.failures) / usage.attempts) * 100).toFixed(1)}%` + usage.attempts === 0 ? '0%' : `${(((usage.attempts - usage.failures) / usage.attempts) * 100).toFixed(1)}%` From a83b46c8a08282d73c62f26d9a78ac7d60672774 Mon Sep 17 00:00:00 2001 From: cte Date: Thu, 17 Apr 2025 22:11:48 -0700 Subject: [PATCH 9/9] Missed a few spots --- src/core/tools/applyDiffTool.ts | 1 + src/core/tools/askFollowupQuestionTool.ts | 1 + src/core/tools/readFileTool.ts | 2 ++ src/core/tools/searchAndReplaceTool.ts | 2 ++ 4 files changed, 6 insertions(+) diff --git a/src/core/tools/applyDiffTool.ts b/src/core/tools/applyDiffTool.ts index b1f7740201..ca0adb9e33 100644 --- a/src/core/tools/applyDiffTool.ts +++ b/src/core/tools/applyDiffTool.ts @@ -73,6 +73,7 @@ export async function applyDiffTool( if (!fileExists) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "apply_diff", success: false }) const formattedError = `File does not exist at path: ${absolutePath}\n\n\nThe specified file could not be found. Please verify the file path and try again.\n` await cline.say("error", formattedError) pushToolResult(formattedError) diff --git a/src/core/tools/askFollowupQuestionTool.ts b/src/core/tools/askFollowupQuestionTool.ts index 31a8cc5a70..4bfc641137 100644 --- a/src/core/tools/askFollowupQuestionTool.ts +++ b/src/core/tools/askFollowupQuestionTool.ts @@ -21,6 +21,7 @@ export async function askFollowupQuestionTool( } else { if (!question) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "ask_followup_question", success: false }) pushToolResult(await cline.sayAndCreateMissingParamError("ask_followup_question", "question")) return } diff --git a/src/core/tools/readFileTool.ts b/src/core/tools/readFileTool.ts index 1d8b23ca0e..ca84c0876e 100644 --- a/src/core/tools/readFileTool.ts +++ b/src/core/tools/readFileTool.ts @@ -69,6 +69,7 @@ export async function readFileTool( if (isNaN(startLine)) { // Invalid start_line cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "read_file", success: false }) await cline.say("error", `Failed to parse start_line: ${startLineStr}`) pushToolResult(`${relPath}Invalid start_line value`) return @@ -84,6 +85,7 @@ export async function readFileTool( if (isNaN(endLine)) { // Invalid end_line cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "read_file", success: false }) await cline.say("error", `Failed to parse end_line: ${endLineStr}`) pushToolResult(`${relPath}Invalid end_line value`) return diff --git a/src/core/tools/searchAndReplaceTool.ts b/src/core/tools/searchAndReplaceTool.ts index 89cd623795..7443974144 100644 --- a/src/core/tools/searchAndReplaceTool.ts +++ b/src/core/tools/searchAndReplaceTool.ts @@ -55,6 +55,7 @@ export async function searchAndReplaceTool( if (!fileExists) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "search_and_replace", success: false }) const formattedError = `File does not exist at path: ${absolutePath}\n\n\nThe specified file could not be found. Please verify the file path and try again.\n` await cline.say("error", formattedError) pushToolResult(formattedError) @@ -79,6 +80,7 @@ export async function searchAndReplaceTool( } } catch (error) { cline.consecutiveMistakeCount++ + cline.recordToolUsage({ toolName: "search_and_replace", success: false }) await cline.say("error", `Failed to parse operations JSON: ${error.message}`) pushToolResult(formatResponse.toolError("Invalid operations JSON format")) return