From 74d6c57d2d2d8c9c978af197c3309606e958f93a Mon Sep 17 00:00:00 2001 From: Roo Code Date: Mon, 21 Jul 2025 09:29:29 +0000 Subject: [PATCH] fix: prevent Kimi K2 model from completing tasks without performing actions - Add validation in attempt_completion to check if actual work was done - Add model-specific instructions in system prompt for Kimi K2 - Track premature completion attempts via telemetry - Add comprehensive tests for the new validation logic Fixes #5999 --- src/core/prompts/sections/rules.ts | 13 +- src/core/prompts/system.ts | 5 +- src/core/task/Task.ts | 2 + .../__tests__/attemptCompletionTool.spec.ts | 267 ++++++++++++++++++ src/core/tools/attemptCompletionTool.ts | 24 ++ 5 files changed, 309 insertions(+), 2 deletions(-) diff --git a/src/core/prompts/sections/rules.ts b/src/core/prompts/sections/rules.ts index 5828568ac48..b415ec1af53 100644 --- a/src/core/prompts/sections/rules.ts +++ b/src/core/prompts/sections/rules.ts @@ -50,6 +50,7 @@ export function getRulesSection( supportsComputerUse: boolean, diffStrategy?: DiffStrategy, codeIndexManager?: CodeIndexManager, + modelId?: string, ): string { const isCodebaseSearchAvailable = codeIndexManager && @@ -61,11 +62,21 @@ export function getRulesSection( ? "- **CRITICAL: For ANY exploration of code you haven't examined yet in this conversation, you MUST use the `codebase_search` tool FIRST before using search_files or other file exploration tools.** This requirement applies throughout the entire conversation, not just when starting a task. The codebase_search tool uses semantic search to find relevant code based on meaning, not just keywords, making it much more effective for understanding how features are implemented. Even if you've already explored some parts of the codebase, any new area or functionality you need to understand requires using codebase_search first.\n" : "" + // Add model-specific rules for Kimi K2 + const isKimiK2 = modelId && modelId.toLowerCase().includes("kimi") && modelId.toLowerCase().includes("k2") + const kimiK2Rules = isKimiK2 + ? `- **CRITICAL FOR KIMI K2 MODEL**: You MUST complete the actual implementation before using attempt_completion. Simply identifying issues without fixing them is NOT acceptable. You must: + 1. Use appropriate tools to make the necessary changes (write_to_file, apply_diff, etc.) + 2. Verify that your changes have been applied + 3. Only then use attempt_completion to present your completed work + Attempting completion without performing actual work will result in an error.\n` + : "" + return `==== RULES -- The project base directory is: ${cwd.toPosix()} +${kimiK2Rules}- The project base directory is: ${cwd.toPosix()} - All file paths must be relative to this directory. However, commands may change directories in terminals, so respect working directory specified by the response to . - You cannot \`cd\` into a different directory to complete a task. You are stuck operating from '${cwd.toPosix()}', so be sure to pass in the correct 'path' parameter when using tools that require a path. - Do not use the ~ character or $HOME to refer to the home directory. diff --git a/src/core/prompts/system.ts b/src/core/prompts/system.ts index 92653cafd24..20910dfe64f 100644 --- a/src/core/prompts/system.ts +++ b/src/core/prompts/system.ts @@ -59,6 +59,7 @@ async function generatePrompt( partialReadsEnabled?: boolean, settings?: Record, todoList?: TodoItem[], + modelId?: string, ): Promise { if (!context) { throw new Error("Extension context is required for generating system prompt") @@ -113,7 +114,7 @@ ${getCapabilitiesSection(cwd, supportsComputerUse, shouldIncludeMcp ? mcpHub : u ${modesSection} -${getRulesSection(cwd, supportsComputerUse, effectiveDiffStrategy, codeIndexManager)} +${getRulesSection(cwd, supportsComputerUse, effectiveDiffStrategy, codeIndexManager, modelId)} ${getSystemInfoSection(cwd)} @@ -143,6 +144,7 @@ export const SYSTEM_PROMPT = async ( partialReadsEnabled?: boolean, settings?: Record, todoList?: TodoItem[], + modelId?: string, ): Promise => { if (!context) { throw new Error("Extension context is required for generating system prompt") @@ -210,5 +212,6 @@ ${customInstructions}` partialReadsEnabled, settings, todoList, + modelId, ) } diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 53b8ef5b87d..5f9ead68d2d 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -1653,6 +1653,8 @@ export class Task extends EventEmitter { { maxConcurrentFileReads, }, + undefined, // todoList + this.api.getModel().id, ) })() } diff --git a/src/core/tools/__tests__/attemptCompletionTool.spec.ts b/src/core/tools/__tests__/attemptCompletionTool.spec.ts index b39c1acac62..072746f2916 100644 --- a/src/core/tools/__tests__/attemptCompletionTool.spec.ts +++ b/src/core/tools/__tests__/attemptCompletionTool.spec.ts @@ -25,9 +25,22 @@ vi.mock("../../../shared/package", () => ({ }, })) +// Mock TelemetryService +vi.mock("@roo-code/telemetry", () => ({ + TelemetryService: { + instance: { + captureEvent: vi.fn(), + captureTaskCompleted: vi.fn(), + captureToolUsage: vi.fn(), + captureConsecutiveMistakeError: vi.fn(), + }, + }, +})) + import { attemptCompletionTool } from "../attemptCompletionTool" import { Task } from "../../task/Task" import * as vscode from "vscode" +import { TelemetryService } from "@roo-code/telemetry" describe("attemptCompletionTool", () => { let mockTask: Partial @@ -62,6 +75,34 @@ describe("attemptCompletionTool", () => { consecutiveMistakeCount: 0, recordToolError: vi.fn(), todoList: undefined, + toolUsage: {}, + didEditFile: false, + api: { + getModel: vi.fn(() => ({ + id: "test-model", + info: { + contextWindow: 100000, + supportsPromptCache: false, + }, + })), + createMessage: vi.fn(), + countTokens: vi.fn(), + } as any, + say: vi.fn(), + emit: vi.fn(), + taskId: "test-task-id", + getTokenUsage: vi.fn(() => ({ + totalTokensIn: 0, + totalTokensOut: 0, + totalCost: 0, + contextTokens: 0, + })), + parentTask: undefined, + providerRef: { + deref: vi.fn(() => undefined), + } as any, + ask: vi.fn(), + userMessageContent: [], } }) @@ -409,4 +450,230 @@ describe("attemptCompletionTool", () => { ) }) }) + + describe("Kimi K2 model validation", () => { + beforeEach(() => { + // Reset telemetry mocks + vi.mocked(TelemetryService.instance.captureToolUsage).mockClear() + vi.mocked(TelemetryService.instance.captureConsecutiveMistakeError).mockClear() + }) + + it("should prevent completion for Kimi K2 model when no tools have been used", async () => { + const block: AttemptCompletionToolUse = { + type: "tool_use", + name: "attempt_completion", + params: { result: "I found the issue but haven't fixed it yet" }, + partial: false, + } + + // Set up Kimi K2 model + mockTask.api = { + getModel: vi.fn(() => ({ + id: "moonshotai/kimi-k2-instruct", + info: { + contextWindow: 100000, + supportsPromptCache: false, + }, + })), + createMessage: vi.fn(), + countTokens: vi.fn(), + } as any + mockTask.toolUsage = {} // No tools used + mockTask.didEditFile = false + + await attemptCompletionTool( + mockTask as Task, + block, + mockAskApproval, + mockHandleError, + mockPushToolResult, + mockRemoveClosingTag, + mockToolDescription, + mockAskFinishSubTaskApproval, + ) + + expect(mockTask.consecutiveMistakeCount).toBe(1) + expect(mockTask.recordToolError).toHaveBeenCalledWith("attempt_completion") + expect(mockPushToolResult).toHaveBeenCalledWith( + expect.stringContaining("Cannot complete task without performing any actions"), + ) + + // Check telemetry was captured + expect(TelemetryService.instance.captureToolUsage).toHaveBeenCalledWith( + "test-task-id", + "attempt_completion", + ) + expect(TelemetryService.instance.captureConsecutiveMistakeError).toHaveBeenCalledWith("test-task-id") + }) + + it("should allow completion for Kimi K2 model when tools have been used", async () => { + const block: AttemptCompletionToolUse = { + type: "tool_use", + name: "attempt_completion", + params: { result: "Fixed the issue by updating the code" }, + partial: false, + } + + // Set up Kimi K2 model with tools used + mockTask.api = { + getModel: vi.fn(() => ({ + id: "moonshotai/kimi-k2-instruct", + info: { + contextWindow: 100000, + supportsPromptCache: false, + }, + })), + createMessage: vi.fn(), + countTokens: vi.fn(), + } as any + mockTask.toolUsage = { + write_to_file: { attempts: 1, failures: 0 }, + read_file: { attempts: 2, failures: 0 }, + } + mockTask.didEditFile = true + + await attemptCompletionTool( + mockTask as Task, + block, + mockAskApproval, + mockHandleError, + mockPushToolResult, + mockRemoveClosingTag, + mockToolDescription, + mockAskFinishSubTaskApproval, + ) + + // Should not increment mistake count or record error + expect(mockTask.consecutiveMistakeCount).toBe(0) + expect(mockTask.recordToolError).not.toHaveBeenCalled() + expect(mockPushToolResult).not.toHaveBeenCalledWith( + expect.stringContaining("Cannot complete task without performing any actions"), + ) + + // Telemetry should not be captured for successful completion + expect(TelemetryService.instance.captureConsecutiveMistakeError).not.toHaveBeenCalled() + }) + + it("should allow completion for non-Kimi K2 models even without tools", async () => { + const block: AttemptCompletionToolUse = { + type: "tool_use", + name: "attempt_completion", + params: { result: "Task analysis complete" }, + partial: false, + } + + // Set up non-Kimi K2 model + mockTask.api = { + getModel: vi.fn(() => ({ + id: "claude-3-opus", + info: { + contextWindow: 100000, + supportsPromptCache: false, + }, + })), + createMessage: vi.fn(), + countTokens: vi.fn(), + } as any + mockTask.toolUsage = {} // No tools used + mockTask.didEditFile = false + + await attemptCompletionTool( + mockTask as Task, + block, + mockAskApproval, + mockHandleError, + mockPushToolResult, + mockRemoveClosingTag, + mockToolDescription, + mockAskFinishSubTaskApproval, + ) + + // Should not prevent completion for non-Kimi K2 models + expect(mockTask.consecutiveMistakeCount).toBe(0) + expect(mockTask.recordToolError).not.toHaveBeenCalled() + expect(mockPushToolResult).not.toHaveBeenCalledWith( + expect.stringContaining("Cannot complete task without performing any actions"), + ) + }) + + it("should detect Kimi K2 model with case-insensitive matching", async () => { + const block: AttemptCompletionToolUse = { + type: "tool_use", + name: "attempt_completion", + params: { result: "Found the problem" }, + partial: false, + } + + // Set up Kimi K2 model with different casing + mockTask.api = { + getModel: vi.fn(() => ({ + id: "KIMI-K2-Model", + info: { + contextWindow: 100000, + supportsPromptCache: false, + }, + })), + createMessage: vi.fn(), + countTokens: vi.fn(), + } as any + mockTask.toolUsage = {} // No tools used + mockTask.didEditFile = false + + await attemptCompletionTool( + mockTask as Task, + block, + mockAskApproval, + mockHandleError, + mockPushToolResult, + mockRemoveClosingTag, + mockToolDescription, + mockAskFinishSubTaskApproval, + ) + + expect(mockTask.consecutiveMistakeCount).toBe(1) + expect(mockTask.recordToolError).toHaveBeenCalledWith("attempt_completion") + expect(mockPushToolResult).toHaveBeenCalledWith( + expect.stringContaining("Cannot complete task without performing any actions"), + ) + }) + + it("should allow completion for Kimi K2 when files have been edited", async () => { + const block: AttemptCompletionToolUse = { + type: "tool_use", + name: "attempt_completion", + params: { result: "Fixed the issue" }, + partial: false, + } + + // Set up Kimi K2 model with file edits but no tool usage recorded + mockTask.api = { + getModel: vi.fn(() => ({ + id: "kimi-k2-instruct", + info: { + contextWindow: 100000, + supportsPromptCache: false, + }, + })), + createMessage: vi.fn(), + countTokens: vi.fn(), + } as any + mockTask.toolUsage = {} // No tools in usage record + mockTask.didEditFile = true // But files were edited + + await attemptCompletionTool( + mockTask as Task, + block, + mockAskApproval, + mockHandleError, + mockPushToolResult, + mockRemoveClosingTag, + mockToolDescription, + mockAskFinishSubTaskApproval, + ) + + // Should allow completion since files were edited + expect(mockTask.consecutiveMistakeCount).toBe(0) + expect(mockTask.recordToolError).not.toHaveBeenCalled() + }) + }) }) diff --git a/src/core/tools/attemptCompletionTool.ts b/src/core/tools/attemptCompletionTool.ts index ef7881854f6..ea60f1aa31a 100644 --- a/src/core/tools/attemptCompletionTool.ts +++ b/src/core/tools/attemptCompletionTool.ts @@ -49,6 +49,30 @@ export async function attemptCompletionTool( return } + // Check if any actual work has been done (tools used) + const hasUsedTools = Object.keys(cline.toolUsage).length > 0 + const hasEditedFiles = cline.didEditFile + + // Check if this is a Kimi K2 model attempting premature completion + const modelId = cline.api.getModel().id + const isKimiK2 = modelId && modelId.toLowerCase().includes("kimi") && modelId.toLowerCase().includes("k2") + + if (!hasUsedTools && !hasEditedFiles && isKimiK2) { + cline.consecutiveMistakeCount++ + cline.recordToolError("attempt_completion") + + // Track premature completion attempts + TelemetryService.instance.captureToolUsage(cline.taskId, "attempt_completion") + TelemetryService.instance.captureConsecutiveMistakeError(cline.taskId) + + pushToolResult( + formatResponse.toolError( + "Cannot complete task without performing any actions. You identified the issue but haven't implemented the fix yet. Please use the appropriate tools to make the necessary changes before attempting completion.", + ), + ) + return + } + try { const lastMessage = cline.clineMessages.at(-1)