Skip to content

Commit 74d6c57

Browse files
committed
fix: prevent Kimi K2 model from completing tasks without performing actions
- Add validation in attempt_completion to check if actual work was done - Add model-specific instructions in system prompt for Kimi K2 - Track premature completion attempts via telemetry - Add comprehensive tests for the new validation logic Fixes #5999
1 parent 9fce90b commit 74d6c57

File tree

5 files changed

+309
-2
lines changed

5 files changed

+309
-2
lines changed

src/core/prompts/sections/rules.ts

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ export function getRulesSection(
5050
supportsComputerUse: boolean,
5151
diffStrategy?: DiffStrategy,
5252
codeIndexManager?: CodeIndexManager,
53+
modelId?: string,
5354
): string {
5455
const isCodebaseSearchAvailable =
5556
codeIndexManager &&
@@ -61,11 +62,21 @@ export function getRulesSection(
6162
? "- **CRITICAL: For ANY exploration of code you haven't examined yet in this conversation, you MUST use the `codebase_search` tool FIRST before using search_files or other file exploration tools.** This requirement applies throughout the entire conversation, not just when starting a task. The codebase_search tool uses semantic search to find relevant code based on meaning, not just keywords, making it much more effective for understanding how features are implemented. Even if you've already explored some parts of the codebase, any new area or functionality you need to understand requires using codebase_search first.\n"
6263
: ""
6364

65+
// Add model-specific rules for Kimi K2
66+
const isKimiK2 = modelId && modelId.toLowerCase().includes("kimi") && modelId.toLowerCase().includes("k2")
67+
const kimiK2Rules = isKimiK2
68+
? `- **CRITICAL FOR KIMI K2 MODEL**: You MUST complete the actual implementation before using attempt_completion. Simply identifying issues without fixing them is NOT acceptable. You must:
69+
1. Use appropriate tools to make the necessary changes (write_to_file, apply_diff, etc.)
70+
2. Verify that your changes have been applied
71+
3. Only then use attempt_completion to present your completed work
72+
Attempting completion without performing actual work will result in an error.\n`
73+
: ""
74+
6475
return `====
6576
6677
RULES
6778
68-
- The project base directory is: ${cwd.toPosix()}
79+
${kimiK2Rules}- The project base directory is: ${cwd.toPosix()}
6980
- All file paths must be relative to this directory. However, commands may change directories in terminals, so respect working directory specified by the response to <execute_command>.
7081
- You cannot \`cd\` into a different directory to complete a task. You are stuck operating from '${cwd.toPosix()}', so be sure to pass in the correct 'path' parameter when using tools that require a path.
7182
- Do not use the ~ character or $HOME to refer to the home directory.

src/core/prompts/system.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ async function generatePrompt(
5959
partialReadsEnabled?: boolean,
6060
settings?: Record<string, any>,
6161
todoList?: TodoItem[],
62+
modelId?: string,
6263
): Promise<string> {
6364
if (!context) {
6465
throw new Error("Extension context is required for generating system prompt")
@@ -113,7 +114,7 @@ ${getCapabilitiesSection(cwd, supportsComputerUse, shouldIncludeMcp ? mcpHub : u
113114
114115
${modesSection}
115116
116-
${getRulesSection(cwd, supportsComputerUse, effectiveDiffStrategy, codeIndexManager)}
117+
${getRulesSection(cwd, supportsComputerUse, effectiveDiffStrategy, codeIndexManager, modelId)}
117118
118119
${getSystemInfoSection(cwd)}
119120
@@ -143,6 +144,7 @@ export const SYSTEM_PROMPT = async (
143144
partialReadsEnabled?: boolean,
144145
settings?: Record<string, any>,
145146
todoList?: TodoItem[],
147+
modelId?: string,
146148
): Promise<string> => {
147149
if (!context) {
148150
throw new Error("Extension context is required for generating system prompt")
@@ -210,5 +212,6 @@ ${customInstructions}`
210212
partialReadsEnabled,
211213
settings,
212214
todoList,
215+
modelId,
213216
)
214217
}

src/core/task/Task.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1653,6 +1653,8 @@ export class Task extends EventEmitter<ClineEvents> {
16531653
{
16541654
maxConcurrentFileReads,
16551655
},
1656+
undefined, // todoList
1657+
this.api.getModel().id,
16561658
)
16571659
})()
16581660
}

src/core/tools/__tests__/attemptCompletionTool.spec.ts

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,22 @@ vi.mock("../../../shared/package", () => ({
2525
},
2626
}))
2727

28+
// Mock TelemetryService
29+
vi.mock("@roo-code/telemetry", () => ({
30+
TelemetryService: {
31+
instance: {
32+
captureEvent: vi.fn(),
33+
captureTaskCompleted: vi.fn(),
34+
captureToolUsage: vi.fn(),
35+
captureConsecutiveMistakeError: vi.fn(),
36+
},
37+
},
38+
}))
39+
2840
import { attemptCompletionTool } from "../attemptCompletionTool"
2941
import { Task } from "../../task/Task"
3042
import * as vscode from "vscode"
43+
import { TelemetryService } from "@roo-code/telemetry"
3144

3245
describe("attemptCompletionTool", () => {
3346
let mockTask: Partial<Task>
@@ -62,6 +75,34 @@ describe("attemptCompletionTool", () => {
6275
consecutiveMistakeCount: 0,
6376
recordToolError: vi.fn(),
6477
todoList: undefined,
78+
toolUsage: {},
79+
didEditFile: false,
80+
api: {
81+
getModel: vi.fn(() => ({
82+
id: "test-model",
83+
info: {
84+
contextWindow: 100000,
85+
supportsPromptCache: false,
86+
},
87+
})),
88+
createMessage: vi.fn(),
89+
countTokens: vi.fn(),
90+
} as any,
91+
say: vi.fn(),
92+
emit: vi.fn(),
93+
taskId: "test-task-id",
94+
getTokenUsage: vi.fn(() => ({
95+
totalTokensIn: 0,
96+
totalTokensOut: 0,
97+
totalCost: 0,
98+
contextTokens: 0,
99+
})),
100+
parentTask: undefined,
101+
providerRef: {
102+
deref: vi.fn(() => undefined),
103+
} as any,
104+
ask: vi.fn(),
105+
userMessageContent: [],
65106
}
66107
})
67108

@@ -409,4 +450,230 @@ describe("attemptCompletionTool", () => {
409450
)
410451
})
411452
})
453+
454+
describe("Kimi K2 model validation", () => {
455+
beforeEach(() => {
456+
// Reset telemetry mocks
457+
vi.mocked(TelemetryService.instance.captureToolUsage).mockClear()
458+
vi.mocked(TelemetryService.instance.captureConsecutiveMistakeError).mockClear()
459+
})
460+
461+
it("should prevent completion for Kimi K2 model when no tools have been used", async () => {
462+
const block: AttemptCompletionToolUse = {
463+
type: "tool_use",
464+
name: "attempt_completion",
465+
params: { result: "I found the issue but haven't fixed it yet" },
466+
partial: false,
467+
}
468+
469+
// Set up Kimi K2 model
470+
mockTask.api = {
471+
getModel: vi.fn(() => ({
472+
id: "moonshotai/kimi-k2-instruct",
473+
info: {
474+
contextWindow: 100000,
475+
supportsPromptCache: false,
476+
},
477+
})),
478+
createMessage: vi.fn(),
479+
countTokens: vi.fn(),
480+
} as any
481+
mockTask.toolUsage = {} // No tools used
482+
mockTask.didEditFile = false
483+
484+
await attemptCompletionTool(
485+
mockTask as Task,
486+
block,
487+
mockAskApproval,
488+
mockHandleError,
489+
mockPushToolResult,
490+
mockRemoveClosingTag,
491+
mockToolDescription,
492+
mockAskFinishSubTaskApproval,
493+
)
494+
495+
expect(mockTask.consecutiveMistakeCount).toBe(1)
496+
expect(mockTask.recordToolError).toHaveBeenCalledWith("attempt_completion")
497+
expect(mockPushToolResult).toHaveBeenCalledWith(
498+
expect.stringContaining("Cannot complete task without performing any actions"),
499+
)
500+
501+
// Check telemetry was captured
502+
expect(TelemetryService.instance.captureToolUsage).toHaveBeenCalledWith(
503+
"test-task-id",
504+
"attempt_completion",
505+
)
506+
expect(TelemetryService.instance.captureConsecutiveMistakeError).toHaveBeenCalledWith("test-task-id")
507+
})
508+
509+
it("should allow completion for Kimi K2 model when tools have been used", async () => {
510+
const block: AttemptCompletionToolUse = {
511+
type: "tool_use",
512+
name: "attempt_completion",
513+
params: { result: "Fixed the issue by updating the code" },
514+
partial: false,
515+
}
516+
517+
// Set up Kimi K2 model with tools used
518+
mockTask.api = {
519+
getModel: vi.fn(() => ({
520+
id: "moonshotai/kimi-k2-instruct",
521+
info: {
522+
contextWindow: 100000,
523+
supportsPromptCache: false,
524+
},
525+
})),
526+
createMessage: vi.fn(),
527+
countTokens: vi.fn(),
528+
} as any
529+
mockTask.toolUsage = {
530+
write_to_file: { attempts: 1, failures: 0 },
531+
read_file: { attempts: 2, failures: 0 },
532+
}
533+
mockTask.didEditFile = true
534+
535+
await attemptCompletionTool(
536+
mockTask as Task,
537+
block,
538+
mockAskApproval,
539+
mockHandleError,
540+
mockPushToolResult,
541+
mockRemoveClosingTag,
542+
mockToolDescription,
543+
mockAskFinishSubTaskApproval,
544+
)
545+
546+
// Should not increment mistake count or record error
547+
expect(mockTask.consecutiveMistakeCount).toBe(0)
548+
expect(mockTask.recordToolError).not.toHaveBeenCalled()
549+
expect(mockPushToolResult).not.toHaveBeenCalledWith(
550+
expect.stringContaining("Cannot complete task without performing any actions"),
551+
)
552+
553+
// Telemetry should not be captured for successful completion
554+
expect(TelemetryService.instance.captureConsecutiveMistakeError).not.toHaveBeenCalled()
555+
})
556+
557+
it("should allow completion for non-Kimi K2 models even without tools", async () => {
558+
const block: AttemptCompletionToolUse = {
559+
type: "tool_use",
560+
name: "attempt_completion",
561+
params: { result: "Task analysis complete" },
562+
partial: false,
563+
}
564+
565+
// Set up non-Kimi K2 model
566+
mockTask.api = {
567+
getModel: vi.fn(() => ({
568+
id: "claude-3-opus",
569+
info: {
570+
contextWindow: 100000,
571+
supportsPromptCache: false,
572+
},
573+
})),
574+
createMessage: vi.fn(),
575+
countTokens: vi.fn(),
576+
} as any
577+
mockTask.toolUsage = {} // No tools used
578+
mockTask.didEditFile = false
579+
580+
await attemptCompletionTool(
581+
mockTask as Task,
582+
block,
583+
mockAskApproval,
584+
mockHandleError,
585+
mockPushToolResult,
586+
mockRemoveClosingTag,
587+
mockToolDescription,
588+
mockAskFinishSubTaskApproval,
589+
)
590+
591+
// Should not prevent completion for non-Kimi K2 models
592+
expect(mockTask.consecutiveMistakeCount).toBe(0)
593+
expect(mockTask.recordToolError).not.toHaveBeenCalled()
594+
expect(mockPushToolResult).not.toHaveBeenCalledWith(
595+
expect.stringContaining("Cannot complete task without performing any actions"),
596+
)
597+
})
598+
599+
it("should detect Kimi K2 model with case-insensitive matching", async () => {
600+
const block: AttemptCompletionToolUse = {
601+
type: "tool_use",
602+
name: "attempt_completion",
603+
params: { result: "Found the problem" },
604+
partial: false,
605+
}
606+
607+
// Set up Kimi K2 model with different casing
608+
mockTask.api = {
609+
getModel: vi.fn(() => ({
610+
id: "KIMI-K2-Model",
611+
info: {
612+
contextWindow: 100000,
613+
supportsPromptCache: false,
614+
},
615+
})),
616+
createMessage: vi.fn(),
617+
countTokens: vi.fn(),
618+
} as any
619+
mockTask.toolUsage = {} // No tools used
620+
mockTask.didEditFile = false
621+
622+
await attemptCompletionTool(
623+
mockTask as Task,
624+
block,
625+
mockAskApproval,
626+
mockHandleError,
627+
mockPushToolResult,
628+
mockRemoveClosingTag,
629+
mockToolDescription,
630+
mockAskFinishSubTaskApproval,
631+
)
632+
633+
expect(mockTask.consecutiveMistakeCount).toBe(1)
634+
expect(mockTask.recordToolError).toHaveBeenCalledWith("attempt_completion")
635+
expect(mockPushToolResult).toHaveBeenCalledWith(
636+
expect.stringContaining("Cannot complete task without performing any actions"),
637+
)
638+
})
639+
640+
it("should allow completion for Kimi K2 when files have been edited", async () => {
641+
const block: AttemptCompletionToolUse = {
642+
type: "tool_use",
643+
name: "attempt_completion",
644+
params: { result: "Fixed the issue" },
645+
partial: false,
646+
}
647+
648+
// Set up Kimi K2 model with file edits but no tool usage recorded
649+
mockTask.api = {
650+
getModel: vi.fn(() => ({
651+
id: "kimi-k2-instruct",
652+
info: {
653+
contextWindow: 100000,
654+
supportsPromptCache: false,
655+
},
656+
})),
657+
createMessage: vi.fn(),
658+
countTokens: vi.fn(),
659+
} as any
660+
mockTask.toolUsage = {} // No tools in usage record
661+
mockTask.didEditFile = true // But files were edited
662+
663+
await attemptCompletionTool(
664+
mockTask as Task,
665+
block,
666+
mockAskApproval,
667+
mockHandleError,
668+
mockPushToolResult,
669+
mockRemoveClosingTag,
670+
mockToolDescription,
671+
mockAskFinishSubTaskApproval,
672+
)
673+
674+
// Should allow completion since files were edited
675+
expect(mockTask.consecutiveMistakeCount).toBe(0)
676+
expect(mockTask.recordToolError).not.toHaveBeenCalled()
677+
})
678+
})
412679
})

src/core/tools/attemptCompletionTool.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,30 @@ export async function attemptCompletionTool(
4949
return
5050
}
5151

52+
// Check if any actual work has been done (tools used)
53+
const hasUsedTools = Object.keys(cline.toolUsage).length > 0
54+
const hasEditedFiles = cline.didEditFile
55+
56+
// Check if this is a Kimi K2 model attempting premature completion
57+
const modelId = cline.api.getModel().id
58+
const isKimiK2 = modelId && modelId.toLowerCase().includes("kimi") && modelId.toLowerCase().includes("k2")
59+
60+
if (!hasUsedTools && !hasEditedFiles && isKimiK2) {
61+
cline.consecutiveMistakeCount++
62+
cline.recordToolError("attempt_completion")
63+
64+
// Track premature completion attempts
65+
TelemetryService.instance.captureToolUsage(cline.taskId, "attempt_completion")
66+
TelemetryService.instance.captureConsecutiveMistakeError(cline.taskId)
67+
68+
pushToolResult(
69+
formatResponse.toolError(
70+
"Cannot complete task without performing any actions. You identified the issue but haven't implemented the fix yet. Please use the appropriate tools to make the necessary changes before attempting completion.",
71+
),
72+
)
73+
return
74+
}
75+
5276
try {
5377
const lastMessage = cline.clineMessages.at(-1)
5478

0 commit comments

Comments
 (0)