Skip to content

Commit 0642041

Browse files
authored
Merge pull request #4509 from Kilo-Org/roo-v3.36.6
Include changes from Roo Code v3.36.6
2 parents f826a7c + 2dd1b27 commit 0642041

File tree

297 files changed

+11417
-3853
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

297 files changed

+11417
-3853
lines changed

.changeset/polite-games-arrive.md

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
---
2+
"kilo-code": patch
3+
---
4+
5+
Include changes from Roo Code v3.36.6
6+
7+
- Add tool alias support for model-specific tool customization, allowing users to configure how tools are presented to different AI models (PR #9989 by @daniel-lxs)
8+
- Sanitize MCP server and tool names for API compatibility, ensuring special characters don't cause issues with API calls (PR #10054 by @daniel-lxs)
9+
- Improve auto-approve timer visibility in follow-up suggestions for better user awareness of pending actions (PR #10048 by @brunobergher)
10+
- Fix: Cancel auto-approval timeout when user starts typing, preventing accidental auto-approvals during user interaction (PR #9937 by @roomote)
11+
- Add WorkspaceTaskVisibility type for organization cloud settings to support team visibility controls (PR #10020 by @roomote)
12+
- Fix: Extract raw error message from OpenRouter metadata for clearer error reporting (PR #10039 by @daniel-lxs)
13+
- Fix: Show tool protocol dropdown for LiteLLM provider, restoring missing configuration option (PR #10053 by @daniel-lxs)
14+
- Add: GPT-5.2 model to openai-native provider (PR #10024 by @hannesrudolph)
15+
- Fix: Handle empty Gemini responses and reasoning loops to prevent infinite retries (PR #10007 by @hannesrudolph)
16+
- Fix: Add missing tool_result blocks to prevent API errors when tool results are expected (PR #10015 by @daniel-lxs)
17+
- Fix: Filter orphaned tool_results when more results than tool_uses to prevent message validation errors (PR #10027 by @daniel-lxs)
18+
- Fix: Add general API endpoints for Z.ai provider (#9879 by @richtong, PR #9894 by @roomote)
19+
- Remove: Deprecated list_code_definition_names tool (PR #10005 by @hannesrudolph)
20+
- Add error details modal with on-demand display for improved error visibility when debugging issues (PR #9985 by @roomote)
21+
- Fix: Prevent premature rawChunkTracker clearing for MCP tools, improving reliability of MCP tool streaming (PR #9993 by @daniel-lxs)
22+
- Fix: Filter out 429 rate limit errors from API error telemetry for cleaner metrics (PR #9987 by @daniel-lxs)
23+
- Fix: Correct TODO list display order in chat view to show items in proper sequence (PR #9991 by @roomote)
24+
- Refactor: Unified context-management architecture with improved UX for better context control (PR #9795 by @hannesrudolph)
25+
- Add new `search_replace` native tool for single-replacement operations with improved editing precision (PR #9918 by @hannesrudolph)
26+
- Streaming tool stats and token usage throttling for better real-time feedback during generation (PR #9926 by @hannesrudolph)
27+
- Add versioned settings support with minPluginVersion gating for Roo provider (PR #9934 by @hannesrudolph)
28+
- Make Architect mode save plans to `/plans` directory and gitignore it (PR #9944 by @brunobergher)
29+
- Add ability to save screenshots from the browser tool (PR #9963 by @mrubens)
30+
- Refactor: Decouple tools from system prompt for cleaner architecture (PR #9784 by @daniel-lxs)
31+
- Update DeepSeek models to V3.2 with new pricing (PR #9962 by @hannesrudolph)
32+
- Add minimal and medium reasoning effort levels for Gemini models (PR #9973 by @hannesrudolph)
33+
- Update xAI models catalog with latest model options (PR #9872 by @hannesrudolph)
34+
- Add DeepSeek V3-2 support for Baseten provider (PR #9861 by @AlexKer)
35+
- Tweaks to Baseten model definitions for better defaults (PR #9866 by @mrubens)
36+
- Fix: Add xhigh reasoning effort support for gpt-5.1-codex-max (#9891 by @andrewginns, PR #9900 by @andrewginns)
37+
- Fix: Add Kimi, MiniMax, and Qwen model configurations for Bedrock (#9902 by @jbearak, PR #9905 by @app/roomote)
38+
- Configure tool preferences for xAI models (PR #9923 by @hannesrudolph)
39+
- Default to using native tools when supported on OpenRouter (PR #9878 by @mrubens)
40+
- Fix: Exclude apply_diff from native tools when diffEnabled is false (#9919 by @denis-kudelin, PR #9920 by @app/roomote)
41+
- Fix: Always show tool protocol selector for openai-compatible provider (#9965 by @bozoweed, PR #9966 by @hannesrudolph)
42+
- Fix: Respect explicit supportsReasoningEffort array values for proper model configuration (PR #9970 by @hannesrudolph)
43+
- Add timeout configuration to OpenAI Compatible Provider Client (PR #9898 by @dcbartlett)
44+
- Revert default tool protocol change from xml to native for stability (PR #9956 by @mrubens)
45+
- Improve OpenAI error messages to be more useful for debugging (PR #9639 by @mrubens)
46+
- Better error logs for parseToolCall exceptions (PR #9857 by @cte)
47+
- Improve cloud job error logging for RCC provider errors (PR #9924 by @cte)
48+
- Fix: Display actual API error message instead of generic text on retry (PR #9954 by @hannesrudolph)
49+
- Add API error telemetry to OpenRouter provider for better diagnostics (PR #9953 by @daniel-lxs)
50+
- Fix: Sanitize removed/invalid API providers to prevent infinite loop (PR #9869 by @hannesrudolph)
51+
- Fix: Use foreground color for context-management icons (PR #9912 by @hannesrudolph)
52+
- Fix: Suppress 'ask promise was ignored' error in handleError (PR #9914 by @daniel-lxs)
53+
- Fix: Process finish_reason to emit tool_call_end events properly (PR #9927 by @daniel-lxs)
54+
- Fix: Add finish_reason processing to xai.ts provider (PR #9929 by @daniel-lxs)
55+
- Fix: Validate and fix tool_result IDs before API requests (PR #9952 by @daniel-lxs)
56+
- Fix: Return undefined instead of 0 for disabled API timeout (PR #9960 by @hannesrudolph)
57+
- Stop making unnecessary count_tokens requests for better performance (PR #9884 by @mrubens)
58+
- Refactor: Consolidate ThinkingBudget components and fix disable handling (PR #9930 by @hannesrudolph)
59+
- Forbid time estimates in architect mode for more focused planning (PR #9931 by @app/roomote

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,6 @@ qdrant_storage/
6868
*.code-workspace
6969

7070
# Act Secret Files
71-
.secrets
71+
.secrets
72+
# Architect plans
73+
plans/

.roo/rules-translate/AGENTS.md

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -316,31 +316,36 @@ For each language that is missing translations:
316316
"dragFiles": "按住shift拖动文件"
317317
=======
318318
"dragFiles": "Shift+拖拽文件"
319-
>>>>>>> AFTER
319+
320+
> > > > > > > AFTER
320321
321322
<<<<<<< BEFORE
322323
"description": "启用后,Kilo Code 将能够与 MCP 服务器交互以获取高级功能。"
323324
=======
324325
"description": "启用后 Kilo Code 可与 MCP 服务交互获取高级功能。"
325-
>>>>>>> AFTER
326+
327+
> > > > > > > AFTER
326328
327329
<<<<<<< BEFORE
328330
"cannotUndo": "此操作无法撤消。"
329331
=======
330332
"cannotUndo": "此操作不可逆。"
331-
>>>>>>> AFTER
333+
334+
> > > > > > > AFTER
332335
333336
<<<<<<< BEFORE
334337
"hold shift to drag in files" → "按住shift拖动文件"
335338
=======
336339
"hold shift to drag in files" → "Shift+拖拽文件"
337-
>>>>>>> AFTER
340+
341+
> > > > > > > AFTER
338342
339343
<<<<<<< BEFORE
340344
"Double click to edit" → "双击进行编辑"
341345
=======
342346
"Double click to edit" → "双击编辑"
343-
>>>>>>> AFTER
347+
348+
> > > > > > > AFTER
344349
```
345350

346351
### Common Pitfalls

apps/kilocode-docs/docs/advanced-usage/appbuilder.md

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,26 +39,27 @@ Before using App Builder:
3939
1. Navigate to **[App Builder](https://app.kilo.ai/app-builder)** from your Kilo dashboard.
4040
2. Choose an **AI Model** for development (e.g., Grok Code Fast 1, Claude Sonnet 4.5, GPT-5.2).
4141
3. Describe your application in plain language:
42-
- What it should do
43-
- Key features and functionality
44-
- Design preferences or constraints
42+
- What it should do
43+
- Key features and functionality
44+
- Design preferences or constraints
4545
4. Watch the **live preview** update as the AI generates your app.
4646
5. Provide feedback to refine:
47-
- "Make the header sticky"
48-
- "Add a dark mode toggle"
49-
- "Connect this form to a database"
47+
- "Make the header sticky"
48+
- "Add a dark mode toggle"
49+
- "Connect this form to a database"
5050
6. When satisfied, click **Deploy** to push your app live.
5151

5252
---
5353

5454
## How App Builder Works
5555

5656
- When you describe your application:
57-
1. The AI model interprets your requirements and generates an initial implementation.
58-
2. Code is rendered in real-time in the live preview panel.
59-
3. You can interact with the preview as if it were the deployed app.
60-
4. Each refinement request triggers targeted updates to the codebase.
61-
5. The AI maintains context across your entire conversation for coherent iteration.
57+
58+
1. The AI model interprets your requirements and generates an initial implementation.
59+
2. Code is rendered in real-time in the live preview panel.
60+
3. You can interact with the preview as if it were the deployed app.
61+
4. Each refinement request triggers targeted updates to the codebase.
62+
5. The AI maintains context across your entire conversation for coherent iteration.
6263

6364
- Deployment packages your application and provisions hosting automatically.
6465

apps/web-evals/src/app/runs/[id]/run.tsx

Lines changed: 82 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ function formatLogContent(log: string): React.ReactNode[] {
242242

243243
export function Run({ run }: { run: Run }) {
244244
const runStatus = useRunStatus(run)
245-
const { tasks, tokenUsage, usageUpdatedAt, heartbeat, runners } = runStatus
245+
const { tasks, tokenUsage, toolUsage, usageUpdatedAt, heartbeat, runners } = runStatus
246246

247247
const [selectedTask, setSelectedTask] = useState<Task | null>(null)
248248
const [taskLog, setTaskLog] = useState<string | null>(null)
@@ -336,37 +336,70 @@ export function Run({ run }: { run: Run }) {
336336
)
337337

338338
const taskMetrics: Record<number, TaskMetrics> = useMemo(() => {
339+
// Reference usageUpdatedAt to trigger recomputation when Map contents change
340+
void usageUpdatedAt
339341
const metrics: Record<number, TaskMetrics> = {}
340342

341343
tasks?.forEach((task) => {
342-
const usage = tokenUsage.get(task.id)
343-
344-
if (task.finishedAt && task.taskMetrics) {
345-
metrics[task.id] = task.taskMetrics
346-
} else if (usage) {
344+
const streamingUsage = tokenUsage.get(task.id)
345+
const dbMetrics = task.taskMetrics
346+
347+
// For finished tasks, prefer DB values but fall back to streaming values
348+
// This handles race conditions during timeout where DB might not have latest data
349+
if (task.finishedAt) {
350+
// Check if DB metrics have meaningful values (not just default/empty)
351+
const dbHasData = dbMetrics && (dbMetrics.tokensIn > 0 || dbMetrics.tokensOut > 0 || dbMetrics.cost > 0)
352+
if (dbHasData) {
353+
metrics[task.id] = dbMetrics
354+
} else if (streamingUsage) {
355+
// Fall back to streaming values if DB is empty/stale
356+
metrics[task.id] = {
357+
tokensIn: streamingUsage.totalTokensIn,
358+
tokensOut: streamingUsage.totalTokensOut,
359+
tokensContext: streamingUsage.contextTokens,
360+
duration: streamingUsage.duration ?? 0,
361+
cost: streamingUsage.totalCost,
362+
}
363+
}
364+
} else if (streamingUsage) {
365+
// For running tasks, use streaming values
347366
metrics[task.id] = {
348-
tokensIn: usage.totalTokensIn,
349-
tokensOut: usage.totalTokensOut,
350-
tokensContext: usage.contextTokens,
351-
duration: usage.duration ?? 0,
352-
cost: usage.totalCost,
367+
tokensIn: streamingUsage.totalTokensIn,
368+
tokensOut: streamingUsage.totalTokensOut,
369+
tokensContext: streamingUsage.contextTokens,
370+
duration: streamingUsage.duration ?? 0,
371+
cost: streamingUsage.totalCost,
353372
}
354373
}
355374
})
356375

357376
return metrics
358-
// eslint-disable-next-line react-hooks/exhaustive-deps
359377
}, [tasks, tokenUsage, usageUpdatedAt])
360378

361379
// Collect all unique tool names from all tasks and sort by total attempts
362380
const toolColumns = useMemo<ToolName[]>(() => {
381+
// Reference usageUpdatedAt to trigger recomputation when Map contents change
382+
void usageUpdatedAt
363383
if (!tasks) return []
364384

365385
const toolTotals = new Map<ToolName, number>()
366386

367387
for (const task of tasks) {
368-
if (task.taskMetrics?.toolUsage) {
369-
for (const [toolName, usage] of Object.entries(task.taskMetrics.toolUsage)) {
388+
// Get both DB and streaming values
389+
const dbToolUsage = task.taskMetrics?.toolUsage
390+
const streamingToolUsage = toolUsage.get(task.id)
391+
392+
// For finished tasks, prefer DB values but fall back to streaming values
393+
// For running tasks, use streaming values
394+
// This handles race conditions during timeout where DB might not have latest data
395+
const taskToolUsage = task.finishedAt
396+
? dbToolUsage && Object.keys(dbToolUsage).length > 0
397+
? dbToolUsage
398+
: streamingToolUsage
399+
: streamingToolUsage
400+
401+
if (taskToolUsage) {
402+
for (const [toolName, usage] of Object.entries(taskToolUsage)) {
370403
const tool = toolName as ToolName
371404
const current = toolTotals.get(tool) ?? 0
372405
toolTotals.set(tool, current + usage.attempts)
@@ -378,10 +411,13 @@ export function Run({ run }: { run: Run }) {
378411
return Array.from(toolTotals.entries())
379412
.sort((a, b) => b[1] - a[1])
380413
.map(([name]): ToolName => name)
381-
}, [tasks])
414+
// toolUsage ref is stable; usageUpdatedAt triggers recomputation when Map contents change
415+
}, [tasks, toolUsage, usageUpdatedAt])
382416

383417
// Compute aggregate stats
384418
const stats = useMemo(() => {
419+
// Reference usageUpdatedAt to trigger recomputation when Map contents change
420+
void usageUpdatedAt
385421
if (!tasks) return null
386422

387423
const passed = tasks.filter((t) => t.passed === true).length
@@ -393,8 +429,8 @@ export function Run({ run }: { run: Run }) {
393429
let totalCost = 0
394430
let totalDuration = 0
395431

396-
// Aggregate tool usage from completed tasks
397-
const toolUsage: ToolUsage = {}
432+
// Aggregate tool usage from all tasks (both finished and running)
433+
const toolUsageAggregate: ToolUsage = {}
398434

399435
for (const task of tasks) {
400436
const metrics = taskMetrics[task.id]
@@ -405,15 +441,24 @@ export function Run({ run }: { run: Run }) {
405441
totalDuration += metrics.duration
406442
}
407443

408-
// Aggregate tool usage from finished tasks with taskMetrics
409-
if (task.finishedAt && task.taskMetrics?.toolUsage) {
410-
for (const [key, usage] of Object.entries(task.taskMetrics.toolUsage)) {
444+
// Aggregate tool usage: prefer DB values for finished tasks, fall back to streaming values
445+
// This handles race conditions during timeout where DB might not have latest data
446+
const dbToolUsage = task.taskMetrics?.toolUsage
447+
const streamingToolUsage = toolUsage.get(task.id)
448+
const taskToolUsage = task.finishedAt
449+
? dbToolUsage && Object.keys(dbToolUsage).length > 0
450+
? dbToolUsage
451+
: streamingToolUsage
452+
: streamingToolUsage
453+
454+
if (taskToolUsage) {
455+
for (const [key, usage] of Object.entries(taskToolUsage)) {
411456
const tool = key as keyof ToolUsage
412-
if (!toolUsage[tool]) {
413-
toolUsage[tool] = { attempts: 0, failures: 0 }
457+
if (!toolUsageAggregate[tool]) {
458+
toolUsageAggregate[tool] = { attempts: 0, failures: 0 }
414459
}
415-
toolUsage[tool].attempts += usage.attempts
416-
toolUsage[tool].failures += usage.failures
460+
toolUsageAggregate[tool].attempts += usage.attempts
461+
toolUsageAggregate[tool].failures += usage.failures
417462
}
418463
}
419464
}
@@ -427,13 +472,15 @@ export function Run({ run }: { run: Run }) {
427472
totalTokensOut,
428473
totalCost,
429474
totalDuration,
430-
toolUsage,
475+
toolUsage: toolUsageAggregate,
431476
}
432-
// eslint-disable-next-line react-hooks/exhaustive-deps
433-
}, [tasks, taskMetrics, tokenUsage, usageUpdatedAt])
477+
// Map refs are stable; usageUpdatedAt triggers recomputation when Map contents change
478+
}, [tasks, taskMetrics, toolUsage, usageUpdatedAt])
434479

435480
// Calculate elapsed time (wall-clock time from run creation to completion or now)
436481
const elapsedTime = useMemo(() => {
482+
// Reference usageUpdatedAt to trigger recomputation for live elapsed time updates
483+
void usageUpdatedAt
437484
if (!tasks || tasks.length === 0) return null
438485

439486
const startTime = new Date(run.createdAt).getTime()
@@ -452,7 +499,6 @@ export function Run({ run }: { run: Run }) {
452499

453500
// If still running, use current time
454501
return Date.now() - startTime
455-
// eslint-disable-next-line react-hooks/exhaustive-deps
456502
}, [tasks, run.createdAt, run.taskMetricsId, usageUpdatedAt])
457503

458504
return (
@@ -655,7 +701,14 @@ export function Run({ run }: { run: Run }) {
655701
{formatTokens(taskMetrics[task.id]!.tokensContext)}
656702
</TableCell>
657703
{toolColumns.map((toolName) => {
658-
const usage = task.taskMetrics?.toolUsage?.[toolName]
704+
// Use DB values for finished tasks, but fall back to streaming values
705+
// if DB values are missing (handles race condition during timeout)
706+
const dbUsage = task.taskMetrics?.toolUsage?.[toolName]
707+
const streamingUsage = toolUsage.get(task.id)?.[toolName]
708+
const usage = task.finishedAt
709+
? (dbUsage ?? streamingUsage)
710+
: streamingUsage
711+
659712
const successRate =
660713
usage && usage.attempts > 0
661714
? ((usage.attempts - usage.failures) / usage.attempts) * 100

apps/web-evals/src/hooks/use-run-status.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { useState, useCallback, useRef } from "react"
22
import { useQuery, keepPreviousData } from "@tanstack/react-query"
33

4-
import { type TokenUsage, RooCodeEventName, taskEventSchema } from "@roo-code/types"
4+
import { type TokenUsage, type ToolUsage, RooCodeEventName, taskEventSchema } from "@roo-code/types"
55
import type { Run, Task, TaskMetrics } from "@roo-code/evals"
66

77
import { getHeartbeat } from "@/actions/heartbeat"
@@ -15,6 +15,7 @@ export type RunStatus = {
1515
runners: string[] | undefined
1616
tasks: (Task & { taskMetrics: TaskMetrics | null })[] | undefined
1717
tokenUsage: Map<number, TokenUsage & { duration?: number }>
18+
toolUsage: Map<number, ToolUsage>
1819
usageUpdatedAt: number | undefined
1920
}
2021

@@ -23,6 +24,7 @@ export const useRunStatus = (run: Run): RunStatus => {
2324
const [usageUpdatedAt, setUsageUpdatedAt] = useState<number>()
2425

2526
const tokenUsage = useRef<Map<number, TokenUsage & { duration?: number }>>(new Map())
27+
const toolUsage = useRef<Map<number, ToolUsage>>(new Map())
2628
const startTimes = useRef<Map<number, number>>(new Map())
2729

2830
const { data: heartbeat } = useQuery({
@@ -78,6 +80,12 @@ export const useRunStatus = (run: Run): RunStatus => {
7880
const startTime = startTimes.current.get(taskId)
7981
const duration = startTime ? Date.now() - startTime : undefined
8082
tokenUsage.current.set(taskId, { ...payload[1], duration })
83+
84+
// Track tool usage from streaming updates
85+
if (payload[2]) {
86+
toolUsage.current.set(taskId, payload[2])
87+
}
88+
8189
setUsageUpdatedAt(Date.now())
8290
break
8391
}
@@ -96,6 +104,7 @@ export const useRunStatus = (run: Run): RunStatus => {
96104
runners,
97105
tasks,
98106
tokenUsage: tokenUsage.current,
107+
toolUsage: toolUsage.current,
99108
usageUpdatedAt,
100109
}
101110
}

0 commit comments

Comments
 (0)