Skip to content

Commit 3bf47ad

Browse files
committed
fix: Add XML auto-repair for broken tool calls from OpenAI-compatible providers
- Add XML repair utility function to fix missing opening brackets in XML tags - Add configuration option openAiXmlAutoRepair in OpenAI Compatible settings - Integrate XML repair into OpenAI and BaseOpenAiCompatibleProvider streaming - Add comprehensive tests for XML repair functionality - Add UI checkbox for enabling XML auto-repair in settings Fixes #7156
1 parent 0d90fac commit 3bf47ad

File tree

6 files changed

+350
-3
lines changed

6 files changed

+350
-3
lines changed

packages/types/src/provider-settings.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ const openAiSchema = baseProviderSettingsSchema.extend({
160160
openAiStreamingEnabled: z.boolean().optional(),
161161
openAiHostHeader: z.string().optional(), // Keep temporarily for backward compatibility during migration.
162162
openAiHeaders: z.record(z.string(), z.string()).optional(),
163+
openAiXmlAutoRepair: z.boolean().optional(), // Auto-repair broken XML tool calls from certain providers
163164
})
164165

165166
const ollamaSchema = baseProviderSettingsSchema.extend({

src/api/providers/base-openai-compatible-provider.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import type { ModelInfo } from "@roo-code/types"
66
import type { ApiHandlerOptions } from "../../shared/api"
77
import { ApiStream } from "../transform/stream"
88
import { convertToOpenAiMessages } from "../transform/openai-format"
9+
import { repairBrokenXml } from "../utils/xml-repair"
910

1011
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
1112
import { DEFAULT_HEADERS } from "./constants"
@@ -89,9 +90,12 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
8990
const delta = chunk.choices[0]?.delta
9091

9192
if (delta?.content) {
93+
// Apply XML repair if enabled for OpenAI-compatible providers
94+
const content = this.options.openAiXmlAutoRepair ? repairBrokenXml(delta.content) : delta.content
95+
9296
yield {
9397
type: "text",
94-
text: delta.content,
98+
text: content,
9599
}
96100
}
97101

src/api/providers/openai.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import {
1313
import type { ApiHandlerOptions } from "../../shared/api"
1414

1515
import { XmlMatcher } from "../../utils/xml-matcher"
16+
import { repairBrokenXml } from "../utils/xml-repair"
1617

1718
import { convertToOpenAiMessages } from "../transform/openai-format"
1819
import { convertToR1Format } from "../transform/r1-format"
@@ -187,7 +188,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
187188
const delta = chunk.choices[0]?.delta ?? {}
188189

189190
if (delta.content) {
190-
for (const chunk of matcher.update(delta.content)) {
191+
// Apply XML repair if enabled
192+
const content = this.options.openAiXmlAutoRepair ? repairBrokenXml(delta.content) : delta.content
193+
194+
for (const chunk of matcher.update(content)) {
191195
yield chunk
192196
}
193197
}
@@ -362,9 +366,12 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
362366
for await (const chunk of stream) {
363367
const delta = chunk.choices[0]?.delta
364368
if (delta?.content) {
369+
// Apply XML repair if enabled
370+
const content = this.options.openAiXmlAutoRepair ? repairBrokenXml(delta.content) : delta.content
371+
365372
yield {
366373
type: "text",
367-
text: delta.content,
374+
text: content,
368375
}
369376
}
370377

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import { describe, it, expect } from "vitest"
2+
import { repairBrokenXml, hasBrokenXmlPattern } from "../xml-repair"
3+
4+
describe("xml-repair", () => {
5+
describe("hasBrokenXmlPattern", () => {
6+
it("should detect broken tool opening tags", () => {
7+
const brokenXml = "read_file>\n<args>\n</args>\n</read_file>"
8+
expect(hasBrokenXmlPattern(brokenXml)).toBe(true)
9+
})
10+
11+
it("should detect broken parameter opening tags", () => {
12+
const brokenXml = "<read_file>\nargs>\n</args>\n</read_file>"
13+
expect(hasBrokenXmlPattern(brokenXml)).toBe(true)
14+
})
15+
16+
it("should detect broken closing tags", () => {
17+
const brokenXml = "<read_file>\n<args>\n/args>\n/read_file>"
18+
expect(hasBrokenXmlPattern(brokenXml)).toBe(true)
19+
})
20+
21+
it("should not detect valid XML as broken", () => {
22+
const validXml = "<read_file>\n<args>\n</args>\n</read_file>"
23+
expect(hasBrokenXmlPattern(validXml)).toBe(false)
24+
})
25+
})
26+
27+
describe("repairBrokenXml", () => {
28+
it("should repair missing opening brackets for tool tags", () => {
29+
const brokenXml = "read_file>\n<args>\n</args>\n/read_file>"
30+
const expected = "<read_file>\n<args>\n</args>\n</read_file>"
31+
expect(repairBrokenXml(brokenXml)).toBe(expected)
32+
})
33+
34+
it("should repair missing opening brackets for parameter tags", () => {
35+
const brokenXml = "<read_file>\nargs>\n/args>\n</read_file>"
36+
const expected = "<read_file>\n<args>\n</args>\n</read_file>"
37+
expect(repairBrokenXml(brokenXml)).toBe(expected)
38+
})
39+
40+
it("should handle the example from the issue", () => {
41+
// This is the exact example from the issue
42+
const brokenXml = "read_file>\nargs>\n<file>\n<path>main.gopath>\n</file>\nargs>\nread_file>"
43+
const expected = "<read_file>\n<args>\n<file>\n<path>main.go</path>\n</file>\n</args>\n</read_file>"
44+
expect(repairBrokenXml(brokenXml)).toBe(expected)
45+
})
46+
47+
it("should not modify valid XML", () => {
48+
const validXml = "<write_to_file>\n<path>test.txt</path>\n</write_to_file>"
49+
expect(repairBrokenXml(validXml)).toBe(validXml)
50+
})
51+
52+
it("should handle execute_command with broken tags", () => {
53+
const brokenXml = "execute_command>\n<command>test</command>\n/execute_command>"
54+
const expected = "<execute_command>\n<command>test</command>\n</execute_command>"
55+
expect(repairBrokenXml(brokenXml)).toBe(expected)
56+
})
57+
58+
it("should handle search_files with broken tags", () => {
59+
const brokenXml = "search_files>\n<path>src</path>\nregex>pattern</regex>\n/search_files>"
60+
const expected = "<search_files>\n<path>src</path>\n<regex>pattern</regex>\n</search_files>"
61+
expect(repairBrokenXml(brokenXml)).toBe(expected)
62+
})
63+
})
64+
})

src/api/utils/xml-repair.ts

Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,261 @@
1+
/**
2+
* Utility functions for repairing broken XML tool calls from LLM providers
3+
* that don't properly format XML responses.
4+
*/
5+
6+
import { toolNames, type ToolName } from "@roo-code/types"
7+
import { toolParamNames, type ToolParamName } from "../../shared/tools"
8+
9+
/**
10+
* Attempts to repair broken XML tool calls by adding missing opening brackets
11+
* and fixing common formatting issues.
12+
*
13+
* @param brokenXml - The potentially broken XML string
14+
* @returns The repaired XML string, or the original if no repairs were needed
15+
*
16+
* @example
17+
* // Input: "read_file>\nargs>\n<file>\n<path>main.gopath>\n</file>\nargs>\nread_file>"
18+
* // Output: "<read_file>\n<args>\n<file>\n<path>main.go</path>\n</file>\n</args>\n</read_file>"
19+
*/
20+
export function repairBrokenXml(brokenXml: string): string {
21+
// Don't check for valid structure - we need to repair even partially valid XML
22+
let repairedXml = brokenXml
23+
24+
// Split into lines for easier processing
25+
const lines = repairedXml.split("\n")
26+
const repairedLines: string[] = []
27+
28+
// Create a set of all valid tag names for quick lookup
29+
const allTagNames = new Set<string>([...toolNames, ...toolParamNames])
30+
31+
// Track open tags to determine if we need opening or closing tags
32+
const openTags: string[] = []
33+
34+
for (let line of lines) {
35+
let repairedLine = line
36+
const trimmedLine = line.trim()
37+
38+
// Skip empty lines
39+
if (!trimmedLine) {
40+
repairedLines.push(repairedLine)
41+
continue
42+
}
43+
44+
// Get the indentation from the original line
45+
const indent = line.match(/^(\s*)/)?.[1] || ""
46+
47+
// Handle lines that already start with < and end with >
48+
if (trimmedLine.startsWith("<") && trimmedLine.endsWith(">")) {
49+
// Check for double brackets like <<args> or <</args>
50+
if (trimmedLine.match(/^<<|>>$/)) {
51+
repairedLine = repairedLine.replace(/<<\//g, "</").replace(/<</g, "<").replace(/>>/g, ">")
52+
}
53+
54+
// Check for special case: <path>main.gopath> should become <path>main.go</path>
55+
// This happens when content is merged with the tag name (missing </ in closing tag)
56+
// Only apply if it doesn't already have a proper closing tag
57+
if (!trimmedLine.includes("</")) {
58+
for (const tagName of allTagNames) {
59+
const mergedPattern = new RegExp(`^<${tagName}>(.+)${tagName}>$`)
60+
const mergedMatch = trimmedLine.match(mergedPattern)
61+
if (mergedMatch) {
62+
// Remove the tag name from the content
63+
const content = mergedMatch[1]
64+
const cleanContent = content.endsWith(tagName)
65+
? content.substring(0, content.length - tagName.length)
66+
: content
67+
repairedLine = `${indent}<${tagName}>${cleanContent}</${tagName}>`
68+
break
69+
}
70+
}
71+
}
72+
73+
// Track open/closed tags
74+
if (trimmedLine.startsWith("</")) {
75+
const tagName = trimmedLine.match(/^<\/([^>]+)>$/)?.[1]
76+
if (tagName) {
77+
// Remove from open tags if it matches
78+
const lastIndex = openTags.lastIndexOf(tagName)
79+
if (lastIndex >= 0) {
80+
openTags.splice(lastIndex, 1)
81+
}
82+
}
83+
} else {
84+
const tagName = trimmedLine.match(/^<([^/>]+)>$/)?.[1]
85+
if (tagName && allTagNames.has(tagName)) {
86+
openTags.push(tagName)
87+
}
88+
}
89+
90+
repairedLines.push(repairedLine)
91+
continue
92+
}
93+
94+
// Handle lines that don't start with < (missing opening bracket)
95+
let handled = false
96+
97+
// Check for patterns like "regex>pattern</regex>"
98+
// This needs to be handled before other patterns
99+
for (const tagName of allTagNames) {
100+
const contentPattern = new RegExp(`^${tagName}>(.+)</${tagName}>$`)
101+
const contentMatch = trimmedLine.match(contentPattern)
102+
if (contentMatch) {
103+
repairedLine = `${indent}<${tagName}>${contentMatch[1]}</${tagName}>`
104+
handled = true
105+
break
106+
}
107+
}
108+
109+
if (!handled) {
110+
// Check each known tag name
111+
for (const tagName of allTagNames) {
112+
// Pattern 1: "/tagName>" at start of line (missing opening bracket for closing tag)
113+
if (trimmedLine === `/${tagName}>` || trimmedLine.startsWith(`/${tagName}>`)) {
114+
repairedLine = `${indent}</${tagName}>`
115+
// Remove from open tags if it matches
116+
const lastIndex = openTags.lastIndexOf(tagName)
117+
if (lastIndex >= 0) {
118+
openTags.splice(lastIndex, 1)
119+
}
120+
handled = true
121+
break
122+
}
123+
124+
// Pattern 2: "tagName>" at start of line
125+
if (trimmedLine === `${tagName}>`) {
126+
// Check if we have this tag open - if so, it's likely a closing tag
127+
if (openTags.includes(tagName)) {
128+
repairedLine = `${indent}</${tagName}>`
129+
// Remove from open tags
130+
const lastIndex = openTags.lastIndexOf(tagName)
131+
if (lastIndex >= 0) {
132+
openTags.splice(lastIndex, 1)
133+
}
134+
} else {
135+
// It's an opening tag
136+
repairedLine = `${indent}<${tagName}>`
137+
openTags.push(tagName)
138+
}
139+
handled = true
140+
break
141+
}
142+
143+
// Pattern 3: "tagName>" with content after it
144+
if (trimmedLine.startsWith(`${tagName}>`)) {
145+
const restOfLine = trimmedLine.substring(tagName.length + 1)
146+
// Check if this is something like "main.gopath>" where content is merged with tag
147+
if (restOfLine.endsWith(`${tagName}>`)) {
148+
// Remove the tag name from the end of content
149+
const content = restOfLine.substring(0, restOfLine.length - tagName.length - 1)
150+
// Remove the tag name if it appears at the end of content (like "main.gopath" -> "main.go")
151+
const cleanContent = content.endsWith(tagName)
152+
? content.substring(0, content.length - tagName.length)
153+
: content
154+
repairedLine = `${indent}<${tagName}>${cleanContent}</${tagName}>`
155+
} else {
156+
// Just missing the opening bracket
157+
repairedLine = `${indent}<${tagName}>${restOfLine}`
158+
openTags.push(tagName)
159+
}
160+
handled = true
161+
break
162+
}
163+
}
164+
}
165+
166+
// Handle special case: content ending with "tagName>" like "main.gopath>"
167+
if (!handled) {
168+
for (const tagName of allTagNames) {
169+
// Check if line ends with "tagName>" and doesn't start with a tag
170+
if (trimmedLine.endsWith(`${tagName}>`) && !trimmedLine.startsWith("<")) {
171+
const beforeTag = trimmedLine.substring(0, trimmedLine.length - tagName.length - 1)
172+
if (beforeTag) {
173+
// Check if the content ends with the tag name (like "main.gopath")
174+
if (beforeTag.endsWith(tagName)) {
175+
const cleanContent = beforeTag.substring(0, beforeTag.length - tagName.length)
176+
repairedLine = `${indent}${cleanContent}</${tagName}>`
177+
} else {
178+
// Content doesn't end with tag name, just add closing tag
179+
repairedLine = `${indent}${beforeTag}</${tagName}>`
180+
}
181+
// Remove from open tags if it matches
182+
const lastIndex = openTags.lastIndexOf(tagName)
183+
if (lastIndex >= 0) {
184+
openTags.splice(lastIndex, 1)
185+
}
186+
handled = true
187+
break
188+
}
189+
}
190+
}
191+
}
192+
193+
repairedLines.push(repairedLine)
194+
}
195+
196+
return repairedLines.join("\n")
197+
}
198+
199+
/**
200+
* Checks if the XML has a valid structure with proper opening and closing tags
201+
*/
202+
function hasValidXmlStructure(xml: string): boolean {
203+
// Check if we have at least one valid tool opening tag
204+
const hasValidToolTag = toolNames.some(
205+
(toolName) => xml.includes(`<${toolName}>`) && xml.includes(`</${toolName}>`),
206+
)
207+
208+
return hasValidToolTag
209+
}
210+
211+
/**
212+
* Detects if a string contains broken XML patterns that need repair
213+
*
214+
* @param text - The text to check for broken XML
215+
* @returns true if broken XML patterns are detected
216+
*/
217+
export function hasBrokenXmlPattern(text: string): boolean {
218+
const lines = text.split("\n")
219+
220+
for (const line of lines) {
221+
const trimmedLine = line.trim()
222+
223+
// Check for tool names without opening brackets
224+
for (const toolName of toolNames) {
225+
// Check if line starts with toolName> (missing opening bracket)
226+
if (trimmedLine.startsWith(`${toolName}>`) || trimmedLine.startsWith(`/${toolName}>`)) {
227+
return true
228+
}
229+
// Check if line is just toolName> (likely a closing tag)
230+
if (trimmedLine === `${toolName}>`) {
231+
return true
232+
}
233+
}
234+
235+
// Check for parameter names without opening brackets
236+
for (const paramName of toolParamNames) {
237+
// Check if line starts with paramName> (missing opening bracket)
238+
if (trimmedLine.startsWith(`${paramName}>`) || trimmedLine.startsWith(`/${paramName}>`)) {
239+
return true
240+
}
241+
// Check if line is just paramName> (likely a closing tag)
242+
if (trimmedLine === `${paramName}>`) {
243+
return true
244+
}
245+
}
246+
}
247+
248+
return false
249+
}
250+
251+
/**
252+
* Configuration for XML auto-repair behavior
253+
*/
254+
export interface XmlAutoRepairConfig {
255+
/** Whether to enable automatic XML repair */
256+
enabled: boolean
257+
/** Whether to use a small model for repair (future enhancement) */
258+
useSmallModel?: boolean
259+
/** The model to use for repair if useSmallModel is true */
260+
repairModelId?: string
261+
}

0 commit comments

Comments
 (0)