|
| 1 | +/** |
| 2 | + * Utility functions for repairing broken XML tool calls from LLM providers |
| 3 | + * that don't properly format XML responses. |
| 4 | + */ |
| 5 | + |
| 6 | +import { toolNames, type ToolName } from "@roo-code/types" |
| 7 | +import { toolParamNames, type ToolParamName } from "../../shared/tools" |
| 8 | + |
| 9 | +/** |
| 10 | + * Attempts to repair broken XML tool calls by adding missing opening brackets |
| 11 | + * and fixing common formatting issues. |
| 12 | + * |
| 13 | + * @param brokenXml - The potentially broken XML string |
| 14 | + * @returns The repaired XML string, or the original if no repairs were needed |
| 15 | + * |
| 16 | + * @example |
| 17 | + * // Input: "read_file>\nargs>\n<file>\n<path>main.gopath>\n</file>\nargs>\nread_file>" |
| 18 | + * // Output: "<read_file>\n<args>\n<file>\n<path>main.go</path>\n</file>\n</args>\n</read_file>" |
| 19 | + */ |
| 20 | +export function repairBrokenXml(brokenXml: string): string { |
| 21 | + // Don't check for valid structure - we need to repair even partially valid XML |
| 22 | + let repairedXml = brokenXml |
| 23 | + |
| 24 | + // Split into lines for easier processing |
| 25 | + const lines = repairedXml.split("\n") |
| 26 | + const repairedLines: string[] = [] |
| 27 | + |
| 28 | + // Create a set of all valid tag names for quick lookup |
| 29 | + const allTagNames = new Set<string>([...toolNames, ...toolParamNames]) |
| 30 | + |
| 31 | + // Track open tags to determine if we need opening or closing tags |
| 32 | + const openTags: string[] = [] |
| 33 | + |
| 34 | + for (let line of lines) { |
| 35 | + let repairedLine = line |
| 36 | + const trimmedLine = line.trim() |
| 37 | + |
| 38 | + // Skip empty lines |
| 39 | + if (!trimmedLine) { |
| 40 | + repairedLines.push(repairedLine) |
| 41 | + continue |
| 42 | + } |
| 43 | + |
| 44 | + // Get the indentation from the original line |
| 45 | + const indent = line.match(/^(\s*)/)?.[1] || "" |
| 46 | + |
| 47 | + // Handle lines that already start with < and end with > |
| 48 | + if (trimmedLine.startsWith("<") && trimmedLine.endsWith(">")) { |
| 49 | + // Check for double brackets like <<args> or <</args> |
| 50 | + if (trimmedLine.match(/^<<|>>$/)) { |
| 51 | + repairedLine = repairedLine.replace(/<<\//g, "</").replace(/<</g, "<").replace(/>>/g, ">") |
| 52 | + } |
| 53 | + |
| 54 | + // Check for special case: <path>main.gopath> should become <path>main.go</path> |
| 55 | + // This happens when content is merged with the tag name (missing </ in closing tag) |
| 56 | + // Only apply if it doesn't already have a proper closing tag |
| 57 | + if (!trimmedLine.includes("</")) { |
| 58 | + for (const tagName of allTagNames) { |
| 59 | + const mergedPattern = new RegExp(`^<${tagName}>(.+)${tagName}>$`) |
| 60 | + const mergedMatch = trimmedLine.match(mergedPattern) |
| 61 | + if (mergedMatch) { |
| 62 | + // Remove the tag name from the content |
| 63 | + const content = mergedMatch[1] |
| 64 | + const cleanContent = content.endsWith(tagName) |
| 65 | + ? content.substring(0, content.length - tagName.length) |
| 66 | + : content |
| 67 | + repairedLine = `${indent}<${tagName}>${cleanContent}</${tagName}>` |
| 68 | + break |
| 69 | + } |
| 70 | + } |
| 71 | + } |
| 72 | + |
| 73 | + // Track open/closed tags |
| 74 | + if (trimmedLine.startsWith("</")) { |
| 75 | + const tagName = trimmedLine.match(/^<\/([^>]+)>$/)?.[1] |
| 76 | + if (tagName) { |
| 77 | + // Remove from open tags if it matches |
| 78 | + const lastIndex = openTags.lastIndexOf(tagName) |
| 79 | + if (lastIndex >= 0) { |
| 80 | + openTags.splice(lastIndex, 1) |
| 81 | + } |
| 82 | + } |
| 83 | + } else { |
| 84 | + const tagName = trimmedLine.match(/^<([^/>]+)>$/)?.[1] |
| 85 | + if (tagName && allTagNames.has(tagName)) { |
| 86 | + openTags.push(tagName) |
| 87 | + } |
| 88 | + } |
| 89 | + |
| 90 | + repairedLines.push(repairedLine) |
| 91 | + continue |
| 92 | + } |
| 93 | + |
| 94 | + // Handle lines that don't start with < (missing opening bracket) |
| 95 | + let handled = false |
| 96 | + |
| 97 | + // Check for patterns like "regex>pattern</regex>" |
| 98 | + // This needs to be handled before other patterns |
| 99 | + for (const tagName of allTagNames) { |
| 100 | + const contentPattern = new RegExp(`^${tagName}>(.+)</${tagName}>$`) |
| 101 | + const contentMatch = trimmedLine.match(contentPattern) |
| 102 | + if (contentMatch) { |
| 103 | + repairedLine = `${indent}<${tagName}>${contentMatch[1]}</${tagName}>` |
| 104 | + handled = true |
| 105 | + break |
| 106 | + } |
| 107 | + } |
| 108 | + |
| 109 | + if (!handled) { |
| 110 | + // Check each known tag name |
| 111 | + for (const tagName of allTagNames) { |
| 112 | + // Pattern 1: "/tagName>" at start of line (missing opening bracket for closing tag) |
| 113 | + if (trimmedLine === `/${tagName}>` || trimmedLine.startsWith(`/${tagName}>`)) { |
| 114 | + repairedLine = `${indent}</${tagName}>` |
| 115 | + // Remove from open tags if it matches |
| 116 | + const lastIndex = openTags.lastIndexOf(tagName) |
| 117 | + if (lastIndex >= 0) { |
| 118 | + openTags.splice(lastIndex, 1) |
| 119 | + } |
| 120 | + handled = true |
| 121 | + break |
| 122 | + } |
| 123 | + |
| 124 | + // Pattern 2: "tagName>" at start of line |
| 125 | + if (trimmedLine === `${tagName}>`) { |
| 126 | + // Check if we have this tag open - if so, it's likely a closing tag |
| 127 | + if (openTags.includes(tagName)) { |
| 128 | + repairedLine = `${indent}</${tagName}>` |
| 129 | + // Remove from open tags |
| 130 | + const lastIndex = openTags.lastIndexOf(tagName) |
| 131 | + if (lastIndex >= 0) { |
| 132 | + openTags.splice(lastIndex, 1) |
| 133 | + } |
| 134 | + } else { |
| 135 | + // It's an opening tag |
| 136 | + repairedLine = `${indent}<${tagName}>` |
| 137 | + openTags.push(tagName) |
| 138 | + } |
| 139 | + handled = true |
| 140 | + break |
| 141 | + } |
| 142 | + |
| 143 | + // Pattern 3: "tagName>" with content after it |
| 144 | + if (trimmedLine.startsWith(`${tagName}>`)) { |
| 145 | + const restOfLine = trimmedLine.substring(tagName.length + 1) |
| 146 | + // Check if this is something like "main.gopath>" where content is merged with tag |
| 147 | + if (restOfLine.endsWith(`${tagName}>`)) { |
| 148 | + // Remove the tag name from the end of content |
| 149 | + const content = restOfLine.substring(0, restOfLine.length - tagName.length - 1) |
| 150 | + // Remove the tag name if it appears at the end of content (like "main.gopath" -> "main.go") |
| 151 | + const cleanContent = content.endsWith(tagName) |
| 152 | + ? content.substring(0, content.length - tagName.length) |
| 153 | + : content |
| 154 | + repairedLine = `${indent}<${tagName}>${cleanContent}</${tagName}>` |
| 155 | + } else { |
| 156 | + // Just missing the opening bracket |
| 157 | + repairedLine = `${indent}<${tagName}>${restOfLine}` |
| 158 | + openTags.push(tagName) |
| 159 | + } |
| 160 | + handled = true |
| 161 | + break |
| 162 | + } |
| 163 | + } |
| 164 | + } |
| 165 | + |
| 166 | + // Handle special case: content ending with "tagName>" like "main.gopath>" |
| 167 | + if (!handled) { |
| 168 | + for (const tagName of allTagNames) { |
| 169 | + // Check if line ends with "tagName>" and doesn't start with a tag |
| 170 | + if (trimmedLine.endsWith(`${tagName}>`) && !trimmedLine.startsWith("<")) { |
| 171 | + const beforeTag = trimmedLine.substring(0, trimmedLine.length - tagName.length - 1) |
| 172 | + if (beforeTag) { |
| 173 | + // Check if the content ends with the tag name (like "main.gopath") |
| 174 | + if (beforeTag.endsWith(tagName)) { |
| 175 | + const cleanContent = beforeTag.substring(0, beforeTag.length - tagName.length) |
| 176 | + repairedLine = `${indent}${cleanContent}</${tagName}>` |
| 177 | + } else { |
| 178 | + // Content doesn't end with tag name, just add closing tag |
| 179 | + repairedLine = `${indent}${beforeTag}</${tagName}>` |
| 180 | + } |
| 181 | + // Remove from open tags if it matches |
| 182 | + const lastIndex = openTags.lastIndexOf(tagName) |
| 183 | + if (lastIndex >= 0) { |
| 184 | + openTags.splice(lastIndex, 1) |
| 185 | + } |
| 186 | + handled = true |
| 187 | + break |
| 188 | + } |
| 189 | + } |
| 190 | + } |
| 191 | + } |
| 192 | + |
| 193 | + repairedLines.push(repairedLine) |
| 194 | + } |
| 195 | + |
| 196 | + return repairedLines.join("\n") |
| 197 | +} |
| 198 | + |
| 199 | +/** |
| 200 | + * Checks if the XML has a valid structure with proper opening and closing tags |
| 201 | + */ |
| 202 | +function hasValidXmlStructure(xml: string): boolean { |
| 203 | + // Check if we have at least one valid tool opening tag |
| 204 | + const hasValidToolTag = toolNames.some( |
| 205 | + (toolName) => xml.includes(`<${toolName}>`) && xml.includes(`</${toolName}>`), |
| 206 | + ) |
| 207 | + |
| 208 | + return hasValidToolTag |
| 209 | +} |
| 210 | + |
| 211 | +/** |
| 212 | + * Detects if a string contains broken XML patterns that need repair |
| 213 | + * |
| 214 | + * @param text - The text to check for broken XML |
| 215 | + * @returns true if broken XML patterns are detected |
| 216 | + */ |
| 217 | +export function hasBrokenXmlPattern(text: string): boolean { |
| 218 | + const lines = text.split("\n") |
| 219 | + |
| 220 | + for (const line of lines) { |
| 221 | + const trimmedLine = line.trim() |
| 222 | + |
| 223 | + // Check for tool names without opening brackets |
| 224 | + for (const toolName of toolNames) { |
| 225 | + // Check if line starts with toolName> (missing opening bracket) |
| 226 | + if (trimmedLine.startsWith(`${toolName}>`) || trimmedLine.startsWith(`/${toolName}>`)) { |
| 227 | + return true |
| 228 | + } |
| 229 | + // Check if line is just toolName> (likely a closing tag) |
| 230 | + if (trimmedLine === `${toolName}>`) { |
| 231 | + return true |
| 232 | + } |
| 233 | + } |
| 234 | + |
| 235 | + // Check for parameter names without opening brackets |
| 236 | + for (const paramName of toolParamNames) { |
| 237 | + // Check if line starts with paramName> (missing opening bracket) |
| 238 | + if (trimmedLine.startsWith(`${paramName}>`) || trimmedLine.startsWith(`/${paramName}>`)) { |
| 239 | + return true |
| 240 | + } |
| 241 | + // Check if line is just paramName> (likely a closing tag) |
| 242 | + if (trimmedLine === `${paramName}>`) { |
| 243 | + return true |
| 244 | + } |
| 245 | + } |
| 246 | + } |
| 247 | + |
| 248 | + return false |
| 249 | +} |
| 250 | + |
| 251 | +/** |
| 252 | + * Configuration for XML auto-repair behavior |
| 253 | + */ |
| 254 | +export interface XmlAutoRepairConfig { |
| 255 | + /** Whether to enable automatic XML repair */ |
| 256 | + enabled: boolean |
| 257 | + /** Whether to use a small model for repair (future enhancement) */ |
| 258 | + useSmallModel?: boolean |
| 259 | + /** The model to use for repair if useSmallModel is true */ |
| 260 | + repairModelId?: string |
| 261 | +} |
0 commit comments