-
Notifications
You must be signed in to change notification settings - Fork 2.6k
fix: improve XML parameter extraction for Qwen3-Coder and other models #7407
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -85,6 +85,32 @@ export class AssistantMessageParser { | |||||||||||||||||
| this.currentParamName = undefined | ||||||||||||||||||
| continue | ||||||||||||||||||
| } else { | ||||||||||||||||||
| // Check for malformed closing tags (e.g., missing slash or spaces) | ||||||||||||||||||
| // Some models might generate </path > or < /path> instead of </path> | ||||||||||||||||||
| const malformedClosingPatterns = [ | ||||||||||||||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Critical: This PR adds new functionality but doesn't include any tests for these malformed tag patterns. Could we add test cases like:
Suggested change
|
||||||||||||||||||
| `</ ${this.currentParamName}>`, // space after slash | ||||||||||||||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The malformed tag patterns are duplicated in three places (here, lines 136-140, and 197-201). Consider extracting into a helper function:
Suggested change
|
||||||||||||||||||
| `< /${this.currentParamName}>`, // space before slash | ||||||||||||||||||
| `</${this.currentParamName} >`, // space before closing bracket | ||||||||||||||||||
| `< / ${this.currentParamName} >`, // spaces everywhere | ||||||||||||||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Edge case consideration: What about tags with multiple spaces like |
||||||||||||||||||
| ] | ||||||||||||||||||
|
|
||||||||||||||||||
| for (const pattern of malformedClosingPatterns) { | ||||||||||||||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Performance consideration: We're checking 4 patterns sequentially for each tag. Would a single regex be more efficient? Something like:
Suggested change
|
||||||||||||||||||
| if (currentParamValue.endsWith(pattern)) { | ||||||||||||||||||
| // Found a malformed closing tag, treat it as valid | ||||||||||||||||||
| const paramValue = currentParamValue.slice(0, -pattern.length) | ||||||||||||||||||
| this.currentToolUse.params[this.currentParamName] = | ||||||||||||||||||
| this.currentParamName === "content" | ||||||||||||||||||
| ? paramValue.replace(/^\n/, "").replace(/\n$/, "") | ||||||||||||||||||
| : paramValue.trim() | ||||||||||||||||||
| this.currentParamName = undefined | ||||||||||||||||||
| break | ||||||||||||||||||
| } | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| if (this.currentParamName === undefined) { | ||||||||||||||||||
| continue | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| // Partial param value is accumulating. | ||||||||||||||||||
| // Write the currently accumulated param content in real time | ||||||||||||||||||
| this.currentToolUse.params[this.currentParamName] = currentParamValue | ||||||||||||||||||
|
|
@@ -104,11 +130,22 @@ export class AssistantMessageParser { | |||||||||||||||||
| this.currentToolUse = undefined | ||||||||||||||||||
| continue | ||||||||||||||||||
| } else { | ||||||||||||||||||
| // Check for parameter opening tags with various formats | ||||||||||||||||||
| const possibleParamOpeningTags = toolParamNames.map((name) => `<${name}>`) | ||||||||||||||||||
| for (const paramOpeningTag of possibleParamOpeningTags) { | ||||||||||||||||||
| // Also check for malformed opening tags | ||||||||||||||||||
| const malformedOpeningTags = toolParamNames.flatMap((name) => [ | ||||||||||||||||||
| `< ${name}>`, // space after opening bracket | ||||||||||||||||||
| `<${name} >`, // space before closing bracket | ||||||||||||||||||
| `< ${name} >`, // spaces on both sides | ||||||||||||||||||
| ]) | ||||||||||||||||||
|
|
||||||||||||||||||
| const allPossibleTags = [...possibleParamOpeningTags, ...malformedOpeningTags] | ||||||||||||||||||
|
|
||||||||||||||||||
| for (const paramOpeningTag of allPossibleTags) { | ||||||||||||||||||
| if (this.accumulator.endsWith(paramOpeningTag)) { | ||||||||||||||||||
| // Start of a new parameter. | ||||||||||||||||||
| const paramName = paramOpeningTag.slice(1, -1) | ||||||||||||||||||
| // Extract the parameter name, handling spaces | ||||||||||||||||||
| const paramName = paramOpeningTag.replace(/[<>\s]/g, '') | ||||||||||||||||||
| if (!toolParamNames.includes(paramName as ToolParamName)) { | ||||||||||||||||||
| // Handle invalid parameter name gracefully | ||||||||||||||||||
| continue | ||||||||||||||||||
|
|
@@ -156,11 +193,19 @@ export class AssistantMessageParser { | |||||||||||||||||
|
|
||||||||||||||||||
| let didStartToolUse = false | ||||||||||||||||||
| const possibleToolUseOpeningTags = toolNames.map((name) => `<${name}>`) | ||||||||||||||||||
| // Also check for malformed tool opening tags | ||||||||||||||||||
| const malformedToolOpeningTags = toolNames.flatMap((name) => [ | ||||||||||||||||||
| `< ${name}>`, // space after opening bracket | ||||||||||||||||||
| `<${name} >`, // space before closing bracket | ||||||||||||||||||
| `< ${name} >`, // spaces on both sides | ||||||||||||||||||
| ]) | ||||||||||||||||||
|
|
||||||||||||||||||
| const allPossibleToolTags = [...possibleToolUseOpeningTags, ...malformedToolOpeningTags] | ||||||||||||||||||
|
|
||||||||||||||||||
| for (const toolUseOpeningTag of possibleToolUseOpeningTags) { | ||||||||||||||||||
| for (const toolUseOpeningTag of allPossibleToolTags) { | ||||||||||||||||||
| if (this.accumulator.endsWith(toolUseOpeningTag)) { | ||||||||||||||||||
| // Extract and validate the tool name | ||||||||||||||||||
| const extractedToolName = toolUseOpeningTag.slice(1, -1) | ||||||||||||||||||
| // Extract and validate the tool name, handling spaces | ||||||||||||||||||
| const extractedToolName = toolUseOpeningTag.replace(/[<>\s]/g, '') | ||||||||||||||||||
|
|
||||||||||||||||||
| // Check if the extracted tool name is valid | ||||||||||||||||||
| if (!toolNames.includes(extractedToolName as ToolName)) { | ||||||||||||||||||
|
|
||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we add a comment explaining why we're checking these patterns? Future maintainers would benefit from understanding this handles models like Qwen3-Coder that generate malformed XML.