-
Notifications
You must be signed in to change notification settings - Fork 2.5k
fix: support Magistral [THINK]...[/THINK] reasoning tags #8523
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,194 @@ | ||
| import { SquareBracketMatcher } from "../square-bracket-matcher" | ||
|
|
||
| describe("SquareBracketMatcher", () => { | ||
| it("matches square bracket tags at position 0", () => { | ||
| const matcher = new SquareBracketMatcher("THINK") | ||
| const chunks = [...matcher.update("[THINK]data[/THINK]"), ...matcher.final()] | ||
| expect(chunks).toEqual([ | ||
| { | ||
| data: "data", | ||
| matched: true, | ||
| }, | ||
| ]) | ||
| }) | ||
|
|
||
| it("handles uppercase tag names", () => { | ||
| const matcher = new SquareBracketMatcher("THINK") | ||
| const chunks = [...matcher.update("[THINK]reasoning content[/THINK]"), ...matcher.final()] | ||
| expect(chunks).toEqual([ | ||
| { | ||
| data: "reasoning content", | ||
| matched: true, | ||
| }, | ||
| ]) | ||
| }) | ||
|
|
||
| it("handles lowercase tag names", () => { | ||
| const matcher = new SquareBracketMatcher("think") | ||
| const chunks = [...matcher.update("[think]reasoning content[/think]"), ...matcher.final()] | ||
| expect(chunks).toEqual([ | ||
| { | ||
| data: "reasoning content", | ||
| matched: true, | ||
| }, | ||
| ]) | ||
| }) | ||
|
|
||
| it("handles mixed content", () => { | ||
| const matcher = new SquareBracketMatcher("THINK") | ||
| const chunks = [...matcher.update("Before [THINK]reasoning[/THINK] After"), ...matcher.final()] | ||
| expect(chunks).toEqual([ | ||
| { | ||
| data: "Before ", | ||
| matched: false, | ||
| }, | ||
| { | ||
| data: "reasoning", | ||
| matched: true, | ||
| }, | ||
| { | ||
| data: " After", | ||
| matched: false, | ||
| }, | ||
| ]) | ||
| }) | ||
|
|
||
| it("handles streaming push", () => { | ||
| const matcher = new SquareBracketMatcher("THINK") | ||
| const chunks = [ | ||
| ...matcher.update("["), | ||
| ...matcher.update("THINK"), | ||
| ...matcher.update("]"), | ||
| ...matcher.update("reasoning"), | ||
| ...matcher.update(" content"), | ||
| ...matcher.update("[/"), | ||
| ...matcher.update("THINK"), | ||
| ...matcher.update("]"), | ||
| ...matcher.final(), | ||
| ] | ||
| expect(chunks).toEqual([ | ||
| { | ||
| data: "reasoning content", | ||
| matched: true, | ||
| }, | ||
| ]) | ||
| }) | ||
|
|
||
| it("handles nested tags", () => { | ||
| const matcher = new SquareBracketMatcher("THINK") | ||
| const chunks = [...matcher.update("[THINK]X[THINK]Y[/THINK]Z[/THINK]"), ...matcher.final()] | ||
| expect(chunks).toEqual([ | ||
| { | ||
| data: "X[THINK]Y[/THINK]Z", | ||
| matched: true, | ||
| }, | ||
| ]) | ||
| }) | ||
|
|
||
| it("handles invalid tag format", () => { | ||
| const matcher = new SquareBracketMatcher("THINK") | ||
| const chunks = [...matcher.update("[INVALID]data[/INVALID]"), ...matcher.final()] | ||
| expect(chunks).toEqual([ | ||
| { | ||
| data: "[INVALID]data[/INVALID]", | ||
| matched: false, | ||
| }, | ||
| ]) | ||
| }) | ||
|
|
||
| it("handles unclosed tags", () => { | ||
| const matcher = new SquareBracketMatcher("THINK") | ||
| const chunks = [...matcher.update("[THINK]data"), ...matcher.final()] | ||
| expect(chunks).toEqual([ | ||
| { | ||
| data: "data", | ||
| matched: true, | ||
| }, | ||
| ]) | ||
| }) | ||
|
|
||
| it("handles wrong matching position", () => { | ||
| const matcher = new SquareBracketMatcher("THINK") | ||
| const chunks = [...matcher.update("prefix[THINK]data[/THINK]"), ...matcher.final()] | ||
| expect(chunks).toEqual([ | ||
| { | ||
| data: "prefix", | ||
| matched: false, | ||
| }, | ||
| { | ||
| data: "data", | ||
| matched: true, | ||
| }, | ||
| ]) | ||
| }) | ||
|
|
||
| it("handles multiple sequential tags", () => { | ||
| const matcher = new SquareBracketMatcher("THINK") | ||
| const chunks = [...matcher.update("[THINK]first[/THINK] middle [THINK]second[/THINK]"), ...matcher.final()] | ||
| expect(chunks).toEqual([ | ||
| { | ||
| data: "first", | ||
| matched: true, | ||
| }, | ||
| { | ||
| data: " middle ", | ||
| matched: false, | ||
| }, | ||
| { | ||
| data: "second", | ||
| matched: true, | ||
| }, | ||
| ]) | ||
| }) | ||
|
|
||
| it("transforms output when transform function is provided", () => { | ||
| const matcher = new SquareBracketMatcher("THINK", (chunk) => ({ | ||
| type: chunk.matched ? "reasoning" : "text", | ||
| text: chunk.data, | ||
| })) | ||
| const chunks = [...matcher.update("Before [THINK]reasoning[/THINK] After"), ...matcher.final()] | ||
| expect(chunks).toEqual([ | ||
| { | ||
| type: "text", | ||
| text: "Before ", | ||
| }, | ||
| { | ||
| type: "reasoning", | ||
| text: "reasoning", | ||
| }, | ||
| { | ||
| type: "text", | ||
| text: " After", | ||
| }, | ||
| ]) | ||
| }) | ||
|
|
||
| it("handles Magistral-style reasoning blocks", () => { | ||
| const matcher = new SquareBracketMatcher("THINK") | ||
| const input = `Let me analyze this problem. | ||
|
|
||
| [THINK] | ||
| I need to understand what the user is asking for. | ||
| They want to fix an issue with Magistral model's reasoning tags. | ||
| The tags use square brackets instead of angle brackets. | ||
| [/THINK] | ||
|
|
||
| Based on my analysis, here's the solution...` | ||
|
|
||
| const chunks = [...matcher.update(input), ...matcher.final()] | ||
| expect(chunks).toEqual([ | ||
| { | ||
| data: "Let me analyze this problem.\n\n", | ||
| matched: false, | ||
| }, | ||
| { | ||
| data: "\nI need to understand what the user is asking for.\nThey want to fix an issue with Magistral model's reasoning tags.\nThe tags use square brackets instead of angle brackets.\n", | ||
| matched: true, | ||
| }, | ||
| { | ||
| data: "\n\nBased on my analysis, here's the solution...", | ||
| matched: false, | ||
| }, | ||
| ]) | ||
| }) | ||
| }) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,141 @@ | ||
| export interface SquareBracketMatcherResult { | ||
| matched: boolean | ||
| data: string | ||
| } | ||
|
|
||
| /** | ||
| * Matcher for square bracket tags like [THINK]...[/THINK] | ||
| * Used by models like Magistral that use square bracket syntax instead of angle brackets | ||
| */ | ||
| export class SquareBracketMatcher<Result = SquareBracketMatcherResult> { | ||
| private buffer = "" | ||
| private insideTag = false | ||
| private tagDepth = 0 | ||
| private results: Result[] = [] | ||
|
|
||
| constructor( | ||
| readonly tagName: string, | ||
| readonly transform?: (chunks: SquareBracketMatcherResult) => Result, | ||
| readonly position = 0, | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [P3] Unused constructor parameter |
||
| ) {} | ||
|
|
||
| private emit(matched: boolean, data: string) { | ||
| if (!data) return | ||
| const chunk: SquareBracketMatcherResult = { matched, data } | ||
| this.results.push(this.transform ? this.transform(chunk) : (chunk as Result)) | ||
| } | ||
|
|
||
| private processComplete() { | ||
| const openTag = `[${this.tagName}]` | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [P1] Matching is case-sensitive. If a Magistral variant emits [think]...[/think], this matcher won’t detect it. Consider normalizing both buffer and tag to a common case during search (for example, use lowercased copies for index lookups while slicing from the original string), or add an optional caseInsensitive flag. |
||
| const closeTag = `[/${this.tagName}]` | ||
| let processed = false | ||
|
|
||
| while (true) { | ||
| if (!this.insideTag) { | ||
| // Look for opening tag | ||
| const openIndex = this.buffer.indexOf(openTag) | ||
| if (openIndex === -1) { | ||
| // No opening tag found | ||
| break | ||
| } | ||
|
|
||
| if (openIndex > 0) { | ||
| // Emit text before tag | ||
| this.emit(false, this.buffer.substring(0, openIndex)) | ||
| this.buffer = this.buffer.substring(openIndex) | ||
| processed = true | ||
| } | ||
|
|
||
| // Now we have opening tag at start | ||
| this.buffer = this.buffer.substring(openTag.length) | ||
| this.insideTag = true | ||
| this.tagDepth = 1 | ||
| processed = true | ||
| } else { | ||
| // Inside tag, look for closing tag | ||
| let pos = 0 | ||
| let contentStart = 0 | ||
|
|
||
| while (pos < this.buffer.length) { | ||
| const nextOpen = this.buffer.indexOf(openTag, pos) | ||
| const nextClose = this.buffer.indexOf(closeTag, pos) | ||
|
|
||
| if (nextClose === -1) { | ||
| // No closing tag found yet | ||
| break | ||
| } | ||
|
|
||
| if (nextOpen !== -1 && nextOpen < nextClose) { | ||
| // Found nested opening tag | ||
| this.tagDepth++ | ||
| pos = nextOpen + openTag.length | ||
| } else { | ||
| // Found closing tag | ||
| this.tagDepth-- | ||
| if (this.tagDepth === 0) { | ||
| // Complete match found | ||
| const content = this.buffer.substring(contentStart, nextClose) | ||
| this.emit(true, content) | ||
| this.buffer = this.buffer.substring(nextClose + closeTag.length) | ||
| this.insideTag = false | ||
| processed = true | ||
| break | ||
| } else { | ||
| // Still nested | ||
| pos = nextClose + closeTag.length | ||
| } | ||
| } | ||
| } | ||
|
|
||
| if (this.insideTag) { | ||
| // Still inside tag, no complete match yet | ||
| break | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return processed | ||
| } | ||
|
|
||
| update(chunk: string): Result[] { | ||
| this.buffer += chunk | ||
| this.results = [] | ||
|
|
||
| // Process any complete tag pairs | ||
| this.processComplete() | ||
|
|
||
| // For streaming, only emit unmatched text if we're sure it won't be part of a tag | ||
| if (!this.insideTag && this.buffer && !this.buffer.includes("[")) { | ||
| // No potential tags, emit as text | ||
| this.emit(false, this.buffer) | ||
| this.buffer = "" | ||
| } | ||
|
|
||
| const results = this.results | ||
| this.results = [] | ||
| return results | ||
| } | ||
|
|
||
| final(chunk?: string): Result[] { | ||
| if (chunk) { | ||
| this.buffer += chunk | ||
| } | ||
|
|
||
| this.results = [] | ||
|
|
||
| // Process any remaining complete pairs | ||
| this.processComplete() | ||
|
|
||
| // Emit any remaining buffer | ||
| if (this.buffer) { | ||
| this.emit(this.insideTag, this.buffer) | ||
| this.buffer = "" | ||
| } | ||
|
|
||
| const results = this.results | ||
| this.results = [] | ||
| this.insideTag = false | ||
| this.tagDepth = 0 | ||
| return results | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The constructor accepts a 'position' parameter which is never used. Consider removing it or adding a comment to document its intended purpose.