-
Notifications
You must be signed in to change notification settings - Fork 2.6k
fix: support both <think> and <thinking> tags for LM Studio GPT-OSS models #6751
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,95 @@ | ||
| import { MultiTagXmlMatcher } from "../multi-tag-xml-matcher" | ||
|
|
||
| describe("MultiTagXmlMatcher", () => { | ||
| it("should match content with <think> tags", () => { | ||
| const matcher = new MultiTagXmlMatcher(["think", "thinking"]) | ||
| const input = "Before <think>This is thinking content</think> After" | ||
|
|
||
| const results = matcher.update(input) | ||
| const finalResults = matcher.final() | ||
|
|
||
| const allResults = [...results, ...finalResults] | ||
|
|
||
| // Check that we have thinking content | ||
| const thinkingBlocks = allResults.filter((r) => r.matched) | ||
| const textBlocks = allResults.filter((r) => !r.matched) | ||
|
|
||
| expect(thinkingBlocks).toContainEqual({ matched: true, data: "This is thinking content" }) | ||
| expect(textBlocks.some((b) => b.data.includes("Before"))).toBe(true) | ||
| expect(textBlocks.some((b) => b.data.includes("After"))).toBe(true) | ||
| }) | ||
|
|
||
| it("should match content with <thinking> tags", () => { | ||
| const matcher = new MultiTagXmlMatcher(["think", "thinking"]) | ||
| const input = "Before <thinking>This is thinking content</thinking> After" | ||
|
|
||
| const results = matcher.update(input) | ||
| const finalResults = matcher.final() | ||
|
|
||
| const allResults = [...results, ...finalResults] | ||
|
|
||
| // Check that we have thinking content | ||
| const thinkingBlocks = allResults.filter((r) => r.matched) | ||
| const textBlocks = allResults.filter((r) => !r.matched) | ||
|
|
||
| expect(thinkingBlocks).toContainEqual({ matched: true, data: "This is thinking content" }) | ||
| expect(textBlocks.some((b) => b.data.includes("Before"))).toBe(true) | ||
| expect(textBlocks.some((b) => b.data.includes("After"))).toBe(true) | ||
| }) | ||
|
|
||
| it("should handle mixed tags in the same content", () => { | ||
| const matcher = new MultiTagXmlMatcher(["think", "thinking"]) | ||
| const input = "Start <think>First thought</think> Middle <thinking>Second thought</thinking> End" | ||
|
|
||
| const results = matcher.update(input) | ||
| const finalResults = matcher.final() | ||
|
|
||
| const allResults = [...results, ...finalResults] | ||
|
|
||
| // The important thing is that both thinking blocks are captured | ||
| const thinkingBlocks = allResults.filter((r) => r.matched) | ||
| const textBlocks = allResults.filter((r) => !r.matched) | ||
|
|
||
| expect(thinkingBlocks).toContainEqual({ matched: true, data: "First thought" }) | ||
| expect(thinkingBlocks).toContainEqual({ matched: true, data: "Second thought" }) | ||
| expect(textBlocks.some((b) => b.data.includes("Start"))).toBe(true) | ||
| expect(textBlocks.some((b) => b.data.includes("Middle"))).toBe(true) | ||
| expect(textBlocks.some((b) => b.data.includes("End"))).toBe(true) | ||
| }) | ||
|
|
||
| it("should work with custom transform function", () => { | ||
| const transform = (chunk: any) => ({ | ||
| type: chunk.matched ? "reasoning" : "text", | ||
| text: chunk.data, | ||
| }) | ||
|
|
||
| const matcher = new MultiTagXmlMatcher(["think", "thinking"], transform) | ||
| const input = "Before <thinking>Reasoning here</thinking> After" | ||
|
|
||
| const results = matcher.update(input) | ||
| const finalResults = matcher.final() | ||
|
|
||
| const allResults = [...results, ...finalResults] | ||
|
|
||
| // Check that transform is applied | ||
| const reasoningBlocks = allResults.filter((r) => r.type === "reasoning") | ||
| const textBlocks = allResults.filter((r) => r.type === "text") | ||
|
|
||
| expect(reasoningBlocks).toContainEqual({ type: "reasoning", text: "Reasoning here" }) | ||
| expect(textBlocks.length).toBeGreaterThan(0) | ||
| }) | ||
|
|
||
| it("should handle empty tags", () => { | ||
| const matcher = new MultiTagXmlMatcher(["think", "thinking"]) | ||
| const input = "Before <think></think> Middle <thinking></thinking> After" | ||
|
|
||
| const results = matcher.update(input) | ||
| const finalResults = matcher.final() | ||
|
|
||
| const allResults = [...results, ...finalResults] | ||
|
|
||
| // Empty tags should still be matched but with empty content | ||
| const emptyBlocks = allResults.filter((r) => r.matched && r.data === "") | ||
| expect(emptyBlocks.length).toBeGreaterThan(0) | ||
| }) | ||
| }) | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,141 @@ | ||||||||||||||||||||||
| import { XmlMatcherResult } from "./xml-matcher" | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /** | ||||||||||||||||||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we add JSDoc with usage examples? It would help developers understand how to use this with different tag combinations:
Suggested change
|
||||||||||||||||||||||
| * A multi-tag XML matcher that can match multiple tag names. | ||||||||||||||||||||||
| * This is useful for handling different thinking tag formats from various models. | ||||||||||||||||||||||
| */ | ||||||||||||||||||||||
| export class MultiTagXmlMatcher<Result = XmlMatcherResult> { | ||||||||||||||||||||||
| private buffer = "" | ||||||||||||||||||||||
| private chunks: Result[] = [] | ||||||||||||||||||||||
| private state: "TEXT" | "TAG_OPEN" | "TAG_CLOSE" = "TEXT" | ||||||||||||||||||||||
| private currentTag = "" | ||||||||||||||||||||||
| private depth = 0 | ||||||||||||||||||||||
| private matchedTag = "" | ||||||||||||||||||||||
| private matchedContent = "" | ||||||||||||||||||||||
| private lastEmittedIndex = 0 | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| constructor( | ||||||||||||||||||||||
| private tagNames: string[], | ||||||||||||||||||||||
| private transform?: (chunks: XmlMatcherResult) => Result, | ||||||||||||||||||||||
| private position = 0, | ||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The constructor parameter 'position' is declared but never used. Consider removing it if not needed.
Suggested change
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the |
||||||||||||||||||||||
| ) {} | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| private emit(matched: boolean, data: string) { | ||||||||||||||||||||||
| // Allow empty strings for empty tags | ||||||||||||||||||||||
| const result: XmlMatcherResult = { matched, data } | ||||||||||||||||||||||
| if (this.transform) { | ||||||||||||||||||||||
| this.chunks.push(this.transform(result)) | ||||||||||||||||||||||
| } else { | ||||||||||||||||||||||
| this.chunks.push(result as Result) | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| private processBuffer() { | ||||||||||||||||||||||
| let i = 0 | ||||||||||||||||||||||
| while (i < this.buffer.length) { | ||||||||||||||||||||||
| const char = this.buffer[i] | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| if (this.state === "TEXT") { | ||||||||||||||||||||||
| if (char === "<") { | ||||||||||||||||||||||
| // Emit any text before the tag | ||||||||||||||||||||||
| if (i > this.lastEmittedIndex) { | ||||||||||||||||||||||
| this.emit(false, this.buffer.substring(this.lastEmittedIndex, i)) | ||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When a '<' is encountered in TEXT state, the code unconditionally emits the preceding text. This can duplicate text that’s being collected as part of a matched tag. Consider emitting plain text only when depth is 0 (i.e. not inside a matching tag). |
||||||||||||||||||||||
| } | ||||||||||||||||||||||
| this.state = "TAG_OPEN" | ||||||||||||||||||||||
| this.currentTag = "" | ||||||||||||||||||||||
| this.lastEmittedIndex = i | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } else if (this.state === "TAG_OPEN") { | ||||||||||||||||||||||
| if (char === ">") { | ||||||||||||||||||||||
| // Check if this is a closing tag | ||||||||||||||||||||||
| const isClosing = this.currentTag.startsWith("/") | ||||||||||||||||||||||
| const tagName = isClosing ? this.currentTag.substring(1) : this.currentTag | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| if (this.tagNames.includes(tagName)) { | ||||||||||||||||||||||
| if (isClosing && this.matchedTag === tagName) { | ||||||||||||||||||||||
| this.depth-- | ||||||||||||||||||||||
| if (this.depth === 0) { | ||||||||||||||||||||||
| // Emit the matched content | ||||||||||||||||||||||
| this.emit(true, this.matchedContent) | ||||||||||||||||||||||
| this.matchedContent = "" | ||||||||||||||||||||||
| this.matchedTag = "" | ||||||||||||||||||||||
| this.lastEmittedIndex = i + 1 | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } else if (!isClosing) { | ||||||||||||||||||||||
| if (this.depth === 0) { | ||||||||||||||||||||||
| this.matchedTag = tagName | ||||||||||||||||||||||
| this.lastEmittedIndex = i + 1 | ||||||||||||||||||||||
| this.matchedContent = "" // Reset matched content | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| this.depth++ | ||||||||||||||||||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we handle nested tags of the same type? The current implementation might not correctly parse cases like |
||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| this.state = "TEXT" | ||||||||||||||||||||||
| } else if (char !== "/" || this.currentTag.length > 0) { | ||||||||||||||||||||||
| this.currentTag += char | ||||||||||||||||||||||
| } else { | ||||||||||||||||||||||
| this.currentTag += char | ||||||||||||||||||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code appears to be duplicated. Could we simplify it to:
Suggested change
|
||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| // If we're inside a matched tag, collect the content | ||||||||||||||||||||||
| if (this.depth > 0 && this.state === "TEXT" && i >= this.lastEmittedIndex) { | ||||||||||||||||||||||
| this.matchedContent += char | ||||||||||||||||||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this intentional? The character-by-character content collection in the matched tag could be inefficient for large responses. Consider buffering larger chunks for better performance:
Suggested change
|
||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| i++ | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| // Emit any remaining text | ||||||||||||||||||||||
| if (this.state === "TEXT" && this.depth === 0 && this.lastEmittedIndex < this.buffer.length) { | ||||||||||||||||||||||
| this.emit(false, this.buffer.substring(this.lastEmittedIndex)) | ||||||||||||||||||||||
| this.lastEmittedIndex = this.buffer.length | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| update(chunk: string): Result[] { | ||||||||||||||||||||||
| this.chunks = [] | ||||||||||||||||||||||
| this.buffer += chunk | ||||||||||||||||||||||
| this.processBuffer() | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| // Keep unprocessed content in buffer | ||||||||||||||||||||||
| if (this.lastEmittedIndex > 0 && this.depth === 0) { | ||||||||||||||||||||||
| this.buffer = this.buffer.substring(this.lastEmittedIndex) | ||||||||||||||||||||||
| this.lastEmittedIndex = 0 | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| const result = this.chunks | ||||||||||||||||||||||
| this.chunks = [] | ||||||||||||||||||||||
| return result | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| final(chunk?: string): Result[] { | ||||||||||||||||||||||
| this.chunks = [] | ||||||||||||||||||||||
| if (chunk) { | ||||||||||||||||||||||
| this.buffer += chunk | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| // Process any remaining buffer | ||||||||||||||||||||||
| this.processBuffer() | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| // Emit any remaining content | ||||||||||||||||||||||
| if (this.buffer.length > this.lastEmittedIndex) { | ||||||||||||||||||||||
| if (this.depth > 0 && this.matchedContent) { | ||||||||||||||||||||||
| // Incomplete tag, emit as text | ||||||||||||||||||||||
| this.emit(false, this.buffer.substring(this.lastEmittedIndex)) | ||||||||||||||||||||||
| } else { | ||||||||||||||||||||||
| this.emit(false, this.buffer.substring(this.lastEmittedIndex)) | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| // Reset state | ||||||||||||||||||||||
| this.buffer = "" | ||||||||||||||||||||||
| this.lastEmittedIndex = 0 | ||||||||||||||||||||||
| this.depth = 0 | ||||||||||||||||||||||
| this.matchedTag = "" | ||||||||||||||||||||||
| this.matchedContent = "" | ||||||||||||||||||||||
| this.state = "TEXT" | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| return this.chunks | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we add tests for streaming behavior with partial chunks? For example, when a tag is split across chunks like receiving
<thiin one chunk andnking>content</thinking>in the next. This would ensure the matcher handles real-world streaming scenarios correctly.