Skip to content

Commit 924a793

Browse files
committed
fix: support both <think> and <thinking> tags for LM Studio GPT-OSS models
- Created MultiTagXmlMatcher utility to handle multiple XML tag names - Updated LM Studio handler to parse both <think> and <thinking> tags - Added comprehensive tests for the new functionality - Fixes #6750
1 parent 2b647ed commit 924a793

File tree

4 files changed

+328
-3
lines changed

4 files changed

+328
-3
lines changed

src/api/providers/__tests__/lmstudio.spec.ts

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,94 @@ describe("LmStudioHandler", () => {
114114
expect(textChunks[0].text).toBe("Test response")
115115
})
116116

117+
it("should handle <think> tags in responses", async () => {
118+
mockCreate.mockImplementationOnce(async (options) => {
119+
return {
120+
[Symbol.asyncIterator]: async function* () {
121+
yield {
122+
choices: [
123+
{
124+
delta: { content: "Before <think>This is a thought</think> After" },
125+
index: 0,
126+
},
127+
],
128+
usage: null,
129+
}
130+
yield {
131+
choices: [
132+
{
133+
delta: {},
134+
index: 0,
135+
},
136+
],
137+
usage: {
138+
prompt_tokens: 10,
139+
completion_tokens: 15,
140+
total_tokens: 25,
141+
},
142+
}
143+
},
144+
}
145+
})
146+
147+
const stream = handler.createMessage(systemPrompt, messages)
148+
const chunks: any[] = []
149+
for await (const chunk of stream) {
150+
chunks.push(chunk)
151+
}
152+
153+
const textChunks = chunks.filter((chunk) => chunk.type === "text")
154+
const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")
155+
156+
expect(textChunks).toContainEqual({ type: "text", text: "Before " })
157+
expect(textChunks).toContainEqual({ type: "text", text: " After" })
158+
expect(reasoningChunks).toContainEqual({ type: "reasoning", text: "This is a thought" })
159+
})
160+
161+
it("should handle <thinking> tags in responses (GPT-OSS compatibility)", async () => {
162+
mockCreate.mockImplementationOnce(async (options) => {
163+
return {
164+
[Symbol.asyncIterator]: async function* () {
165+
yield {
166+
choices: [
167+
{
168+
delta: { content: "Before <thinking>This is thinking content</thinking> After" },
169+
index: 0,
170+
},
171+
],
172+
usage: null,
173+
}
174+
yield {
175+
choices: [
176+
{
177+
delta: {},
178+
index: 0,
179+
},
180+
],
181+
usage: {
182+
prompt_tokens: 10,
183+
completion_tokens: 20,
184+
total_tokens: 30,
185+
},
186+
}
187+
},
188+
}
189+
})
190+
191+
const stream = handler.createMessage(systemPrompt, messages)
192+
const chunks: any[] = []
193+
for await (const chunk of stream) {
194+
chunks.push(chunk)
195+
}
196+
197+
const textChunks = chunks.filter((chunk) => chunk.type === "text")
198+
const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")
199+
200+
expect(textChunks).toContainEqual({ type: "text", text: "Before " })
201+
expect(textChunks).toContainEqual({ type: "text", text: " After" })
202+
expect(reasoningChunks).toContainEqual({ type: "reasoning", text: "This is thinking content" })
203+
})
204+
117205
it("should handle API errors", async () => {
118206
mockCreate.mockRejectedValueOnce(new Error("API Error"))
119207

src/api/providers/lm-studio.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import { type ModelInfo, openAiModelInfoSaneDefaults, LMSTUDIO_DEFAULT_TEMPERATU
66

77
import type { ApiHandlerOptions } from "../../shared/api"
88

9-
import { XmlMatcher } from "../../utils/xml-matcher"
9+
import { MultiTagXmlMatcher } from "../../utils/multi-tag-xml-matcher"
1010

1111
import { convertToOpenAiMessages } from "../transform/openai-format"
1212
import { ApiStream } from "../transform/stream"
@@ -87,8 +87,9 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
8787

8888
const results = await this.client.chat.completions.create(params)
8989

90-
const matcher = new XmlMatcher(
91-
"think",
90+
// Support both <think> and <thinking> tags for different GPT-OSS models
91+
const matcher = new MultiTagXmlMatcher(
92+
["think", "thinking"],
9293
(chunk) =>
9394
({
9495
type: chunk.matched ? "reasoning" : "text",
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import { MultiTagXmlMatcher } from "../multi-tag-xml-matcher"
2+
3+
describe("MultiTagXmlMatcher", () => {
4+
it("should match content with <think> tags", () => {
5+
const matcher = new MultiTagXmlMatcher(["think", "thinking"])
6+
const input = "Before <think>This is thinking content</think> After"
7+
8+
const results = matcher.update(input)
9+
const finalResults = matcher.final()
10+
11+
const allResults = [...results, ...finalResults]
12+
13+
// Check that we have thinking content
14+
const thinkingBlocks = allResults.filter((r) => r.matched)
15+
const textBlocks = allResults.filter((r) => !r.matched)
16+
17+
expect(thinkingBlocks).toContainEqual({ matched: true, data: "This is thinking content" })
18+
expect(textBlocks.some((b) => b.data.includes("Before"))).toBe(true)
19+
expect(textBlocks.some((b) => b.data.includes("After"))).toBe(true)
20+
})
21+
22+
it("should match content with <thinking> tags", () => {
23+
const matcher = new MultiTagXmlMatcher(["think", "thinking"])
24+
const input = "Before <thinking>This is thinking content</thinking> After"
25+
26+
const results = matcher.update(input)
27+
const finalResults = matcher.final()
28+
29+
const allResults = [...results, ...finalResults]
30+
31+
// Check that we have thinking content
32+
const thinkingBlocks = allResults.filter((r) => r.matched)
33+
const textBlocks = allResults.filter((r) => !r.matched)
34+
35+
expect(thinkingBlocks).toContainEqual({ matched: true, data: "This is thinking content" })
36+
expect(textBlocks.some((b) => b.data.includes("Before"))).toBe(true)
37+
expect(textBlocks.some((b) => b.data.includes("After"))).toBe(true)
38+
})
39+
40+
it("should handle mixed tags in the same content", () => {
41+
const matcher = new MultiTagXmlMatcher(["think", "thinking"])
42+
const input = "Start <think>First thought</think> Middle <thinking>Second thought</thinking> End"
43+
44+
const results = matcher.update(input)
45+
const finalResults = matcher.final()
46+
47+
const allResults = [...results, ...finalResults]
48+
49+
// The important thing is that both thinking blocks are captured
50+
const thinkingBlocks = allResults.filter((r) => r.matched)
51+
const textBlocks = allResults.filter((r) => !r.matched)
52+
53+
expect(thinkingBlocks).toContainEqual({ matched: true, data: "First thought" })
54+
expect(thinkingBlocks).toContainEqual({ matched: true, data: "Second thought" })
55+
expect(textBlocks.some((b) => b.data.includes("Start"))).toBe(true)
56+
expect(textBlocks.some((b) => b.data.includes("Middle"))).toBe(true)
57+
expect(textBlocks.some((b) => b.data.includes("End"))).toBe(true)
58+
})
59+
60+
it("should work with custom transform function", () => {
61+
const transform = (chunk: any) => ({
62+
type: chunk.matched ? "reasoning" : "text",
63+
text: chunk.data,
64+
})
65+
66+
const matcher = new MultiTagXmlMatcher(["think", "thinking"], transform)
67+
const input = "Before <thinking>Reasoning here</thinking> After"
68+
69+
const results = matcher.update(input)
70+
const finalResults = matcher.final()
71+
72+
const allResults = [...results, ...finalResults]
73+
74+
// Check that transform is applied
75+
const reasoningBlocks = allResults.filter((r) => r.type === "reasoning")
76+
const textBlocks = allResults.filter((r) => r.type === "text")
77+
78+
expect(reasoningBlocks).toContainEqual({ type: "reasoning", text: "Reasoning here" })
79+
expect(textBlocks.length).toBeGreaterThan(0)
80+
})
81+
82+
it("should handle empty tags", () => {
83+
const matcher = new MultiTagXmlMatcher(["think", "thinking"])
84+
const input = "Before <think></think> Middle <thinking></thinking> After"
85+
86+
const results = matcher.update(input)
87+
const finalResults = matcher.final()
88+
89+
const allResults = [...results, ...finalResults]
90+
91+
// Empty tags should still be matched but with empty content
92+
const emptyBlocks = allResults.filter((r) => r.matched && r.data === "")
93+
expect(emptyBlocks.length).toBeGreaterThan(0)
94+
})
95+
})

src/utils/multi-tag-xml-matcher.ts

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
import { XmlMatcherResult } from "./xml-matcher"
2+
3+
/**
4+
* A multi-tag XML matcher that can match multiple tag names.
5+
* This is useful for handling different thinking tag formats from various models.
6+
*/
7+
export class MultiTagXmlMatcher<Result = XmlMatcherResult> {
8+
private buffer = ""
9+
private chunks: Result[] = []
10+
private state: "TEXT" | "TAG_OPEN" | "TAG_CLOSE" = "TEXT"
11+
private currentTag = ""
12+
private depth = 0
13+
private matchedTag = ""
14+
private matchedContent = ""
15+
private lastEmittedIndex = 0
16+
17+
constructor(
18+
private tagNames: string[],
19+
private transform?: (chunks: XmlMatcherResult) => Result,
20+
private position = 0,
21+
) {}
22+
23+
private emit(matched: boolean, data: string) {
24+
// Allow empty strings for empty tags
25+
const result: XmlMatcherResult = { matched, data }
26+
if (this.transform) {
27+
this.chunks.push(this.transform(result))
28+
} else {
29+
this.chunks.push(result as Result)
30+
}
31+
}
32+
33+
private processBuffer() {
34+
let i = 0
35+
while (i < this.buffer.length) {
36+
const char = this.buffer[i]
37+
38+
if (this.state === "TEXT") {
39+
if (char === "<") {
40+
// Emit any text before the tag
41+
if (i > this.lastEmittedIndex) {
42+
this.emit(false, this.buffer.substring(this.lastEmittedIndex, i))
43+
}
44+
this.state = "TAG_OPEN"
45+
this.currentTag = ""
46+
this.lastEmittedIndex = i
47+
}
48+
} else if (this.state === "TAG_OPEN") {
49+
if (char === ">") {
50+
// Check if this is a closing tag
51+
const isClosing = this.currentTag.startsWith("/")
52+
const tagName = isClosing ? this.currentTag.substring(1) : this.currentTag
53+
54+
if (this.tagNames.includes(tagName)) {
55+
if (isClosing && this.matchedTag === tagName) {
56+
this.depth--
57+
if (this.depth === 0) {
58+
// Emit the matched content
59+
this.emit(true, this.matchedContent)
60+
this.matchedContent = ""
61+
this.matchedTag = ""
62+
this.lastEmittedIndex = i + 1
63+
}
64+
} else if (!isClosing) {
65+
if (this.depth === 0) {
66+
this.matchedTag = tagName
67+
this.lastEmittedIndex = i + 1
68+
this.matchedContent = "" // Reset matched content
69+
}
70+
this.depth++
71+
}
72+
}
73+
this.state = "TEXT"
74+
} else if (char !== "/" || this.currentTag.length > 0) {
75+
this.currentTag += char
76+
} else {
77+
this.currentTag += char
78+
}
79+
}
80+
81+
// If we're inside a matched tag, collect the content
82+
if (this.depth > 0 && this.state === "TEXT" && i >= this.lastEmittedIndex) {
83+
this.matchedContent += char
84+
}
85+
86+
i++
87+
}
88+
89+
// Emit any remaining text
90+
if (this.state === "TEXT" && this.depth === 0 && this.lastEmittedIndex < this.buffer.length) {
91+
this.emit(false, this.buffer.substring(this.lastEmittedIndex))
92+
this.lastEmittedIndex = this.buffer.length
93+
}
94+
}
95+
96+
update(chunk: string): Result[] {
97+
this.chunks = []
98+
this.buffer += chunk
99+
this.processBuffer()
100+
101+
// Keep unprocessed content in buffer
102+
if (this.lastEmittedIndex > 0 && this.depth === 0) {
103+
this.buffer = this.buffer.substring(this.lastEmittedIndex)
104+
this.lastEmittedIndex = 0
105+
}
106+
107+
const result = this.chunks
108+
this.chunks = []
109+
return result
110+
}
111+
112+
final(chunk?: string): Result[] {
113+
this.chunks = []
114+
if (chunk) {
115+
this.buffer += chunk
116+
}
117+
118+
// Process any remaining buffer
119+
this.processBuffer()
120+
121+
// Emit any remaining content
122+
if (this.buffer.length > this.lastEmittedIndex) {
123+
if (this.depth > 0 && this.matchedContent) {
124+
// Incomplete tag, emit as text
125+
this.emit(false, this.buffer.substring(this.lastEmittedIndex))
126+
} else {
127+
this.emit(false, this.buffer.substring(this.lastEmittedIndex))
128+
}
129+
}
130+
131+
// Reset state
132+
this.buffer = ""
133+
this.lastEmittedIndex = 0
134+
this.depth = 0
135+
this.matchedTag = ""
136+
this.matchedContent = ""
137+
this.state = "TEXT"
138+
139+
return this.chunks
140+
}
141+
}

0 commit comments

Comments
 (0)