Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions src/api/providers/__tests__/lmstudio.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,94 @@ describe("LmStudioHandler", () => {
expect(textChunks[0].text).toBe("Test response")
})

it("should handle <think> tags in responses", async () => {
mockCreate.mockImplementationOnce(async (options) => {
return {
[Symbol.asyncIterator]: async function* () {
yield {
choices: [
{
delta: { content: "Before <think>This is a thought</think> After" },
index: 0,
},
],
usage: null,
}
yield {
choices: [
{
delta: {},
index: 0,
},
],
usage: {
prompt_tokens: 10,
completion_tokens: 15,
total_tokens: 25,
},
}
},
}
})

const stream = handler.createMessage(systemPrompt, messages)
const chunks: any[] = []
for await (const chunk of stream) {
chunks.push(chunk)
}

const textChunks = chunks.filter((chunk) => chunk.type === "text")
const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")

expect(textChunks).toContainEqual({ type: "text", text: "Before " })
expect(textChunks).toContainEqual({ type: "text", text: " After" })
expect(reasoningChunks).toContainEqual({ type: "reasoning", text: "This is a thought" })
})

it("should handle <thinking> tags in responses (GPT-OSS compatibility)", async () => {
mockCreate.mockImplementationOnce(async (options) => {
return {
[Symbol.asyncIterator]: async function* () {
yield {
choices: [
{
delta: { content: "Before <thinking>This is thinking content</thinking> After" },
index: 0,
},
],
usage: null,
}
yield {
choices: [
{
delta: {},
index: 0,
},
],
usage: {
prompt_tokens: 10,
completion_tokens: 20,
total_tokens: 30,
},
}
},
}
})

const stream = handler.createMessage(systemPrompt, messages)
const chunks: any[] = []
for await (const chunk of stream) {
chunks.push(chunk)
}

const textChunks = chunks.filter((chunk) => chunk.type === "text")
const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")

expect(textChunks).toContainEqual({ type: "text", text: "Before " })
expect(textChunks).toContainEqual({ type: "text", text: " After" })
expect(reasoningChunks).toContainEqual({ type: "reasoning", text: "This is thinking content" })
})

it("should handle API errors", async () => {
mockCreate.mockRejectedValueOnce(new Error("API Error"))

Expand Down
7 changes: 4 additions & 3 deletions src/api/providers/lm-studio.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { type ModelInfo, openAiModelInfoSaneDefaults, LMSTUDIO_DEFAULT_TEMPERATU

import type { ApiHandlerOptions } from "../../shared/api"

import { XmlMatcher } from "../../utils/xml-matcher"
import { MultiTagXmlMatcher } from "../../utils/multi-tag-xml-matcher"

import { convertToOpenAiMessages } from "../transform/openai-format"
import { ApiStream } from "../transform/stream"
Expand Down Expand Up @@ -87,8 +87,9 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan

const results = await this.client.chat.completions.create(params)

const matcher = new XmlMatcher(
"think",
// Support both <think> and <thinking> tags for different GPT-OSS models
const matcher = new MultiTagXmlMatcher(
["think", "thinking"],
(chunk) =>
({
type: chunk.matched ? "reasoning" : "text",
Expand Down
95 changes: 95 additions & 0 deletions src/utils/__tests__/multi-tag-xml-matcher.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import { MultiTagXmlMatcher } from "../multi-tag-xml-matcher"

describe("MultiTagXmlMatcher", () => {
it("should match content with <think> tags", () => {
const matcher = new MultiTagXmlMatcher(["think", "thinking"])
const input = "Before <think>This is thinking content</think> After"

const results = matcher.update(input)
const finalResults = matcher.final()

const allResults = [...results, ...finalResults]

// Check that we have thinking content
const thinkingBlocks = allResults.filter((r) => r.matched)
const textBlocks = allResults.filter((r) => !r.matched)

expect(thinkingBlocks).toContainEqual({ matched: true, data: "This is thinking content" })
expect(textBlocks.some((b) => b.data.includes("Before"))).toBe(true)
expect(textBlocks.some((b) => b.data.includes("After"))).toBe(true)
})

it("should match content with <thinking> tags", () => {
const matcher = new MultiTagXmlMatcher(["think", "thinking"])
const input = "Before <thinking>This is thinking content</thinking> After"

const results = matcher.update(input)
const finalResults = matcher.final()

const allResults = [...results, ...finalResults]

// Check that we have thinking content
const thinkingBlocks = allResults.filter((r) => r.matched)
const textBlocks = allResults.filter((r) => !r.matched)

expect(thinkingBlocks).toContainEqual({ matched: true, data: "This is thinking content" })
expect(textBlocks.some((b) => b.data.includes("Before"))).toBe(true)
expect(textBlocks.some((b) => b.data.includes("After"))).toBe(true)
})

it("should handle mixed tags in the same content", () => {
const matcher = new MultiTagXmlMatcher(["think", "thinking"])
const input = "Start <think>First thought</think> Middle <thinking>Second thought</thinking> End"

const results = matcher.update(input)
const finalResults = matcher.final()

const allResults = [...results, ...finalResults]

// The important thing is that both thinking blocks are captured
const thinkingBlocks = allResults.filter((r) => r.matched)
const textBlocks = allResults.filter((r) => !r.matched)

expect(thinkingBlocks).toContainEqual({ matched: true, data: "First thought" })
expect(thinkingBlocks).toContainEqual({ matched: true, data: "Second thought" })
expect(textBlocks.some((b) => b.data.includes("Start"))).toBe(true)
expect(textBlocks.some((b) => b.data.includes("Middle"))).toBe(true)
expect(textBlocks.some((b) => b.data.includes("End"))).toBe(true)
})

it("should work with custom transform function", () => {
const transform = (chunk: any) => ({
type: chunk.matched ? "reasoning" : "text",
text: chunk.data,
})

const matcher = new MultiTagXmlMatcher(["think", "thinking"], transform)
const input = "Before <thinking>Reasoning here</thinking> After"

const results = matcher.update(input)
const finalResults = matcher.final()

const allResults = [...results, ...finalResults]

// Check that transform is applied
const reasoningBlocks = allResults.filter((r) => r.type === "reasoning")
const textBlocks = allResults.filter((r) => r.type === "text")

expect(reasoningBlocks).toContainEqual({ type: "reasoning", text: "Reasoning here" })
expect(textBlocks.length).toBeGreaterThan(0)
})

it("should handle empty tags", () => {
const matcher = new MultiTagXmlMatcher(["think", "thinking"])
const input = "Before <think></think> Middle <thinking></thinking> After"

const results = matcher.update(input)
const finalResults = matcher.final()

const allResults = [...results, ...finalResults]

// Empty tags should still be matched but with empty content
const emptyBlocks = allResults.filter((r) => r.matched && r.data === "")
expect(emptyBlocks.length).toBeGreaterThan(0)
})
})
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we add tests for streaming behavior with partial chunks? For example, when a tag is split across chunks like receiving <thi in one chunk and nking>content</thinking> in the next. This would ensure the matcher handles real-world streaming scenarios correctly.

141 changes: 141 additions & 0 deletions src/utils/multi-tag-xml-matcher.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import { XmlMatcherResult } from "./xml-matcher"

/**
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we add JSDoc with usage examples? It would help developers understand how to use this with different tag combinations:

Suggested change
/**
/**
* A multi-tag XML matcher that can match multiple tag names.
* This is useful for handling different thinking tag formats from various models.
*
* @example
* // Match both <think> and <thinking> tags
* const matcher = new MultiTagXmlMatcher(['think', 'thinking'])
* const results = matcher.update('<think>Hello</think> world <thinking>Hi</thinking>')
*/

* A multi-tag XML matcher that can match multiple tag names.
* This is useful for handling different thinking tag formats from various models.
*/
export class MultiTagXmlMatcher<Result = XmlMatcherResult> {
private buffer = ""
private chunks: Result[] = []
private state: "TEXT" | "TAG_OPEN" | "TAG_CLOSE" = "TEXT"
private currentTag = ""
private depth = 0
private matchedTag = ""
private matchedContent = ""
private lastEmittedIndex = 0

constructor(
private tagNames: string[],
private transform?: (chunks: XmlMatcherResult) => Result,
private position = 0,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The constructor parameter 'position' is declared but never used. Consider removing it if not needed.

Suggested change
private position = 0,

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the position parameter needed? It's accepted in the constructor but never used in the implementation, unlike the original XmlMatcher. If it's not needed, we could remove it to avoid confusion.

) {}

private emit(matched: boolean, data: string) {
// Allow empty strings for empty tags
const result: XmlMatcherResult = { matched, data }
if (this.transform) {
this.chunks.push(this.transform(result))
} else {
this.chunks.push(result as Result)
}
}

private processBuffer() {
let i = 0
while (i < this.buffer.length) {
const char = this.buffer[i]

if (this.state === "TEXT") {
if (char === "<") {
// Emit any text before the tag
if (i > this.lastEmittedIndex) {
this.emit(false, this.buffer.substring(this.lastEmittedIndex, i))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When a '<' is encountered in TEXT state, the code unconditionally emits the preceding text. This can duplicate text that’s being collected as part of a matched tag. Consider emitting plain text only when depth is 0 (i.e. not inside a matching tag).

}
this.state = "TAG_OPEN"
this.currentTag = ""
this.lastEmittedIndex = i
}
} else if (this.state === "TAG_OPEN") {
if (char === ">") {
// Check if this is a closing tag
const isClosing = this.currentTag.startsWith("/")
const tagName = isClosing ? this.currentTag.substring(1) : this.currentTag

if (this.tagNames.includes(tagName)) {
if (isClosing && this.matchedTag === tagName) {
this.depth--
if (this.depth === 0) {
// Emit the matched content
this.emit(true, this.matchedContent)
this.matchedContent = ""
this.matchedTag = ""
this.lastEmittedIndex = i + 1
}
} else if (!isClosing) {
if (this.depth === 0) {
this.matchedTag = tagName
this.lastEmittedIndex = i + 1
this.matchedContent = "" // Reset matched content
}
this.depth++
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we handle nested tags of the same type? The current implementation might not correctly parse cases like <think>outer <think>inner</think> outer</think>. The depth tracking seems to assume all matched tags are the same, but with multiple tag names, nested different tags would work while nested same tags might not.

}
}
this.state = "TEXT"
} else if (char !== "/" || this.currentTag.length > 0) {
this.currentTag += char
} else {
this.currentTag += char
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code appears to be duplicated. Could we simplify it to:

Suggested change
this.currentTag += char
} else {
this.currentTag += char
}

}
}

// If we're inside a matched tag, collect the content
if (this.depth > 0 && this.state === "TEXT" && i >= this.lastEmittedIndex) {
this.matchedContent += char
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this intentional? The character-by-character content collection in the matched tag could be inefficient for large responses. Consider buffering larger chunks for better performance:

Suggested change
this.matchedContent += char
// If we're inside a matched tag, collect the content
if (this.depth > 0 && this.state === "TEXT") {
const remainingContent = this.buffer.substring(i)
this.matchedContent += remainingContent
i = this.buffer.length - 1
}

}

i++
}

// Emit any remaining text
if (this.state === "TEXT" && this.depth === 0 && this.lastEmittedIndex < this.buffer.length) {
this.emit(false, this.buffer.substring(this.lastEmittedIndex))
this.lastEmittedIndex = this.buffer.length
}
}

update(chunk: string): Result[] {
this.chunks = []
this.buffer += chunk
this.processBuffer()

// Keep unprocessed content in buffer
if (this.lastEmittedIndex > 0 && this.depth === 0) {
this.buffer = this.buffer.substring(this.lastEmittedIndex)
this.lastEmittedIndex = 0
}

const result = this.chunks
this.chunks = []
return result
}

final(chunk?: string): Result[] {
this.chunks = []
if (chunk) {
this.buffer += chunk
}

// Process any remaining buffer
this.processBuffer()

// Emit any remaining content
if (this.buffer.length > this.lastEmittedIndex) {
if (this.depth > 0 && this.matchedContent) {
// Incomplete tag, emit as text
this.emit(false, this.buffer.substring(this.lastEmittedIndex))
} else {
this.emit(false, this.buffer.substring(this.lastEmittedIndex))
}
}

// Reset state
this.buffer = ""
this.lastEmittedIndex = 0
this.depth = 0
this.matchedTag = ""
this.matchedContent = ""
this.state = "TEXT"

return this.chunks
}
}
Loading