Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 129 additions & 0 deletions src/api/providers/__tests__/lmstudio.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -164,4 +164,133 @@ describe("LmStudioHandler", () => {
expect(modelInfo.info.contextWindow).toBe(128_000)
})
})

describe("gpt-oss special token parsing", () => {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding test cases for edge scenarios:

  • Multiple <|message|> tokens in a single chunk
  • Malformed special tokens (e.g., unclosed tokens like <|start)
  • Very large JSON payloads after the message token
  • Mixed content with both special tokens and regular text

it("should parse gpt-oss format with special tokens", async () => {
// Mock gpt-oss model response with special tokens
mockCreate.mockImplementationOnce(async (options) => {
return {
[Symbol.asyncIterator]: async function* () {
yield {
choices: [
{
delta: {
content:
'<|start|>assistant<|channel|>commentary to=read_file <|constrain|>json<|message|>{"args":[{"file":{"path":"documentation/program_analysis.md"}}]}',
},
index: 0,
},
],
usage: null,
}
},
}
})

// Create handler with gpt-oss model
const gptOssHandler = new LmStudioHandler({
apiModelId: "gpt-oss-20b",
lmStudioModelId: "gpt-oss-20b",
lmStudioBaseUrl: "http://localhost:1234",
})

const systemPrompt = "You are a helpful assistant."
const messages: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: "Read the file",
},
]

const stream = gptOssHandler.createMessage(systemPrompt, messages)
const chunks: any[] = []
for await (const chunk of stream) {
chunks.push(chunk)
}

const textChunks = chunks.filter((chunk) => chunk.type === "text")
expect(textChunks).toHaveLength(1)
// Should extract just the JSON message content
expect(textChunks[0].text).toBe('{"args":[{"file":{"path":"documentation/program_analysis.md"}}]}')
})

it("should handle gpt-oss format without message token", async () => {
mockCreate.mockImplementationOnce(async (options) => {
return {
[Symbol.asyncIterator]: async function* () {
yield {
choices: [
{
delta: {
content:
"<|start|>assistant<|channel|>commentary to=analyze_code <|constrain|>text",
},
index: 0,
},
],
usage: null,
}
},
}
})

const gptOssHandler = new LmStudioHandler({
apiModelId: "gpt-oss-20b",
lmStudioModelId: "gpt-oss-20b",
lmStudioBaseUrl: "http://localhost:1234",
})

const systemPrompt = "You are a helpful assistant."
const messages: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: "Analyze the code",
},
]

const stream = gptOssHandler.createMessage(systemPrompt, messages)
const chunks: any[] = []
for await (const chunk of stream) {
chunks.push(chunk)
}

const textChunks = chunks.filter((chunk) => chunk.type === "text")
expect(textChunks).toHaveLength(1)
// Should clean up special tokens and function patterns
expect(textChunks[0].text).toBe("assistant commentary text")
})

it("should not parse special tokens for non-gpt-oss models", async () => {
// Mock response with special-looking content
mockCreate.mockImplementationOnce(async (options) => {
return {
[Symbol.asyncIterator]: async function* () {
yield {
choices: [
{
delta: {
content:
"Here is some content with <|special|> tokens that should not be parsed",
},
index: 0,
},
],
usage: null,
}
},
}
})

const stream = handler.createMessage("System prompt", [{ role: "user", content: "Test" }])
const chunks: any[] = []
for await (const chunk of stream) {
chunks.push(chunk)
}

const textChunks = chunks.filter((chunk) => chunk.type === "text")
expect(textChunks).toHaveLength(1)
// Should keep the content as-is for non-gpt-oss models
expect(textChunks[0].text).toBe("Here is some content with <|special|> tokens that should not be parsed")
})
})
})
46 changes: 43 additions & 3 deletions src/api/providers/lm-studio.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,24 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
const delta = chunk.choices[0]?.delta

if (delta?.content) {
assistantText += delta.content
for (const processedChunk of matcher.update(delta.content)) {
yield processedChunk
// Check if this is a gpt-oss model with special token format
const isGptOss = this.getModel().id?.toLowerCase().includes("gpt-oss")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Performance consideration: Since the model doesn't change during streaming, could we move this gpt-oss check outside the loop to avoid repeated string operations on every chunk?

Suggested change
const isGptOss = this.getModel().id?.toLowerCase().includes("gpt-oss")
// Check if this is a gpt-oss model with special token format
const isGptOss = this.getModel().id?.toLowerCase().includes("gpt-oss")
for await (const chunk of results) {
const delta = chunk.choices[0]?.delta
if (delta?.content) {
if (isGptOss && delta.content.includes("<|") && delta.content.includes("|>")) {


if (isGptOss && delta.content.includes("<|") && delta.content.includes("|>")) {
// Parse gpt-oss special token format
// Format: <|start|>assistant<|channel|>commentary to=read_file <|constrain|>json<|message|>{"args":[...]}
const cleanedContent = this.parseGptOssFormat(delta.content)
if (cleanedContent) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When cleanedContent is empty or falsy after parsing, we silently skip it. Should we consider logging a warning to help with debugging unexpected formats?

assistantText += cleanedContent
for (const processedChunk of matcher.update(cleanedContent)) {
yield processedChunk
}
}
} else {
assistantText += delta.content
for (const processedChunk of matcher.update(delta.content)) {
yield processedChunk
}
}
}
}
Expand Down Expand Up @@ -169,6 +184,31 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
)
}
}

/**
* Parse gpt-oss special token format
* Format example: <|start|>assistant<|channel|>commentary to=read_file <|constrain|>json<|message|>{"args":[...]}
* We want to extract just the actual message content
*/
private parseGptOssFormat(content: string): string {
// Remove all special tokens and extract the actual message
// Pattern: <|token|> where token can be any word
const specialTokenPattern = /<\|[^|]+\|>/g
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The regex pattern /<\|[^|]+\|>/g might not handle edge cases correctly. What happens if the token content itself contains a pipe character? Consider using a more robust parsing approach or documenting this limitation.


// First, check if this contains the message token
const messageMatch = content.match(/<\|message\|>(.+)$/s)
if (messageMatch) {
// Extract content after <|message|> token
return messageMatch[1].trim()
}

// Otherwise, just remove all special tokens
const cleaned = content.replace(specialTokenPattern, " ").trim()

// Also clean up any "to=function_name" patterns that might remain
const functionPattern = /\s*to=\w+\s*/g
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function pattern /\s*to=\w+\s*/g only matches word characters. Is this intentional? Function names might contain hyphens or underscores. Consider using /\s*to=[\w-]+\s*/g if you want to support kebab-case function names.

return cleaned.replace(functionPattern, " ").trim()
}
}

export async function getLmStudioModels(baseUrl = "http://localhost:1234") {
Expand Down
Loading