Skip to content

Commit e3d47ad

Browse files
logancyangclaude
andauthored
feat(lm-studio): use Responses API for LM Studio models (#2306)
Switch LM Studio from /v1/chat/completions to /v1/responses via a thin ChatLMStudio wrapper that patches LangChain compatibility issues (text.format requirement, strict:null in tool definitions). - New ChatLMStudio class with fetch wrapper for tool sanitization - Opt-out toggle in model settings (useResponsesApi) - Ping uses ChatLMStudio to test the correct endpoint - ThinkBlockStreamer: strip special tokens from text content Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 8b43355 commit e3d47ad

File tree

6 files changed

+136
-7
lines changed

6 files changed

+136
-7
lines changed

src/LLMProviders/ChatLMStudio.ts

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import { ChatOpenAI } from "@langchain/openai";
2+
3+
/**
4+
* ChatLMStudio extends ChatOpenAI with the Responses API (/v1/responses)
5+
* for LM Studio local inference.
6+
*
7+
* Patches LangChain/OpenAI SDK compatibility issues with LM Studio:
8+
* - Ensures text.format is always set (LM Studio requires it)
9+
* - Removes strict:null from tool definitions (LM Studio rejects it)
10+
*/
11+
export interface ChatLMStudioInput {
12+
modelName?: string;
13+
apiKey?: string;
14+
configuration?: any;
15+
temperature?: number;
16+
maxTokens?: number;
17+
topP?: number;
18+
frequencyPenalty?: number;
19+
streaming?: boolean;
20+
streamUsage?: boolean;
21+
[key: string]: any;
22+
}
23+
24+
/**
25+
* Create a fetch wrapper that sanitizes request bodies for LM Studio
26+
* compatibility. This intercepts at the HTTP level, which is the last
27+
* stop before the request is sent, guaranteeing all null values in
28+
* tools are stripped regardless of which LangChain code path produced them.
29+
*/
30+
function createLMStudioFetch(baseFetch?: typeof globalThis.fetch): typeof globalThis.fetch {
31+
const underlyingFetch = baseFetch || globalThis.fetch;
32+
33+
return async (input: string | URL | Request, init?: RequestInit): Promise<Response> => {
34+
if (init?.body && typeof init.body === "string") {
35+
try {
36+
const body = JSON.parse(init.body);
37+
let modified = false;
38+
39+
// Strip null/undefined values from tool definitions
40+
if (Array.isArray(body.tools)) {
41+
body.tools = body.tools.map((tool: Record<string, unknown>) => {
42+
const cleaned: Record<string, unknown> = {};
43+
for (const [key, value] of Object.entries(tool)) {
44+
if (value !== null && value !== undefined) {
45+
cleaned[key] = value;
46+
}
47+
}
48+
return cleaned;
49+
});
50+
modified = true;
51+
}
52+
53+
if (modified) {
54+
init = { ...init, body: JSON.stringify(body) };
55+
}
56+
} catch {
57+
// Not JSON, pass through unchanged
58+
}
59+
}
60+
return underlyingFetch(input, init);
61+
};
62+
}
63+
64+
export class ChatLMStudio extends ChatOpenAI {
65+
constructor(fields: ChatLMStudioInput) {
66+
const originalFetch = fields.configuration?.fetch;
67+
68+
super({
69+
...fields,
70+
useResponsesApi: true,
71+
configuration: {
72+
...fields.configuration,
73+
// Wrap fetch to sanitize request bodies for LM Studio compatibility
74+
fetch: createLMStudioFetch(originalFetch),
75+
},
76+
// modelKwargs is spread LAST in ChatOpenAIResponses.invocationParams(),
77+
// overriding the computed `text` field. Without this, LangChain emits
78+
// `text: { format: undefined }` (serializes to `text: {}`) which LM Studio
79+
// rejects with "Required: text.format".
80+
modelKwargs: {
81+
...fields.modelKwargs,
82+
text: { format: { type: "text" } },
83+
},
84+
});
85+
}
86+
}

src/LLMProviders/chainRunner/utils/ThinkBlockStreamer.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ export class ThinkBlockStreamer {
152152
this.hasOpenThinkBlock = false;
153153
}
154154
if (textContent) {
155-
this.fullResponse += textContent;
155+
this.fullResponse += stripSpecialTokens(textContent);
156156
}
157157
return hasThinkingContent;
158158
}

src/LLMProviders/chatModelManager.ts

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import { ChatXAI } from "@langchain/xai";
3939
import { MissingApiKeyError, MissingPlusLicenseError } from "@/error";
4040
import { Notice } from "obsidian";
4141
import { ChatOpenRouter } from "./ChatOpenRouter";
42+
import { ChatLMStudio } from "./ChatLMStudio";
4243
import { BedrockChatModel, type BedrockChatModelFields } from "./BedrockChatModel";
4344
import { GitHubCopilotChatModel } from "@/LLMProviders/githubCopilot/GitHubCopilotChatModel";
4445

@@ -818,6 +819,14 @@ export default class ChatModelManager {
818819
logInfo(`Enabling Responses API for GPT-5 model: ${model.name} (${selectedModel.vendor})`);
819820
}
820821

822+
// For LM Studio, use ChatLMStudio by default for Responses API compatibility.
823+
// Opt out by setting useResponsesApi to false.
824+
if (model.provider === ChatModelProviders.LM_STUDIO && model.useResponsesApi !== false) {
825+
const lmStudioInstance = new ChatLMStudio(constructorConfig);
826+
logInfo(`[ChatModelManager] Using Responses API for LM Studio model: ${model.name}`);
827+
return lmStudioInstance;
828+
}
829+
821830
const newModelInstance = new selectedModel.AIConstructor(constructorConfig);
822831

823832
return newModelInstance;
@@ -887,7 +896,12 @@ export default class ChatModelManager {
887896
constructorConfig.useResponsesApi = true;
888897
}
889898

890-
const testModel = new (this.getProviderConstructor(modelToTest))(constructorConfig);
899+
// For LM Studio with Responses API, ping via ChatLMStudio so the
900+
// connectivity check hits the same /v1/responses endpoint used in chats.
901+
const testModel =
902+
model.provider === ChatModelProviders.LM_STUDIO && model.useResponsesApi !== false
903+
? new ChatLMStudio(constructorConfig)
904+
: new (this.getProviderConstructor(modelToTest))(constructorConfig);
891905
await testModel.invoke([{ role: "user", content: "hello" }], {
892906
timeout: 8000,
893907
});

src/aiParams.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,9 @@ export interface CustomModel {
153153
// Ollama specific fields
154154
numCtx?: number;
155155

156+
// LM Studio specific fields
157+
useResponsesApi?: boolean;
158+
156159
projectEnabled?: boolean;
157160
plusExclusive?: boolean;
158161
believerExclusive?: boolean;

src/core/ChatManager.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -592,7 +592,7 @@ export class ChatManager {
592592
onTruncate();
593593
}
594594

595-
// Update chain memory
595+
// Update chain memory after truncation
596596
await this.updateChainMemory();
597597

598598
// Get the LLM version of the user message for regeneration
@@ -661,7 +661,7 @@ export class ChatManager {
661661
return false;
662662
}
663663

664-
// Update chain memory
664+
// Update chain memory after deletion
665665
await this.updateChainMemory();
666666

667667
logInfo(`[ChatManager] Successfully deleted message ${messageId}`);
@@ -699,7 +699,7 @@ export class ChatManager {
699699
const currentRepo = this.getCurrentMessageRepo();
700700
currentRepo.truncateAfterMessageId(messageId);
701701

702-
// Update chain memory with the truncated messages
702+
// Update chain memory after truncation
703703
await this.updateChainMemory();
704704

705705
logInfo(`[ChatManager] Truncated messages after ${messageId}`);
@@ -738,7 +738,7 @@ export class ChatManager {
738738
}
739739

740740
/**
741-
* Update chain memory with current LLM messages
741+
* Sync chain memory with the current message repository.
742742
*/
743743
private async updateChainMemory(): Promise<void> {
744744
try {
@@ -798,7 +798,7 @@ export class ChatManager {
798798
this.lastKnownProjectId = null; // Reset to force change detection
799799
const currentRepo = this.getCurrentMessageRepo();
800800

801-
// Sync chain memory with the current project's messages
801+
// Sync chain memory with the new project's messages
802802
await this.updateChainMemory();
803803

804804
logInfo(

src/settings/v2/components/ModelEditDialog.tsx

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,32 @@ export const ModelEditModalContent: React.FC<ModelEditModalContentProps> = ({
274274
</FormField>
275275
)}
276276

277+
{/* Responses API Toggle for LM Studio */}
278+
{localModel.provider === ChatModelProviders.LM_STUDIO && (
279+
<FormField label="Responses API">
280+
<div className="tw-flex tw-items-center tw-gap-2">
281+
<Checkbox
282+
id="use-responses-api"
283+
checked={localModel.useResponsesApi !== false}
284+
onCheckedChange={(checked) => handleLocalUpdate("useResponsesApi", checked)}
285+
/>
286+
<HelpTooltip
287+
content={
288+
<div className="tw-text-sm tw-text-muted">
289+
Use /v1/responses instead of /v1/chat/completions. Patches compatibility
290+
issues with LM Studio (text.format, tool definitions). Requires LM Studio
291+
0.3.6+.
292+
</div>
293+
}
294+
>
295+
<Label htmlFor="use-responses-api" className="tw-cursor-pointer tw-text-sm">
296+
Use Responses API (faster inference)
297+
</Label>
298+
</HelpTooltip>
299+
</div>
300+
</FormField>
301+
)}
302+
277303
{/* Model Parameters Editor */}
278304
<ModelParametersEditor
279305
model={localModel}

0 commit comments

Comments
 (0)