feat(lm-studio): use Responses API for LM Studio models (#2306)

logancyang · claude · web-flow · commit e3d47adfa23a · 2026-03-16T19:37:38.000-07:00
Switch LM Studio from /v1/chat/completions to /v1/responses via a thin
ChatLMStudio wrapper that patches LangChain compatibility issues
(text.format requirement, strict:null in tool definitions).

- New ChatLMStudio class with fetch wrapper for tool sanitization
- Opt-out toggle in model settings (useResponsesApi)
- Ping uses ChatLMStudio to test the correct endpoint
- ThinkBlockStreamer: strip special tokens from text content

Co-authored-by: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/src/LLMProviders/ChatLMStudio.ts b/src/LLMProviders/ChatLMStudio.ts
@@ -0,0 +1,86 @@
+import { ChatOpenAI } from "@langchain/openai";
+
+/**
+ * ChatLMStudio extends ChatOpenAI with the Responses API (/v1/responses)
+ * for LM Studio local inference.
+ *
+ * Patches LangChain/OpenAI SDK compatibility issues with LM Studio:
+ * - Ensures text.format is always set (LM Studio requires it)
+ * - Removes strict:null from tool definitions (LM Studio rejects it)
+ */
+export interface ChatLMStudioInput {
+  modelName?: string;
+  apiKey?: string;
+  configuration?: any;
+  temperature?: number;
+  maxTokens?: number;
+  topP?: number;
+  frequencyPenalty?: number;
+  streaming?: boolean;
+  streamUsage?: boolean;
+  [key: string]: any;
+}
+
+/**
+ * Create a fetch wrapper that sanitizes request bodies for LM Studio
+ * compatibility. This intercepts at the HTTP level, which is the last
+ * stop before the request is sent, guaranteeing all null values in
+ * tools are stripped regardless of which LangChain code path produced them.
+ */
+function createLMStudioFetch(baseFetch?: typeof globalThis.fetch): typeof globalThis.fetch {
+  const underlyingFetch = baseFetch || globalThis.fetch;
+
+  return async (input: string | URL | Request, init?: RequestInit): Promise<Response> => {
+    if (init?.body && typeof init.body === "string") {
+      try {
+        const body = JSON.parse(init.body);
+        let modified = false;
+
+        // Strip null/undefined values from tool definitions
+        if (Array.isArray(body.tools)) {
+          body.tools = body.tools.map((tool: Record<string, unknown>) => {
+            const cleaned: Record<string, unknown> = {};
+            for (const [key, value] of Object.entries(tool)) {
+              if (value !== null && value !== undefined) {
+                cleaned[key] = value;
+              }
+            }
+            return cleaned;
+          });
+          modified = true;
+        }
+
+        if (modified) {
+          init = { ...init, body: JSON.stringify(body) };
+        }
+      } catch {
+        // Not JSON, pass through unchanged
+      }
+    }
+    return underlyingFetch(input, init);
+  };
+}
+
+export class ChatLMStudio extends ChatOpenAI {
+  constructor(fields: ChatLMStudioInput) {
+    const originalFetch = fields.configuration?.fetch;
+
+    super({
+      ...fields,
+      useResponsesApi: true,
+      configuration: {
+        ...fields.configuration,
+        // Wrap fetch to sanitize request bodies for LM Studio compatibility
+        fetch: createLMStudioFetch(originalFetch),
+      },
+      // modelKwargs is spread LAST in ChatOpenAIResponses.invocationParams(),
+      // overriding the computed `text` field. Without this, LangChain emits
+      // `text: { format: undefined }` (serializes to `text: {}`) which LM Studio
+      // rejects with "Required: text.format".
+      modelKwargs: {
+        ...fields.modelKwargs,
+        text: { format: { type: "text" } },
+      },
+    });
+  }
+}
diff --git a/src/LLMProviders/chainRunner/utils/ThinkBlockStreamer.ts b/src/LLMProviders/chainRunner/utils/ThinkBlockStreamer.ts
@@ -152,7 +152,7 @@ export class ThinkBlockStreamer {
       this.hasOpenThinkBlock = false;
     }
     if (textContent) {
-      this.fullResponse += textContent;
+      this.fullResponse += stripSpecialTokens(textContent);
     }
     return hasThinkingContent;
   }
diff --git a/src/LLMProviders/chatModelManager.ts b/src/LLMProviders/chatModelManager.ts
@@ -39,6 +39,7 @@ import { ChatXAI } from "@langchain/xai";
 import { MissingApiKeyError, MissingPlusLicenseError } from "@/error";
 import { Notice } from "obsidian";
 import { ChatOpenRouter } from "./ChatOpenRouter";
+import { ChatLMStudio } from "./ChatLMStudio";
 import { BedrockChatModel, type BedrockChatModelFields } from "./BedrockChatModel";
 import { GitHubCopilotChatModel } from "@/LLMProviders/githubCopilot/GitHubCopilotChatModel";
 
@@ -818,6 +819,14 @@ export default class ChatModelManager {
       logInfo(`Enabling Responses API for GPT-5 model: ${model.name} (${selectedModel.vendor})`);
     }
 
+    // For LM Studio, use ChatLMStudio by default for Responses API compatibility.
+    // Opt out by setting useResponsesApi to false.
+    if (model.provider === ChatModelProviders.LM_STUDIO && model.useResponsesApi !== false) {
+      const lmStudioInstance = new ChatLMStudio(constructorConfig);
+      logInfo(`[ChatModelManager] Using Responses API for LM Studio model: ${model.name}`);
+      return lmStudioInstance;
+    }
+
     const newModelInstance = new selectedModel.AIConstructor(constructorConfig);
 
     return newModelInstance;
@@ -887,7 +896,12 @@ export default class ChatModelManager {
         constructorConfig.useResponsesApi = true;
       }
 
-      const testModel = new (this.getProviderConstructor(modelToTest))(constructorConfig);
+      // For LM Studio with Responses API, ping via ChatLMStudio so the
+      // connectivity check hits the same /v1/responses endpoint used in chats.
+      const testModel =
+        model.provider === ChatModelProviders.LM_STUDIO && model.useResponsesApi !== false
+          ? new ChatLMStudio(constructorConfig)
+          : new (this.getProviderConstructor(modelToTest))(constructorConfig);
       await testModel.invoke([{ role: "user", content: "hello" }], {
         timeout: 8000,
       });
diff --git a/src/aiParams.ts b/src/aiParams.ts
@@ -153,6 +153,9 @@ export interface CustomModel {
   // Ollama specific fields
   numCtx?: number;
 
+  // LM Studio specific fields
+  useResponsesApi?: boolean;
+
   projectEnabled?: boolean;
   plusExclusive?: boolean;
   believerExclusive?: boolean;
diff --git a/src/core/ChatManager.ts b/src/core/ChatManager.ts
@@ -592,7 +592,7 @@ export class ChatManager {
         onTruncate();
       }
 
-      // Update chain memory
+      // Update chain memory after truncation
       await this.updateChainMemory();
 
       // Get the LLM version of the user message for regeneration
@@ -661,7 +661,7 @@ export class ChatManager {
         return false;
       }
 
-      // Update chain memory
+      // Update chain memory after deletion
       await this.updateChainMemory();
 
       logInfo(`[ChatManager] Successfully deleted message ${messageId}`);
@@ -699,7 +699,7 @@ export class ChatManager {
     const currentRepo = this.getCurrentMessageRepo();
     currentRepo.truncateAfterMessageId(messageId);
 
-    // Update chain memory with the truncated messages
+    // Update chain memory after truncation
     await this.updateChainMemory();
 
     logInfo(`[ChatManager] Truncated messages after ${messageId}`);
@@ -738,7 +738,7 @@ export class ChatManager {
   }
 
   /**
-   * Update chain memory with current LLM messages
+   * Sync chain memory with the current message repository.
    */
   private async updateChainMemory(): Promise<void> {
     try {
@@ -798,7 +798,7 @@ export class ChatManager {
     this.lastKnownProjectId = null; // Reset to force change detection
     const currentRepo = this.getCurrentMessageRepo();
 
-    // Sync chain memory with the current project's messages
+    // Sync chain memory with the new project's messages
     await this.updateChainMemory();
 
     logInfo(
diff --git a/src/settings/v2/components/ModelEditDialog.tsx b/src/settings/v2/components/ModelEditDialog.tsx
@@ -274,6 +274,32 @@ export const ModelEditModalContent: React.FC<ModelEditModalContentProps> = ({
               </FormField>
             )}
 
+            {/* Responses API Toggle for LM Studio */}
+            {localModel.provider === ChatModelProviders.LM_STUDIO && (
+              <FormField label="Responses API">
+                <div className="tw-flex tw-items-center tw-gap-2">
+                  <Checkbox
+                    id="use-responses-api"
+                    checked={localModel.useResponsesApi !== false}
+                    onCheckedChange={(checked) => handleLocalUpdate("useResponsesApi", checked)}
+                  />
+                  <HelpTooltip
+                    content={
+                      <div className="tw-text-sm tw-text-muted">
+                        Use /v1/responses instead of /v1/chat/completions. Patches compatibility
+                        issues with LM Studio (text.format, tool definitions). Requires LM Studio
+                        0.3.6+.
+                      </div>
+                    }
+                  >
+                    <Label htmlFor="use-responses-api" className="tw-cursor-pointer tw-text-sm">
+                      Use Responses API (faster inference)
+                    </Label>
+                  </HelpTooltip>
+                </div>
+              </FormField>
+            )}
+
             {/* Model Parameters Editor */}
             <ModelParametersEditor
               model={localModel}

Original file line number	Diff line number	Diff line change
`@@ -152,7 +152,7 @@ export class ThinkBlockStreamer {`
`152`	`152`	`this.hasOpenThinkBlock = false;`
`153`	`153`	`}`
`154`	`154`	`if (textContent) {`
`155`		`- this.fullResponse += textContent;`
	`155`	`+ this.fullResponse += stripSpecialTokens(textContent);`
`156`	`156`	`}`
`157`	`157`	`return hasThinkingContent;`
`158`	`158`	`}`