Skip to content

Commit 1b0efca

Browse files
authored
fix: LM Studio compatibility for response_format parameter (#63)
* fix: LM Studio compatibility for response_format parameter (#46) LM Studio rejects requests with response_format set to json_object, only supporting json_schema. This fix adds: - Automatic detection for LM Studio via port 1234 heuristic - Graceful degradation: retry without response_format on specific error - Flag to remember incompatible endpoints for subsequent calls - Updated LangChain integration to respect these settings Bumps version to 2.14.1 * Revert "fix: LM Studio compatibility for response_format parameter (#46)" This reverts commit 0fc4a59. * fix: LM Studio compatibility for response_format parameter (#46) LM Studio rejects requests with response_format set to json_object, only supporting json_schema. This fix adds: - Automatic detection for LM Studio via port 1234 heuristic - Graceful degradation: retry without response_format on specific error - Flag to remember incompatible endpoints for subsequent calls - Updated LangChain integration to respect these settings Bumps version to 2.14.1 * fix: port detection heuristic now matches only port 1234 Previously ':1234' substring check would incorrectly match ports like 12345, 12346, etc. Now checks for ':1234/' or URL ending with ':1234'.
1 parent 8165cbb commit 1b0efca

File tree

5 files changed

+247
-18
lines changed

5 files changed

+247
-18
lines changed

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,17 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [2.14.1] - 2026-01-16
9+
10+
### Fixed
11+
12+
- **LM Studio Compatibility** - Fixed `response_format` parameter rejection by LM Studio (#46)
13+
- LM Studio only supports `json_schema` response format, not `json_object`
14+
- Added automatic detection for LM Studio (port 1234 heuristic)
15+
- Added graceful degradation: retries without `response_format` if endpoint rejects it
16+
- Affects both direct API calls and LangChain integration
17+
- See also: [lmstudio-ai/lmstudio-bug-tracker#189](https://github.com/lmstudio-ai/lmstudio-bug-tracker/issues/189)
18+
819
## [2.14.0] - 2026-01-16
920

1021
### Added

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "esperanto"
3-
version = "2.14.0"
3+
version = "2.14.1"
44
description = "A light-weight, production-ready, unified interface for various AI model providers"
55
authors = [
66
{ name = "LUIS NOVO", email = "lfnovo@gmail.com" }

src/esperanto/providers/llm/openai_compatible.py

Lines changed: 121 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,27 @@
22

33
import os
44
from dataclasses import dataclass
5-
from typing import TYPE_CHECKING, Any, Dict, List, Optional
5+
from typing import (
6+
TYPE_CHECKING,
7+
Any,
8+
AsyncGenerator,
9+
Dict,
10+
Generator,
11+
List,
12+
Optional,
13+
Union,
14+
)
615

7-
from esperanto.common_types import Model
16+
from esperanto.common_types import ChatCompletion, ChatCompletionChunk, Model
817
from esperanto.providers.llm.openai import OpenAILanguageModel
918
from esperanto.utils.logging import logger
1019

1120
if TYPE_CHECKING:
1221
from langchain_openai import ChatOpenAI
1322

23+
# Error message indicating the endpoint doesn't support json_object response format
24+
_RESPONSE_FORMAT_ERROR = "'response_format.type' must be 'json_schema'"
25+
1426

1527
@dataclass
1628
class OpenAICompatibleLanguageModel(OpenAILanguageModel):
@@ -61,6 +73,24 @@ def __post_init__(self):
6173
# Call parent's post_init to set up HTTP clients and normalized response handling
6274
super().__post_init__()
6375

76+
# Track if we've detected that this endpoint doesn't support json_object
77+
self._response_format_unsupported = False
78+
79+
def _is_likely_lmstudio(self) -> bool:
80+
"""Check if this endpoint is likely LM Studio based on port.
81+
82+
LM Studio uses port 1234 by default. This is a heuristic to avoid
83+
sending unsupported response_format parameter.
84+
85+
Known issue: If you use another OpenAI-compatible provider on port 1234,
86+
structured output with json_object may not work. Use a different port.
87+
"""
88+
if not self.base_url:
89+
return False
90+
# Check for exact port 1234 (not 12345, 12346, etc.)
91+
# Port is followed by "/" or end of host portion
92+
return ":1234/" in self.base_url or self.base_url.rstrip("/").endswith(":1234")
93+
6494
def _handle_error(self, response) -> None:
6595
"""Handle HTTP error responses with graceful degradation."""
6696
if response.status_code >= 400:
@@ -169,24 +199,96 @@ def _normalize_chunk(self, chunk_data: Dict[str, Any]) -> "ChatCompletionChunk":
169199
model=model,
170200
)
171201

172-
def _get_api_kwargs(self, exclude_stream: bool = False) -> Dict[str, Any]:
202+
def _get_api_kwargs(
203+
self, exclude_stream: bool = False, exclude_response_format: bool = False
204+
) -> Dict[str, Any]:
173205
"""Get API kwargs with graceful feature fallback.
174-
206+
175207
Args:
176208
exclude_stream: If True, excludes streaming-related parameters.
177-
209+
exclude_response_format: If True, excludes response_format parameter.
210+
178211
Returns:
179212
Dict containing API parameters for the request.
180213
"""
181214
# Get base kwargs from parent
182215
kwargs = super()._get_api_kwargs(exclude_stream)
183-
184-
# For OpenAI-compatible endpoints, we attempt all features
185-
# and let the endpoint handle graceful degradation
186-
# This includes streaming, JSON mode, and other OpenAI features
187-
216+
217+
# Remove response_format if:
218+
# 1. Explicitly requested (for retry logic)
219+
# 2. Endpoint is likely LM Studio (port 1234 heuristic)
220+
# 3. We've previously detected this endpoint doesn't support it
221+
should_skip_response_format = (
222+
exclude_response_format
223+
or self._is_likely_lmstudio()
224+
or self._response_format_unsupported
225+
)
226+
227+
if should_skip_response_format and "response_format" in kwargs:
228+
logger.debug(
229+
"Removing response_format parameter for OpenAI-compatible endpoint"
230+
)
231+
kwargs.pop("response_format")
232+
188233
return kwargs
189234

235+
def _is_response_format_error(self, error: Exception) -> bool:
236+
"""Check if the error is due to unsupported response_format."""
237+
error_str = str(error)
238+
return _RESPONSE_FORMAT_ERROR in error_str
239+
240+
def chat_complete(
241+
self, messages: List[Dict[str, str]], stream: Optional[bool] = None
242+
) -> Union[ChatCompletion, Generator[ChatCompletionChunk, None, None]]:
243+
"""Send a chat completion request with retry for unsupported response_format.
244+
245+
Args:
246+
messages: List of messages in the conversation.
247+
stream: Whether to stream the response. If None, uses the instance's streaming setting.
248+
249+
Returns:
250+
Either a ChatCompletion or a Generator yielding ChatCompletionChunks if streaming.
251+
"""
252+
try:
253+
return super().chat_complete(messages, stream)
254+
except RuntimeError as e:
255+
# Check if it's a response_format error and we haven't already disabled it
256+
if self._is_response_format_error(e) and not self._response_format_unsupported:
257+
logger.debug(
258+
"Endpoint doesn't support json_object response_format, retrying without it"
259+
)
260+
# Mark this endpoint as not supporting response_format
261+
self._response_format_unsupported = True
262+
# Retry without response_format
263+
return super().chat_complete(messages, stream)
264+
raise
265+
266+
async def achat_complete(
267+
self, messages: List[Dict[str, str]], stream: Optional[bool] = None
268+
) -> Union[ChatCompletion, AsyncGenerator[ChatCompletionChunk, None]]:
269+
"""Send an async chat completion request with retry for unsupported response_format.
270+
271+
Args:
272+
messages: List of messages in the conversation.
273+
stream: Whether to stream the response. If None, uses the instance's streaming setting.
274+
275+
Returns:
276+
Either a ChatCompletion or an AsyncGenerator yielding ChatCompletionChunks if streaming.
277+
"""
278+
try:
279+
return await super().achat_complete(messages, stream)
280+
except RuntimeError as e:
281+
# Check if it's a response_format error and we haven't already disabled it
282+
if self._is_response_format_error(e) and not self._response_format_unsupported:
283+
logger.debug(
284+
"Endpoint doesn't support json_object response_format, retrying without it"
285+
)
286+
# Mark this endpoint as not supporting response_format
287+
self._response_format_unsupported = True
288+
# Retry without response_format
289+
return await super().achat_complete(messages, stream)
290+
raise
291+
190292
def _get_models(self) -> List[Model]:
191293
"""List all available models for this provider.
192294
@@ -242,7 +344,15 @@ def to_langchain(self) -> "ChatOpenAI":
242344
) from e
243345

244346
model_kwargs = {}
245-
if self.structured and isinstance(self.structured, dict):
347+
# Only set response_format if endpoint is likely to support it
348+
should_skip_response_format = (
349+
self._is_likely_lmstudio() or self._response_format_unsupported
350+
)
351+
if (
352+
self.structured
353+
and isinstance(self.structured, dict)
354+
and not should_skip_response_format
355+
):
246356
structured_type = self.structured.get("type")
247357
if structured_type in ["json", "json_object"]:
248358
model_kwargs["response_format"] = {"type": "json_object"}

tests/providers/llm/test_openai_compatible_provider.py

Lines changed: 113 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -260,20 +260,38 @@ def test_langchain_integration(self):
260260
assert result == mock_instance
261261

262262
def test_langchain_integration_with_structured_output(self):
263-
"""Test LangChain integration with structured output."""
263+
"""Test LangChain integration with structured output (non-LM Studio port)."""
264+
# Use port 8080 (not 1234) to test that response_format IS set
264265
model = OpenAICompatibleLanguageModel(
265266
api_key="test-key",
266-
base_url="http://localhost:1234",
267+
base_url="http://localhost:8080",
267268
structured={"type": "json"}
268269
)
269-
270+
270271
with patch('langchain_openai.ChatOpenAI') as mock_chat_openai:
271272
model.to_langchain()
272-
273+
273274
call_args = mock_chat_openai.call_args[1]
274275
assert "model_kwargs" in call_args
275276
assert call_args["model_kwargs"]["response_format"] == {"type": "json_object"}
276277

278+
def test_langchain_integration_lmstudio_skips_response_format(self):
279+
"""Test LangChain integration skips response_format for LM Studio (port 1234)."""
280+
# Port 1234 is the default LM Studio port - response_format should be skipped
281+
model = OpenAICompatibleLanguageModel(
282+
api_key="test-key",
283+
base_url="http://localhost:1234",
284+
structured={"type": "json"}
285+
)
286+
287+
with patch('langchain_openai.ChatOpenAI') as mock_chat_openai:
288+
model.to_langchain()
289+
290+
call_args = mock_chat_openai.call_args[1]
291+
assert "model_kwargs" in call_args
292+
# response_format should NOT be set for LM Studio
293+
assert "response_format" not in call_args["model_kwargs"]
294+
277295
def test_langchain_integration_reasoning_model(self):
278296
"""Test LangChain integration with reasoning model (o1)."""
279297
model = OpenAICompatibleLanguageModel(
@@ -366,4 +384,94 @@ def test_error_message_mentions_both_env_vars(self):
366384
OpenAICompatibleLanguageModel(api_key="test-key")
367385
error_message = str(exc_info.value)
368386
assert "OPENAI_COMPATIBLE_BASE_URL_LLM" in error_message
369-
assert "OPENAI_COMPATIBLE_BASE_URL" in error_message
387+
assert "OPENAI_COMPATIBLE_BASE_URL" in error_message
388+
389+
def test_is_likely_lmstudio_port_1234(self):
390+
"""Test that port 1234 is detected as likely LM Studio."""
391+
model = OpenAICompatibleLanguageModel(
392+
api_key="test-key",
393+
base_url="http://localhost:1234/v1"
394+
)
395+
assert model._is_likely_lmstudio() is True
396+
397+
def test_is_likely_lmstudio_other_port(self):
398+
"""Test that other ports are not detected as LM Studio."""
399+
model = OpenAICompatibleLanguageModel(
400+
api_key="test-key",
401+
base_url="http://localhost:8080/v1"
402+
)
403+
assert model._is_likely_lmstudio() is False
404+
405+
def test_is_likely_lmstudio_port_12345_not_matched(self):
406+
"""Test that port 12345 is NOT detected as LM Studio (regression test)."""
407+
model = OpenAICompatibleLanguageModel(
408+
api_key="test-key",
409+
base_url="http://localhost:12345/v1"
410+
)
411+
assert model._is_likely_lmstudio() is False
412+
413+
def test_is_likely_lmstudio_port_12346_not_matched(self):
414+
"""Test that port 12346 is NOT detected as LM Studio (regression test)."""
415+
model = OpenAICompatibleLanguageModel(
416+
api_key="test-key",
417+
base_url="http://localhost:12346/v1"
418+
)
419+
assert model._is_likely_lmstudio() is False
420+
421+
def test_is_likely_lmstudio_127_0_0_1(self):
422+
"""Test that 127.0.0.1:1234 is detected as likely LM Studio."""
423+
model = OpenAICompatibleLanguageModel(
424+
api_key="test-key",
425+
base_url="http://127.0.0.1:1234/v1"
426+
)
427+
assert model._is_likely_lmstudio() is True
428+
429+
def test_response_format_skipped_for_lmstudio(self):
430+
"""Test that response_format is skipped for LM Studio (port 1234)."""
431+
model = OpenAICompatibleLanguageModel(
432+
api_key="test-key",
433+
base_url="http://localhost:1234/v1",
434+
structured={"type": "json_object"}
435+
)
436+
kwargs = model._get_api_kwargs()
437+
assert "response_format" not in kwargs
438+
439+
def test_response_format_included_for_other_ports(self):
440+
"""Test that response_format is included for non-LM Studio endpoints."""
441+
model = OpenAICompatibleLanguageModel(
442+
api_key="test-key",
443+
base_url="http://localhost:8080/v1",
444+
structured={"type": "json_object"}
445+
)
446+
kwargs = model._get_api_kwargs()
447+
assert "response_format" in kwargs
448+
assert kwargs["response_format"] == {"type": "json_object"}
449+
450+
def test_is_response_format_error(self):
451+
"""Test detection of response_format error message."""
452+
model = OpenAICompatibleLanguageModel(
453+
api_key="test-key",
454+
base_url="http://localhost:8080/v1"
455+
)
456+
# Test the specific error from LM Studio
457+
error = RuntimeError("'response_format.type' must be 'json_schema' or 'text'")
458+
assert model._is_response_format_error(error) is True
459+
460+
# Test other errors
461+
other_error = RuntimeError("Some other error")
462+
assert model._is_response_format_error(other_error) is False
463+
464+
def test_response_format_unsupported_flag(self):
465+
"""Test that _response_format_unsupported flag is properly set."""
466+
model = OpenAICompatibleLanguageModel(
467+
api_key="test-key",
468+
base_url="http://localhost:8080/v1",
469+
structured={"type": "json_object"}
470+
)
471+
# Initially should be False
472+
assert model._response_format_unsupported is False
473+
474+
# After setting the flag, response_format should be skipped
475+
model._response_format_unsupported = True
476+
kwargs = model._get_api_kwargs()
477+
assert "response_format" not in kwargs

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)