fix: LM Studio compatibility for response_format parameter (#63)

lfnovo · web-flow · commit 1b0efca7615b · 2026-01-16T16:22:20.000-03:00
* fix: LM Studio compatibility for response_format parameter (#46) LM Studio rejects requests with response_format set to json_object, only supporting json_schema. This fix adds: - Automatic detection for LM Studio via port 1234 heuristic - Graceful degradation: retry without response_format on specific error - Flag to remember incompatible endpoints for subsequent calls - Updated LangChain integration to respect these settings Bumps version to 2.14.1 * Revert "fix: LM Studio compatibility for response_format parameter (#46)" This reverts commit 0fc4a59. * fix: LM Studio compatibility for response_format parameter (#46) LM Studio rejects requests with response_format set to json_object, only supporting json_schema. This fix adds: - Automatic detection for LM Studio via port 1234 heuristic - Graceful degradation: retry without response_format on specific error - Flag to remember incompatible endpoints for subsequent calls - Updated LangChain integration to respect these settings Bumps version to 2.14.1 * fix: port detection heuristic now matches only port 1234 Previously ':1234' substring check would incorrectly match ports like 12345, 12346, etc. Now checks for ':1234/' or URL ending with ':1234'.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,17 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [2.14.1] - 2026-01-16
+
+### Fixed
+
+- **LM Studio Compatibility** - Fixed `response_format` parameter rejection by LM Studio (#46)
+  - LM Studio only supports `json_schema` response format, not `json_object`
+  - Added automatic detection for LM Studio (port 1234 heuristic)
+  - Added graceful degradation: retries without `response_format` if endpoint rejects it
+  - Affects both direct API calls and LangChain integration
+  - See also: [lmstudio-ai/lmstudio-bug-tracker#189](https://github.com/lmstudio-ai/lmstudio-bug-tracker/issues/189)
+
 ## [2.14.0] - 2026-01-16
 
 ### Added
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "esperanto"
-version = "2.14.0"
+version = "2.14.1"
 description = "A light-weight, production-ready, unified interface for various AI model providers"
 authors = [
     { name = "LUIS NOVO", email = "lfnovo@gmail.com" }
diff --git a/src/esperanto/providers/llm/openai_compatible.py b/src/esperanto/providers/llm/openai_compatible.py
@@ -2,15 +2,27 @@
 
 import os
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, Dict, List, Optional
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    AsyncGenerator,
+    Dict,
+    Generator,
+    List,
+    Optional,
+    Union,
+)
 
-from esperanto.common_types import Model
+from esperanto.common_types import ChatCompletion, ChatCompletionChunk, Model
 from esperanto.providers.llm.openai import OpenAILanguageModel
 from esperanto.utils.logging import logger
 
 if TYPE_CHECKING:
     from langchain_openai import ChatOpenAI
 
+# Error message indicating the endpoint doesn't support json_object response format
+_RESPONSE_FORMAT_ERROR = "'response_format.type' must be 'json_schema'"
+
 
 @dataclass
 class OpenAICompatibleLanguageModel(OpenAILanguageModel):
@@ -61,6 +73,24 @@ def __post_init__(self):
         # Call parent's post_init to set up HTTP clients and normalized response handling
         super().__post_init__()
 
+        # Track if we've detected that this endpoint doesn't support json_object
+        self._response_format_unsupported = False
+
+    def _is_likely_lmstudio(self) -> bool:
+        """Check if this endpoint is likely LM Studio based on port.
+
+        LM Studio uses port 1234 by default. This is a heuristic to avoid
+        sending unsupported response_format parameter.
+
+        Known issue: If you use another OpenAI-compatible provider on port 1234,
+        structured output with json_object may not work. Use a different port.
+        """
+        if not self.base_url:
+            return False
+        # Check for exact port 1234 (not 12345, 12346, etc.)
+        # Port is followed by "/" or end of host portion
+        return ":1234/" in self.base_url or self.base_url.rstrip("/").endswith(":1234")
+
     def _handle_error(self, response) -> None:
         """Handle HTTP error responses with graceful degradation."""
         if response.status_code >= 400:
@@ -169,24 +199,96 @@ def _normalize_chunk(self, chunk_data: Dict[str, Any]) -> "ChatCompletionChunk":
             model=model,
         )
 
-    def _get_api_kwargs(self, exclude_stream: bool = False) -> Dict[str, Any]:
+    def _get_api_kwargs(
+        self, exclude_stream: bool = False, exclude_response_format: bool = False
+    ) -> Dict[str, Any]:
         """Get API kwargs with graceful feature fallback.
-        
+
         Args:
             exclude_stream: If True, excludes streaming-related parameters.
-            
+            exclude_response_format: If True, excludes response_format parameter.
+
         Returns:
             Dict containing API parameters for the request.
         """
         # Get base kwargs from parent
         kwargs = super()._get_api_kwargs(exclude_stream)
-        
-        # For OpenAI-compatible endpoints, we attempt all features
-        # and let the endpoint handle graceful degradation
-        # This includes streaming, JSON mode, and other OpenAI features
-        
+
+        # Remove response_format if:
+        # 1. Explicitly requested (for retry logic)
+        # 2. Endpoint is likely LM Studio (port 1234 heuristic)
+        # 3. We've previously detected this endpoint doesn't support it
+        should_skip_response_format = (
+            exclude_response_format
+            or self._is_likely_lmstudio()
+            or self._response_format_unsupported
+        )
+
+        if should_skip_response_format and "response_format" in kwargs:
+            logger.debug(
+                "Removing response_format parameter for OpenAI-compatible endpoint"
+            )
+            kwargs.pop("response_format")
+
         return kwargs
 
+    def _is_response_format_error(self, error: Exception) -> bool:
+        """Check if the error is due to unsupported response_format."""
+        error_str = str(error)
+        return _RESPONSE_FORMAT_ERROR in error_str
+
+    def chat_complete(
+        self, messages: List[Dict[str, str]], stream: Optional[bool] = None
+    ) -> Union[ChatCompletion, Generator[ChatCompletionChunk, None, None]]:
+        """Send a chat completion request with retry for unsupported response_format.
+
+        Args:
+            messages: List of messages in the conversation.
+            stream: Whether to stream the response. If None, uses the instance's streaming setting.
+
+        Returns:
+            Either a ChatCompletion or a Generator yielding ChatCompletionChunks if streaming.
+        """
+        try:
+            return super().chat_complete(messages, stream)
+        except RuntimeError as e:
+            # Check if it's a response_format error and we haven't already disabled it
+            if self._is_response_format_error(e) and not self._response_format_unsupported:
+                logger.debug(
+                    "Endpoint doesn't support json_object response_format, retrying without it"
+                )
+                # Mark this endpoint as not supporting response_format
+                self._response_format_unsupported = True
+                # Retry without response_format
+                return super().chat_complete(messages, stream)
+            raise
+
+    async def achat_complete(
+        self, messages: List[Dict[str, str]], stream: Optional[bool] = None
+    ) -> Union[ChatCompletion, AsyncGenerator[ChatCompletionChunk, None]]:
+        """Send an async chat completion request with retry for unsupported response_format.
+
+        Args:
+            messages: List of messages in the conversation.
+            stream: Whether to stream the response. If None, uses the instance's streaming setting.
+
+        Returns:
+            Either a ChatCompletion or an AsyncGenerator yielding ChatCompletionChunks if streaming.
+        """
+        try:
+            return await super().achat_complete(messages, stream)
+        except RuntimeError as e:
+            # Check if it's a response_format error and we haven't already disabled it
+            if self._is_response_format_error(e) and not self._response_format_unsupported:
+                logger.debug(
+                    "Endpoint doesn't support json_object response_format, retrying without it"
+                )
+                # Mark this endpoint as not supporting response_format
+                self._response_format_unsupported = True
+                # Retry without response_format
+                return await super().achat_complete(messages, stream)
+            raise
+
     def _get_models(self) -> List[Model]:
         """List all available models for this provider.
         
@@ -242,7 +344,15 @@ def to_langchain(self) -> "ChatOpenAI":
             ) from e
 
         model_kwargs = {}
-        if self.structured and isinstance(self.structured, dict):
+        # Only set response_format if endpoint is likely to support it
+        should_skip_response_format = (
+            self._is_likely_lmstudio() or self._response_format_unsupported
+        )
+        if (
+            self.structured
+            and isinstance(self.structured, dict)
+            and not should_skip_response_format
+        ):
             structured_type = self.structured.get("type")
             if structured_type in ["json", "json_object"]:
                 model_kwargs["response_format"] = {"type": "json_object"}
diff --git a/tests/providers/llm/test_openai_compatible_provider.py b/tests/providers/llm/test_openai_compatible_provider.py
@@ -260,20 +260,38 @@ def test_langchain_integration(self):
             assert result == mock_instance
 
     def test_langchain_integration_with_structured_output(self):
-        """Test LangChain integration with structured output."""
+        """Test LangChain integration with structured output (non-LM Studio port)."""
+        # Use port 8080 (not 1234) to test that response_format IS set
         model = OpenAICompatibleLanguageModel(
             api_key="test-key",
-            base_url="http://localhost:1234",
+            base_url="http://localhost:8080",
             structured={"type": "json"}
         )
-        
+
         with patch('langchain_openai.ChatOpenAI') as mock_chat_openai:
             model.to_langchain()
-            
+
             call_args = mock_chat_openai.call_args[1]
             assert "model_kwargs" in call_args
             assert call_args["model_kwargs"]["response_format"] == {"type": "json_object"}
 
+    def test_langchain_integration_lmstudio_skips_response_format(self):
+        """Test LangChain integration skips response_format for LM Studio (port 1234)."""
+        # Port 1234 is the default LM Studio port - response_format should be skipped
+        model = OpenAICompatibleLanguageModel(
+            api_key="test-key",
+            base_url="http://localhost:1234",
+            structured={"type": "json"}
+        )
+
+        with patch('langchain_openai.ChatOpenAI') as mock_chat_openai:
+            model.to_langchain()
+
+            call_args = mock_chat_openai.call_args[1]
+            assert "model_kwargs" in call_args
+            # response_format should NOT be set for LM Studio
+            assert "response_format" not in call_args["model_kwargs"]
+
     def test_langchain_integration_reasoning_model(self):
         """Test LangChain integration with reasoning model (o1)."""
         model = OpenAICompatibleLanguageModel(
@@ -366,4 +384,94 @@ def test_error_message_mentions_both_env_vars(self):
                 OpenAICompatibleLanguageModel(api_key="test-key")
             error_message = str(exc_info.value)
             assert "OPENAI_COMPATIBLE_BASE_URL_LLM" in error_message
-            assert "OPENAI_COMPATIBLE_BASE_URL" in error_message
+            assert "OPENAI_COMPATIBLE_BASE_URL" in error_message
+
+    def test_is_likely_lmstudio_port_1234(self):
+        """Test that port 1234 is detected as likely LM Studio."""
+        model = OpenAICompatibleLanguageModel(
+            api_key="test-key",
+            base_url="http://localhost:1234/v1"
+        )
+        assert model._is_likely_lmstudio() is True
+
+    def test_is_likely_lmstudio_other_port(self):
+        """Test that other ports are not detected as LM Studio."""
+        model = OpenAICompatibleLanguageModel(
+            api_key="test-key",
+            base_url="http://localhost:8080/v1"
+        )
+        assert model._is_likely_lmstudio() is False
+
+    def test_is_likely_lmstudio_port_12345_not_matched(self):
+        """Test that port 12345 is NOT detected as LM Studio (regression test)."""
+        model = OpenAICompatibleLanguageModel(
+            api_key="test-key",
+            base_url="http://localhost:12345/v1"
+        )
+        assert model._is_likely_lmstudio() is False
+
+    def test_is_likely_lmstudio_port_12346_not_matched(self):
+        """Test that port 12346 is NOT detected as LM Studio (regression test)."""
+        model = OpenAICompatibleLanguageModel(
+            api_key="test-key",
+            base_url="http://localhost:12346/v1"
+        )
+        assert model._is_likely_lmstudio() is False
+
+    def test_is_likely_lmstudio_127_0_0_1(self):
+        """Test that 127.0.0.1:1234 is detected as likely LM Studio."""
+        model = OpenAICompatibleLanguageModel(
+            api_key="test-key",
+            base_url="http://127.0.0.1:1234/v1"
+        )
+        assert model._is_likely_lmstudio() is True
+
+    def test_response_format_skipped_for_lmstudio(self):
+        """Test that response_format is skipped for LM Studio (port 1234)."""
+        model = OpenAICompatibleLanguageModel(
+            api_key="test-key",
+            base_url="http://localhost:1234/v1",
+            structured={"type": "json_object"}
+        )
+        kwargs = model._get_api_kwargs()
+        assert "response_format" not in kwargs
+
+    def test_response_format_included_for_other_ports(self):
+        """Test that response_format is included for non-LM Studio endpoints."""
+        model = OpenAICompatibleLanguageModel(
+            api_key="test-key",
+            base_url="http://localhost:8080/v1",
+            structured={"type": "json_object"}
+        )
+        kwargs = model._get_api_kwargs()
+        assert "response_format" in kwargs
+        assert kwargs["response_format"] == {"type": "json_object"}
+
+    def test_is_response_format_error(self):
+        """Test detection of response_format error message."""
+        model = OpenAICompatibleLanguageModel(
+            api_key="test-key",
+            base_url="http://localhost:8080/v1"
+        )
+        # Test the specific error from LM Studio
+        error = RuntimeError("'response_format.type' must be 'json_schema' or 'text'")
+        assert model._is_response_format_error(error) is True
+
+        # Test other errors
+        other_error = RuntimeError("Some other error")
+        assert model._is_response_format_error(other_error) is False
+
+    def test_response_format_unsupported_flag(self):
+        """Test that _response_format_unsupported flag is properly set."""
+        model = OpenAICompatibleLanguageModel(
+            api_key="test-key",
+            base_url="http://localhost:8080/v1",
+            structured={"type": "json_object"}
+        )
+        # Initially should be False
+        assert model._response_format_unsupported is False
+
+        # After setting the flag, response_format should be skipped
+        model._response_format_unsupported = True
+        kwargs = model._get_api_kwargs()
+        assert "response_format" not in kwargs
diff --git a/uv.lock b/uv.lock