update readme and add integration test for structured output

njbrake · njbrake · commit de4e42ed1b00 · 2025-07-15T19:52:37.000-04:00
diff --git a/README.md b/README.md
@@ -21,7 +21,7 @@ A single interface to use and evaluate different llm providers.
 
 ## [Supported Providers](https://mozilla-ai.github.io/any-llm/providers)
 
-## Why Does this exist?
+## Motivation
 
 The landscape of LLM provider interfaces presents a fragmented ecosystem with several challenges that `any-llm` aims to address:
 
@@ -34,13 +34,15 @@ While the OpenAI API has become the de facto standard for LLM provider interface
 - **[LiteLLM](https://github.com/BerriAI/litellm)**: While popular, it reimplements provider interfaces rather than leveraging official SDKs, which can lead to compatibility issues and unexpected behavior modifications
 - **[AISuite](https://github.com/andrewyng/aisuite/issues)**: Offers a clean, modular approach but lacks active maintenance, comprehensive testing, and modern Python typing standards.
 - **[Framework-specific solutions](https://github.com/agno-agi/agno/tree/main/libs/agno/agno/models)**: Some agent frameworks either depend on LiteLLM or implement their own provider integrations, creating fragmentation
+- **[Proxy Only Solutions](https://openrouter.ai/): solutions like OpenRouter require a hosted proxy to serve as the interface between your code and the LLM provider. `any-llm` allows you to communicate directly with the LLM provider without the need for a hosted proxy.
 
 **Our Approach:**
 
 `any-llm` fills the gap by providing a simple, well-maintained interface that:
 - **Leverages official provider SDKs** when available, reducing maintenance burden and ensuring compatibility
 - **Stays framework-agnostic** so it can be used across different projects and use cases
 - **Provides active maintenance** we support this in our product ([any-llm](https://github.com/mozilla-ai/any-llm)) so we're motivated to maintain it.
+- **No Proxy or Gateway server required** so you don't need to deal with setting up any other service to talk to whichever LLM provider you need.
 
 
 
diff --git a/src/any_llm/providers/google/google.py b/src/any_llm/providers/google/google.py
@@ -9,6 +9,8 @@
     msg = "google-genai is not installed. Please install it with `pip install any-llm-sdk[google]`"
     raise ImportError(msg)
 
+from pydantic import BaseModel
+
 from openai.types.chat.chat_completion import ChatCompletion
 from any_llm.provider import Provider, ApiConfig
 from any_llm.exceptions import MissingApiKeyError
@@ -97,6 +99,41 @@ def _convert_messages(messages: list[dict[str, Any]]) -> list[types.Content]:
     return formatted_messages
 
 
+def _convert_pydantic_to_google_json(
+    pydantic_model: type[BaseModel], messages: list[dict[str, Any]]
+) -> list[dict[str, Any]]:
+    """
+    Convert Pydantic model to Google-compatible JSON instructions.
+
+    Following a similar pattern to the DeepSeek provider but adapted for Google.
+
+    Returns:
+        modified_messages
+    """
+    # Get the JSON schema from the Pydantic model
+    schema = pydantic_model.model_json_schema()
+
+    # Add JSON instructions to the last user message
+    modified_messages = messages.copy()
+    if modified_messages and modified_messages[-1]["role"] == "user":
+        original_content = modified_messages[-1]["content"]
+        json_instruction = f"""
+Please respond with a JSON object that matches the following schema:
+
+{json.dumps(schema, indent=2)}
+
+Return the JSON object only, no other text, do not wrap it in ```json or ```.
+
+{original_content}
+"""
+        modified_messages[-1]["content"] = json_instruction
+    else:
+        msg = "Last message is not a user message"
+        raise ValueError(msg)
+
+    return modified_messages
+
+
 class GoogleProvider(Provider):
     """Google Provider using the new response conversion utilities."""
 
@@ -133,8 +170,15 @@ def completion(
         **kwargs: Any,
     ) -> ChatCompletion:
         """Create a chat completion using Google GenAI."""
-        # Remove unsupported parameters
-        kwargs = remove_unsupported_params(kwargs, ["response_format", "parallel_tool_calls"])
+        # Handle response_format for Pydantic models
+        if "response_format" in kwargs:
+            response_format = kwargs.pop("response_format")
+            if isinstance(response_format, type) and issubclass(response_format, BaseModel):
+                # Convert Pydantic model to Google JSON format
+                messages = _convert_pydantic_to_google_json(response_format, messages)
+
+        # Remove other unsupported parameters
+        kwargs = remove_unsupported_params(kwargs, ["parallel_tool_calls"])
 
         # Convert tools if present
         tools = None
diff --git a/src/any_llm/providers/groq/groq.py b/src/any_llm/providers/groq/groq.py
@@ -1,4 +1,5 @@
 import os
+import json
 from typing import Any
 
 try:
@@ -7,6 +8,8 @@
     msg = "groq is not installed. Please install it with `pip install any-llm-sdk[groq]`"
     raise ImportError(msg)
 
+from pydantic import BaseModel
+
 from openai.types.chat.chat_completion import ChatCompletion
 from any_llm.provider import Provider, ApiConfig
 from any_llm.exceptions import MissingApiKeyError
@@ -32,6 +35,28 @@ def completion(
         **kwargs: Any,
     ) -> ChatCompletion:
         """Create a chat completion using Groq."""
+        # Handle response_format for Pydantic models
+        if "response_format" in kwargs:
+            response_format = kwargs["response_format"]
+            if isinstance(response_format, type) and issubclass(response_format, BaseModel):
+                # Convert Pydantic model to JSON schema format for Groq
+                schema = response_format.model_json_schema()
+                kwargs["response_format"] = {"type": "json_object"}
+
+                # Add JSON instruction to the last user message (required by Groq)
+                if messages and messages[-1]["role"] == "user":
+                    original_content = messages[-1]["content"]
+                    json_instruction = f"""
+Please respond with a JSON object that matches the following schema:
+
+{json.dumps(schema, indent=2)}
+
+Return the JSON object only, no other text.
+
+{original_content}
+"""
+                    messages[-1]["content"] = json_instruction
+
         # Clean messages (remove refusal field as per original implementation)
         cleaned_messages = []
         for message in messages:
diff --git a/src/any_llm/providers/huggingface/huggingface.py b/src/any_llm/providers/huggingface/huggingface.py
@@ -1,4 +1,5 @@
 import os
+import json
 from typing import Any
 
 try:
@@ -7,6 +8,8 @@
     msg = "huggingface-hub is not installed. Please install it with `pip install any-llm-sdk[huggingface]`"
     raise ImportError(msg)
 
+from pydantic import BaseModel
+
 from openai.types.chat.chat_completion import ChatCompletion
 from any_llm.provider import Provider, ApiConfig
 from any_llm.exceptions import MissingApiKeyError
@@ -16,6 +19,43 @@
 )
 
 
+def _convert_pydantic_to_huggingface_json(
+    pydantic_model: type[BaseModel], messages: list[dict[str, Any]]
+) -> list[dict[str, Any]]:
+    """
+    Convert Pydantic model to HuggingFace-compatible JSON instructions.
+
+    Following a similar pattern to the DeepSeek provider but adapted for HuggingFace.
+
+    Returns:
+        modified_messages
+    """
+    # Get the JSON schema from the Pydantic model
+    schema = pydantic_model.model_json_schema()
+
+    # Add JSON instructions to the last user message
+    modified_messages = messages.copy()
+    if modified_messages and modified_messages[-1]["role"] == "user":
+        original_content = modified_messages[-1]["content"]
+        json_instruction = f"""Answer the following question and format your response as a JSON object matching this schema:
+
+Schema: {json.dumps(schema, indent=2)}
+
+DO NOT return the schema itself. Instead, answer the question and put your answer in the correct JSON format.
+
+For example, if the question asks for a name and you want to answer "Paris", return: {{"name": "Paris"}}
+
+Question: {original_content}
+
+Answer (as JSON):"""
+        modified_messages[-1]["content"] = json_instruction
+    else:
+        msg = "Last message is not a user message"
+        raise ValueError(msg)
+
+    return modified_messages
+
+
 class HuggingfaceProvider(Provider):
     """HuggingFace Provider using the new response conversion utilities."""
 
@@ -39,8 +79,15 @@ def completion(
         if "max_tokens" in kwargs:
             kwargs["max_new_tokens"] = kwargs.pop("max_tokens")
 
-        # Remove unsupported parameters
-        kwargs = remove_unsupported_params(kwargs, ["response_format", "parallel_tool_calls"])
+        # Handle response_format for Pydantic models
+        if "response_format" in kwargs:
+            response_format = kwargs.pop("response_format")
+            if isinstance(response_format, type) and issubclass(response_format, BaseModel):
+                # Convert Pydantic model to HuggingFace JSON format
+                messages = _convert_pydantic_to_huggingface_json(response_format, messages)
+
+        # Remove other unsupported parameters
+        kwargs = remove_unsupported_params(kwargs, ["parallel_tool_calls"])
 
         # Ensure message content is always a string and handle tool calls
         cleaned_messages = []
diff --git a/tests/integration/test_completion.py b/tests/integration/test_completion.py
@@ -1,4 +1,5 @@
 import httpx
+from pydantic import BaseModel
 import pytest
 from any_llm import completion
 from any_llm.exceptions import MissingApiKeyError
@@ -17,7 +18,7 @@
     "xai": "xai-3-70b-instruct",
     "inception": "inception-3-70b-instruct",
     "nebius": "nebius-3-70b-instruct",
-    "ollama": "llama3.1:8b",
+    "ollama": "llama3.2:3b",
     "azure": "gpt-4o",
     "cohere": "command-r-20240215",
     "cerebras": "llama3.1-8b",
@@ -41,3 +42,29 @@ def test_providers(provider: str) -> None:
             pytest.skip("Ollama is not set up, skipping")
         raise
     assert result.choices[0].message.content is not None
+
+
+def test_response_format(provider: str) -> None:
+    """Test that all supported providers can be loaded successfully."""
+    if provider == "anthropic":
+        pytest.skip("Anthropic does not support response_format")
+        return
+    model_id = provider_model_map[provider]
+
+    class ResponseFormat(BaseModel):
+        name: str
+
+    prompt = "What is the capital of France?"
+    try:
+        result = completion(
+            f"{provider}/{model_id}", messages=[{"role": "user", "content": prompt}], response_format=ResponseFormat
+        )
+        assert result.choices[0].message.content is not None
+        output = ResponseFormat.model_validate_json(result.choices[0].message.content)
+        assert output.name == "Paris"
+    except MissingApiKeyError:
+        pytest.skip(f"{provider} API key not provided, skipping")
+    except (httpx.HTTPStatusError, httpx.ConnectError):
+        if provider == "ollama":
+            pytest.skip("Ollama is not set up, skipping")
+        raise