run-llama · d8rt8v · Sep 14, 2025 · Sep 15, 2025
diff --git a/llama-index-integrations/llms/llama-index-llms-openai-like/README.md b/llama-index-integrations/llms/llama-index-llms-openai-like/README.md
@@ -1,11 +1,20 @@
-# LlamaIndex Llms Integration: Openai Like
+# LlamaIndex Llms Integration: OpenAI Like
 
 `pip install llama-index-llms-openai-like`
 
 This package is a thin wrapper around the OpenAI API. It is designed to be used with the OpenAI API, but can be used with any OpenAI-compatible API.
 
+## Classes
+
+This integration provides two classes:
+
+1. **OpenAILike** - For standard OpenAI-compatible APIs using chat/completions endpoints
+2. **OpenAILikeResponses** - For OpenAI-compatible APIs that support the `/responses` endpoint
+
 ## Usage
 
+### Basic OpenAI-compatible API (OpenAILike)
+
 ```python
 from llama_index.llms.openai_like import OpenAILike
 
@@ -20,4 +29,106 @@ llm = OpenAILike(
     # Controls whether the model supports function calling
     is_function_calling_model=False,
 )
+
+response = llm.complete("Hello World!")
+print(response.text)
+```
+
+### OpenAI-compatible API with Responses support (OpenAILikeResponses)
+
+For OpenAI-compatible servers that support the `/responses` API endpoint (similar to OpenAI's responses API), use `OpenAILikeResponses`:
+
+```python
+from llama_index.llms.openai_like import OpenAILikeResponses
+
+llm = OpenAILikeResponses(
+    model="gpt-4o-mini",
+    api_base="https://your-openai-compatible-api.com/v1",
+    api_key="your-api-key",
+    context_window=128000,
+    is_chat_model=True,
+    is_function_calling_model=True,
+
+    # Responses-specific parameters
+    max_output_tokens=1000,
+    instructions="You are a helpful assistant.",
+    track_previous_responses=True,
+    built_in_tools=[{"type": "web_search"}],
+    user="user_id",
+)
+
+response = llm.complete("Write a short story")
+print(response.text)
+```
+
+### Key Features of OpenAILikeResponses
+
+- **Built-in Tools**: Support for built-in tools like web search, code interpreter, etc.
+- **Response Tracking**: Track previous responses for conversational context
+- **Instructions**: Set global instructions for the model
+- **Advanced Function Calling**: Enhanced function calling with parallel execution support
+- **Response Storage**: Optional storage of responses in the provider's system
+- **Streaming Support**: Full streaming support for both chat and completion
+- **Structured Output**: Full support for structured output using Pydantic models
+
+### Structured Output with OpenAILikeResponses
+
+The OpenAILikeResponses class supports structured output using Pydantic models:
+
+```python
+from pydantic import BaseModel, Field
+from llama_index.llms.openai_like import OpenAILikeResponses
+
+class PersonInfo(BaseModel):
+    name: str = Field(description="Person's name")
+    age: int = Field(description="Person's age")
+    city: str = Field(description="City where they live")
+    profession: str = Field(description="Their profession")
+
+llm = OpenAILikeResponses(
+    model="gpt-4o-mini",
+    api_base="https://your-api.com/v1", 
+    api_key="your-key",
+    is_chat_model=True,
+)
+
+# Create structured LLM
+structured_llm = llm.as_structured_llm(PersonInfo)
+
+# Get structured response
+response = structured_llm.complete("Tell me about Alice, a 28-year-old engineer in SF")
+
+# Access structured data
+person = response.raw  # PersonInfo object
+print(f"Name: {person.name}, Age: {person.age}")
+print(f"City: {person.city}, Job: {person.profession}")
+```
+
+The structured output implementation automatically:
+- Uses JSON schema-based structured output when supported by the model
+- Falls back to function calling for extraction when JSON schema is not supported
+- Provides full async support for structured output
+- Supports streaming structured output
+
+### Function Calling with OpenAILikeResponses
+
+```python
+from llama_index.core.tools import FunctionTool
+
+def search_web(query: str) -> str:
+    """Search the web for information."""
+    return f"Search results for: {query}"
+
+search_tool = FunctionTool.from_defaults(fn=search_web)
+
+response = llm.chat_with_tools(
+    tools=[search_tool],
+    user_msg="Search for the latest AI developments",
+    tool_required=True
+)
 ```
+
+## When to Use Which Class
+
+- Use **OpenAILike** for standard OpenAI-compatible APIs that use `/chat/completions` or `/completions` endpoints
+- Use **OpenAILikeResponses** for OpenAI-compatible APIs that support the `/responses` endpoint and you want to leverage advanced features like built-in tools, response tracking, and enhanced function calling
diff --git a/...-integrations/llms/llama-index-llms-openai-like/examples/openai_like_responses_example.py b/...-integrations/llms/llama-index-llms-openai-like/examples/openai_like_responses_example.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+"""
+Example usage of OpenAILikeResponses for OpenAI-compatible APIs with /responses support.
+
+This example demonstrates how to use the OpenAILikeResponses class to interact with
+OpenAI-compatible servers that support the /responses API endpoint.
+"""
+
+from llama_index.llms.openai_like import OpenAILikeResponses
+from llama_index.core.base.llms.types import ChatMessage, MessageRole
+from llama_index.core.tools import FunctionTool
+
+
+def search_web(query: str) -> str:
+    """Search the web for information about a query."""
+    # This is a mock implementation - in reality, you'd call a real search API
+    return f"Search results for '{query}': Found relevant information about {query}."
+
+
+def main():
+    """Demonstrate usage of OpenAILikeResponses."""
+    print("OpenAILikeResponses Example")
+    print("=" * 40)
+
+    # Create an OpenAILikeResponses instance
+    # Replace with your actual API endpoint and key
+    llm = OpenAILikeResponses(
+        model="gpt-4o-mini",  # Use whatever model your API supports
+        api_base="https://your-openai-compatible-api.com/v1",  # Your API endpoint
+        api_key="your-api-key-here",  # Your API key
+        context_window=128000,
+        is_chat_model=True,
+        is_function_calling_model=True,
+
+        # Responses-specific parameters
+        max_output_tokens=1000,
+        instructions="You are a helpful assistant that provides accurate and concise answers.",
+        track_previous_responses=True,
+        built_in_tools=[{"type": "web_search"}],  # Enable built-in web search
+        user="example_user",
+    )
+
+    print(f"✓ Created {llm.class_name()}")
+    print(f"✓ Model: {llm.model}")
+    print(f"✓ API Base: {llm.api_base}")
+    print(f"✓ Context Window: {llm.context_window}")
+    print(f"✓ Max Output Tokens: {llm.max_output_tokens}")
+    print(f"✓ Track Previous Responses: {llm.track_previous_responses}")
+    print()
+
+    # Example 1: Simple chat completion
+    print("Example 1: Simple Chat Completion")
+    print("-" * 30)
+
+    messages = [
+        ChatMessage(role=MessageRole.USER, content="Hello! Can you tell me about Python programming?")
+    ]
+
+    print("Note: This would make an actual API call to your OpenAI-compatible server")
+    print("Messages to send:", [{"role": msg.role, "content": msg.content} for msg in messages])
+    print()
+
+    # Example 2: Function calling with tools
+    print("Example 2: Function Calling with Tools")
+    print("-" * 30)
+
+    # Create a tool
+    search_tool = FunctionTool.from_defaults(
+        fn=search_web,
+        name="search_web",
+        description="Search the web for information"
+    )
+
+    # Prepare chat with tools
+    tool_chat_params = llm._prepare_chat_with_tools(
+        tools=[search_tool],
+        user_msg="Search for the latest developments in artificial intelligence",
+        tool_required=False
+    )
+
+    print(f"✓ Prepared chat with {len(tool_chat_params['tools'])} tools")
+    print("Tool specifications:")
+    for i, tool_spec in enumerate(tool_chat_params['tools']):
+        print(f"  {i+1}. {tool_spec['name']}: {tool_spec.get('description', 'No description')}")
+
+    print(f"✓ Messages prepared: {len(tool_chat_params['messages'])}")
+    print(f"✓ Tool choice: {tool_chat_params.get('tool_choice', 'auto')}")
+    print()
+
+    # Example 3: Model kwargs for responses API
+    print("Example 3: Model Kwargs for Responses API")
+    print("-" * 30)
+
+    model_kwargs = llm._get_model_kwargs(
+        tools=[{"type": "function", "name": "custom_tool"}]
+    )
+
+    print("Generated model kwargs:")
+    for key, value in model_kwargs.items():
+        if key == 'tools':
+            print(f"  {key}: {len(value)} tools")
+        elif isinstance(value, str) and len(value) > 50:
+            print(f"  {key}: {value[:47]}...")
+        else:
+            print(f"  {key}: {value}")
+    print()
+
+    print("Example completed successfully!")
+    print("To use this with a real API, update the api_base and api_key parameters.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/...ndex-integrations/llms/llama-index-llms-openai-like/examples/structured_output_example.py b/...ndex-integrations/llms/llama-index-llms-openai-like/examples/structured_output_example.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+"""
+Example demonstrating structured output with OpenAILikeResponses.
+
+This example shows how to use the OpenAILikeResponses class with structured output
+to extract structured data from LLM responses using Pydantic models.
+"""
+
+from pydantic import BaseModel, Field
+from llama_index.llms.openai_like import OpenAILikeResponses
+from llama_index.core.base.llms.types import ChatMessage, MessageRole
+
+
+class PersonInfo(BaseModel):
+    """Pydantic model for structured person information."""
+    name: str = Field(description="The person's name")
+    age: int = Field(description="The person's age in years") 
+    city: str = Field(description="The city where the person lives")
+    profession: str = Field(description="The person's profession or job")
+
+
+class CityInfo(BaseModel):
+    """Pydantic model for structured city information."""
+    capital: str = Field(description="The capital city")
+    country: str = Field(description="The country name")
+    population: str = Field(description="The population of the city/country")
+    area: str = Field(description="The area of the city/country")
+    currency: str = Field(description="The official currency")
+    language: str = Field(description="The primary language spoken")
+    time_zone: str = Field(description="The time zone")
+    government_type: str = Field(description="The type of government")
+    independence_year: str = Field(description="The year of independence")
+    religion: str = Field(description="The predominant religion")
+
+
+def main():
+    """Demonstrate structured output functionality."""
+    print("=== OpenAILikeResponses Structured Output Example ===\n")
+
+    # Initialize the LLM
+    llm = OpenAILikeResponses(
+        model="/models/gpt-oss-120b",
+        api_base="http://your-host:8021/v1",
+        api_key="your-api-key",
+        context_window=128000,
+        is_chat_model=True,
+        is_function_calling_model=True,
+        temperature=0.7,
+    )
+
+    print("1. Creating structured LLM for PersonInfo...")
+    person_llm = llm.as_structured_llm(PersonInfo)
+
+    print("2. Example: Extract person information")
+    print("   Input: 'Tell me about Alice, a 28-year-old software engineer in San Francisco'")
+
+    try:
+        response = person_llm.complete(
+            "Tell me about Alice, a 28-year-old software engineer in San Francisco"
+        )
+
+        # The response.raw contains the structured Pydantic object
+        person_data = response.raw
+        print(f"   Output:")
+        print(f"   - Name: {person_data.name}")
+        print(f"   - Age: {person_data.age}")
+        print(f"   - City: {person_data.city}")
+        print(f"   - Profession: {person_data.profession}")
+
+    except Exception as e:
+        print(f"   Error with PersonInfo example: {e}")
+
+    print("\n" + "="*50 + "\n")
+
+    print("3. Creating structured LLM for CityInfo...")
+    city_llm = llm.as_structured_llm(CityInfo)
+
+    print("4. Example: Extract detailed city/country information")
+    print("   Input: 'Write a short story about Paris'")
+
+    try:
+        response = city_llm.complete("Write a short story about Paris")
+
+        # The response.raw contains the structured Pydantic object  
+        city_data = response.raw
+        print(f"   Output:")
+        print(f"   - Capital: {city_data.capital}")
+        print(f"   - Country: {city_data.country}")
+        print(f"   - Population: {city_data.population}")
+        print(f"   - Area: {city_data.area}")
+        print(f"   - Currency: {city_data.currency}")
+        print(f"   - Language: {city_data.language}")
+        print(f"   - Time Zone: {city_data.time_zone}")
+        print(f"   - Government: {city_data.government_type}")
+        print(f"   - Independence: {city_data.independence_year}")
+        print(f"   - Religion: {city_data.religion}")
+
+    except Exception as e:
+        print(f"   Error with CityInfo example: {e}")
+        import traceback
+        traceback.print_exc()
+
+    print("\n" + "="*50)
+    print("Examples completed!")
+    print("\nNOTE: Make sure to:")
+    print("- Update the api_base URL to point to your OpenAI-compatible server")
+    print("- Set the correct api_key for your server")
+    print("- Ensure your server supports the /responses API endpoint")
+    print("- For servers that don't support structured output, the implementation")
+    print("  will fall back to function calling for structured data extraction")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/...x-integrations/llms/llama-index-llms-openai-like/llama_index/llms/openai_like/__init__.py b/...x-integrations/llms/llama-index-llms-openai-like/llama_index/llms/openai_like/__init__.py
@@ -1,3 +1,4 @@
 from llama_index.llms.openai_like.base import OpenAILike
+from llama_index.llms.openai_like.responses import OpenAILikeResponses
 
-__all__ = ["OpenAILike"]
+__all__ = ["OpenAILike", "OpenAILikeResponses"]
diff --git a/...ndex-integrations/llms/llama-index-llms-openai-like/llama_index/llms/openai_like/py.typed b/...ndex-integrations/llms/llama-index-llms-openai-like/llama_index/llms/openai_like/py.typed