Draft

RobinPicard · RobinPicard · commit f118c9f1747a · 2025-08-20T17:33:56.000+02:00
diff --git a/outlines_example.py b/outlines_example.py
@@ -0,0 +1,67 @@
+from pydantic_ai import Agent, NativeOutput
+from pydantic_ai.models.outlines import OutlinesModel
+from pydantic_ai.settings import ModelSettings
+from pydantic import BaseModel
+
+
+class Box(BaseModel):
+    width: int
+    height: int
+    depth: int
+    units: str
+
+
+def transformers_example():
+
+    print("---- start transformers_example ----")
+
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+
+    hf_model = AutoModelForCausalLM.from_pretrained("erwanf/gpt2-mini")
+    hf_tokenizer = AutoTokenizer.from_pretrained("erwanf/gpt2-mini")
+    chat_template = '{% for message in messages %}{{ message.role }}: {{ message.content }}{% endfor %}'
+    hf_tokenizer.chat_template = chat_template
+
+    model = OutlinesModel.transformers(hf_model, hf_tokenizer, settings=ModelSettings(max_new_tokens=100))
+    agent = Agent(model, output_type=NativeOutput([Box]))
+
+    response = agent.run_sync('Give me the dimensions of a box')
+    print("response.output: ", response.output)
+
+    response2 = agent.run_sync('Give me another box', message_history=response.all_messages())
+    print("response2.output: ", response2.output)
+
+    print("all_messages: ", response2.all_messages())
+
+    print("---- end transformers_example ----")
+
+
+def llama_cpp_example():
+    print("---- start llama_cpp_example ----")
+
+    from llama_cpp import Llama
+
+    llama_model = Llama.from_pretrained(
+        repo_id="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
+        filename="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
+        n_ctx=2048,  # 2K context window
+    )
+
+    model = OutlinesModel.llama_cpp(llama_model)
+    agent = Agent(model, output_type=NativeOutput([Box]))
+
+    response = agent.run_sync('Give me the dimensions of a box')
+    print("response.output: ", response.output)
+
+    response2 = agent.run_sync('Give me another box', message_history=response.all_messages())
+    print("response2.output: ", response2.output)
+
+    print("all_messages: ", response2.all_messages())
+
+    print("---- end llama_cpp_example ----")
+
+
+if __name__ == "__main__":
+    #transformers_example()
+    llama_cpp_example()
+    #existing()
diff --git a/pydantic_ai_slim/pydantic_ai/models/outlines.py b/pydantic_ai_slim/pydantic_ai/models/outlines.py
@@ -0,0 +1,266 @@
+from collections.abc import AsyncIterable, AsyncIterator
+from contextlib import asynccontextmanager
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from typing import Any, Literal
+
+from .. import UnexpectedModelBehavior, _utils
+from .._run_context import RunContext
+from ..messages import (
+    ModelMessage,
+    ModelResponse,
+    ModelResponseStreamEvent,
+    TextPart,
+)
+from ..profiles import ModelProfileSpec
+from ..providers import Provider, infer_provider
+from ..settings import ModelSettings
+from . import (
+    Model,
+    ModelRequestParameters,
+    StreamedResponse,
+)
+
+try:
+    from outlines.inputs import Chat
+    from outlines.models.base import AsyncModel as OutlinesAsyncBaseModel, Model as OutlinesBaseModel
+    from outlines.models.llamacpp import from_llamacpp  # pyright: ignore[reportUnknownVariableType]
+    from outlines.models.mlxlm import from_mlxlm  # pyright: ignore[reportUnknownVariableType]
+    from outlines.models.sglang import from_sglang
+    from outlines.models.tgi import from_tgi
+    from outlines.models.transformers import from_transformers  # pyright: ignore[reportUnknownVariableType]
+    from outlines.models.vllm import from_vllm
+    from outlines.types.dsl import JsonSchema
+except ImportError as _import_error:
+    raise ImportError(
+        'Please install `outlines` to use the Outlines model, '
+        'you can use the `outlines` optional group — `pip install "pydantic-ai-slim[outlines]"`'
+    ) from _import_error
+
+
+@dataclass
+class OutlinesStreamedResponse(StreamedResponse):
+    """Implementation of `StreamedResponse` for Outlines models."""
+
+    _model_name: str
+    _response: AsyncIterable[str]
+    _timestamp: datetime
+
+    async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
+        async for event in self._response:
+            event = self._parts_manager.handle_text_delta(vendor_part_id='content', content=event)
+            if event is not None:  # pragma: no branch
+                yield event
+
+    @property
+    def model_name(self) -> str:
+        """Get the model name of the response."""
+        return self._model_name
+
+    @property
+    def timestamp(self) -> datetime:
+        """Get the timestamp of the response."""
+        return self._timestamp
+
+
+@dataclass(init=False)
+class OutlinesModel(Model):
+    """A model that relies on the Outlines library to run non API-based models."""
+
+    _system: str = field(default='outlines', repr=False)
+
+    def __init__(
+        self,
+        model: OutlinesBaseModel | OutlinesAsyncBaseModel,
+        model_name: str | None = None,
+        *,
+        provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
+        profile: ModelProfileSpec | None = None,
+        settings: ModelSettings | None = None,
+    ):
+        """Initialize an Outlines model.
+
+        Args:
+            model: The Outlines model used for the model.
+            model_name: The name of the model run by the provider.
+            provider: The provider to use for OutlinesModel. Can be either the string 'outlines' or an
+                instance of `Provider[OutlinesBaseModel]`. If not provided, the other parameters will be used.
+            profile: The model profile to use. Defaults to a profile picked by the provider.
+            settings: Default model settings for this model instance.
+        """
+        self.model = model
+        self._model_name = model_name
+
+        if isinstance(provider, str):
+            provider = infer_provider(provider)
+
+        super().__init__(settings=settings, profile=profile or provider.model_profile)
+
+    @classmethod
+    def transformers(
+        cls,
+        hf_model: Any,
+        hf_tokenizer: Any,
+        *,
+        provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
+        profile: ModelProfileSpec | None = None,
+        settings: ModelSettings | None = None,
+    ):
+        outlines_model: OutlinesBaseModel = from_transformers(hf_model, hf_tokenizer)
+        return cls(outlines_model, None, provider=provider, profile=profile, settings=settings)
+
+    @classmethod
+    def llama_cpp(
+        cls,
+        llama_model: Any,
+        *,
+        provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
+        profile: ModelProfileSpec | None = None,
+        settings: ModelSettings | None = None,
+    ):
+        outlines_model: OutlinesBaseModel = from_llamacpp(llama_model)
+        return cls(outlines_model, None, provider=provider, profile=profile, settings=settings)
+
+    @classmethod
+    def mlxlm(
+        cls,
+        mlx_model: Any,
+        mlx_tokenizer: Any,
+        *,
+        provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
+        profile: ModelProfileSpec | None = None,
+        settings: ModelSettings | None = None,
+    ):
+        outlines_model: OutlinesBaseModel = from_mlxlm(mlx_model, mlx_tokenizer)
+        return cls(outlines_model, None, provider=provider, profile=profile, settings=settings)
+
+    @classmethod
+    def tgi(
+        cls,
+        client: Any,
+        *,
+        provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
+        profile: ModelProfileSpec | None = None,
+        settings: ModelSettings | None = None,
+    ):
+        outlines_model: OutlinesBaseModel | OutlinesAsyncBaseModel = from_tgi(client)
+        return cls(outlines_model, None, provider=provider, profile=profile, settings=settings)
+
+    @classmethod
+    def sglang(
+        cls,
+        client: Any,
+        model_name: str,
+        *,
+        provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
+        profile: ModelProfileSpec | None = None,
+        settings: ModelSettings | None = None,
+    ):
+        outlines_model: OutlinesBaseModel | OutlinesAsyncBaseModel = from_sglang(client, model_name)
+        return cls(outlines_model, None, provider=provider, profile=profile, settings=settings)
+
+    @classmethod
+    def vllm(
+        cls,
+        client: Any,
+        model_name: str,
+        *,
+        provider: Literal['outlines'] | Provider[OutlinesBaseModel] = 'outlines',
+        profile: ModelProfileSpec | None = None,
+        settings: ModelSettings | None = None,
+    ):
+        outlines_model: OutlinesBaseModel | OutlinesAsyncBaseModel = from_vllm(client, model_name)
+        return cls(outlines_model, None, provider=provider, profile=profile, settings=settings)
+
+    @property
+    def model_name(self) -> str:
+        return self._model_name or ''
+
+    @property
+    def system(self) -> str:
+        return self._system
+
+    async def request(
+        self,
+        messages: list[ModelMessage],
+        model_settings: ModelSettings | None,
+        model_request_parameters: ModelRequestParameters,
+    ) -> ModelResponse:
+        """Make a request to the model."""
+        prompt = self._format_prompt(messages)
+        output_type = (
+            JsonSchema(model_request_parameters.output_object.json_schema)
+            if model_request_parameters.output_object
+            else None
+        )
+        model_settings_dict = dict(model_settings) if model_settings else {}
+        if isinstance(self.model, OutlinesAsyncBaseModel):
+            response: str = await self.model(prompt, output_type, None, **model_settings_dict)
+        else:
+            response: str = self.model(prompt, output_type, None, **model_settings_dict)
+        return self._process_response(response)
+
+    @asynccontextmanager
+    async def request_stream(
+        self,
+        messages: list[ModelMessage],
+        model_settings: ModelSettings | None,
+        model_request_parameters: ModelRequestParameters,
+        run_context: RunContext[Any] | None = None,
+    ) -> AsyncIterator[StreamedResponse]:
+        prompt = self._format_prompt(messages)
+        output_type = (
+            JsonSchema(model_request_parameters.output_object.json_schema)
+            if model_request_parameters.output_object
+            else None
+        )
+        model_settings_dict = dict(model_settings) if model_settings else {}
+        if isinstance(self.model, OutlinesAsyncBaseModel):
+            response = self.model.stream(prompt, output_type, None, **model_settings_dict)
+            async for chunk in response:
+                yield chunk
+        else:
+            response = self.model.stream(prompt, output_type, None, **model_settings_dict)
+
+            async def async_response():
+                for chunk in response:
+                    yield chunk
+
+            yield await self._process_streamed_response(async_response(), model_request_parameters)
+
+    def _format_prompt(self, messages: list[ModelMessage]) -> Chat:
+        """Turn the model messages into an Outlines Chat instance."""
+        chat = Chat()
+        for message in messages:
+            if message.kind == 'request':
+                for part in message.parts:
+                    if part.part_kind == 'system-prompt':
+                        chat.add_system_message(part.content)
+                    elif part.part_kind == 'user-prompt':
+                        chat.add_user_message(str(part.content))
+            elif message.kind == 'response':
+                for part in message.parts:
+                    if part.part_kind == 'text':
+                        chat.add_assistant_message(str(part.content))
+        return chat
+
+    def _process_response(self, response: str) -> ModelResponse:
+        """Turn the Outlines text response into a Pydantic AI model response instance."""
+        return ModelResponse(parts=[TextPart(content=response)])
+
+    async def _process_streamed_response(
+        self, response: AsyncIterable[str], model_request_parameters: ModelRequestParameters
+    ) -> StreamedResponse:
+        """Turn the Outlines text response into a Pydantic AI streamed response instance."""
+        peekable_response = _utils.PeekableAsyncStream(response)
+        first_chunk = await peekable_response.peek()
+        if isinstance(first_chunk, _utils.Unset):
+            raise UnexpectedModelBehavior('Streamed response ended without content or tool calls')  # pragma: no cover
+
+        timestamp = datetime.now(tz=timezone.utc)
+        return OutlinesStreamedResponse(
+            model_request_parameters=model_request_parameters,
+            _model_name=self.model_name,
+            _response=peekable_response,
+            _timestamp=timestamp,
+        )
diff --git a/pydantic_ai_slim/pydantic_ai/profiles/outlines.py b/pydantic_ai_slim/pydantic_ai/profiles/outlines.py
@@ -0,0 +1,11 @@
+from . import ModelProfile
+
+
+def outlines_model_profile(model_name: str | None = None) -> ModelProfile:
+    """Get the model profile for an Outlines model."""
+    return ModelProfile(
+        supports_tools=False,
+        supports_json_schema_output=True,
+        supports_json_object_output=True,
+        default_structured_output_mode='native',
+    )
diff --git a/pydantic_ai_slim/pydantic_ai/providers/__init__.py b/pydantic_ai_slim/pydantic_ai/providers/__init__.py
@@ -131,6 +131,10 @@ def infer_provider_class(provider: str) -> type[Provider[Any]]:  # noqa: C901
         from .github import GitHubProvider
 
         return GitHubProvider
+    elif provider == 'outlines':  # pragma: no cover
+        from .outlines import OutlinesProvider
+
+        return OutlinesProvider
     else:  # pragma: no cover
         raise ValueError(f'Unknown provider: {provider}')
 
diff --git a/pydantic_ai_slim/pydantic_ai/providers/outlines.py b/pydantic_ai_slim/pydantic_ai/providers/outlines.py
@@ -0,0 +1,30 @@
+from __future__ import annotations as _annotations
+
+from typing import Any
+
+from pydantic_ai.profiles import ModelProfile
+from pydantic_ai.profiles.outlines import outlines_model_profile
+from pydantic_ai.providers import Provider
+
+
+class OutlinesProvider(Provider[Any]):
+    """Provider for Outlines API."""
+
+    @property
+    def name(self) -> str:
+        """The provider name."""
+        return 'outlines'
+
+    @property
+    def base_url(self) -> str:
+        """The base URL for the provider API."""
+        raise NotImplementedError()
+
+    @property
+    def client(self) -> Any:
+        """The client for the provider."""
+        raise NotImplementedError()
+
+    def model_profile(self, model_name: str) -> ModelProfile | None:
+        """The model profile for the named model, if available."""
+        return outlines_model_profile(model_name)
diff --git a/pydantic_ai_slim/pyproject.toml b/pydantic_ai_slim/pyproject.toml
@@ -95,6 +95,7 @@ ag-ui = ["ag-ui-protocol>=0.1.8", "starlette>=0.45.3"]
 retries = ["tenacity>=8.2.3"]
 # Temporal
 temporal = ["temporalio==1.15.0"]
+outlines = ["outlines>=0.0.1"]
 
 [tool.hatch.metadata]
 allow-direct-references = true
diff --git a/pyproject.toml b/pyproject.toml
diff --git a/uv.lock b/uv.lock