pydantic
diff --git a/‎docs/agents.md‎
Lines changed: 22 additions & 0 deletions b/‎docs/agents.md‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/models/anthropic.py‎
Lines changed: 16 additions & 7 deletions b/‎pydantic_ai_slim/pydantic_ai/models/anthropic.py‎
Lines changed: 16 additions & 7 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/models/cohere.py‎
Lines changed: 12 additions & 4 deletions b/‎pydantic_ai_slim/pydantic_ai/models/cohere.py‎
Lines changed: 12 additions & 4 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/models/gemini.py‎
Lines changed: 19 additions & 5 deletions b/‎pydantic_ai_slim/pydantic_ai/models/gemini.py‎
Lines changed: 19 additions & 5 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/models/groq.py‎
Lines changed: 16 additions & 8 deletions b/‎pydantic_ai_slim/pydantic_ai/models/groq.py‎
Lines changed: 16 additions & 8 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/models/mistral.py‎
Lines changed: 14 additions & 7 deletions b/‎pydantic_ai_slim/pydantic_ai/models/mistral.py‎
Lines changed: 14 additions & 7 deletions
@@ -204,6 +204,28 @@ print(result_sync.data)
 #> Rome
 ```
 
+### Model specific settings
+
+<!-- TODO: replace this with the gemini safety settings example once added via https://github.com/pydantic/pydantic-ai/issues/373 -->
+
+If you wish to further customize model behavior, you can use a subclass of [`ModelSettings`][pydantic_ai.settings.ModelSettings], like [`AnthropicModelSettings`][pydantic_ai.models.anthropic.AnthropicModelSettings], associated with your model of choice.
+
+For example:
+
+```py
+from pydantic_ai import Agent
+from pydantic_ai.models.anthropic import AnthropicModelSettings
+
+agent = Agent('anthropic:claude-3-5-sonnet-latest')
+
+result_sync = agent.run_sync(
+    'What is the capital of Italy?',
+    model_settings=AnthropicModelSettings(anthropic_metadata={'user_id': 'my_user_id'}),
+)
+print(result_sync.data)
+#> Rome
+```
+
 ## Runs vs. Conversations
 
 An agent **run** might represent an entire conversation — there's no limit to how many messages can be exchanged in a single run. However, a **conversation** might also be composed of multiple runs, especially if you need to maintain state between separate interactions or API calls.
 
@@ -41,6 +41,7 @@
     from anthropic.types import (
         Message as AnthropicMessage,
         MessageParam,
+        MetadataParam,
         RawContentBlockDeltaEvent,
         RawContentBlockStartEvent,
         RawContentBlockStopEvent,
@@ -79,6 +80,15 @@
 """
 
 
+class AnthropicModelSettings(ModelSettings):
+    """Settings used for an Anthropic model request."""
+
+    anthropic_metadata: MetadataParam
+    """An object describing metadata about the request.
+
+    Contains `user_id`, an external identifier for the user who is associated with the request."""
+
+
 @dataclass(init=False)
 class AnthropicModel(Model):
     """A model that uses the Anthropic API.
@@ -167,35 +177,33 @@ class AnthropicAgentModel(AgentModel):
     async def request(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
     ) -> tuple[ModelResponse, usage.Usage]:
-        response = await self._messages_create(messages, False, model_settings)
+        response = await self._messages_create(messages, False, cast(AnthropicModelSettings, model_settings or {}))
         return self._process_response(response), _map_usage(response)
 
     @asynccontextmanager
     async def request_stream(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
     ) -> AsyncIterator[StreamedResponse]:
-        response = await self._messages_create(messages, True, model_settings)
+        response = await self._messages_create(messages, True, cast(AnthropicModelSettings, model_settings or {}))
         async with response:
             yield await self._process_streamed_response(response)
 
     @overload
     async def _messages_create(
-        self, messages: list[ModelMessage], stream: Literal[True], model_settings: ModelSettings | None
+        self, messages: list[ModelMessage], stream: Literal[True], model_settings: AnthropicModelSettings
     ) -> AsyncStream[RawMessageStreamEvent]:
         pass
 
     @overload
     async def _messages_create(
-        self, messages: list[ModelMessage], stream: Literal[False], model_settings: ModelSettings | None
+        self, messages: list[ModelMessage], stream: Literal[False], model_settings: AnthropicModelSettings
     ) -> AnthropicMessage:
         pass
 
     async def _messages_create(
-        self, messages: list[ModelMessage], stream: bool, model_settings: ModelSettings | None
+        self, messages: list[ModelMessage], stream: bool, model_settings: AnthropicModelSettings
     ) -> AnthropicMessage | AsyncStream[RawMessageStreamEvent]:
         # standalone function to make it easier to override
-        model_settings = model_settings or {}
-
         tool_choice: ToolChoiceParam | None
 
         if not self.tools:
@@ -222,6 +230,7 @@ async def _messages_create(
             temperature=model_settings.get('temperature', NOT_GIVEN),
             top_p=model_settings.get('top_p', NOT_GIVEN),
             timeout=model_settings.get('timeout', NOT_GIVEN),
+            metadata=model_settings.get('anthropic_metadata', NOT_GIVEN),
         )
 
     def _process_response(self, response: AnthropicMessage) -> ModelResponse:
 
@@ -3,7 +3,7 @@
 from collections.abc import Iterable
 from dataclasses import dataclass, field
 from itertools import chain
-from typing import Literal, TypeAlias, Union
+from typing import Literal, TypeAlias, Union, cast
 
 from cohere import TextAssistantMessageContentItem
 from typing_extensions import assert_never
@@ -71,6 +71,12 @@
 ]
 
 
+class CohereModelSettings(ModelSettings):
+    """Settings used for a Cohere model request."""
+
+    # This class is a placeholder for any future cohere-specific settings
+
+
 @dataclass(init=False)
 class CohereModel(Model):
     """A model that uses the Cohere API.
@@ -153,23 +159,25 @@ class CohereAgentModel(AgentModel):
     async def request(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
     ) -> tuple[ModelResponse, result.Usage]:
-        response = await self._chat(messages, model_settings)
+        response = await self._chat(messages, cast(CohereModelSettings, model_settings or {}))
         return self._process_response(response), _map_usage(response)
 
     async def _chat(
         self,
         messages: list[ModelMessage],
-        model_settings: ModelSettings | None,
+        model_settings: CohereModelSettings,
     ) -> ChatResponse:
         cohere_messages = list(chain(*(self._map_message(m) for m in messages)))
-        model_settings = model_settings or {}
         return await self.client.chat(
             model=self.model_name,
             messages=cohere_messages,
             tools=self.tools or OMIT,
             max_tokens=model_settings.get('max_tokens', OMIT),
             temperature=model_settings.get('temperature', OMIT),
             p=model_settings.get('top_p', OMIT),
+            seed=model_settings.get('seed', OMIT),
+            presence_penalty=model_settings.get('presence_penalty', OMIT),
+            frequency_penalty=model_settings.get('frequency_penalty', OMIT),
         )
 
     def _process_response(self, response: ChatResponse) -> ModelResponse:
 
@@ -7,7 +7,7 @@
 from copy import deepcopy
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Annotated, Any, Literal, Protocol, Union
+from typing import Annotated, Any, Literal, Protocol, Union, cast
 from uuid import uuid4
 
 import pydantic
@@ -48,6 +48,12 @@
 """
 
 
+class GeminiModelSettings(ModelSettings):
+    """Settings used for a Gemini model request."""
+
+    # This class is a placeholder for any future gemini-specific settings
+
+
 @dataclass(init=False)
 class GeminiModel(Model):
     """A model that uses Gemini via `generativelanguage.googleapis.com` API.
@@ -171,20 +177,22 @@ def __init__(
     async def request(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
     ) -> tuple[ModelResponse, usage.Usage]:
-        async with self._make_request(messages, False, model_settings) as http_response:
+        async with self._make_request(
+            messages, False, cast(GeminiModelSettings, model_settings or {})
+        ) as http_response:
             response = _gemini_response_ta.validate_json(await http_response.aread())
         return self._process_response(response), _metadata_as_usage(response)
 
     @asynccontextmanager
     async def request_stream(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
     ) -> AsyncIterator[StreamedResponse]:
-        async with self._make_request(messages, True, model_settings) as http_response:
+        async with self._make_request(messages, True, cast(GeminiModelSettings, model_settings or {})) as http_response:
             yield await self._process_streamed_response(http_response)
 
     @asynccontextmanager
     async def _make_request(
-        self, messages: list[ModelMessage], streamed: bool, model_settings: ModelSettings | None
+        self, messages: list[ModelMessage], streamed: bool, model_settings: GeminiModelSettings
     ) -> AsyncIterator[HTTPResponse]:
         sys_prompt_parts, contents = self._message_to_gemini_content(messages)
 
@@ -204,6 +212,10 @@ async def _make_request(
                 generation_config['temperature'] = temperature
             if (top_p := model_settings.get('top_p')) is not None:
                 generation_config['top_p'] = top_p
+            if (presence_penalty := model_settings.get('presence_penalty')) is not None:
+                generation_config['presence_penalty'] = presence_penalty
+            if (frequency_penalty := model_settings.get('frequency_penalty')) is not None:
+                generation_config['frequency_penalty'] = frequency_penalty
         if generation_config:
             request_data['generation_config'] = generation_config
 
@@ -222,7 +234,7 @@ async def _make_request(
             url,
             content=request_json,
             headers=headers,
-            timeout=(model_settings or {}).get('timeout', USE_CLIENT_DEFAULT),
+            timeout=model_settings.get('timeout', USE_CLIENT_DEFAULT),
         ) as r:
             if r.status_code != 200:
                 await r.aread()
@@ -398,6 +410,8 @@ class _GeminiGenerationConfig(TypedDict, total=False):
     max_output_tokens: int
     temperature: float
     top_p: float
+    presence_penalty: float
+    frequency_penalty: float
 
 
 class _GeminiContent(TypedDict):
 
@@ -5,7 +5,7 @@
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from itertools import chain
-from typing import Literal, overload
+from typing import Literal, cast, overload
 
 from httpx import AsyncClient as AsyncHTTPClient
 from typing_extensions import assert_never
@@ -68,6 +68,12 @@
 """
 
 
+class GroqModelSettings(ModelSettings):
+    """Settings used for a Groq model request."""
+
+    # This class is a placeholder for any future groq-specific settings
+
+
 @dataclass(init=False)
 class GroqModel(Model):
     """A model that uses the Groq API.
@@ -155,31 +161,31 @@ class GroqAgentModel(AgentModel):
     async def request(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
     ) -> tuple[ModelResponse, usage.Usage]:
-        response = await self._completions_create(messages, False, model_settings)
+        response = await self._completions_create(messages, False, cast(GroqModelSettings, model_settings or {}))
         return self._process_response(response), _map_usage(response)
 
     @asynccontextmanager
     async def request_stream(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
     ) -> AsyncIterator[StreamedResponse]:
-        response = await self._completions_create(messages, True, model_settings)
+        response = await self._completions_create(messages, True, cast(GroqModelSettings, model_settings or {}))
         async with response:
             yield await self._process_streamed_response(response)
 
     @overload
     async def _completions_create(
-        self, messages: list[ModelMessage], stream: Literal[True], model_settings: ModelSettings | None
+        self, messages: list[ModelMessage], stream: Literal[True], model_settings: GroqModelSettings
     ) -> AsyncStream[ChatCompletionChunk]:
         pass
 
     @overload
     async def _completions_create(
-        self, messages: list[ModelMessage], stream: Literal[False], model_settings: ModelSettings | None
+        self, messages: list[ModelMessage], stream: Literal[False], model_settings: GroqModelSettings
     ) -> chat.ChatCompletion:
         pass
 
     async def _completions_create(
-        self, messages: list[ModelMessage], stream: bool, model_settings: ModelSettings | None
+        self, messages: list[ModelMessage], stream: bool, model_settings: GroqModelSettings
     ) -> chat.ChatCompletion | AsyncStream[ChatCompletionChunk]:
         # standalone function to make it easier to override
         if not self.tools:
@@ -191,8 +197,6 @@ async def _completions_create(
 
         groq_messages = list(chain(*(self._map_message(m) for m in messages)))
 
-        model_settings = model_settings or {}
-
         return await self.client.chat.completions.create(
             model=str(self.model_name),
             messages=groq_messages,
@@ -205,6 +209,10 @@ async def _completions_create(
             temperature=model_settings.get('temperature', NOT_GIVEN),
             top_p=model_settings.get('top_p', NOT_GIVEN),
             timeout=model_settings.get('timeout', NOT_GIVEN),
+            seed=model_settings.get('seed', NOT_GIVEN),
+            presence_penalty=model_settings.get('presence_penalty', NOT_GIVEN),
+            frequency_penalty=model_settings.get('frequency_penalty', NOT_GIVEN),
+            logit_bias=model_settings.get('logit_bias', NOT_GIVEN),
         )
 
     def _process_response(self, response: chat.ChatCompletion) -> ModelResponse:
 
@@ -6,7 +6,7 @@
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from itertools import chain
-from typing import Any, Callable, Literal, Union
+from typing import Any, Callable, Literal, Union, cast
 
 import pydantic_core
 from httpx import AsyncClient as AsyncHTTPClient, Timeout
@@ -85,6 +85,12 @@
 """
 
 
+class MistralModelSettings(ModelSettings):
+    """Settings used for a Mistral model request."""
+
+    # This class is a placeholder for any future mistral-specific settings
+
+
 @dataclass(init=False)
 class MistralModel(Model):
     """A model that uses Mistral.
@@ -159,23 +165,22 @@ async def request(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
     ) -> tuple[ModelResponse, Usage]:
         """Make a non-streaming request to the model from Pydantic AI call."""
-        response = await self._completions_create(messages, model_settings)
+        response = await self._completions_create(messages, cast(MistralModelSettings, model_settings or {}))
         return self._process_response(response), _map_usage(response)
 
     @asynccontextmanager
     async def request_stream(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
     ) -> AsyncIterator[StreamedResponse]:
         """Make a streaming request to the model from Pydantic AI call."""
-        response = await self._stream_completions_create(messages, model_settings)
+        response = await self._stream_completions_create(messages, cast(MistralModelSettings, model_settings or {}))
         async with response:
             yield await self._process_streamed_response(self.result_tools, response)
 
     async def _completions_create(
-        self, messages: list[ModelMessage], model_settings: ModelSettings | None
+        self, messages: list[ModelMessage], model_settings: MistralModelSettings
     ) -> MistralChatCompletionResponse:
         """Make a non-streaming request to the model."""
-        model_settings = model_settings or {}
         response = await self.client.chat.complete_async(
             model=str(self.model_name),
             messages=list(chain(*(self._map_message(m) for m in messages))),
@@ -187,19 +192,19 @@ async def _completions_create(
             temperature=model_settings.get('temperature', UNSET),
             top_p=model_settings.get('top_p', 1),
             timeout_ms=self._get_timeout_ms(model_settings.get('timeout')),
+            random_seed=model_settings.get('seed', UNSET),
         )
         assert response, 'A unexpected empty response from Mistral.'
         return response
 
     async def _stream_completions_create(
         self,
         messages: list[ModelMessage],
-        model_settings: ModelSettings | None,
+        model_settings: MistralModelSettings,
     ) -> MistralEventStreamAsync[MistralCompletionEvent]:
         """Create a streaming completion request to the Mistral model."""
         response: MistralEventStreamAsync[MistralCompletionEvent] | None
         mistral_messages = list(chain(*(self._map_message(m) for m in messages)))
-        model_settings = model_settings or {}
 
         if self.result_tools and self.function_tools or self.function_tools:
             # Function Calling
@@ -213,6 +218,8 @@ async def _stream_completions_create(
                 top_p=model_settings.get('top_p', 1),
                 max_tokens=model_settings.get('max_tokens', UNSET),
                 timeout_ms=self._get_timeout_ms(model_settings.get('timeout')),
+                presence_penalty=model_settings.get('presence_penalty'),
+                frequency_penalty=model_settings.get('frequency_penalty'),
             )
 
         elif self.result_tools: