pydantic
diff --git a/‎docs/agents.md‎
Lines changed: 25 additions & 0 deletions b/‎docs/agents.md‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎docs/api/settings.md‎
Lines changed: 7 additions & 0 deletions b/‎docs/api/settings.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎mkdocs.yml‎
Lines changed: 2 additions & 1 deletion b/‎mkdocs.yml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎pydantic_ai_slim/pydantic_ai/agent.py‎
Lines changed: 30 additions & 3 deletions b/‎pydantic_ai_slim/pydantic_ai/agent.py‎
Lines changed: 30 additions & 3 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/models/__init__.py‎
Lines changed: 7 additions & 2 deletions b/‎pydantic_ai_slim/pydantic_ai/models/__init__.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/models/anthropic.py‎
Lines changed: 20 additions & 9 deletions b/‎pydantic_ai_slim/pydantic_ai/models/anthropic.py‎
Lines changed: 20 additions & 9 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/models/function.py‎
Lines changed: 15 additions & 6 deletions b/‎pydantic_ai_slim/pydantic_ai/models/function.py‎
Lines changed: 15 additions & 6 deletions
@@ -101,6 +101,31 @@ You can also pass messages from previous runs to continue a conversation or prov
     nest_asyncio.apply()
     ```
 
+### Additional Configuration
+
+PydanticAI offers a [`settings.ModelSettings`][pydantic_ai.settings.ModelSettings] structure to help you fine tune your requests.
+This structure allows you to configure common parameters that influence the model's behavior, such as `temperature`, `max_tokens`,
+`timeout`, and more.
+
+There are two ways to apply these settings:
+1. Passing to `run{_sync,_stream}` functions via the `model_settings` argument. This allows for fine-tuning on a per-request basis.
+2. Setting during [`Agent`][pydantic_ai.agent.Agent] initialization via the `model_settings` argument. These settings will be applied by default to all subsequent run calls using said agent. However, `model_settings` provided during a specific run call will override the agent's default settings.
+
+For example, if you'd like to set the `temperature` setting to `0.0` to ensure less random behavior,
+you can do the following:
+
+```py
+from pydantic_ai import Agent
+
+agent = Agent('openai:gpt-4o')
+
+result_sync = agent.run_sync(
+    'What is the capital of Italy?', model_settings={'temperature': 0.0}
+)
+print(result_sync.data)
+#> Rome
+```
+
 ## Runs vs. Conversations
 
 An agent **run** might represent an entire conversation — there's no limit to how many messages can be exchanged in a single run. However, a **conversation** might also be composed of multiple runs, especially if you need to maintain state between separate interactions or API calls.
 
@@ -0,0 +1,7 @@
+# `pydantic_ai.settings`
+
+::: pydantic_ai.settings
+    options:
+      inherited_members: true
+      members:
+        - ModelSettings
@@ -38,8 +38,9 @@ nav:
     - api/result.md
     - api/messages.md
     - api/exceptions.md
-    - api/models/anthropic.md
+    - api/settings.md
     - api/models/base.md
+    - api/models/anthropic.md
     - api/models/openai.md
     - api/models/ollama.md
     - api/models/gemini.md
 
@@ -22,6 +22,7 @@
     result,
 )
 from .result import ResultData
+from .settings import ModelSettings, merge_model_settings
 from .tools import (
     AgentDeps,
     RunContext,
@@ -81,6 +82,13 @@ class Agent(Generic[AgentDeps, ResultData]):
     end_strategy: EndStrategy
     """Strategy for handling tool calls when a final result is found."""
 
+    model_settings: ModelSettings | None = None
+    """Optional model request settings to use for this agents's runs, by default.
+
+    Note, if `model_settings` is provided by `run`, `run_sync`, or `run_stream`, those settings will
+    be merged with this value, with the runtime argument taking priority.
+    """
+
     last_run_messages: list[_messages.Message] | None = None
     """The messages from the last run, useful when a run raised an exception.
 
@@ -108,6 +116,7 @@ def __init__(
         system_prompt: str | Sequence[str] = (),
         deps_type: type[AgentDeps] = NoneType,
         name: str | None = None,
+        model_settings: ModelSettings | None = None,
         retries: int = 1,
         result_tool_name: str = 'final_result',
         result_tool_description: str | None = None,
@@ -130,6 +139,7 @@ def __init__(
                 or add a type hint `: Agent[None, <return type>]`.
             name: The name of the agent, used for logging. If `None`, we try to infer the agent name from the call frame
                 when the agent is first run.
+            model_settings: Optional model request settings to use for this agent's runs, by default.
             retries: The default number of retries to allow before raising an error.
             result_tool_name: The name of the tool to use for the final result.
             result_tool_description: The description of the final result tool.
@@ -151,6 +161,7 @@ def __init__(
 
         self.end_strategy = end_strategy
         self.name = name
+        self.model_settings = model_settings
         self._result_schema = _result.ResultSchema[result_type].build(
             result_type, result_tool_name, result_tool_description
         )
@@ -178,6 +189,7 @@ async def run(
         message_history: list[_messages.Message] | None = None,
         model: models.Model | models.KnownModelName | None = None,
         deps: AgentDeps = None,
+        model_settings: ModelSettings | None = None,
         infer_name: bool = True,
     ) -> result.RunResult[ResultData]:
         """Run the agent with a user prompt in async mode.
@@ -199,6 +211,7 @@ async def run(
             model: Optional model to use for this run, required if `model` was not set when creating the agent.
             deps: Optional dependencies to use for this run.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
+            model_settings: Optional settings to use for this model's request.
 
         Returns:
             The result of the run.
@@ -225,14 +238,16 @@ async def run(
 
             cost = result.Cost()
 
+            model_settings = merge_model_settings(self.model_settings, model_settings)
+
             run_step = 0
             while True:
                 run_step += 1
                 with _logfire.span('preparing model and tools {run_step=}', run_step=run_step):
                     agent_model = await self._prepare_model(model_used, deps)
 
                 with _logfire.span('model request', run_step=run_step) as model_req_span:
-                    model_response, request_cost = await agent_model.request(messages)
+                    model_response, request_cost = await agent_model.request(messages, model_settings)
                     model_req_span.set_attribute('response', model_response)
                     model_req_span.set_attribute('cost', request_cost)
                     model_req_span.message = f'model request -> {model_response.role}'
@@ -267,6 +282,7 @@ def run_sync(
         message_history: list[_messages.Message] | None = None,
         model: models.Model | models.KnownModelName | None = None,
         deps: AgentDeps = None,
+        model_settings: ModelSettings | None = None,
         infer_name: bool = True,
     ) -> result.RunResult[ResultData]:
         """Run the agent with a user prompt synchronously.
@@ -291,6 +307,7 @@ async def main():
             model: Optional model to use for this run, required if `model` was not set when creating the agent.
             deps: Optional dependencies to use for this run.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
+            model_settings: Optional settings to use for this model's request.
 
         Returns:
             The result of the run.
@@ -299,7 +316,14 @@ async def main():
             self._infer_name(inspect.currentframe())
         loop = asyncio.get_event_loop()
         return loop.run_until_complete(
-            self.run(user_prompt, message_history=message_history, model=model, deps=deps, infer_name=False)
+            self.run(
+                user_prompt,
+                message_history=message_history,
+                model=model,
+                deps=deps,
+                infer_name=False,
+                model_settings=model_settings,
+            )
         )
 
     @asynccontextmanager
@@ -310,6 +334,7 @@ async def run_stream(
         message_history: list[_messages.Message] | None = None,
         model: models.Model | models.KnownModelName | None = None,
         deps: AgentDeps = None,
+        model_settings: ModelSettings | None = None,
         infer_name: bool = True,
     ) -> AsyncIterator[result.StreamedRunResult[AgentDeps, ResultData]]:
         """Run the agent with a user prompt in async mode, returning a streamed response.
@@ -332,6 +357,7 @@ async def main():
             model: Optional model to use for this run, required if `model` was not set when creating the agent.
             deps: Optional dependencies to use for this run.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
+            model_settings: Optional settings to use for this model's request.
 
         Returns:
             The result of the run.
@@ -359,6 +385,7 @@ async def main():
                 tool.current_retry = 0
 
             cost = result.Cost()
+            model_settings = merge_model_settings(self.model_settings, model_settings)
 
             run_step = 0
             while True:
@@ -368,7 +395,7 @@ async def main():
                     agent_model = await self._prepare_model(model_used, deps)
 
                 with _logfire.span('model request {run_step=}', run_step=run_step) as model_req_span:
-                    async with agent_model.request_stream(messages) as model_response:
+                    async with agent_model.request_stream(messages, model_settings) as model_response:
                         model_req_span.set_attribute('response_type', model_response.__class__.__name__)
                         # We want to end the "model request" span here, but we can't exit the context manager
                         # in the traditional way
 
@@ -17,6 +17,7 @@
 
 from ..exceptions import UserError
 from ..messages import Message, ModelAnyResponse, ModelStructuredResponse
+from ..settings import ModelSettings
 
 if TYPE_CHECKING:
     from ..result import Cost
@@ -113,12 +114,16 @@ class AgentModel(ABC):
     """Model configured for each step of an Agent run."""
 
     @abstractmethod
-    async def request(self, messages: list[Message]) -> tuple[ModelAnyResponse, Cost]:
+    async def request(
+        self, messages: list[Message], model_settings: ModelSettings | None
+    ) -> tuple[ModelAnyResponse, Cost]:
         """Make a request to the model."""
         raise NotImplementedError()
 
     @asynccontextmanager
-    async def request_stream(self, messages: list[Message]) -> AsyncIterator[EitherStreamedResponse]:
+    async def request_stream(
+        self, messages: list[Message], model_settings: ModelSettings | None
+    ) -> AsyncIterator[EitherStreamedResponse]:
         """Make a request to the model and return a streaming response."""
         raise NotImplementedError(f'Streamed requests not supported by this {self.__class__.__name__}')
         # yield is required to make this a generator for type checking
 
@@ -19,6 +19,7 @@
     ModelTextResponse,
     ToolCall,
 )
+from ..settings import ModelSettings
 from ..tools import ToolDefinition
 from . import (
     AgentModel,
@@ -151,28 +152,34 @@ class AnthropicAgentModel(AgentModel):
     allow_text_result: bool
     tools: list[ToolParam]
 
-    async def request(self, messages: list[Message]) -> tuple[ModelAnyResponse, result.Cost]:
-        response = await self._messages_create(messages, False)
+    async def request(
+        self, messages: list[Message], model_settings: ModelSettings | None
+    ) -> tuple[ModelAnyResponse, result.Cost]:
+        response = await self._messages_create(messages, False, model_settings)
         return self._process_response(response), _map_cost(response)
 
     @asynccontextmanager
-    async def request_stream(self, messages: list[Message]) -> AsyncIterator[EitherStreamedResponse]:
-        response = await self._messages_create(messages, True)
+    async def request_stream(
+        self, messages: list[Message], model_settings: ModelSettings | None
+    ) -> AsyncIterator[EitherStreamedResponse]:
+        response = await self._messages_create(messages, True, model_settings)
         async with response:
             yield await self._process_streamed_response(response)
 
     @overload
     async def _messages_create(
-        self, messages: list[Message], stream: Literal[True]
+        self, messages: list[Message], stream: Literal[True], model_settings: ModelSettings | None
     ) -> AsyncStream[RawMessageStreamEvent]:
         pass
 
     @overload
-    async def _messages_create(self, messages: list[Message], stream: Literal[False]) -> AnthropicMessage:
+    async def _messages_create(
+        self, messages: list[Message], stream: Literal[False], model_settings: ModelSettings | None
+    ) -> AnthropicMessage:
         pass
 
     async def _messages_create(
-        self, messages: list[Message], stream: bool
+        self, messages: list[Message], stream: bool, model_settings: ModelSettings | None
     ) -> AnthropicMessage | AsyncStream[RawMessageStreamEvent]:
         # standalone function to make it easier to override
         if not self.tools:
@@ -191,15 +198,19 @@ async def _messages_create(
             else:
                 anthropic_messages.append(self._map_message(m))
 
+        model_settings = model_settings or {}
+
         return await self.client.messages.create(
-            max_tokens=1024,
+            max_tokens=model_settings.get('max_tokens', 1024),
             system=system_prompt or NOT_GIVEN,
             messages=anthropic_messages,
             model=self.model_name,
-            temperature=0.0,
             tools=self.tools or NOT_GIVEN,
             tool_choice=tool_choice or NOT_GIVEN,
             stream=stream,
+            temperature=model_settings.get('temperature', NOT_GIVEN),
+            top_p=model_settings.get('top_p', NOT_GIVEN),
+            timeout=model_settings.get('timeout', NOT_GIVEN),
         )
 
     @staticmethod
 
@@ -4,7 +4,7 @@
 import re
 from collections.abc import AsyncIterator, Awaitable, Iterable
 from contextlib import asynccontextmanager
-from dataclasses import dataclass, field
+from dataclasses import dataclass, field, replace
 from datetime import datetime
 from itertools import chain
 from typing import Callable, Union, cast
@@ -14,6 +14,7 @@
 
 from .. import _utils, result
 from ..messages import ArgsJson, Message, ModelAnyResponse, ModelStructuredResponse, ToolCall
+from ..settings import ModelSettings
 from ..tools import ToolDefinition
 from . import AgentModel, EitherStreamedResponse, Model, StreamStructuredResponse, StreamTextResponse
 
@@ -59,7 +60,7 @@ async def agent_model(
         result_tools: list[ToolDefinition],
     ) -> AgentModel:
         return FunctionAgentModel(
-            self.function, self.stream_function, AgentInfo(function_tools, allow_text_result, result_tools)
+            self.function, self.stream_function, AgentInfo(function_tools, allow_text_result, result_tools, None)
         )
 
     def name(self) -> str:
@@ -88,6 +89,8 @@ class AgentInfo:
     """Whether a plain text result is allowed."""
     result_tools: list[ToolDefinition]
     """The tools that can called as the final result of the run."""
+    model_settings: ModelSettings | None
+    """The model settings passed to the run call."""
 
 
 @dataclass
@@ -127,18 +130,24 @@ class FunctionAgentModel(AgentModel):
     stream_function: StreamFunctionDef | None
     agent_info: AgentInfo
 
-    async def request(self, messages: list[Message]) -> tuple[ModelAnyResponse, result.Cost]:
+    async def request(
+        self, messages: list[Message], model_settings: ModelSettings | None
+    ) -> tuple[ModelAnyResponse, result.Cost]:
+        agent_info = replace(self.agent_info, model_settings=model_settings)
+
         assert self.function is not None, 'FunctionModel must receive a `function` to support non-streamed requests'
         if inspect.iscoroutinefunction(self.function):
-            response = await self.function(messages, self.agent_info)
+            response = await self.function(messages, agent_info)
         else:
-            response_ = await _utils.run_in_executor(self.function, messages, self.agent_info)
+            response_ = await _utils.run_in_executor(self.function, messages, agent_info)
             response = cast(ModelAnyResponse, response_)
         # TODO is `messages` right here? Should it just be new messages?
         return response, _estimate_cost(chain(messages, [response]))
 
     @asynccontextmanager
-    async def request_stream(self, messages: list[Message]) -> AsyncIterator[EitherStreamedResponse]:
+    async def request_stream(
+        self, messages: list[Message], model_settings: ModelSettings | None
+    ) -> AsyncIterator[EitherStreamedResponse]:
         assert (
             self.stream_function is not None
         ), 'FunctionModel must receive a `stream_function` to support streamed requests'