diff --git a/pydantic_ai_slim/pydantic_ai/direct.py b/pydantic_ai_slim/pydantic_ai/direct.py index 6735c928e..15af2b14f 100644 --- a/pydantic_ai_slim/pydantic_ai/direct.py +++ b/pydantic_ai_slim/pydantic_ai/direct.py @@ -21,6 +21,7 @@ from . import agent, messages, models, settings from .models import StreamedResponse, instrumented as instrumented_models +from functools import lru_cache __all__ = ( 'model_request', @@ -188,11 +189,24 @@ async def main(): Returns: A [stream response][pydantic_ai.models.StreamedResponse] async context manager. """ - model_instance = _prepare_model(model, instrument) + # Using memoized _prepare_model if possible + # We use original _prepare_model only if not cacheable + try: + model_instance = _cached_prepare_model(model, instrument) + except TypeError: + # fallback for non-hashable type (e.g. 'model' instance), use original function + model_instance = _prepare_model(model, instrument) + + # Only instantiate ModelRequestParameters() if needed. + mrp = model_request_parameters if model_request_parameters is not None else models.ModelRequestParameters() + # Get customize_request_parameters only once + customize_fn = model_instance.customize_request_parameters + customized_params = customize_fn(mrp) + return model_instance.request_stream( messages, model_settings, - model_instance.customize_request_parameters(model_request_parameters or models.ModelRequestParameters()), + customized_params, ) @@ -264,6 +278,11 @@ def _prepare_model( return instrumented_models.instrument_model(model_instance, instrument) +# Simple cache for _prepare_model to avoid repeated expensive model preparation +@lru_cache(maxsize=64) +def _cached_prepare_model(model, instrument): + return _prepare_model(model, instrument) + @dataclass class StreamedResponseSync: