diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py index c755885223..5a5b7f45d5 100644 --- a/pydantic_ai_slim/pydantic_ai/messages.py +++ b/pydantic_ai_slim/pydantic_ai/messages.py @@ -1014,14 +1014,22 @@ class ModelResponse: provider_name: str | None = None """The name of the LLM provider that generated the response.""" - provider_details: dict[str, Any] | None = field(default=None) + provider_details: Annotated[ + dict[str, Any] | None, + # `vendor_details` is deprecated, but we still want to support deserializing model responses stored in a DB before the name was changed + pydantic.Field(validation_alias=pydantic.AliasChoices('provider_details', 'vendor_details')), + ] = None """Additional provider-specific details in a serializable format. This allows storing selected vendor-specific data that isn't mapped to standard ModelResponse fields. For OpenAI models, this may include 'logprobs', 'finish_reason', etc. """ - provider_response_id: str | None = None + provider_response_id: Annotated[ + str | None, + # `vendor_id` is deprecated, but we still want to support deserializing model responses stored in a DB before the name was changed + pydantic.Field(validation_alias=pydantic.AliasChoices('provider_response_id', 'vendor_id')), + ] = None """request ID as specified by the model provider. This can be used to track the specific request to the model.""" def price(self) -> genai_types.PriceCalculation: diff --git a/pydantic_ai_slim/pydantic_ai/usage.py b/pydantic_ai_slim/pydantic_ai/usage.py index d9bd774e5f..d52e239781 100644 --- a/pydantic_ai_slim/pydantic_ai/usage.py +++ b/pydantic_ai_slim/pydantic_ai/usage.py @@ -3,7 +3,9 @@ import dataclasses from copy import copy from dataclasses import dataclass, fields +from typing import Annotated +from pydantic import AliasChoices, BeforeValidator, Field from typing_extensions import deprecated, overload from . import _utils @@ -14,7 +16,11 @@ @dataclass(repr=False, kw_only=True) class UsageBase: - input_tokens: int = 0 + input_tokens: Annotated[ + int, + # `request_tokens` is deprecated, but we still want to support deserializing model responses stored in a DB before the name was changed + Field(validation_alias=AliasChoices('input_tokens', 'request_tokens')), + ] = 0 """Number of input/prompt tokens.""" cache_write_tokens: int = 0 @@ -22,7 +28,11 @@ class UsageBase: cache_read_tokens: int = 0 """Number of tokens read from the cache.""" - output_tokens: int = 0 + output_tokens: Annotated[ + int, + # `response_tokens` is deprecated, but we still want to support deserializing model responses stored in a DB before the name was changed + Field(validation_alias=AliasChoices('output_tokens', 'response_tokens')), + ] = 0 """Number of output/completion tokens.""" input_audio_tokens: int = 0 @@ -32,7 +42,11 @@ class UsageBase: output_audio_tokens: int = 0 """Number of audio output tokens.""" - details: dict[str, int] = dataclasses.field(default_factory=dict) + details: Annotated[ + dict[str, int], + # `details` can not be `None` any longer, but we still want to support deserializing model responses stored in a DB before this was changed + BeforeValidator(lambda d: d or {}), + ] = dataclasses.field(default_factory=dict) """Any extra details returned by the model.""" @property diff --git a/tests/test_messages.py b/tests/test_messages.py index ec4d1231cf..e46897a42b 100644 --- a/tests/test_messages.py +++ b/tests/test_messages.py @@ -1,8 +1,25 @@ import sys +from datetime import datetime, timezone import pytest +from inline_snapshot import snapshot + +from pydantic_ai.messages import ( + AudioUrl, + BinaryContent, + DocumentUrl, + ImageUrl, + ModelMessagesTypeAdapter, + ModelRequest, + ModelResponse, + RequestUsage, + TextPart, + ThinkingPartDelta, + UserPromptPart, + VideoUrl, +) -from pydantic_ai.messages import AudioUrl, BinaryContent, DocumentUrl, ImageUrl, ThinkingPartDelta, VideoUrl +from .conftest import IsNow def test_image_url(): @@ -325,3 +342,63 @@ def test_thinking_part_delta_apply_to_thinking_part_delta(): result = content_delta.apply(original_delta) assert isinstance(result, ThinkingPartDelta) assert result.content_delta == 'new_content' + + +def test_pre_usage_refactor_messages_deserializable(): + # https://github.com/pydantic/pydantic-ai/pull/2378 changed the `ModelResponse` fields, + # but we as tell people to store those in the DB we want to be very careful not to break deserialization. + data = [ + { + 'parts': [ + { + 'content': 'What is the capital of Mexico?', + 'timestamp': datetime.now(tz=timezone.utc), + 'part_kind': 'user-prompt', + } + ], + 'instructions': None, + 'kind': 'request', + }, + { + 'parts': [{'content': 'Mexico City.', 'part_kind': 'text'}], + 'usage': { + 'requests': 1, + 'request_tokens': 13, + 'response_tokens': 76, + 'total_tokens': 89, + 'details': None, + }, + 'model_name': 'gpt-5-2025-08-07', + 'timestamp': datetime.now(tz=timezone.utc), + 'kind': 'response', + 'vendor_details': { + 'finish_reason': 'STOP', + }, + 'vendor_id': 'chatcmpl-CBpEXeCfDAW4HRcKQwbqsRDn7u7C5', + }, + ] + messages = ModelMessagesTypeAdapter.validate_python(data) + assert messages == snapshot( + [ + ModelRequest( + parts=[ + UserPromptPart( + content='What is the capital of Mexico?', + timestamp=IsNow(tz=timezone.utc), + ) + ] + ), + ModelResponse( + parts=[TextPart(content='Mexico City.')], + usage=RequestUsage( + input_tokens=13, + output_tokens=76, + details={}, + ), + model_name='gpt-5-2025-08-07', + timestamp=IsNow(tz=timezone.utc), + provider_details={'finish_reason': 'STOP'}, + provider_response_id='chatcmpl-CBpEXeCfDAW4HRcKQwbqsRDn7u7C5', + ), + ] + )