Ensure that old ModelResponses stored in a DB can still be deserialized

DouweM · DouweM · commit 8d20b12d1cd5 · 2025-09-03T22:40:12.000Z
diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py
@@ -1014,14 +1014,19 @@ class ModelResponse:
     provider_name: str | None = None
     """The name of the LLM provider that generated the response."""
 
-    provider_details: dict[str, Any] | None = field(default=None)
+    provider_details: Annotated[
+        dict[str, Any] | None,
+        pydantic.Field(validation_alias=pydantic.AliasChoices('provider_details', 'vendor_details')),
+    ] = None
     """Additional provider-specific details in a serializable format.
 
     This allows storing selected vendor-specific data that isn't mapped to standard ModelResponse fields.
     For OpenAI models, this may include 'logprobs', 'finish_reason', etc.
     """
 
-    provider_response_id: str | None = None
+    provider_response_id: Annotated[
+        str | None, pydantic.Field(validation_alias=pydantic.AliasChoices('provider_response_id', 'vendor_id'))
+    ] = None
     """request ID as specified by the model provider. This can be used to track the specific request to the model."""
 
     def price(self) -> genai_types.PriceCalculation:
diff --git a/pydantic_ai_slim/pydantic_ai/usage.py b/pydantic_ai_slim/pydantic_ai/usage.py
@@ -3,7 +3,9 @@
 import dataclasses
 from copy import copy
 from dataclasses import dataclass, fields
+from typing import Annotated
 
+from pydantic import AliasChoices, BeforeValidator, Field
 from typing_extensions import deprecated, overload
 
 from . import _utils
@@ -14,15 +16,15 @@
 
 @dataclass(repr=False, kw_only=True)
 class UsageBase:
-    input_tokens: int = 0
+    input_tokens: Annotated[int, Field(validation_alias=AliasChoices('input_tokens', 'request_tokens'))] = 0
     """Number of input/prompt tokens."""
 
     cache_write_tokens: int = 0
     """Number of tokens written to the cache."""
     cache_read_tokens: int = 0
     """Number of tokens read from the cache."""
 
-    output_tokens: int = 0
+    output_tokens: Annotated[int, Field(validation_alias=AliasChoices('output_tokens', 'response_tokens'))] = 0
     """Number of output/completion tokens."""
 
     input_audio_tokens: int = 0
@@ -32,7 +34,7 @@ class UsageBase:
     output_audio_tokens: int = 0
     """Number of audio output tokens."""
 
-    details: dict[str, int] = dataclasses.field(default_factory=dict)
+    details: Annotated[dict[str, int], BeforeValidator(lambda d: d or {})] = dataclasses.field(default_factory=dict)
     """Any extra details returned by the model."""
 
     @property
diff --git a/tests/test_messages.py b/tests/test_messages.py
@@ -1,8 +1,25 @@
 import sys
+from datetime import datetime, timezone
 
 import pytest
+from inline_snapshot import snapshot
+
+from pydantic_ai.messages import (
+    AudioUrl,
+    BinaryContent,
+    DocumentUrl,
+    ImageUrl,
+    ModelMessagesTypeAdapter,
+    ModelRequest,
+    ModelResponse,
+    RequestUsage,
+    TextPart,
+    ThinkingPartDelta,
+    UserPromptPart,
+    VideoUrl,
+)
 
-from pydantic_ai.messages import AudioUrl, BinaryContent, DocumentUrl, ImageUrl, ThinkingPartDelta, VideoUrl
+from .conftest import IsNow
 
 
 def test_image_url():
@@ -325,3 +342,63 @@ def test_thinking_part_delta_apply_to_thinking_part_delta():
     result = content_delta.apply(original_delta)
     assert isinstance(result, ThinkingPartDelta)
     assert result.content_delta == 'new_content'
+
+
+def test_pre_usage_refactor_messages_deserializable():
+    # https://github.com/pydantic/pydantic-ai/pull/2378 changed the `ModelResponse` fields,
+    # but we as tell people to store those in the DB we want to be very careful not to break deserialization.
+    data = [
+        {
+            'parts': [
+                {
+                    'content': 'What is the capital of Mexico?',
+                    'timestamp': datetime.now(tz=timezone.utc),
+                    'part_kind': 'user-prompt',
+                }
+            ],
+            'instructions': None,
+            'kind': 'request',
+        },
+        {
+            'parts': [{'content': 'Mexico City.', 'part_kind': 'text'}],
+            'usage': {
+                'requests': 1,
+                'request_tokens': 13,
+                'response_tokens': 76,
+                'total_tokens': 89,
+                'details': None,
+            },
+            'model_name': 'gpt-5-2025-08-07',
+            'timestamp': datetime.now(tz=timezone.utc),
+            'kind': 'response',
+            'vendor_details': {
+                'finish_reason': 'STOP',
+            },
+            'vendor_id': 'chatcmpl-CBpEXeCfDAW4HRcKQwbqsRDn7u7C5',
+        },
+    ]
+    messages = ModelMessagesTypeAdapter.validate_python(data)
+    assert messages == snapshot(
+        [
+            ModelRequest(
+                parts=[
+                    UserPromptPart(
+                        content='What is the capital of Mexico?',
+                        timestamp=IsNow(tz=timezone.utc),
+                    )
+                ]
+            ),
+            ModelResponse(
+                parts=[TextPart(content='Mexico City.')],
+                usage=RequestUsage(
+                    input_tokens=13,
+                    output_tokens=76,
+                    details={},
+                ),
+                model_name='gpt-5-2025-08-07',
+                timestamp=IsNow(tz=timezone.utc),
+                provider_details={'finish_reason': 'STOP'},
+                provider_response_id='chatcmpl-CBpEXeCfDAW4HRcKQwbqsRDn7u7C5',
+            ),
+        ]
+    )