Skip to content

Commit d3653fa

Browse files
committed
Merge branch 'main' of github.com:pydantic/pydantic-ai into alex/extract-usage
2 parents e797f11 + cf0fa2a commit d3653fa

File tree

3 files changed

+145
-53
lines changed

3 files changed

+145
-53
lines changed

pydantic_ai_slim/pydantic_ai/messages.py

Lines changed: 65 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -114,20 +114,6 @@ class FileUrl(ABC):
114114

115115
_: KW_ONLY
116116

117-
identifier: str
118-
"""The identifier of the file, such as a unique ID. generating one from the url if not explicitly set.
119-
120-
This identifier can be provided to the model in a message to allow it to refer to this file in a tool call argument,
121-
and the tool can look up the file in question by iterating over the message history and finding the matching `FileUrl`.
122-
123-
This identifier is only automatically passed to the model when the `FileUrl` is returned by a tool.
124-
If you're passing the `FileUrl` as a user message, it's up to you to include a separate text part with the identifier,
125-
e.g. "This is file <identifier>:" preceding the `FileUrl`.
126-
127-
It's also included in inline-text delimiters for providers that require inlining text documents, so the model can
128-
distinguish multiple files.
129-
"""
130-
131117
force_download: bool = False
132118
"""For OpenAI and Google APIs it:
133119
@@ -147,27 +133,48 @@ class FileUrl(ABC):
147133
compare=False, default=None
148134
)
149135

136+
_identifier: Annotated[str | None, pydantic.Field(alias='identifier', default=None, exclude=True)] = field(
137+
compare=False, default=None
138+
)
139+
150140
def __init__(
151141
self,
152142
url: str,
153143
*,
154-
force_download: bool = False,
155-
vendor_metadata: dict[str, Any] | None = None,
156144
media_type: str | None = None,
157145
identifier: str | None = None,
146+
force_download: bool = False,
147+
vendor_metadata: dict[str, Any] | None = None,
158148
) -> None:
159149
self.url = url
150+
self._media_type = media_type
151+
self._identifier = identifier
160152
self.force_download = force_download
161153
self.vendor_metadata = vendor_metadata
162-
self._media_type = media_type
163-
self.identifier = identifier or _multi_modal_content_identifier(url)
164154

165155
@pydantic.computed_field
166156
@property
167157
def media_type(self) -> str:
168158
"""Return the media type of the file, based on the URL or the provided `media_type`."""
169159
return self._media_type or self._infer_media_type()
170160

161+
@pydantic.computed_field
162+
@property
163+
def identifier(self) -> str:
164+
"""The identifier of the file, such as a unique ID.
165+
166+
This identifier can be provided to the model in a message to allow it to refer to this file in a tool call argument,
167+
and the tool can look up the file in question by iterating over the message history and finding the matching `FileUrl`.
168+
169+
This identifier is only automatically passed to the model when the `FileUrl` is returned by a tool.
170+
If you're passing the `FileUrl` as a user message, it's up to you to include a separate text part with the identifier,
171+
e.g. "This is file <identifier>:" preceding the `FileUrl`.
172+
173+
It's also included in inline-text delimiters for providers that require inlining text documents, so the model can
174+
distinguish multiple files.
175+
"""
176+
return self._identifier or _multi_modal_content_identifier(self.url)
177+
171178
@abstractmethod
172179
def _infer_media_type(self) -> str:
173180
"""Infer the media type of the file based on the URL."""
@@ -198,20 +205,21 @@ def __init__(
198205
self,
199206
url: str,
200207
*,
208+
media_type: str | None = None,
209+
identifier: str | None = None,
201210
force_download: bool = False,
202211
vendor_metadata: dict[str, Any] | None = None,
203-
media_type: str | None = None,
204212
kind: Literal['video-url'] = 'video-url',
205-
identifier: str | None = None,
206213
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
207214
_media_type: str | None = None,
215+
_identifier: str | None = None,
208216
) -> None:
209217
super().__init__(
210218
url=url,
211219
force_download=force_download,
212220
vendor_metadata=vendor_metadata,
213221
media_type=media_type or _media_type,
214-
identifier=identifier,
222+
identifier=identifier or _identifier,
215223
)
216224
self.kind = kind
217225

@@ -273,20 +281,21 @@ def __init__(
273281
self,
274282
url: str,
275283
*,
284+
media_type: str | None = None,
285+
identifier: str | None = None,
276286
force_download: bool = False,
277287
vendor_metadata: dict[str, Any] | None = None,
278-
media_type: str | None = None,
279288
kind: Literal['audio-url'] = 'audio-url',
280-
identifier: str | None = None,
281289
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
282290
_media_type: str | None = None,
291+
_identifier: str | None = None,
283292
) -> None:
284293
super().__init__(
285294
url=url,
286295
force_download=force_download,
287296
vendor_metadata=vendor_metadata,
288297
media_type=media_type or _media_type,
289-
identifier=identifier,
298+
identifier=identifier or _identifier,
290299
)
291300
self.kind = kind
292301

@@ -335,20 +344,21 @@ def __init__(
335344
self,
336345
url: str,
337346
*,
347+
media_type: str | None = None,
348+
identifier: str | None = None,
338349
force_download: bool = False,
339350
vendor_metadata: dict[str, Any] | None = None,
340-
media_type: str | None = None,
341351
kind: Literal['image-url'] = 'image-url',
342-
identifier: str | None = None,
343352
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
344353
_media_type: str | None = None,
354+
_identifier: str | None = None,
345355
) -> None:
346356
super().__init__(
347357
url=url,
348358
force_download=force_download,
349359
vendor_metadata=vendor_metadata,
350360
media_type=media_type or _media_type,
351-
identifier=identifier,
361+
identifier=identifier or _identifier,
352362
)
353363
self.kind = kind
354364

@@ -392,20 +402,21 @@ def __init__(
392402
self,
393403
url: str,
394404
*,
405+
media_type: str | None = None,
406+
identifier: str | None = None,
395407
force_download: bool = False,
396408
vendor_metadata: dict[str, Any] | None = None,
397-
media_type: str | None = None,
398409
kind: Literal['document-url'] = 'document-url',
399-
identifier: str | None = None,
400410
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
401411
_media_type: str | None = None,
412+
_identifier: str | None = None,
402413
) -> None:
403414
super().__init__(
404415
url=url,
405416
force_download=force_download,
406417
vendor_metadata=vendor_metadata,
407418
media_type=media_type or _media_type,
408-
identifier=identifier,
419+
identifier=identifier or _identifier,
409420
)
410421
self.kind = kind
411422

@@ -460,16 +471,6 @@ class BinaryContent:
460471
media_type: AudioMediaType | ImageMediaType | DocumentMediaType | str
461472
"""The media type of the binary data."""
462473

463-
identifier: str
464-
"""Identifier for the binary content, such as a unique ID.
465-
This identifier can be provided to the model in a message to allow it to refer to this file in a tool call argument,
466-
and the tool can look up the file in question by iterating over the message history and finding the matching `BinaryContent`.
467-
468-
This identifier is only automatically passed to the model when the `BinaryContent` is returned by a tool.
469-
If you're passing the `BinaryContent` as a user message, it's up to you to include a separate text part with the identifier,
470-
e.g. "This is file <identifier>:" preceding the `BinaryContent`.
471-
"""
472-
473474
vendor_metadata: dict[str, Any] | None = None
474475
"""Vendor-specific metadata for the file.
475476
@@ -478,6 +479,10 @@ class BinaryContent:
478479
- `OpenAIChatModel`, `OpenAIResponsesModel`: `BinaryContent.vendor_metadata['detail']` is used as `detail` setting for images
479480
"""
480481

482+
_identifier: Annotated[str | None, pydantic.Field(alias='identifier', default=None, exclude=True)] = field(
483+
compare=False, default=None
484+
)
485+
481486
kind: Literal['binary'] = 'binary'
482487
"""Type identifier, this is available on all parts as a discriminator."""
483488

@@ -489,10 +494,12 @@ def __init__(
489494
identifier: str | None = None,
490495
vendor_metadata: dict[str, Any] | None = None,
491496
kind: Literal['binary'] = 'binary',
497+
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
498+
_identifier: str | None = None,
492499
) -> None:
493500
self.data = data
494501
self.media_type = media_type
495-
self.identifier = identifier or _multi_modal_content_identifier(data)
502+
self._identifier = identifier or _identifier
496503
self.vendor_metadata = vendor_metadata
497504
self.kind = kind
498505

@@ -518,6 +525,23 @@ def from_data_uri(cls, data_uri: str) -> Self:
518525
media_type, data = data_uri[len(prefix) :].split(';base64,', 1)
519526
return cls(data=base64.b64decode(data), media_type=media_type)
520527

528+
@pydantic.computed_field
529+
@property
530+
def identifier(self) -> str:
531+
"""Identifier for the binary content, such as a unique ID.
532+
533+
This identifier can be provided to the model in a message to allow it to refer to this file in a tool call argument,
534+
and the tool can look up the file in question by iterating over the message history and finding the matching `BinaryContent`.
535+
536+
This identifier is only automatically passed to the model when the `BinaryContent` is returned by a tool.
537+
If you're passing the `BinaryContent` as a user message, it's up to you to include a separate text part with the identifier,
538+
e.g. "This is file <identifier>:" preceding the `BinaryContent`.
539+
540+
It's also included in inline-text delimiters for providers that require inlining text documents, so the model can
541+
distinguish multiple files.
542+
"""
543+
return self._identifier or _multi_modal_content_identifier(self.data)
544+
521545
@property
522546
def data_uri(self) -> str:
523547
"""Convert the `BinaryContent` to a data URI."""

tests/test_agent.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3545,18 +3545,17 @@ def test_tool_return_part_binary_content_serialization():
35453545

35463546
tool_return = ToolReturnPart(tool_name='test_tool', content=binary_content, tool_call_id='test_call_123')
35473547

3548-
response_str = tool_return.model_response_str()
3549-
3550-
assert '"kind":"binary"' in response_str
3551-
assert '"media_type":"image/png"' in response_str
3552-
assert '"data":"' in response_str
3553-
assert '"identifier":"14a01a"' in response_str
3554-
3555-
response_obj = tool_return.model_response_object()
3556-
assert response_obj['return_value']['kind'] == 'binary'
3557-
assert response_obj['return_value']['media_type'] == 'image/png'
3558-
assert response_obj['return_value']['identifier'] == '14a01a'
3559-
assert 'data' in response_obj['return_value']
3548+
assert tool_return.model_response_object() == snapshot(
3549+
{
3550+
'return_value': {
3551+
'data': 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAADElEQVR4nGNgYGAAAAAEAAH2FzgAAAAASUVORK5CYII=',
3552+
'media_type': 'image/png',
3553+
'vendor_metadata': None,
3554+
'_identifier': None,
3555+
'kind': 'binary',
3556+
}
3557+
}
3558+
)
35603559

35613560

35623561
def test_tool_returning_binary_content_directly():

tests/test_messages.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import pytest
55
from inline_snapshot import snapshot
6+
from pydantic import TypeAdapter
67

78
from pydantic_ai import (
89
AudioUrl,
@@ -515,3 +516,71 @@ def test_model_response_convenience_methods():
515516
)
516517
]
517518
)
519+
520+
521+
def test_image_url_validation_with_optional_identifier():
522+
image_url_ta = TypeAdapter(ImageUrl)
523+
image = image_url_ta.validate_python({'url': 'https://example.com/image.jpg'})
524+
assert image.url == snapshot('https://example.com/image.jpg')
525+
assert image.identifier == snapshot('39cfc4')
526+
assert image.media_type == snapshot('image/jpeg')
527+
assert image_url_ta.dump_python(image) == snapshot(
528+
{
529+
'url': 'https://example.com/image.jpg',
530+
'force_download': False,
531+
'vendor_metadata': None,
532+
'kind': 'image-url',
533+
'media_type': 'image/jpeg',
534+
'identifier': '39cfc4',
535+
}
536+
)
537+
538+
image = image_url_ta.validate_python(
539+
{'url': 'https://example.com/image.jpg', 'identifier': 'foo', 'media_type': 'image/png'}
540+
)
541+
assert image.url == snapshot('https://example.com/image.jpg')
542+
assert image.identifier == snapshot('foo')
543+
assert image.media_type == snapshot('image/png')
544+
assert image_url_ta.dump_python(image) == snapshot(
545+
{
546+
'url': 'https://example.com/image.jpg',
547+
'force_download': False,
548+
'vendor_metadata': None,
549+
'kind': 'image-url',
550+
'media_type': 'image/png',
551+
'identifier': 'foo',
552+
}
553+
)
554+
555+
556+
def test_binary_content_validation_with_optional_identifier():
557+
binary_content_ta = TypeAdapter(BinaryContent)
558+
binary_content = binary_content_ta.validate_python({'data': b'fake', 'media_type': 'image/jpeg'})
559+
assert binary_content.data == b'fake'
560+
assert binary_content.identifier == snapshot('c053ec')
561+
assert binary_content.media_type == snapshot('image/jpeg')
562+
assert binary_content_ta.dump_python(binary_content) == snapshot(
563+
{
564+
'data': b'fake',
565+
'vendor_metadata': None,
566+
'kind': 'binary',
567+
'media_type': 'image/jpeg',
568+
'identifier': 'c053ec',
569+
}
570+
)
571+
572+
binary_content = binary_content_ta.validate_python(
573+
{'data': b'fake', 'identifier': 'foo', 'media_type': 'image/png'}
574+
)
575+
assert binary_content.data == b'fake'
576+
assert binary_content.identifier == snapshot('foo')
577+
assert binary_content.media_type == snapshot('image/png')
578+
assert binary_content_ta.dump_python(binary_content) == snapshot(
579+
{
580+
'data': b'fake',
581+
'vendor_metadata': None,
582+
'kind': 'binary',
583+
'media_type': 'image/png',
584+
'identifier': 'foo',
585+
}
586+
)

0 commit comments

Comments
 (0)