Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion pydantic_ai_slim/pydantic_ai/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,25 @@ def format(self) -> ImageFormat:
"""
return _image_format_lookup[self.media_type]

def _is_text_like_media_type(media_type: str) -> bool:
return (
media_type.startswith('text/')
or media_type == 'application/json'
or media_type.endswith('+json')
or media_type == 'application/xml'
or media_type.endswith('+xml')
or media_type in ('application/x-yaml', 'application/yaml')
)

def _inline_text_file(text: str, *, media_type: str, identifier: str):
text = '\n'.join(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can return this directly, no need for the text variable

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@DouweM The thing I'm actually returning is here return {"type": "text", "text":text} and not returning the direct thing. This is because google and openai models expect different formats for this function to return things. Having it return this way made things work for both openai and google models.

[
f'-----BEGIN FILE id="{identifier}" type="{media_type}"-----',
text,
f'-----END FILE id="{identifier}"-----',
]
)
return {"type": "text", "text":text}

@dataclass(init=False, repr=False)
class DocumentUrl(FileUrl):
Expand Down Expand Up @@ -464,6 +483,14 @@ def format(self) -> DocumentFormat:
return _document_format_lookup[media_type]
except KeyError as e:
raise ValueError(f'Unknown document media type: {media_type}') from e

@staticmethod
def is_text_like_media_type(media_type: str) -> bool:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned, I want to see instance methods that use self.media_type, not static methods

return _is_text_like_media_type(media_type)

@staticmethod
def inline_text_file_part(text: str, *, media_type: str, identifier: str) -> dict[str, str]:
return _inline_text_file(text, media_type=media_type, identifier=identifier)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, this should be an instance method using self to read the relevant fields



@dataclass(init=False, repr=False)
Expand Down Expand Up @@ -520,9 +547,17 @@ def narrow_type(bc: BinaryContent) -> BinaryContent | BinaryImage:
identifier=bc.identifier,
vendor_metadata=bc.vendor_metadata,
)
else:
else:
return bc

@staticmethod
def is_text_like_media_type(media_type: str) -> bool:
return _is_text_like_media_type(media_type)

@staticmethod
def inline_text_file_part(text: str, *, media_type: str, identifier: str) -> dict[str, str]:
return _inline_text_file(text, media_type=media_type, identifier=identifier)

@classmethod
def from_data_uri(cls, data_uri: str) -> BinaryContent:
"""Create a `BinaryContent` from a data URI."""
Expand Down
42 changes: 35 additions & 7 deletions pydantic_ai_slim/pydantic_ai/models/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
BinaryContent,
BuiltinToolCallPart,
BuiltinToolReturnPart,
CachePoint,
DocumentUrl,
FilePart,
FileUrl,
FinishReason,
Expand Down Expand Up @@ -602,17 +602,44 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]:
if isinstance(item, str):
content.append({'text': item})
elif isinstance(item, BinaryContent):
inline_data_dict: BlobDict = {'data': item.data, 'mime_type': item.media_type}
part_dict: PartDict = {'inline_data': inline_data_dict}
if item.vendor_metadata:
part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata)
content.append(part_dict)
if BinaryContent.is_text_like_media_type(item.media_type):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also update the OpenAI implementation to use the new methods

part_dict = BinaryContent.inline_text_file_part(
item.data.decode('utf-8'),
media_type=item.media_type,
identifier=item.identifier,
)
content.append({'text': part_dict['text']})
else:
inline_data_dict: BlobDict = {'data': item.data, 'mime_type': item.media_type}
part_dict: PartDict = {'inline_data': inline_data_dict}
if item.vendor_metadata:
part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata)
content.append(part_dict)

elif isinstance(item, DocumentUrl):
if DocumentUrl.is_text_like_media_type(item.media_type):
downloaded_text = await download_item(item, data_format='text')
part_dict = DocumentUrl.inline_text_file_part(
downloaded_text['data'],
media_type=item.media_type,
identifier=item.identifier,
)
content.append({'text': part_dict['text']})
else:
downloaded_item = await download_item(item, data_format='bytes')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should keep the original behavior of the isinstance(item, FileUrl) stuff below if is_text_like_media_type is False, but now we always download instead. I think we can add and DocumentUrl.is_text_like_media_type(item.media_type) to elif isinstance(item, DocumentUrl), so that this branch is only used in that combination, and non-text DocumentUrls keep the old behavior.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@DouweM For the CI that are failing, I'm running my tests locally specifically testing test_google.py using the below command;
uv run pytest tests/models/test_google.py --record-mode=rewrite

But I see the tests fails, I get this error;

if 400 <= status_code < 500:
>       raise ClientError(status_code, response_json, response)
E       google.genai.errors.ClientError: 400 INVALID_ARGUMENT. {'error': {'code': 400, 'message': 'API key not valid. Please pass a valid API key.', 'status': 'INVALID_ARGUMENT', 'details': [{'@type': 'type.googleapis.com/google.rpc.ErrorInfo', 'reason': 'API_KEY_INVALID', 'domain': 'googleapis.com', 'metadata': {'service': 'generativelanguage.googleapis.com'}}, {'@type': 'type.googleapis.com/google.rpc.LocalizedMessage', 'locale': 'en-US', 'message': 'API key not valid. Please pass a valid API key.'}]}}

Leading to these results

Results (866.31s):
        51 failed
         9 passed
        14 skipped

Yet I have the environment variable GOOGLE_API_KEY in my .env file, but getting the above errors.

Am I doing something wrong here?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Kamal-Moha Did you source your .env?

inline_data_dict: BlobDict = {
'data': downloaded_item['data'],
'mime_type': downloaded_item['data_type'],
}
content.append({'inline_data': inline_data_dict})

elif isinstance(item, VideoUrl) and item.is_youtube:
file_data_dict: FileDataDict = {'file_uri': item.url, 'mime_type': item.media_type}
part_dict: PartDict = {'file_data': file_data_dict}
if item.vendor_metadata: # pragma: no branch
part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata)
content.append(part_dict)

elif isinstance(item, FileUrl):
if item.force_download or (
# google-gla does not support passing file urls directly, except for youtube videos
Expand All @@ -634,7 +661,8 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]:
pass
else:
assert_never(item)
return content

return content

def _map_response_schema(self, o: OutputObjectDefinition) -> dict[str, Any]:
response_schema = o.json_schema.copy()
Expand Down
7 changes: 3 additions & 4 deletions pydantic_ai_slim/pydantic_ai/models/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -923,10 +923,9 @@ async def _map_user_prompt(self, part: UserPromptPart) -> chat.ChatCompletionUse
image_url['url'] = image_content['data']
content.append(ChatCompletionContentPartImageParam(image_url=image_url, type='image_url'))
elif isinstance(item, BinaryContent):
if self._is_text_like_media_type(item.media_type):
# Inline text-like binary content as a text block
if BinaryContent.is_text_like_media_type(item.media_type):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that we use the new methods, the existing ones on OpenAIChatModel are not needed anymore, please remove them

content.append(
self._inline_text_file_part(
BinaryContent.inline_text_file_part(
item.data.decode('utf-8'),
media_type=item.media_type,
identifier=item.identifier,
Expand Down Expand Up @@ -965,7 +964,7 @@ async def _map_user_prompt(self, part: UserPromptPart) -> chat.ChatCompletionUse
if self._is_text_like_media_type(item.media_type):
downloaded_text = await download_item(item, data_format='text')
content.append(
self._inline_text_file_part(
DocumentUrl.inline_text_file_part(
downloaded_text['data'],
media_type=item.media_type,
identifier=item.identifier,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
interactions:
- request:
body: ''
headers:
accept:
- '*/*'
accept-encoding:
- gzip, deflate
connection:
- keep-alive
host:
- kamalscraping-collab.github.io
method: GET
uri: https://kamalscraping-collab.github.io/sample-data/sample_transcript.json
response:
body:
string: |-
{
"items": [
{
"id": "GR_ad8d2a461fc5",
"type": "message",
"role": "assistant",
"content": [
"Hello, how can I help you today?"
],
"interrupted": false
},
{
"id": "item_13ecd51e0dcc",
"type": "function_call",
"call_id": "function-call-18124021183837676163",
"arguments": "{\"location\": \"Kampala, Uganda\"}",
"name": "lookup_weather"
},
{
"id": "GI_14a70e7c2d20",
"type": "message",
"role": "user",
"content": [
"Haide, can you please tell me the weather in compiler Uganda"
],
"interrupted": false
},
{
"id": "item_000f739d4414",
"type": "function_call_output",
"name": "lookup_weather",
"call_id": "function-call-18124021183837676163",
"output": "{'weather': 'sunny', 'temperature_f': 70}",
"is_error": false
},
{
"id": "GR_95c91db6b975",
"type": "message",
"role": "assistant",
"content": [
"The weather in Kampala, Uganda is sunny with a temperature of 70 degrees Fahrenheit."
],
"interrupted": false
},
{
"id": "GI_c8cc9177073f",
"type": "message",
"role": "user",
"content": [
"what can you please tell me what are the best things to do in compiler you're"
],
"interrupted": false
},
{
"id": "GR_792c5f6fbc89",
"type": "message",
"role": "assistant",
"content": [
"While I can tell you the weather, I'm not able to provide information on the best things to do in a specific location. Is there anything else I can help you with?"
],
"interrupted": false
}
]
}
headers:
cache-control:
- max-age=604800
- public
connection:
- keep-alive
content-length:
- '2574'
content-type:
- text/plain; charset=UTF-8
etag:
- W/"61efea10-a0e"
expires:
- Fri, 26 Dec 2025 16:42:28 GMT
last-modified:
- Tue, 25 Jan 2022 12:16:16 GMT
strict-transport-security:
- max-age=15552000; includeSubDomains
transfer-encoding:
- chunked
vary:
- Accept-Encoding
status:
code: 200
message: OK
- request:
headers:
accept:
- '*/*'
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '3701'
content-type:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
parsed_body:
contents:
- parts:
- text: What is the main content on this document?
- inlineData:
data: VFhUIHRlc3QgZmlsZQpQdXJwb3NlOiBQcm92aWRlIGV4YW1wbGUgb2YgdGhpcyBmaWxlIHR5cGUKRG9jdW1lbnQgZmlsZSB0eXBlOiBUWFQKVmVyc2lvbjogMS4wClJlbWFyazoKCkV4YW1wbGUgY29udGVudDoKVGhlIG5hbWVzICJKb2huIERvZSIgZm9yIG1hbGVzLCAiSmFuZSBEb2UiIG9yICJKYW5lIFJvZSIgZm9yIGZlbWFsZXMsIG9yICJKb25uaWUgRG9lIiBhbmQgIkphbmllIERvZSIgZm9yIGNoaWxkcmVuLCBvciBqdXN0ICJEb2UiIG5vbi1nZW5kZXItc3BlY2lmaWNhbGx5IGFyZSB1c2VkIGFzIHBsYWNlaG9sZGVyIG5hbWVzIGZvciBhIHBhcnR5IHdob3NlIHRydWUgaWRlbnRpdHkgaXMgdW5rbm93biBvciBtdXN0IGJlIHdpdGhoZWxkIGluIGEgbGVnYWwgYWN0aW9uLCBjYXNlLCBvciBkaXNjdXNzaW9uLiBUaGUgbmFtZXMgYXJlIGFsc28gdXNlZCB0byByZWZlciB0byBhY29ycHNlIG9yIGhvc3BpdGFsIHBhdGllbnQgd2hvc2UgaWRlbnRpdHkgaXMgdW5rbm93bi4gVGhpcyBwcmFjdGljZSBpcyB3aWRlbHkgdXNlZCBpbiB0aGUgVW5pdGVkIFN0YXRlcyBhbmQgQ2FuYWRhLCBidXQgaXMgcmFyZWx5IHVzZWQgaW4gb3RoZXIgRW5nbGlzaC1zcGVha2luZyBjb3VudHJpZXMgaW5jbHVkaW5nIHRoZSBVbml0ZWQgS2luZ2RvbSBpdHNlbGYsIGZyb20gd2hlcmUgdGhlIHVzZSBvZiAiSm9obiBEb2UiIGluIGEgbGVnYWwgY29udGV4dCBvcmlnaW5hdGVzLiBUaGUgbmFtZXMgSm9lIEJsb2dncyBvciBKb2huIFNtaXRoIGFyZSB1c2VkIGluIHRoZSBVSyBpbnN0ZWFkLCBhcyB3ZWxsIGFzIGluIEF1c3RyYWxpYSBhbmQgTmV3IFplYWxhbmQuCgpKb2huIERvZSBpcyBzb21ldGltZXMgdXNlZCB0byByZWZlciB0byBhIHR5cGljYWwgbWFsZSBpbiBvdGhlciBjb250ZXh0cyBhcyB3ZWxsLCBpbiBhIHNpbWlsYXIgbWFubmVyIHRvIEpvaG4gUS4gUHVibGljLCBrbm93biBpbiBHcmVhdCBCcml0YWluIGFzIEpvZSBQdWJsaWMsIEpvaG4gU21pdGggb3IgSm9lIEJsb2dncy4gRm9yIGV4YW1wbGUsIHRoZSBmaXJzdCBuYW1lIGxpc3RlZCBvbiBhIGZvcm0gaXMgb2Z0ZW4gSm9obiBEb2UsIGFsb25nIHdpdGggYSBmaWN0aW9uYWwgYWRkcmVzcyBvciBvdGhlciBmaWN0aW9uYWwgaW5mb3JtYXRpb24gdG8gcHJvdmlkZSBhbiBleGFtcGxlIG9mIGhvdyB0byBmaWxsIGluIHRoZSBmb3JtLiBUaGUgbmFtZSBpcyBhbHNvIHVzZWQgZnJlcXVlbnRseSBpbiBwb3B1bGFyIGN1bHR1cmUsIGZvciBleGFtcGxlIGluIHRoZSBGcmFuayBDYXByYSBmaWxtIE1lZXQgSm9obiBEb2UuIEpvaG4gRG9lIHdhcyBhbHNvIHRoZSBuYW1lIG9mIGEgMjAwMiBBbWVyaWNhbiB0ZWxldmlzaW9uIHNlcmllcy4KClNpbWlsYXJseSwgYSBjaGlsZCBvciBiYWJ5IHdob3NlIGlkZW50aXR5IGlzIHVua25vd24gbWF5IGJlIHJlZmVycmVkIHRvIGFzIEJhYnkgRG9lLiBBIG5vdG9yaW91cyBtdXJkZXIgY2FzZSBpbiBLYW5zYXMgQ2l0eSwgTWlzc291cmksIHJlZmVycmVkIHRvIHRoZSBiYWJ5IHZpY3RpbSBhcyBQcmVjaW91cyBEb2UuIE90aGVyIHVuaWRlbnRpZmllZCBmZW1hbGUgbXVyZGVyIHZpY3RpbXMgYXJlIENhbGkgRG9lIGFuZCBQcmluY2VzcyBEb2UuIEFkZGl0aW9uYWwgcGVyc29ucyBtYXkgYmUgY2FsbGVkIEphbWVzIERvZSwgSnVkeSBEb2UsIGV0Yy4gSG93ZXZlciwgdG8gYXZvaWQgcG9zc2libGUgY29uZnVzaW9uLCBpZiB0d28gYW5vbnltb3VzIG9yIHVua25vd24gcGFydGllcyBhcmUgY2l0ZWQgaW4gYSBzcGVjaWZpYyBjYXNlIG9yIGFjdGlvbiwgdGhlIHN1cm5hbWVzIERvZSBhbmQgUm9lIG1heSBiZSB1c2VkIHNpbXVsdGFuZW91c2x5OyBmb3IgZXhhbXBsZSwgIkpvaG4gRG9lIHYuIEphbmUgUm9lIi4gSWYgc2V2ZXJhbCBhbm9ueW1vdXMgcGFydGllcyBhcmUgcmVmZXJlbmNlZCwgdGhleSBtYXkgc2ltcGx5IGJlIGxhYmVsbGVkIEpvaG4gRG9lICMxLCBKb2huIERvZSAjMiwgZXRjLiAodGhlIFUuUy4gT3BlcmF0aW9uIERlbGVnbyBjaXRlZCAyMSAobnVtYmVyZWQpICJKb2huIERvZSJzKSBvciBsYWJlbGxlZCB3aXRoIG90aGVyIHZhcmlhbnRzIG9mIERvZSAvIFJvZSAvIFBvZSAvIGV0Yy4gT3RoZXIgZWFybHkgYWx0ZXJuYXRpdmVzIHN1Y2ggYXMgSm9obiBTdGlsZXMgYW5kIFJpY2hhcmQgTWlsZXMgYXJlIG5vdyByYXJlbHkgdXNlZCwgYW5kIE1hcnkgTWFqb3IgaGFzIGJlZW4gdXNlZCBpbiBzb21lIEFtZXJpY2FuIGZlZGVyYWwgY2FzZXMuCgoKCkZpbGUgY3JlYXRlZCBieSBodHRwczovL3d3dy5vbmxpbmUtY29udmVydC5jb20KTW9yZSBleGFtcGxlIGZpbGVzOiBodHRwczovL3d3dy5vbmxpbmUtY29udmVydC5jb20vZmlsZS10eXBlClRleHQgb2YgRXhhbXBsZSBjb250ZW50OiBXaWtpcGVkaWEgKGh0dHBzOi8vZW4ud2lraXBlZGlhLm9yZy93aWtpL0pvaG5fRG9lKQpMaWNlbnNlOiBBdHRyaWJ1dGlvbi1TaGFyZUFsaWtlIDQuMCAoaHR0cHM6Ly9jcmVhdGl2ZWNvbW1vbnMub3JnL2xpY2Vuc2VzL2J5LXNhLzQuMC8pCgpGZWVsIGZyZWUgdG8gdXNlIGFuZCBzaGFyZSB0aGUgZmlsZSBhY2NvcmRpbmcgdG8gdGhlIGxpY2Vuc2UgYWJvdmUu
mimeType: application/json
role: user
generationConfig: {}
systemInstruction:
parts:
- text: You are a helpful chatbot.
role: user
uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent
response:
headers:
alt-svc:
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
content-length:
- '985'
content-type:
- application/json; charset=UTF-8
server-timing:
- gfet4t7; dur=888
transfer-encoding:
- chunked
vary:
- Origin
- X-Origin
- Referer
parsed_body:
candidates:
- avgLogprobs: -0.5004191543116714
content:
parts:
- text: |
Based on the JSON data provided, the document contains the log of a conversation between a user and an AI assistant.
role: model
finishReason: STOP
modelVersion: gemini-2.5-pro
responseId: 9YfNaLGGDuOmqtsPoLXu4AQ
usageMetadata:
candidatesTokenCount: 66
candidatesTokensDetails:
- modality: TEXT
tokenCount: 66
promptTokenCount: 614
promptTokensDetails:
- modality: TEXT
tokenCount: 614
totalTokenCount: 680
status:
code: 200
message: OK
version: 1
12 changes: 12 additions & 0 deletions tests/models/test_google.py
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,18 @@ async def test_google_model_text_document_url_input(allow_model_requests: None,
)


async def test_google_model_json_document_url_input(allow_model_requests: None, google_provider: GoogleProvider):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The VCR cassette will be generated automatically when you call uv run pytest <path> --record-mode=rewrite, is that what you did?

m = GoogleModel('gemini-2.5-pro', provider=google_provider)
agent = Agent(m, system_prompt='You are a helpful chatbot.')

json_document_url = DocumentUrl(url='https://kamalscraping-collab.github.io/sample-data/sample_transcript.json')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we please use a different public JSON file that's not dependent on your repo?


result = await agent.run(['What is the main content of this document?', json_document_url])
assert result.output == snapshot(
'Based on the JSON data provided, the document contains the log of a conversation between a user and an AI assistant.\n'
)


async def test_google_model_text_as_binary_content_input(allow_model_requests: None, google_provider: GoogleProvider):
m = GoogleModel('gemini-2.0-flash', provider=google_provider)
agent = Agent(m, system_prompt='You are a helpful chatbot.')
Expand Down
Loading