Skip to content

Commit 961a666

Browse files
authored
Stop redundantly encoding binary data as base64 when sending to Google genai SDK (#2962)
1 parent 2b6703b commit 961a666

9 files changed

+133
-121
lines changed

pydantic_ai_slim/pydantic_ai/models/google.py

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,15 @@
5151
try:
5252
from google.genai import Client
5353
from google.genai.types import (
54+
BlobDict,
5455
CodeExecutionResult,
5556
CodeExecutionResultDict,
5657
ContentDict,
5758
ContentUnionDict,
5859
CountTokensConfigDict,
5960
ExecutableCode,
6061
ExecutableCodeDict,
62+
FileDataDict,
6163
FinishReason as GoogleFinishReason,
6264
FunctionCallDict,
6365
FunctionCallingConfigDict,
@@ -79,6 +81,7 @@
7981
ToolDict,
8082
ToolListUnionDict,
8183
UrlContextDict,
84+
VideoMetadataDict,
8285
)
8386

8487
from ..providers.google import GoogleProvider
@@ -525,31 +528,33 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]:
525528
if isinstance(item, str):
526529
content.append({'text': item})
527530
elif isinstance(item, BinaryContent):
528-
# NOTE: The type from Google GenAI is incorrect, it should be `str`, not `bytes`.
529-
base64_encoded = base64.b64encode(item.data).decode('utf-8')
530-
inline_data_dict = {'inline_data': {'data': base64_encoded, 'mime_type': item.media_type}}
531+
inline_data_dict: BlobDict = {'data': item.data, 'mime_type': item.media_type}
532+
part_dict: PartDict = {'inline_data': inline_data_dict}
531533
if item.vendor_metadata:
532-
inline_data_dict['video_metadata'] = item.vendor_metadata
533-
content.append(inline_data_dict) # type: ignore
534+
part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata)
535+
content.append(part_dict)
534536
elif isinstance(item, VideoUrl) and item.is_youtube:
535-
file_data_dict = {'file_data': {'file_uri': item.url, 'mime_type': item.media_type}}
537+
file_data_dict: FileDataDict = {'file_uri': item.url, 'mime_type': item.media_type}
538+
part_dict: PartDict = {'file_data': file_data_dict}
536539
if item.vendor_metadata: # pragma: no branch
537-
file_data_dict['video_metadata'] = item.vendor_metadata
538-
content.append(file_data_dict) # type: ignore
540+
part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata)
541+
content.append(part_dict)
539542
elif isinstance(item, FileUrl):
540543
if item.force_download or (
541544
# google-gla does not support passing file urls directly, except for youtube videos
542545
# (see above) and files uploaded to the file API (which cannot be downloaded anyway)
543546
self.system == 'google-gla'
544547
and not item.url.startswith(r'https://generativelanguage.googleapis.com/v1beta/files')
545548
):
546-
downloaded_item = await download_item(item, data_format='base64')
547-
inline_data = {'data': downloaded_item['data'], 'mime_type': downloaded_item['data_type']}
548-
content.append({'inline_data': inline_data}) # type: ignore
549+
downloaded_item = await download_item(item, data_format='bytes')
550+
inline_data: BlobDict = {
551+
'data': downloaded_item['data'],
552+
'mime_type': downloaded_item['data_type'],
553+
}
554+
content.append({'inline_data': inline_data})
549555
else:
550-
content.append(
551-
{'file_data': {'file_uri': item.url, 'mime_type': item.media_type}}
552-
) # pragma: lax no cover
556+
file_data_dict: FileDataDict = {'file_uri': item.url, 'mime_type': item.media_type}
557+
content.append({'file_data': file_data_dict}) # pragma: lax no cover
553558
else:
554559
assert_never(item)
555560
return content
@@ -827,7 +832,7 @@ def _metadata_as_usage(response: GenerateContentResponse) -> usage.RequestUsage:
827832
if not metadata_details:
828833
continue
829834
for detail in metadata_details:
830-
if not detail.modality or not detail.token_count: # pragma: no cover
835+
if not detail.modality or not detail.token_count:
831836
continue
832837
details[f'{detail.modality.lower()}_{prefix}_tokens'] = detail.token_count
833838
if detail.modality != 'AUDIO':

tests/models/cassettes/test_google/test_google_model_document_url_input.yaml

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -251,12 +251,10 @@ interactions:
251251
headers:
252252
accept-ranges:
253253
- bytes
254-
age:
255-
- '264068'
256254
alt-svc:
257255
- h3=":443"; ma=86400
258256
cache-control:
259-
- public, max-age=604800, s-maxage=604800
257+
- max-age=21600
260258
connection:
261259
- keep-alive
262260
content-length:
@@ -268,7 +266,7 @@ interactions:
268266
etag:
269267
- '"33d0-438b181451e00"'
270268
expires:
271-
- Tue, 24 Jun 2025 13:27:15 GMT
269+
- Fri, 19 Sep 2025 22:42:26 GMT
272270
last-modified:
273271
- Mon, 27 Aug 2007 17:15:36 GMT
274272
strict-transport-security:
@@ -312,11 +310,11 @@ interactions:
312310
alt-svc:
313311
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
314312
content-length:
315-
- '776'
313+
- '772'
316314
content-type:
317315
- application/json; charset=UTF-8
318316
server-timing:
319-
- gfet4t7; dur=1228
317+
- gfet4t7; dur=1306
320318
transfer-encoding:
321319
- chunked
322320
vary:
@@ -325,27 +323,27 @@ interactions:
325323
- Referer
326324
parsed_body:
327325
candidates:
328-
- avgLogprobs: -0.28572704394658405
326+
- avgLogprobs: -0.2349079738963734
329327
content:
330328
parts:
331329
- text: |
332-
The document appears to be a "Dummy PDF file".
330+
The document appears to be a dummy PDF file.
333331
role: model
334332
finishReason: STOP
335333
modelVersion: gemini-2.0-flash
336-
responseId: 4FpeaJWYOLq3nvgP0vasuQk
334+
responseId: 8ofNaO_lJsO1qtsP0OzFsQQ
337335
usageMetadata:
338-
candidatesTokenCount: 12
336+
candidatesTokenCount: 11
339337
candidatesTokensDetails:
340338
- modality: TEXT
341-
tokenCount: 12
339+
tokenCount: 11
342340
promptTokenCount: 1305
343341
promptTokensDetails:
344-
- modality: TEXT
345-
tokenCount: 15
346342
- modality: DOCUMENT
347343
tokenCount: 1290
348-
totalTokenCount: 1317
344+
- modality: TEXT
345+
tokenCount: 15
346+
totalTokenCount: 1316
349347
status:
350348
code: 200
351349
message: OK

tests/models/cassettes/test_google/test_google_model_image_as_binary_content_input.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ interactions:
3333
alt-svc:
3434
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
3535
content-length:
36-
- '756'
36+
- '754'
3737
content-type:
3838
- application/json; charset=UTF-8
3939
server-timing:
40-
- gfet4t7; dur=2659
40+
- gfet4t7; dur=1900
4141
transfer-encoding:
4242
- chunked
4343
vary:
@@ -46,25 +46,25 @@ interactions:
4646
- Referer
4747
parsed_body:
4848
candidates:
49-
- avgLogprobs: -0.005608726706769731
49+
- avgLogprobs: -0.00857612325085534
5050
content:
5151
parts:
5252
- text: The fruit in the image is a kiwi.
5353
role: model
5454
finishReason: STOP
5555
modelVersion: gemini-2.0-flash
56-
responseId: 2VpeaPm3DaHp1PIPwK-EmAM
56+
responseId: 4YfNaLXqOsKVmtkPqqehuAQ
5757
usageMetadata:
5858
candidatesTokenCount: 9
5959
candidatesTokensDetails:
6060
- modality: TEXT
6161
tokenCount: 9
6262
promptTokenCount: 3367
6363
promptTokensDetails:
64-
- modality: TEXT
65-
tokenCount: 13
6664
- modality: IMAGE
6765
tokenCount: 3354
66+
- modality: TEXT
67+
tokenCount: 13
6868
totalTokenCount: 3376
6969
status:
7070
code: 200

tests/models/cassettes/test_google/test_google_model_image_url_input.yaml

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,7 @@ interactions:
580580
access-control-allow-origin:
581581
- '*'
582582
age:
583-
- '1386476'
583+
- '1500997'
584584
cache-control:
585585
- public, max-age=31536000
586586
connection:
@@ -632,11 +632,11 @@ interactions:
632632
alt-svc:
633633
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
634634
content-length:
635-
- '740'
635+
- '738'
636636
content-type:
637637
- application/json; charset=UTF-8
638638
server-timing:
639-
- gfet4t7; dur=1424
639+
- gfet4t7; dur=867
640640
transfer-encoding:
641641
- chunked
642642
vary:
@@ -645,27 +645,26 @@ interactions:
645645
- Referer
646646
parsed_body:
647647
candidates:
648-
- avgLogprobs: -0.1821905771891276
648+
- avgLogprobs: -0.18855420351028443
649649
content:
650650
parts:
651-
- text: |
652-
That is a potato.
651+
- text: That is a potato.
653652
role: model
654653
finishReason: STOP
655654
modelVersion: gemini-2.0-flash
656-
responseId: 3VpeaPexBLq3nvgP0vasuQk
655+
responseId: 64fNaIPHJNulqtsPx8OZsQQ
657656
usageMetadata:
658-
candidatesTokenCount: 6
657+
candidatesTokenCount: 5
659658
candidatesTokensDetails:
660659
- modality: TEXT
661-
tokenCount: 6
660+
tokenCount: 5
662661
promptTokenCount: 1817
663662
promptTokensDetails:
664663
- modality: TEXT
665664
tokenCount: 11
666665
- modality: IMAGE
667666
tokenCount: 1806
668-
totalTokenCount: 1823
667+
totalTokenCount: 1822
669668
status:
670669
code: 200
671670
message: OK

tests/models/cassettes/test_google/test_google_model_text_as_binary_content_input.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ interactions:
3333
alt-svc:
3434
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
3535
content-length:
36-
- '712'
36+
- '710'
3737
content-type:
3838
- application/json; charset=UTF-8
3939
server-timing:
40-
- gfet4t7; dur=459
40+
- gfet4t7; dur=571
4141
transfer-encoding:
4242
- chunked
4343
vary:
@@ -46,15 +46,15 @@ interactions:
4646
- Referer
4747
parsed_body:
4848
candidates:
49-
- avgLogprobs: -0.014047189553578695
49+
- avgLogprobs: -0.2041482448577881
5050
content:
5151
parts:
5252
- text: |
5353
The main content of the document is that it is a test document.
5454
role: model
5555
finishReason: STOP
5656
modelVersion: gemini-2.0-flash
57-
responseId: 41peaPz5EtOvnvgPgYfPiQY
57+
responseId: 9ofNaNqNKNWDmtkPs-nsqAU
5858
usageMetadata:
5959
candidatesTokenCount: 15
6060
candidatesTokensDetails:

tests/models/cassettes/test_google/test_google_model_text_document_url_input.yaml

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,11 @@ interactions:
4949
etag:
5050
- W/"61efea10-a0e"
5151
expires:
52-
- Fri, 04 Jul 2025 08:48:34 GMT
52+
- Fri, 26 Sep 2025 16:42:28 GMT
5353
last-modified:
5454
- Tue, 25 Jan 2022 12:16:16 GMT
55+
strict-transport-security:
56+
- max-age=15552000; includeSubDomains
5557
transfer-encoding:
5658
- chunked
5759
vary:
@@ -93,11 +95,11 @@ interactions:
9395
alt-svc:
9496
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
9597
content-length:
96-
- '1189'
98+
- '985'
9799
content-type:
98100
- application/json; charset=UTF-8
99101
server-timing:
100-
- gfet4t7; dur=802
102+
- gfet4t7; dur=888
101103
transfer-encoding:
102104
- chunked
103105
vary:
@@ -106,25 +108,25 @@ interactions:
106108
- Referer
107109
parsed_body:
108110
candidates:
109-
- avgLogprobs: -0.6026712397939151
111+
- avgLogprobs: -0.5004191543116714
110112
content:
111113
parts:
112114
- text: |
113-
The main content of the document is an example of a TXT file, specifically providing information about the placeholder names "John Doe" (and related variations) used for unidentified or anonymous individuals, particularly in legal contexts in the United States and Canada. It also explains alternative names used in other countries and some additional context and examples of when "John Doe" might be used. The document also includes attribution to Wikipedia for the example content and a link to the license under which it is shared.
115+
The main content of the TXT file is an explanation of the placeholder name "John Doe" (and related variations) and its usage in legal contexts, popular culture, and other situations where the identity of a person is unknown or needs to be withheld. The document also includes the purpose of the file and other file type information.
114116
role: model
115117
finishReason: STOP
116118
modelVersion: gemini-2.0-flash
117-
responseId: 4lpeaLX9EYzj1PIP0MPrsAg
119+
responseId: 9YfNaLGGDuOmqtsPoLXu4AQ
118120
usageMetadata:
119-
candidatesTokenCount: 97
121+
candidatesTokenCount: 66
120122
candidatesTokensDetails:
121123
- modality: TEXT
122-
tokenCount: 97
124+
tokenCount: 66
123125
promptTokenCount: 614
124126
promptTokensDetails:
125127
- modality: TEXT
126128
tokenCount: 614
127-
totalTokenCount: 711
129+
totalTokenCount: 680
128130
status:
129131
code: 200
130132
message: OK

0 commit comments

Comments
 (0)