From 2cb4086a37b3de0af6a087fcb25a815ef1a33908 Mon Sep 17 00:00:00 2001 From: Thiago Padilha Date: Tue, 19 Aug 2025 09:02:32 -0300 Subject: [PATCH 01/11] Add UploadedFile UserContent This wraps an opaque reference to a provider-specific representation of an uploaded file. --- pydantic_ai_slim/pydantic_ai/messages.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py index 28447187ef..5d09c63c0d 100644 --- a/pydantic_ai_slim/pydantic_ai/messages.py +++ b/pydantic_ai_slim/pydantic_ai/messages.py @@ -423,7 +423,18 @@ def format(self) -> str: __repr__ = _utils.dataclasses_no_defaults_repr -UserContent: TypeAlias = 'str | ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent' +@dataclass(repr=False) +class UploadedFile: + """File uploaded to the LLM provider.""" + + file: Any + """A provider-specific file object, e.g. a file ID or a file URL.""" + + kind: Literal['uploaded-file'] = 'uploaded-file' + """Type identifier, this is available on all parts as a discriminator.""" + + +UserContent: TypeAlias = 'str | ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent | UploadedFile' @dataclass(repr=False) From 6abc260d8f63ef03c791d1f3ad8e67dc47f24ff3 Mon Sep 17 00:00:00 2001 From: Thiago Padilha Date: Tue, 19 Aug 2025 09:15:34 -0300 Subject: [PATCH 02/11] Implement support for UploadedFile in OpenAIModel --- pydantic_ai_slim/pydantic_ai/models/openai.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index 7f5d2b5956..3602813faa 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -8,6 +8,7 @@ from datetime import datetime from typing import Any, Literal, Union, cast, overload +from openai.types import FileObject from pydantic import ValidationError from typing_extensions import assert_never, deprecated @@ -36,6 +37,7 @@ ThinkingPart, ToolCallPart, ToolReturnPart, + UploadedFile, UserPromptPart, VideoUrl, ) @@ -697,6 +699,16 @@ async def _map_user_prompt(part: UserPromptPart) -> chat.ChatCompletionUserMessa content.append(file) elif isinstance(item, VideoUrl): # pragma: no cover raise NotImplementedError('VideoUrl is not supported for OpenAI') + elif isinstance(item, UploadedFile): + if not isinstance(item.file, FileObject): + raise UserError('UploadedFile.file_object must be an OpenAI FileObject') + file = File( + file=FileFile( + file_id=item.file.id, + ), + type='file', + ) + content.append(file) else: assert_never(item) return chat.ChatCompletionUserMessageParam(role='user', content=content) From 2c3c6a008f2ff5d99fee13d630b87e5bbf82e25f Mon Sep 17 00:00:00 2001 From: Thiago Padilha Date: Tue, 19 Aug 2025 09:45:13 -0300 Subject: [PATCH 03/11] Support UploadedFile for OpenAI models --- pydantic_ai_slim/pydantic_ai/models/openai.py | 39 ++++++++++++------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index 3602813faa..ab09e31176 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -8,6 +8,7 @@ from datetime import datetime from typing import Any, Literal, Union, cast, overload +from httpx import URL from openai.types import FileObject from pydantic import ValidationError from typing_extensions import assert_never, deprecated @@ -625,7 +626,7 @@ async def _map_user_message(self, message: ModelRequest) -> AsyncIterable[chat.C else: yield chat.ChatCompletionSystemMessageParam(role='system', content=part.content) elif isinstance(part, UserPromptPart): - yield await self._map_user_prompt(part) + yield await self._map_user_prompt(part, self._provider) elif isinstance(part, ToolReturnPart): yield chat.ChatCompletionToolMessageParam( role='tool', @@ -647,7 +648,7 @@ async def _map_user_message(self, message: ModelRequest) -> AsyncIterable[chat.C assert_never(part) @staticmethod - async def _map_user_prompt(part: UserPromptPart) -> chat.ChatCompletionUserMessageParam: + async def _map_user_prompt(part: UserPromptPart, provider: Provider[Any]) -> chat.ChatCompletionUserMessageParam: content: str | list[ChatCompletionContentPartParam] if isinstance(part.content, str): content = part.content @@ -700,15 +701,8 @@ async def _map_user_prompt(part: UserPromptPart) -> chat.ChatCompletionUserMessa elif isinstance(item, VideoUrl): # pragma: no cover raise NotImplementedError('VideoUrl is not supported for OpenAI') elif isinstance(item, UploadedFile): - if not isinstance(item.file, FileObject): - raise UserError('UploadedFile.file_object must be an OpenAI FileObject') - file = File( - file=FileFile( - file_id=item.file.id, - ), - type='file', - ) - content.append(file) + file = _map_uploaded_file(item, provider) + content.append(File(file=FileFile(file_id=file.id), type='file')) else: assert_never(item) return chat.ChatCompletionUserMessageParam(role='user', content=content) @@ -996,7 +990,7 @@ async def _map_messages( if isinstance(part, SystemPromptPart): openai_messages.append(responses.EasyInputMessageParam(role='system', content=part.content)) elif isinstance(part, UserPromptPart): - openai_messages.append(await self._map_user_prompt(part)) + openai_messages.append(await self._map_user_prompt(part, self._provider)) elif isinstance(part, ToolReturnPart): openai_messages.append( FunctionCallOutput( @@ -1078,7 +1072,7 @@ def _map_json_schema(self, o: OutputObjectDefinition) -> responses.ResponseForma return response_format_param @staticmethod - async def _map_user_prompt(part: UserPromptPart) -> responses.EasyInputMessageParam: + async def _map_user_prompt(part: UserPromptPart, provider: Provider[Any]) -> responses.EasyInputMessageParam: content: str | list[responses.ResponseInputContentParam] if isinstance(part.content, str): content = part.content @@ -1136,6 +1130,9 @@ async def _map_user_prompt(part: UserPromptPart) -> responses.EasyInputMessagePa ) elif isinstance(item, VideoUrl): # pragma: no cover raise NotImplementedError('VideoUrl is not supported for OpenAI.') + elif isinstance(item, UploadedFile): + file = _map_uploaded_file(item, provider) + content.append(responses.ResponseInputFileParam(file_id=file.id, type='input_file')) else: assert_never(item) return responses.EasyInputMessageParam(role='user', content=content) @@ -1370,3 +1367,19 @@ def _map_usage(response: chat.ChatCompletion | ChatCompletionChunk | responses.R u.input_audio_tokens = response_usage.prompt_tokens_details.audio_tokens or 0 u.cache_read_tokens = response_usage.prompt_tokens_details.cached_tokens or 0 return u + + +def _map_openai_uploaded_file(item: UploadedFile) -> FileObject: + if not isinstance(item.file, FileObject): + raise UserError('UploadedFile.file must be an openai.types.FileObject') + return item.file + + +def _map_uploaded_file(uploaded_file: UploadedFile, provider: Provider[Any]) -> FileObject: + """Map an UploadedFile to a File object.""" + url = URL(provider.base_url) + + if url.host == 'api.openai.com': + return _map_openai_uploaded_file(uploaded_file) + else: + raise UserError(f'UploadedFile is not supported for `{provider.name}` with base_url {provider.base_url}.') From e05ea0c5be08c38ff8984fa192e22db97f92fc68 Mon Sep 17 00:00:00 2001 From: Thiago Padilha Date: Tue, 19 Aug 2025 11:12:57 -0300 Subject: [PATCH 04/11] Add test for OpenAI UploadedFile --- tests/assets/smiley.pdf | Bin 0 -> 5930 bytes .../test_openai/test_uploaded_file_input.yaml | 85 ++++++++++++++++++ tests/models/test_openai.py | 35 +++++++- 3 files changed, 119 insertions(+), 1 deletion(-) create mode 100644 tests/assets/smiley.pdf create mode 100644 tests/models/cassettes/test_openai/test_uploaded_file_input.yaml diff --git a/tests/assets/smiley.pdf b/tests/assets/smiley.pdf new file mode 100644 index 0000000000000000000000000000000000000000..ab8cd0df8177fecdcc908754e2d972ad2cfcef3a GIT binary patch literal 5930 zcmZ`-2{@GN-|i&Q*eY8IjgW(w8DlVnLbfc~Bil3vV;jcUcOxzKEZIfK*u!K=gk)b1 zV-1mg8_JfYd^7z|=R5y%zVErNx#oTE=l=bk-|v3j>s@ZaJ1VMT5OE1000IO9G4~w+ z*RKOWhG?7<3i!LbGujE|BMbm3V^Ihk8snmZz)@foSqZQN3@ihILm*%&2{47Ajj{QE z=k8!JHXcY67Wlgw7U61-M!IVw?9fO@AVeH0E-3+|B!h~9;bIa{pdLgDXph6W%7Q>% zUS8t=%@@aD?Lfi+1qA@g#fDm`#NR9SaYX?^$_O0731bHU-9gx)+<{OELH}<7)bqbu z{2B0*2LRnh+qeVo0VSz?parF0jE4&j2>Dwmb?V=t)P=Ap3iUrLf%H-C7!NFxvNTo2 zySn!sP)Ho*7zomE1_Nb&rl-^oqDX=?K;eMWe<9i^8#F=*;|;tAreG4_pFe6PSd8oc z1!G)rly%*KFlvUs)k!`7Ri_rp#SUi=l=?++$5FP?8Q=z>rvJ4J^&jaB_*tL|2B1e3 z_fO)VGE?KJDWQ;G!c)tpBKLqG%H;zD8KS&#)D|hCD^ukAGmhHsuTl(us{N-R0FZ&l zecVsvXAA(OgK(zo9z`PnNEPjbqg*SL)T&ODt4#%k#Mq#ydt!pNp)^A&9RN~Cq3!H( z|2bC1IAO2`t_UOw2%+|(4*)5laqf3eSY?c}E5?PoJ2C*!pIX3wGALW{XN%tB>lPMH zY$NPah3ez^kH@RBH4+x+5+pSsulL7B)1K)Yi5QB^hdg`AsZ^v{B63STQJ+oNAx?y- zJ;ynoe==7}Q_tHzHR&On(Sy)9F&l8isSA z`SX5i_Hv^&r!$GT_HIT;oq3<2-Hv-0X5d-6YesX4zy9&T#|)^b1--T^{?4g#?WvO= zQxa=TP6abLg^Bk+8ZXe+zQ0=teGR>q1nA0$OHW?Xq*wn9oVL_F$)~K2&gQIoA>}$4 z_j^=$>e!CBd${pqH?C_&t)t$YxuH%7+qnLO@loxY_kG(HUcrCob#V`O*J}|t8Eny^ ztfPJ@xRf;w&XUCa_)?28!rDZ$#j^t%mzERh|uB?3NU?=v7i$l6NV}Mk`sq4}hc$<#5qdTNFgVJhSl=xNf2;h3xQHjb3eXukBUc{@RDl6$KUK>zYEj zgj!X3vEtzmEor1+Y{z=u^R;|FX-;=R0gq@dp+~$( z#!B@ml2L0eZi$w%t>10%i58!^lDo^zZYvMfX_Irk3KBmBFQfLHmz;%89-n74)@*+A zrXe?DQU^v_zD9^E4oI(jZfZ?v@tZG>0>KIa(h9yQw;clX;PXK(YxKvFj;3&STE1Wgk z6eX+;XcUsUE1dQWUePIXsABrqIWCDgZti>Vm@T@IuRg2YdOiVeH)OaL-=%3CROJ`` zbrL`H$`jj~5^uZQFKZ3-G}rJjuICzMyjFIB*(q{gQ0893LzRJn2Mlc{YfF|DEif7h(>$+b z6*!kcB#d&aoty3mR@L9}Grwipn>U#m=X$r=!CU7=-SlhzZf@d0-nr}K-`?d-zMAe_ zxwjs&+E!;3I%CDrP{x8~%d_D3o;Yka<7v5F?%(6yV>$VofYNZC#T~xE=5LY?ryBh} zBz+~ST7|aKWI|lky+mWi-SfaI?lxd**(A4eBHGyXXt|YAtqDlkpG#hay+%s zHq>&{A#ovcsTXX>0pk)ff5@$e9-*Jw3(gL?>ZI>c@I%t?ns~ukc+Nm+T!MGmn^2NP zL$;-(&cI4uW(oxrkLgUWYsy9NtG`;z` zO6y@cbBpQu1$7c1xil6_RZDo-M|6E%clh4o_4SxCeWHR-v6Pl(MTUtILSDn)&@z2` z`_w0D&QSTi=Y+oY8??n(&xG>`BbUa5yLyh3>v1luPffb>U!^y5Ek}oo5}t;ny@Z&! z&O6s~5DS~cy;J3}EkO%D<~{~5-f0M!Yb>6TC`}GeI-9~FEsMU$BZQ!^wEHI#3)$Q8%1Fa=cCj}a`A_WJwQ&{HTV;32h zKJ_{=w+%H0^agKTP41bw(3b{Oy!>#CJNk=1_t&lZWTqMKzOB?33PG`BXQTIyn&33O zw^33dNJUWMuK(iav59E1VA;{Pb4Twt<`(i$N{;UX8@9M7eYOvee=rdS7g(&!Na?@v zR)v$F!mKZAd+APJKY-^APhK4Uk~{6Q?fkiJC$X*lZE;pMu?)5yX}0Oj4_h~~dN4Jv zS5&5r7l-w}J!skBh?g@HQvT$S-|t??d3{Pf%HGKQ41$MDO7~V{(|fVA{jo79bz;)@ zLh5iiO>sii3H?+a^#Z^B4~s@J4QlSr%G2X*SWY<%Ga{S`n}I zR51sgSmTH?a=yG=C8L-uX4Jg3K znumW*%uIH$EU%nuT2ZO8jqzYMoeFp`o>L46lAR)w<(hoN?x?K>%nJ3CaNwO{!I4+` z_Y9)u6BOc%EDlet0*lH!@QZ_H%3R%Pfpu{2DZ}i>@B5}$dIBRssgp$;*5^K>%~A?T z+YavJsc>Oj?L+xw5=8f<1NUvzM~e4{IfZmmBwTNvZ)erVE)~E*i}96pkYXkicg@K+ z0qM_g4*Lb2IV9nye32R{xMO}Y^TlQc#`SH6Gm0PXwekA2KYdd1hzrlgT=ig&6iO?0 z>=R>K_~=laUPoYpZz<18-3JD4&@ZxA9^PN)g)slIc2bmiUg`8Zc3`73aI`|eIwkXW;8|G?L!mOcMX^45Gz zyb@<=dgiI)bi zlH9!789O>|Jdl1~9m=B+^=h<6$KJYXH3t13XZyq&9cW7yp*?z5wB1l6~!pn?)0GhSPGYrr?~HBT$_lj%K3O?PyUd zW?zen{h094&oaArXNl{2dE@#kwHLQ~ed+aYN?{KRR=YNlJSCN^VM$MWD;QL{1*f$Z zj`w@rNitv0&OvOnxfLY%5JqXaSDna@e0vjy;=ihxB(IBUu-ZKD+4sw=aWAq7g7u4t z=~d8Gi)eV`0Wu{qTgT&DM+SVFy0d~4=k|!_yAp?*HY1J#FHUGOWfcmnU0k(JV-!PX z_WQ(Jc4RHDejAJ7MRS8g!rn|<=C3c`;e0j&`xecm0LwP219nXyRTwuI%9Z_|HB`-H zkIsZSe(ua~LR&h%Uy(2QT4_~xgDKq_o6p>dbP9TtqT0}df6MbRs8S}YNRjoy#%Dkc zbFDb>vLJ!(%_U=Pp4@OPd;6&Mr`aC&YiIiFdQAG}502lim0{$!#*^Bl{5QIIy2yts z$_yK(OG*6GLV;~on_fqpSxrJ`4Cdo*Qlc0OUs;f>AKy@-M=wUd{vO^vba$NP?ifnB z!IJf&na-GO-OWJRZxcpWl6v7D`%B?ryC5NEF~{+hj*kA%TWQ;PO+rYB)r#EYhqLRSc36!it`eN0)a5(G0fixZQ;3UAj=uV`wa7Vc|j^rzM?(Ufd=pB-#(s8r-MgmMyQ zVy{;neC*Lm$b$E*%$c23xv4mwzN738FDSC}10WWCgM*W^1{MS16sDoZ$ttua~zKM2+o9!Tpe zcnRG%6{-rmGjf)R5T(mg1Y4;d`JrE}#mYdSyx@Cny8^DT<&@41O-d4q^wmRMh8&6e zck|1l`uZ7940%7R-`Mbsv*cPKoPU@k|AGVj$x>@Bq$?;{c2-fV>1nu0pu<&_xZpn z0zv73t7*TU_Q~O5s)YQI%$YX=UbKd*H4>a&wQeK=JF!ZkF{6Ch)Rz7nfppd6_0}uT z?xzC^{x<{d`FMKrudCgYQHXgqu`o5&@Z|@njqUVqZ0)XS-+c4c9 zJ{PY-`HaVrY{f3KC&}@{3xSvSTL1P1ZxHX=Ngf*<+Okl%u!`~GhlCwGo^NtRnP2w$ zQUWcDiwPCiHiDoO9*^au6)8?!Y>Htgqlt8~=}n2f4{K!?5)*y68zMxfWgCd|zxme; z_N4T2P>cq{2r#Rc`1dzr*-!h53W8Kno@gXWUrp(+slS=C|4s(^WrWH|O8$!Q#-eNiU?2nv0RJ_B zQZN`42DAnKg;AHJ4xr0l7(|BR+x-KRfKnX4e`0WoFZd5kQi@`e{sV(a{yP>XMX@RW z!IJ`0Jk|AA3l Date: Tue, 19 Aug 2025 14:27:19 -0300 Subject: [PATCH 05/11] Support UploadedFile for google genai models --- pydantic_ai_slim/pydantic_ai/models/google.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/google.py b/pydantic_ai_slim/pydantic_ai/models/google.py index 6d21a88678..b1bd80d931 100644 --- a/pydantic_ai_slim/pydantic_ai/models/google.py +++ b/pydantic_ai_slim/pydantic_ai/models/google.py @@ -31,6 +31,7 @@ ThinkingPart, ToolCallPart, ToolReturnPart, + UploadedFile, UserPromptPart, VideoUrl, ) @@ -54,6 +55,7 @@ ContentUnionDict, CountTokensConfigDict, ExecutableCodeDict, + File, FunctionCallDict, FunctionCallingConfigDict, FunctionCallingConfigMode, @@ -425,7 +427,7 @@ async def _map_messages(self, messages: list[ModelMessage]) -> tuple[ContentDict if isinstance(part, SystemPromptPart): system_parts.append({'text': part.content}) elif isinstance(part, UserPromptPart): - message_parts.extend(await self._map_user_prompt(part)) + message_parts.extend(await self._map_user_prompt(part, contents)) elif isinstance(part, ToolReturnPart): message_parts.append( { @@ -465,7 +467,7 @@ async def _map_messages(self, messages: list[ModelMessage]) -> tuple[ContentDict system_instruction = ContentDict(role='user', parts=system_parts) if system_parts else None return system_instruction, contents - async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]: + async def _map_user_prompt(self, part: UserPromptPart, contents: list[ContentUnionDict]) -> list[PartDict]: if isinstance(part.content, str): return [{'text': part.content}] else: @@ -499,6 +501,12 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]: content.append( {'file_data': {'file_uri': item.url, 'mime_type': item.media_type}} ) # pragma: lax no cover + elif isinstance(item, UploadedFile): + if not isinstance(item.file, File): + raise UserError('UploadedFile.file must be a genai.types.File object') + # genai.types.File is its own ContentUnionDict and not a + # PartDict, so append to the contents directly. + contents.append(item.file) else: assert_never(item) return content From ffa6a57ddc28f202ba3f6151670b1613290f9316 Mon Sep 17 00:00:00 2001 From: Thiago Padilha Date: Tue, 19 Aug 2025 14:28:13 -0300 Subject: [PATCH 06/11] Add test for Google UploadedFile --- .../test_google/test_uploaded_file_input.yaml | 70 +++++++++++++++++++ tests/models/test_google.py | 27 +++++++ 2 files changed, 97 insertions(+) create mode 100644 tests/models/cassettes/test_google/test_uploaded_file_input.yaml diff --git a/tests/models/cassettes/test_google/test_uploaded_file_input.yaml b/tests/models/cassettes/test_google/test_uploaded_file_input.yaml new file mode 100644 index 0000000000..c8b2896380 --- /dev/null +++ b/tests/models/cassettes/test_google/test_uploaded_file_input.yaml @@ -0,0 +1,70 @@ +interactions: +- request: + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '280' + content-type: + - application/json + host: + - generativelanguage.googleapis.com + method: POST + parsed_body: + contents: + - parts: + - fileData: + fileUri: https://generativelanguage.googleapis.com/v1beta/files/6myu0b1v3mxl + mimeType: application/pdf + role: user + - parts: + - text: Give me a short description of this image + role: user + generationConfig: {} + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + response: + headers: + alt-svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + content-length: + - '881' + content-type: + - application/json; charset=UTF-8 + server-timing: + - gfet4t7; dur=5652 + transfer-encoding: + - chunked + vary: + - Origin + - X-Origin + - Referer + parsed_body: + candidates: + - content: + parts: + - text: The image displays a classic smiley face. It features a bright yellow circular face with two simple black + dot eyes and an upward-curved black line forming a smile. The yellow circle has a subtle darker yellow outline + and is set against a plain white background. + role: model + finishReason: STOP + index: 0 + modelVersion: gemini-2.5-flash + responseId: T7OkaOv-JOemmtkP5IXU2QI + usageMetadata: + candidatesTokenCount: 51 + promptTokenCount: 268 + promptTokensDetails: + - modality: TEXT + tokenCount: 10 + - modality: DOCUMENT + tokenCount: 258 + thoughtsTokenCount: 678 + totalTokenCount: 997 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/test_google.py b/tests/models/test_google.py index d94703e699..5d999016e1 100644 --- a/tests/models/test_google.py +++ b/tests/models/test_google.py @@ -36,6 +36,7 @@ ThinkingPartDelta, ToolCallPart, ToolReturnPart, + UploadedFile, UserPromptPart, VideoUrl, ) @@ -49,6 +50,7 @@ with try_import() as imports_successful: from google.genai.types import ( CodeExecutionResult, + File, GenerateContentResponse, GenerateContentResponseUsageMetadata, HarmBlockThreshold, @@ -1706,3 +1708,28 @@ def test_map_usage(): }, ) ) + + +async def test_uploaded_file_input(allow_model_requests: None, google_provider: GoogleProvider): + m = GoogleModel('gemini-2.5-flash', provider=google_provider) + agent = Agent(m, system_prompt='You are a helpful chatbot.') + # client = google_provider.client + # with open('tests/assets/smiley.pdf', 'rb') as f: + # google_file = client.files.upload( + # file=f, + # config={ + # 'mime_type': 'application/pdf', + # }, + # ) + # print(google_file) + google_file = File( + name='files/6myu0b1v3mxl', + mime_type='application/pdf', + uri='https://generativelanguage.googleapis.com/v1beta/files/6myu0b1v3mxl', + ) + agent = Agent(m) + + result = await agent.run(['Give me a short description of this image', UploadedFile(file=google_file)]) + assert result.output == snapshot( + 'The image displays a classic smiley face. It features a bright yellow circular face with two simple black dot eyes and an upward-curved black line forming a smile. The yellow circle has a subtle darker yellow outline and is set against a plain white background.' + ) From 0d6e4864bff286ce7cbf86b74ed401bf28b4c6f7 Mon Sep 17 00:00:00 2001 From: Thiago Padilha Date: Tue, 19 Aug 2025 16:11:24 -0300 Subject: [PATCH 07/11] Add placeholder for handling UploadedFile in bedrock/gemini/huggingface --- pydantic_ai_slim/pydantic_ai/models/bedrock.py | 3 +++ pydantic_ai_slim/pydantic_ai/models/gemini.py | 3 +++ pydantic_ai_slim/pydantic_ai/models/huggingface.py | 3 +++ 3 files changed, 9 insertions(+) diff --git a/pydantic_ai_slim/pydantic_ai/models/bedrock.py b/pydantic_ai_slim/pydantic_ai/models/bedrock.py index 85766ae216..6928ff1ff0 100644 --- a/pydantic_ai_slim/pydantic_ai/models/bedrock.py +++ b/pydantic_ai_slim/pydantic_ai/models/bedrock.py @@ -35,6 +35,7 @@ ThinkingPart, ToolCallPart, ToolReturnPart, + UploadedFile, UserPromptPart, VideoUrl, ) @@ -577,6 +578,8 @@ async def _map_user_prompt(part: UserPromptPart, document_count: Iterator[int]) content.append({'video': video}) elif isinstance(item, AudioUrl): # pragma: no cover raise NotImplementedError('Audio is not supported yet.') + elif isinstance(item, UploadedFile): + raise NotImplementedError('Uploaded files are not supported yet.') else: assert_never(item) return [{'role': 'user', 'content': content}] diff --git a/pydantic_ai_slim/pydantic_ai/models/gemini.py b/pydantic_ai_slim/pydantic_ai/models/gemini.py index 517acbc614..7fb7748389 100644 --- a/pydantic_ai_slim/pydantic_ai/models/gemini.py +++ b/pydantic_ai_slim/pydantic_ai/models/gemini.py @@ -33,6 +33,7 @@ ThinkingPart, ToolCallPart, ToolReturnPart, + UploadedFile, UserPromptPart, VideoUrl, ) @@ -368,6 +369,8 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[_GeminiPartUnion] else: # pragma: lax no cover file_data = _GeminiFileDataPart(file_data={'file_uri': item.url, 'mime_type': item.media_type}) content.append(file_data) + elif isinstance(item, UploadedFile): + raise NotImplementedError('Uploaded files are not supported for GeminiModel.') else: assert_never(item) # pragma: lax no cover return content diff --git a/pydantic_ai_slim/pydantic_ai/models/huggingface.py b/pydantic_ai_slim/pydantic_ai/models/huggingface.py index ff854b1244..1518b6d617 100644 --- a/pydantic_ai_slim/pydantic_ai/models/huggingface.py +++ b/pydantic_ai_slim/pydantic_ai/models/huggingface.py @@ -32,6 +32,7 @@ ThinkingPart, ToolCallPart, ToolReturnPart, + UploadedFile, UserPromptPart, VideoUrl, ) @@ -424,6 +425,8 @@ async def _map_user_prompt(part: UserPromptPart) -> ChatCompletionInputMessage: raise NotImplementedError('DocumentUrl is not supported for Hugging Face') elif isinstance(item, VideoUrl): raise NotImplementedError('VideoUrl is not supported for Hugging Face') + elif isinstance(item, UploadedFile): + raise NotImplementedError('Uploaded files are not supported for Hugging Face') else: assert_never(item) return ChatCompletionInputMessage(role='user', content=content) # type: ignore From 89bfecebcb5e4a2bcd51b794f48981cd5aee47dd Mon Sep 17 00:00:00 2001 From: Thiago Padilha Date: Wed, 26 Nov 2025 11:40:18 -0300 Subject: [PATCH 08/11] Address review comments - map UploadedFile to provider-friendly structures: Google uses file_data parts; OpenAI accepts file IDs or objects with ids - document provider expectations for UploadedFile in code and input docs - add tests and cassette adjustments to cover file ID/URI handling for OpenAI and Google --- docs/input.md | 22 ++++++++++++ pydantic_ai_slim/pydantic_ai/messages.py | 12 ++++++- pydantic_ai_slim/pydantic_ai/models/google.py | 35 +++++++++++++++---- pydantic_ai_slim/pydantic_ai/models/openai.py | 32 ++++++++--------- .../test_google/test_uploaded_file_input.yaml | 4 +-- tests/models/test_google.py | 8 ++++- tests/models/test_openai.py | 20 +++++++++-- 7 files changed, 101 insertions(+), 32 deletions(-) diff --git a/docs/input.md b/docs/input.md index e662c35832..2a13c6dd55 100644 --- a/docs/input.md +++ b/docs/input.md @@ -102,6 +102,28 @@ print(result.output) #> The document discusses... ``` +## Uploaded files + +Use [`UploadedFile`][pydantic_ai.UploadedFile] when you've already uploaded content to the model provider. + +- [`OpenAIChatModel`][pydantic_ai.models.openai.OpenAIChatModel] and [`OpenAIResponsesModel`][pydantic_ai.models.openai.OpenAIResponsesModel] accept an `openai.types.FileObject` or a file ID string returned by the OpenAI Files API. +- [`GoogleModel`][pydantic_ai.models.google.GoogleModel] accepts a `google.genai.types.File` or a file URI string from the Gemini Files API. +- Other models currently raise `NotImplementedError` when they receive an `UploadedFile`. + +```py {title="uploaded_file_input.py" test="skip" lint="skip"} +from pydantic_ai import Agent, UploadedFile + +agent = Agent(model='openai:gpt-5') +result = agent.run_sync( + [ + 'Give me a short description of this image', + UploadedFile(file='file-abc123'), # file-abc123 is a file ID returned by the provider + ] +) +print(result.output) +#> The image is a simple design of a classic yellow smiley face... +``` + ## User-side download vs. direct file URL As a general rule, when you provide a URL using any of `ImageUrl`, `AudioUrl`, `VideoUrl` or `DocumentUrl`, Pydantic AI downloads the file content and then sends it as part of the API request. diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py index acf6e342c2..9acc8da5ba 100644 --- a/pydantic_ai_slim/pydantic_ai/messages.py +++ b/pydantic_ai_slim/pydantic_ai/messages.py @@ -635,7 +635,17 @@ def __init__( @dataclass(repr=False) class UploadedFile: - """File uploaded to the LLM provider.""" + """File uploaded to the LLM provider. + + Supported by [`OpenAIChatModel`][pydantic_ai.models.openai.OpenAIChatModel], + [`OpenAIResponsesModel`][pydantic_ai.models.openai.OpenAIResponsesModel], and + [`GoogleModel`][pydantic_ai.models.google.GoogleModel]. + + - For OpenAI-compatible models, provide an `openai.types.FileObject` or a file ID string returned by the Files API. + - For Gemini, provide a `google.genai.types.File` or the file URI string returned by the Files API. + + Other models raise `NotImplementedError` when they receive this part. + """ file: Any """A provider-specific file object, e.g. a file ID or a file URL.""" diff --git a/pydantic_ai_slim/pydantic_ai/models/google.py b/pydantic_ai_slim/pydantic_ai/models/google.py index e57394a8d3..bc8e9706f8 100644 --- a/pydantic_ai_slim/pydantic_ai/models/google.py +++ b/pydantic_ai_slim/pydantic_ai/models/google.py @@ -546,7 +546,7 @@ async def _map_messages( if isinstance(part, SystemPromptPart): system_parts.append({'text': part.content}) elif isinstance(part, UserPromptPart): - message_parts.extend(await self._map_user_prompt(part, contents)) + message_parts.extend(await self._map_user_prompt(part)) elif isinstance(part, ToolReturnPart): message_parts.append( { @@ -592,7 +592,7 @@ async def _map_messages( return system_instruction, contents - async def _map_user_prompt(self, part: UserPromptPart, contents: list[ContentUnionDict]) -> list[PartDict]: + async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]: if isinstance(part.content, str): return [{'text': part.content}] else: @@ -629,11 +629,7 @@ async def _map_user_prompt(self, part: UserPromptPart, contents: list[ContentUni file_data_dict: FileDataDict = {'file_uri': item.url, 'mime_type': item.media_type} content.append({'file_data': file_data_dict}) # pragma: lax no cover elif isinstance(item, UploadedFile): - if not isinstance(item.file, File): - raise UserError('UploadedFile.file must be a genai.types.File object') - # genai.types.File is its own ContentUnionDict and not a - # PartDict, so append to the contents directly. - contents.append(item.file) + content.append({'file_data': self._map_uploaded_file(item)}) elif isinstance(item, CachePoint): # Google Gemini doesn't support prompt caching via CachePoint pass @@ -641,6 +637,31 @@ async def _map_user_prompt(self, part: UserPromptPart, contents: list[ContentUni assert_never(item) return content + @staticmethod + def _map_uploaded_file(item: UploadedFile) -> FileDataDict: + """Convert an UploadedFile into the structure expected by Gemini.""" + file = item.file + if isinstance(file, File): + file_uri = file.uri + mime_type = file.mime_type + display_name = getattr(file, 'display_name', None) + elif isinstance(file, str): + file_uri = file + mime_type = None + display_name = None + else: + raise UserError('UploadedFile.file must be a genai.types.File or file URI string') + + if not file_uri: + raise UserError('UploadedFile.file must include a file URI') + + file_data: FileDataDict = {'file_uri': file_uri} + if mime_type: + file_data['mime_type'] = mime_type + if display_name: + file_data['display_name'] = display_name + return file_data + def _map_response_schema(self, o: OutputObjectDefinition) -> dict[str, Any]: response_schema = o.json_schema.copy() if o.name: diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index 7e20173d43..9178cb726e 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -10,7 +10,6 @@ from datetime import datetime from typing import TYPE_CHECKING, Any, Literal, cast, overload -from httpx import URL from pydantic import ValidationError from pydantic_core import to_json from typing_extensions import assert_never, deprecated @@ -980,8 +979,8 @@ async def _map_user_prompt(self, part: UserPromptPart) -> chat.ChatCompletionUse ) ) elif isinstance(item, UploadedFile): - file = _map_uploaded_file(item, self._provider) - content.append(File(file=FileFile(file_id=file.id), type='file')) + file_id = _map_uploaded_file(item, self._provider) + content.append(File(file=FileFile(file_id=file_id), type='file')) elif isinstance(item, VideoUrl): # pragma: no cover raise NotImplementedError('VideoUrl is not supported for OpenAI') elif isinstance(item, CachePoint): @@ -1800,8 +1799,8 @@ async def _map_user_prompt(self, part: UserPromptPart) -> responses.EasyInputMes ) ) elif isinstance(item, UploadedFile): - file = _map_uploaded_file(item, self._provider) - content.append(responses.ResponseInputFileParam(file_id=file.id, type='input_file')) + file_id = _map_uploaded_file(item, self._provider) + content.append(responses.ResponseInputFileParam(file_id=file_id, type='input_file')) elif isinstance(item, VideoUrl): # pragma: no cover raise NotImplementedError('VideoUrl is not supported for OpenAI.') elif isinstance(item, CachePoint): @@ -2319,20 +2318,19 @@ def _map_usage( ) -def _map_openai_uploaded_file(item: UploadedFile) -> FileObject: - if not isinstance(item.file, FileObject): - raise UserError('UploadedFile.file must be an openai.types.FileObject') - return item.file +def _map_uploaded_file(uploaded_file: UploadedFile, _provider: Provider[Any]) -> str: + """Map an UploadedFile to a file ID understood by OpenAI-compatible APIs.""" + file = uploaded_file.file + if isinstance(file, str): + return file + if isinstance(file, FileObject): + return file.id + file_id = getattr(file, 'id', None) + if isinstance(file_id, str): + return file_id -def _map_uploaded_file(uploaded_file: UploadedFile, provider: Provider[Any]) -> FileObject: - """Map an UploadedFile to a File object.""" - url = URL(provider.base_url) - - if url.host == 'api.openai.com': - return _map_openai_uploaded_file(uploaded_file) - else: - raise UserError(f'UploadedFile is not supported for `{provider.name}` with base_url {provider.base_url}.') + raise UserError('UploadedFile.file must be a file ID string or an object with an `id` attribute') def _map_provider_details( diff --git a/tests/models/cassettes/test_google/test_uploaded_file_input.yaml b/tests/models/cassettes/test_google/test_uploaded_file_input.yaml index c8b2896380..f2db00aa26 100644 --- a/tests/models/cassettes/test_google/test_uploaded_file_input.yaml +++ b/tests/models/cassettes/test_google/test_uploaded_file_input.yaml @@ -17,13 +17,11 @@ interactions: parsed_body: contents: - parts: + - text: Give me a short description of this image - fileData: fileUri: https://generativelanguage.googleapis.com/v1beta/files/6myu0b1v3mxl mimeType: application/pdf role: user - - parts: - - text: Give me a short description of this image - role: user generationConfig: {} uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent response: diff --git a/tests/models/test_google.py b/tests/models/test_google.py index 86d967c935..3c074b1c39 100644 --- a/tests/models/test_google.py +++ b/tests/models/test_google.py @@ -2774,6 +2774,12 @@ def test_map_usage(): ) +def test_google_uploaded_file_accepts_uri_string(): + file_uri = 'https://generativelanguage.googleapis.com/v1beta/files/123' + + assert GoogleModel._map_uploaded_file(UploadedFile(file=file_uri)) == {'file_uri': file_uri} + + async def test_uploaded_file_input(allow_model_requests: None, google_provider: GoogleProvider): m = GoogleModel('gemini-2.5-flash', provider=google_provider) google_file = File( @@ -3663,7 +3669,7 @@ async def test_cache_point_filtering(): model = GoogleModel('gemini-1.5-flash', provider=GoogleProvider(api_key='test-key')) # Test that CachePoint in a list is handled (triggers line 606) - content = await model._map_user_prompt(UserPromptPart(content=['text before', CachePoint(), 'text after']), []) # pyright: ignore[reportPrivateUsage] + content = await model._map_user_prompt(UserPromptPart(content=['text before', CachePoint(), 'text after'])) # pyright: ignore[reportPrivateUsage] # CachePoint should be filtered out, only text content should remain assert len(content) == 2 diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py index 3b13323b42..93aea2b770 100644 --- a/tests/models/test_openai.py +++ b/tests/models/test_openai.py @@ -80,6 +80,7 @@ OpenAIResponsesModel, OpenAIResponsesModelSettings, OpenAISystemPromptRole, + _map_uploaded_file, # pyright: ignore[reportPrivateUsage] ) from pydantic_ai.profiles.openai import OpenAIJsonSchemaTransformer from pydantic_ai.providers.cerebras import CerebrasProvider @@ -3104,12 +3105,25 @@ async def test_openai_model_settings_temperature_ignored_on_gpt_5(allow_model_re assert result.output == snapshot('Paris.') +def test_openai_uploaded_file_accepts_id_string(openai_api_key: str): + file_id = 'file-abc123' + provider = OpenAIProvider(api_key=openai_api_key) + + assert _map_uploaded_file(UploadedFile(file=file_id), provider) == file_id + + +def test_openai_uploaded_file_accepts_object_with_id(openai_api_key: str): + class FileStub: + id = 'file-stub' + + provider = OpenAIProvider(api_key=openai_api_key) + + assert _map_uploaded_file(UploadedFile(file=FileStub()), provider) == 'file-stub' + + async def test_uploaded_file_input(allow_model_requests: None, openai_api_key: str): provider = OpenAIProvider(api_key=openai_api_key) m = OpenAIChatModel('gpt-4o', provider=provider) - # VCR recording breaks when dealing with openai file upload request due to - # binary contents. For that reason, we have manually run once the upload - # and rebuild the FileObject manually (from the print command output). openai_file = FileObject( id='file-7yEHnJNSSBeUYfkLq6G8KG', bytes=5930, From 13a1e696631af99e04ff073cbefaec1e2713d242 Mon Sep 17 00:00:00 2001 From: Thiago Padilha Date: Wed, 26 Nov 2025 12:35:25 -0300 Subject: [PATCH 09/11] Fix type errors - add an `UploadedFilePart` schema and emit uploaded-file metadata in OTEL user prompt parts, including file references when allowed - derive stable identifiers for `UploadedFile` objects with optional overrides for clearer telemetry - silence the pyright private-usage warning in the Google uploaded file test --- .../pydantic_ai/_otel_messages.py | 10 +++- pydantic_ai_slim/pydantic_ai/messages.py | 54 +++++++++++++++++-- tests/models/test_google.py | 2 +- 3 files changed, 61 insertions(+), 5 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/_otel_messages.py b/pydantic_ai_slim/pydantic_ai/_otel_messages.py index 18c780098b..0753e29750 100644 --- a/pydantic_ai_slim/pydantic_ai/_otel_messages.py +++ b/pydantic_ai_slim/pydantic_ai/_otel_messages.py @@ -43,12 +43,20 @@ class BinaryDataPart(TypedDict): content: NotRequired[str] +class UploadedFilePart(TypedDict): + type: Literal['uploaded-file'] + identifier: NotRequired[str] + file: NotRequired[str] + + class ThinkingPart(TypedDict): type: Literal['thinking'] content: NotRequired[str] -MessagePart: TypeAlias = 'TextPart | ToolCallPart | ToolCallResponsePart | MediaUrlPart | BinaryDataPart | ThinkingPart' +MessagePart: TypeAlias = ( + 'TextPart | ToolCallPart | ToolCallResponsePart | MediaUrlPart | BinaryDataPart | UploadedFilePart | ThinkingPart' +) Role = Literal['system', 'user', 'assistant'] diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py index 9acc8da5ba..2d37b9cc31 100644 --- a/pydantic_ai_slim/pydantic_ai/messages.py +++ b/pydantic_ai_slim/pydantic_ai/messages.py @@ -108,6 +108,16 @@ def _multi_modal_content_identifier(identifier: str | bytes) -> str: return hashlib.sha1(identifier).hexdigest()[:6] +def _uploaded_file_identifier_source(file: Any) -> str: + if isinstance(file, str): + return file + for attr in ('id', 'uri', 'name'): + value = getattr(file, attr, None) + if isinstance(value, str): + return value + return repr(file) + + @dataclass(init=False, repr=False) class FileUrl(ABC): """Abstract base class for any URL-based file.""" @@ -633,7 +643,7 @@ def __init__( raise ValueError('`BinaryImage` must be have a media type that starts with "image/"') # pragma: no cover -@dataclass(repr=False) +@dataclass(init=False, repr=False) class UploadedFile: """File uploaded to the LLM provider. @@ -650,9 +660,41 @@ class UploadedFile: file: Any """A provider-specific file object, e.g. a file ID or a file URL.""" + _: KW_ONLY + + _identifier: Annotated[str | None, pydantic.Field(alias='identifier', default=None, exclude=True)] = field( + compare=False, default=None + ) + """Optional identifier for the uploaded file.""" + kind: Literal['uploaded-file'] = 'uploaded-file' """Type identifier, this is available on all parts as a discriminator.""" + def __init__( + self, + file: Any, + *, + identifier: str | None = None, + kind: Literal['uploaded-file'] = 'uploaded-file', + # Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs. + _identifier: str | None = None, + ): + self.file = file + self._identifier = identifier or _identifier + self.kind = kind + + @pydantic.computed_field + @property + def identifier(self) -> str: + """Identifier for the uploaded file, usually derived from the provider's reference.""" + identifier = self._identifier + if identifier is not None: + return identifier + + return _multi_modal_content_identifier(_uploaded_file_identifier_source(self.file)) + + __repr__ = _utils.dataclasses_no_defaults_repr + @dataclass class CachePoint: @@ -795,11 +837,17 @@ def otel_message_parts(self, settings: InstrumentationSettings) -> list[_otel_me if settings.include_content and settings.include_binary_content: converted_part['content'] = base64.b64encode(part.data).decode() parts.append(converted_part) + elif isinstance(part, UploadedFile): + uploaded_part: _otel_messages.UploadedFilePart = { + 'type': 'uploaded-file', + 'identifier': part.identifier, + } + if settings.include_content: + uploaded_part['file'] = _uploaded_file_identifier_source(part.file) + parts.append(uploaded_part) elif isinstance(part, CachePoint): # CachePoint is a marker, not actual content - skip it for otel pass - else: - parts.append({'type': part.kind}) # pragma: no cover return parts __repr__ = _utils.dataclasses_no_defaults_repr diff --git a/tests/models/test_google.py b/tests/models/test_google.py index 3c074b1c39..64e82dd84a 100644 --- a/tests/models/test_google.py +++ b/tests/models/test_google.py @@ -2777,7 +2777,7 @@ def test_map_usage(): def test_google_uploaded_file_accepts_uri_string(): file_uri = 'https://generativelanguage.googleapis.com/v1beta/files/123' - assert GoogleModel._map_uploaded_file(UploadedFile(file=file_uri)) == {'file_uri': file_uri} + assert GoogleModel._map_uploaded_file(UploadedFile(file=file_uri)) == {'file_uri': file_uri} # pyright: ignore[reportPrivateUsage] async def test_uploaded_file_input(allow_model_requests: None, google_provider: GoogleProvider): From 6e8dd1d7c203dab1ea6b4e82ff8983e62dc4fde5 Mon Sep 17 00:00:00 2001 From: Thiago Padilha Date: Wed, 26 Nov 2025 14:15:47 -0300 Subject: [PATCH 10/11] Regenerate UploadedFile tests with dummy.pdf --- tests/assets/smiley.pdf | Bin 5930 -> 0 bytes .../test_google/test_uploaded_file_input.yaml | 29 +++++++++--------- .../test_openai/test_uploaded_file_input.yaml | 23 +++++++------- tests/models/test_google.py | 18 +++++++++-- tests/models/test_openai.py | 18 ++++++++--- 5 files changed, 56 insertions(+), 32 deletions(-) delete mode 100644 tests/assets/smiley.pdf diff --git a/tests/assets/smiley.pdf b/tests/assets/smiley.pdf deleted file mode 100644 index ab8cd0df8177fecdcc908754e2d972ad2cfcef3a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5930 zcmZ`-2{@GN-|i&Q*eY8IjgW(w8DlVnLbfc~Bil3vV;jcUcOxzKEZIfK*u!K=gk)b1 zV-1mg8_JfYd^7z|=R5y%zVErNx#oTE=l=bk-|v3j>s@ZaJ1VMT5OE1000IO9G4~w+ z*RKOWhG?7<3i!LbGujE|BMbm3V^Ihk8snmZz)@foSqZQN3@ihILm*%&2{47Ajj{QE z=k8!JHXcY67Wlgw7U61-M!IVw?9fO@AVeH0E-3+|B!h~9;bIa{pdLgDXph6W%7Q>% zUS8t=%@@aD?Lfi+1qA@g#fDm`#NR9SaYX?^$_O0731bHU-9gx)+<{OELH}<7)bqbu z{2B0*2LRnh+qeVo0VSz?parF0jE4&j2>Dwmb?V=t)P=Ap3iUrLf%H-C7!NFxvNTo2 zySn!sP)Ho*7zomE1_Nb&rl-^oqDX=?K;eMWe<9i^8#F=*;|;tAreG4_pFe6PSd8oc z1!G)rly%*KFlvUs)k!`7Ri_rp#SUi=l=?++$5FP?8Q=z>rvJ4J^&jaB_*tL|2B1e3 z_fO)VGE?KJDWQ;G!c)tpBKLqG%H;zD8KS&#)D|hCD^ukAGmhHsuTl(us{N-R0FZ&l zecVsvXAA(OgK(zo9z`PnNEPjbqg*SL)T&ODt4#%k#Mq#ydt!pNp)^A&9RN~Cq3!H( z|2bC1IAO2`t_UOw2%+|(4*)5laqf3eSY?c}E5?PoJ2C*!pIX3wGALW{XN%tB>lPMH zY$NPah3ez^kH@RBH4+x+5+pSsulL7B)1K)Yi5QB^hdg`AsZ^v{B63STQJ+oNAx?y- zJ;ynoe==7}Q_tHzHR&On(Sy)9F&l8isSA z`SX5i_Hv^&r!$GT_HIT;oq3<2-Hv-0X5d-6YesX4zy9&T#|)^b1--T^{?4g#?WvO= zQxa=TP6abLg^Bk+8ZXe+zQ0=teGR>q1nA0$OHW?Xq*wn9oVL_F$)~K2&gQIoA>}$4 z_j^=$>e!CBd${pqH?C_&t)t$YxuH%7+qnLO@loxY_kG(HUcrCob#V`O*J}|t8Eny^ ztfPJ@xRf;w&XUCa_)?28!rDZ$#j^t%mzERh|uB?3NU?=v7i$l6NV}Mk`sq4}hc$<#5qdTNFgVJhSl=xNf2;h3xQHjb3eXukBUc{@RDl6$KUK>zYEj zgj!X3vEtzmEor1+Y{z=u^R;|FX-;=R0gq@dp+~$( z#!B@ml2L0eZi$w%t>10%i58!^lDo^zZYvMfX_Irk3KBmBFQfLHmz;%89-n74)@*+A zrXe?DQU^v_zD9^E4oI(jZfZ?v@tZG>0>KIa(h9yQw;clX;PXK(YxKvFj;3&STE1Wgk z6eX+;XcUsUE1dQWUePIXsABrqIWCDgZti>Vm@T@IuRg2YdOiVeH)OaL-=%3CROJ`` zbrL`H$`jj~5^uZQFKZ3-G}rJjuICzMyjFIB*(q{gQ0893LzRJn2Mlc{YfF|DEif7h(>$+b z6*!kcB#d&aoty3mR@L9}Grwipn>U#m=X$r=!CU7=-SlhzZf@d0-nr}K-`?d-zMAe_ zxwjs&+E!;3I%CDrP{x8~%d_D3o;Yka<7v5F?%(6yV>$VofYNZC#T~xE=5LY?ryBh} zBz+~ST7|aKWI|lky+mWi-SfaI?lxd**(A4eBHGyXXt|YAtqDlkpG#hay+%s zHq>&{A#ovcsTXX>0pk)ff5@$e9-*Jw3(gL?>ZI>c@I%t?ns~ukc+Nm+T!MGmn^2NP zL$;-(&cI4uW(oxrkLgUWYsy9NtG`;z` zO6y@cbBpQu1$7c1xil6_RZDo-M|6E%clh4o_4SxCeWHR-v6Pl(MTUtILSDn)&@z2` z`_w0D&QSTi=Y+oY8??n(&xG>`BbUa5yLyh3>v1luPffb>U!^y5Ek}oo5}t;ny@Z&! z&O6s~5DS~cy;J3}EkO%D<~{~5-f0M!Yb>6TC`}GeI-9~FEsMU$BZQ!^wEHI#3)$Q8%1Fa=cCj}a`A_WJwQ&{HTV;32h zKJ_{=w+%H0^agKTP41bw(3b{Oy!>#CJNk=1_t&lZWTqMKzOB?33PG`BXQTIyn&33O zw^33dNJUWMuK(iav59E1VA;{Pb4Twt<`(i$N{;UX8@9M7eYOvee=rdS7g(&!Na?@v zR)v$F!mKZAd+APJKY-^APhK4Uk~{6Q?fkiJC$X*lZE;pMu?)5yX}0Oj4_h~~dN4Jv zS5&5r7l-w}J!skBh?g@HQvT$S-|t??d3{Pf%HGKQ41$MDO7~V{(|fVA{jo79bz;)@ zLh5iiO>sii3H?+a^#Z^B4~s@J4QlSr%G2X*SWY<%Ga{S`n}I zR51sgSmTH?a=yG=C8L-uX4Jg3K znumW*%uIH$EU%nuT2ZO8jqzYMoeFp`o>L46lAR)w<(hoN?x?K>%nJ3CaNwO{!I4+` z_Y9)u6BOc%EDlet0*lH!@QZ_H%3R%Pfpu{2DZ}i>@B5}$dIBRssgp$;*5^K>%~A?T z+YavJsc>Oj?L+xw5=8f<1NUvzM~e4{IfZmmBwTNvZ)erVE)~E*i}96pkYXkicg@K+ z0qM_g4*Lb2IV9nye32R{xMO}Y^TlQc#`SH6Gm0PXwekA2KYdd1hzrlgT=ig&6iO?0 z>=R>K_~=laUPoYpZz<18-3JD4&@ZxA9^PN)g)slIc2bmiUg`8Zc3`73aI`|eIwkXW;8|G?L!mOcMX^45Gz zyb@<=dgiI)bi zlH9!789O>|Jdl1~9m=B+^=h<6$KJYXH3t13XZyq&9cW7yp*?z5wB1l6~!pn?)0GhSPGYrr?~HBT$_lj%K3O?PyUd zW?zen{h094&oaArXNl{2dE@#kwHLQ~ed+aYN?{KRR=YNlJSCN^VM$MWD;QL{1*f$Z zj`w@rNitv0&OvOnxfLY%5JqXaSDna@e0vjy;=ihxB(IBUu-ZKD+4sw=aWAq7g7u4t z=~d8Gi)eV`0Wu{qTgT&DM+SVFy0d~4=k|!_yAp?*HY1J#FHUGOWfcmnU0k(JV-!PX z_WQ(Jc4RHDejAJ7MRS8g!rn|<=C3c`;e0j&`xecm0LwP219nXyRTwuI%9Z_|HB`-H zkIsZSe(ua~LR&h%Uy(2QT4_~xgDKq_o6p>dbP9TtqT0}df6MbRs8S}YNRjoy#%Dkc zbFDb>vLJ!(%_U=Pp4@OPd;6&Mr`aC&YiIiFdQAG}502lim0{$!#*^Bl{5QIIy2yts z$_yK(OG*6GLV;~on_fqpSxrJ`4Cdo*Qlc0OUs;f>AKy@-M=wUd{vO^vba$NP?ifnB z!IJf&na-GO-OWJRZxcpWl6v7D`%B?ryC5NEF~{+hj*kA%TWQ;PO+rYB)r#EYhqLRSc36!it`eN0)a5(G0fixZQ;3UAj=uV`wa7Vc|j^rzM?(Ufd=pB-#(s8r-MgmMyQ zVy{;neC*Lm$b$E*%$c23xv4mwzN738FDSC}10WWCgM*W^1{MS16sDoZ$ttua~zKM2+o9!Tpe zcnRG%6{-rmGjf)R5T(mg1Y4;d`JrE}#mYdSyx@Cny8^DT<&@41O-d4q^wmRMh8&6e zck|1l`uZ7940%7R-`Mbsv*cPKoPU@k|AGVj$x>@Bq$?;{c2-fV>1nu0pu<&_xZpn z0zv73t7*TU_Q~O5s)YQI%$YX=UbKd*H4>a&wQeK=JF!ZkF{6Ch)Rz7nfppd6_0}uT z?xzC^{x<{d`FMKrudCgYQHXgqu`o5&@Z|@njqUVqZ0)XS-+c4c9 zJ{PY-`HaVrY{f3KC&}@{3xSvSTL1P1ZxHX=Ngf*<+Okl%u!`~GhlCwGo^NtRnP2w$ zQUWcDiwPCiHiDoO9*^au6)8?!Y>Htgqlt8~=}n2f4{K!?5)*y68zMxfWgCd|zxme; z_N4T2P>cq{2r#Rc`1dzr*-!h53W8Kno@gXWUrp(+slS=C|4s(^WrWH|O8$!Q#-eNiU?2nv0RJ_B zQZN`42DAnKg;AHJ4xr0l7(|BR+x-KRfKnX4e`0WoFZd5kQi@`e{sV(a{yP>XMX@RW z!IJ`0Jk|AA3l Date: Wed, 26 Nov 2025 15:51:25 -0300 Subject: [PATCH 11/11] Increase coverage for uploading files --- tests/models/test_google.py | 25 +++++++++++++++++ tests/models/test_openai.py | 26 ++++++++++++++++++ tests/test_uploaded_file.py | 55 +++++++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+) create mode 100644 tests/test_uploaded_file.py diff --git a/tests/models/test_google.py b/tests/models/test_google.py index 3fb3ee1788..ea389e6775 100644 --- a/tests/models/test_google.py +++ b/tests/models/test_google.py @@ -2998,6 +2998,31 @@ def test_google_uploaded_file_accepts_uri_string(): assert GoogleModel._map_uploaded_file(UploadedFile(file=file_uri)) == {'file_uri': file_uri} # pyright: ignore[reportPrivateUsage] +def test_google_uploaded_file_requires_valid_type(): + with pytest.raises(UserError, match='genai\\.types\\.File or file URI string'): + GoogleModel._map_uploaded_file(UploadedFile(file=object())) # pyright: ignore[reportPrivateUsage] + + +def test_google_uploaded_file_requires_uri(): + with pytest.raises(UserError, match='include a file URI'): + GoogleModel._map_uploaded_file(UploadedFile(file='')) # pyright: ignore[reportPrivateUsage] + + +def test_google_uploaded_file_includes_display_name(): + google_file = File( + name='files/123', + uri='https://generativelanguage.googleapis.com/v1beta/files/123', + mime_type='application/pdf', + display_name='resume.pdf', + ) + + assert GoogleModel._map_uploaded_file(UploadedFile(file=google_file)) == { # pyright: ignore[reportPrivateUsage] + 'file_uri': google_file.uri, + 'mime_type': 'application/pdf', + 'display_name': 'resume.pdf', + } + + async def test_uploaded_file_input(allow_model_requests: None, google_provider: GoogleProvider): m = GoogleModel('gemini-2.5-flash', provider=google_provider) # VCR recording breaks when dealing with openai file upload request due to diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py index 89d30c6f82..882be9226f 100644 --- a/tests/models/test_openai.py +++ b/tests/models/test_openai.py @@ -3121,6 +3121,32 @@ class FileStub: assert _map_uploaded_file(UploadedFile(file=FileStub()), provider) == 'file-stub' +def test_openai_uploaded_file_requires_id(openai_api_key: str): + provider = OpenAIProvider(api_key=openai_api_key) + + class FileStub: + pass + + with pytest.raises( + UserError, match='UploadedFile\\.file must be a file ID string or an object with an `id` attribute' + ): + _map_uploaded_file(UploadedFile(file=FileStub()), provider) + + +async def test_openai_responses_uploaded_file_mapping(openai_api_key: str): + provider = OpenAIProvider(api_key=openai_api_key) + responses_model = OpenAIResponsesModel('gpt-4o-mini', provider=provider) + + msg = await responses_model._map_user_prompt( # pyright: ignore[reportPrivateUsage] + UserPromptPart(content=[UploadedFile(file='file-xyz')]) + ) + + assert msg == { + 'role': 'user', + 'content': [{'file_id': 'file-xyz', 'type': 'input_file'}], + } + + async def test_uploaded_file_input(allow_model_requests: None, openai_api_key: str): provider = OpenAIProvider(api_key=openai_api_key) m = OpenAIChatModel('gpt-4o', provider=provider) diff --git a/tests/test_uploaded_file.py b/tests/test_uploaded_file.py new file mode 100644 index 0000000000..aea3f9405a --- /dev/null +++ b/tests/test_uploaded_file.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +import hashlib + +from pydantic_ai.messages import ( + UploadedFile, + UserPromptPart, + _uploaded_file_identifier_source, # pyright: ignore[reportPrivateUsage] +) +from pydantic_ai.models.instrumented import InstrumentationSettings + + +def test_uploaded_file_identifier_source_prefers_known_fields(): + class WithId: + id = 'file-id' + + class WithUri: + uri = 'gs://bucket/file' + + class WithName: + name = 'named-file' + + class WithRepr: + def __repr__(self) -> str: + return 'repr-value' + + assert _uploaded_file_identifier_source('direct-id') == 'direct-id' + assert _uploaded_file_identifier_source(WithId()) == 'file-id' + assert _uploaded_file_identifier_source(WithUri()) == 'gs://bucket/file' + assert _uploaded_file_identifier_source(WithName()) == 'named-file' + assert _uploaded_file_identifier_source(WithRepr()) == 'repr-value' + + +def test_uploaded_file_identifier_defaults_to_hash_and_respects_override(): + file_id = 'file-abc' + uploaded_file = UploadedFile(file=file_id) + + expected = hashlib.sha1(file_id.encode('utf-8')).hexdigest()[:6] + assert uploaded_file.identifier == expected + + overridden = UploadedFile(file=file_id, identifier='explicit-id') + assert overridden.identifier == 'explicit-id' + + +def test_uploaded_file_instrumentation_parts_include_identifier_and_optional_file(): + uploaded_file = UploadedFile(file='file-123') + part = UserPromptPart(content=[uploaded_file]) + + without_content = part.otel_message_parts(InstrumentationSettings(include_content=False)) + assert without_content == [{'type': 'uploaded-file', 'identifier': uploaded_file.identifier}] + + with_content = part.otel_message_parts(InstrumentationSettings(include_content=True)) + assert with_content == [ + {'type': 'uploaded-file', 'identifier': uploaded_file.identifier, 'file': 'file-123'}, + ]