diff --git a/docs/input.md b/docs/input.md index e662c35832..2a13c6dd55 100644 --- a/docs/input.md +++ b/docs/input.md @@ -102,6 +102,28 @@ print(result.output) #> The document discusses... ``` +## Uploaded files + +Use [`UploadedFile`][pydantic_ai.UploadedFile] when you've already uploaded content to the model provider. + +- [`OpenAIChatModel`][pydantic_ai.models.openai.OpenAIChatModel] and [`OpenAIResponsesModel`][pydantic_ai.models.openai.OpenAIResponsesModel] accept an `openai.types.FileObject` or a file ID string returned by the OpenAI Files API. +- [`GoogleModel`][pydantic_ai.models.google.GoogleModel] accepts a `google.genai.types.File` or a file URI string from the Gemini Files API. +- Other models currently raise `NotImplementedError` when they receive an `UploadedFile`. + +```py {title="uploaded_file_input.py" test="skip" lint="skip"} +from pydantic_ai import Agent, UploadedFile + +agent = Agent(model='openai:gpt-5') +result = agent.run_sync( + [ + 'Give me a short description of this image', + UploadedFile(file='file-abc123'), # file-abc123 is a file ID returned by the provider + ] +) +print(result.output) +#> The image is a simple design of a classic yellow smiley face... +``` + ## User-side download vs. direct file URL As a general rule, when you provide a URL using any of `ImageUrl`, `AudioUrl`, `VideoUrl` or `DocumentUrl`, Pydantic AI downloads the file content and then sends it as part of the API request. diff --git a/pydantic_ai_slim/pydantic_ai/__init__.py b/pydantic_ai_slim/pydantic_ai/__init__.py index c860d20dd8..2ea92c3536 100644 --- a/pydantic_ai_slim/pydantic_ai/__init__.py +++ b/pydantic_ai_slim/pydantic_ai/__init__.py @@ -80,6 +80,7 @@ ToolCallPartDelta, ToolReturn, ToolReturnPart, + UploadedFile, UserContent, UserPromptPart, VideoFormat, @@ -182,6 +183,7 @@ 'ToolCallPartDelta', 'ToolReturn', 'ToolReturnPart', + 'UploadedFile', 'UserContent', 'UserPromptPart', 'VideoFormat', diff --git a/pydantic_ai_slim/pydantic_ai/_otel_messages.py b/pydantic_ai_slim/pydantic_ai/_otel_messages.py index 18c780098b..0753e29750 100644 --- a/pydantic_ai_slim/pydantic_ai/_otel_messages.py +++ b/pydantic_ai_slim/pydantic_ai/_otel_messages.py @@ -43,12 +43,20 @@ class BinaryDataPart(TypedDict): content: NotRequired[str] +class UploadedFilePart(TypedDict): + type: Literal['uploaded-file'] + identifier: NotRequired[str] + file: NotRequired[str] + + class ThinkingPart(TypedDict): type: Literal['thinking'] content: NotRequired[str] -MessagePart: TypeAlias = 'TextPart | ToolCallPart | ToolCallResponsePart | MediaUrlPart | BinaryDataPart | ThinkingPart' +MessagePart: TypeAlias = ( + 'TextPart | ToolCallPart | ToolCallResponsePart | MediaUrlPart | BinaryDataPart | UploadedFilePart | ThinkingPart' +) Role = Literal['system', 'user', 'assistant'] diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py index ac0fb0da6d..2d37b9cc31 100644 --- a/pydantic_ai_slim/pydantic_ai/messages.py +++ b/pydantic_ai_slim/pydantic_ai/messages.py @@ -108,6 +108,16 @@ def _multi_modal_content_identifier(identifier: str | bytes) -> str: return hashlib.sha1(identifier).hexdigest()[:6] +def _uploaded_file_identifier_source(file: Any) -> str: + if isinstance(file, str): + return file + for attr in ('id', 'uri', 'name'): + value = getattr(file, attr, None) + if isinstance(value, str): + return value + return repr(file) + + @dataclass(init=False, repr=False) class FileUrl(ABC): """Abstract base class for any URL-based file.""" @@ -633,6 +643,59 @@ def __init__( raise ValueError('`BinaryImage` must be have a media type that starts with "image/"') # pragma: no cover +@dataclass(init=False, repr=False) +class UploadedFile: + """File uploaded to the LLM provider. + + Supported by [`OpenAIChatModel`][pydantic_ai.models.openai.OpenAIChatModel], + [`OpenAIResponsesModel`][pydantic_ai.models.openai.OpenAIResponsesModel], and + [`GoogleModel`][pydantic_ai.models.google.GoogleModel]. + + - For OpenAI-compatible models, provide an `openai.types.FileObject` or a file ID string returned by the Files API. + - For Gemini, provide a `google.genai.types.File` or the file URI string returned by the Files API. + + Other models raise `NotImplementedError` when they receive this part. + """ + + file: Any + """A provider-specific file object, e.g. a file ID or a file URL.""" + + _: KW_ONLY + + _identifier: Annotated[str | None, pydantic.Field(alias='identifier', default=None, exclude=True)] = field( + compare=False, default=None + ) + """Optional identifier for the uploaded file.""" + + kind: Literal['uploaded-file'] = 'uploaded-file' + """Type identifier, this is available on all parts as a discriminator.""" + + def __init__( + self, + file: Any, + *, + identifier: str | None = None, + kind: Literal['uploaded-file'] = 'uploaded-file', + # Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs. + _identifier: str | None = None, + ): + self.file = file + self._identifier = identifier or _identifier + self.kind = kind + + @pydantic.computed_field + @property + def identifier(self) -> str: + """Identifier for the uploaded file, usually derived from the provider's reference.""" + identifier = self._identifier + if identifier is not None: + return identifier + + return _multi_modal_content_identifier(_uploaded_file_identifier_source(self.file)) + + __repr__ = _utils.dataclasses_no_defaults_repr + + @dataclass class CachePoint: """A cache point marker for prompt caching. @@ -656,7 +719,7 @@ class CachePoint: * Anthropic. See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information.""" -MultiModalContent = ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent +MultiModalContent = ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent | UploadedFile UserContent: TypeAlias = str | MultiModalContent | CachePoint @@ -774,11 +837,17 @@ def otel_message_parts(self, settings: InstrumentationSettings) -> list[_otel_me if settings.include_content and settings.include_binary_content: converted_part['content'] = base64.b64encode(part.data).decode() parts.append(converted_part) + elif isinstance(part, UploadedFile): + uploaded_part: _otel_messages.UploadedFilePart = { + 'type': 'uploaded-file', + 'identifier': part.identifier, + } + if settings.include_content: + uploaded_part['file'] = _uploaded_file_identifier_source(part.file) + parts.append(uploaded_part) elif isinstance(part, CachePoint): # CachePoint is a marker, not actual content - skip it for otel pass - else: - parts.append({'type': part.kind}) # pragma: no cover return parts __repr__ = _utils.dataclasses_no_defaults_repr diff --git a/pydantic_ai_slim/pydantic_ai/models/bedrock.py b/pydantic_ai_slim/pydantic_ai/models/bedrock.py index 34cba80a2d..b723a13458 100644 --- a/pydantic_ai_slim/pydantic_ai/models/bedrock.py +++ b/pydantic_ai_slim/pydantic_ai/models/bedrock.py @@ -34,6 +34,7 @@ ThinkingPart, ToolCallPart, ToolReturnPart, + UploadedFile, UserPromptPart, VideoUrl, _utils, @@ -676,6 +677,8 @@ async def _map_user_prompt(part: UserPromptPart, document_count: Iterator[int]) content.append({'video': video}) elif isinstance(item, AudioUrl): # pragma: no cover raise NotImplementedError('Audio is not supported yet.') + elif isinstance(item, UploadedFile): + raise NotImplementedError('Uploaded files are not supported yet.') elif isinstance(item, CachePoint): # Bedrock support has not been implemented yet: https://github.com/pydantic/pydantic-ai/issues/3418 pass diff --git a/pydantic_ai_slim/pydantic_ai/models/gemini.py b/pydantic_ai_slim/pydantic_ai/models/gemini.py index 4da92018fd..b98b53c5d0 100644 --- a/pydantic_ai_slim/pydantic_ai/models/gemini.py +++ b/pydantic_ai_slim/pydantic_ai/models/gemini.py @@ -35,6 +35,7 @@ ThinkingPart, ToolCallPart, ToolReturnPart, + UploadedFile, UserPromptPart, VideoUrl, ) @@ -392,6 +393,8 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[_GeminiPartUnion] else: # pragma: lax no cover file_data = _GeminiFileDataPart(file_data={'file_uri': item.url, 'mime_type': item.media_type}) content.append(file_data) + elif isinstance(item, UploadedFile): + raise NotImplementedError('Uploaded files are not supported for GeminiModel.') elif isinstance(item, CachePoint): # Gemini doesn't support prompt caching via CachePoint pass diff --git a/pydantic_ai_slim/pydantic_ai/models/google.py b/pydantic_ai_slim/pydantic_ai/models/google.py index e6b5301673..38e988aa16 100644 --- a/pydantic_ai_slim/pydantic_ai/models/google.py +++ b/pydantic_ai_slim/pydantic_ai/models/google.py @@ -34,6 +34,7 @@ ThinkingPart, ToolCallPart, ToolReturnPart, + UploadedFile, UserPromptPart, VideoUrl, ) @@ -62,6 +63,7 @@ CountTokensConfigDict, ExecutableCode, ExecutableCodeDict, + File, FileDataDict, FinishReason as GoogleFinishReason, FunctionCallDict, @@ -628,6 +630,8 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]: else: file_data_dict: FileDataDict = {'file_uri': item.url, 'mime_type': item.media_type} content.append({'file_data': file_data_dict}) # pragma: lax no cover + elif isinstance(item, UploadedFile): + content.append({'file_data': self._map_uploaded_file(item)}) elif isinstance(item, CachePoint): # Google Gemini doesn't support prompt caching via CachePoint pass @@ -635,6 +639,31 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]: assert_never(item) return content + @staticmethod + def _map_uploaded_file(item: UploadedFile) -> FileDataDict: + """Convert an UploadedFile into the structure expected by Gemini.""" + file = item.file + if isinstance(file, File): + file_uri = file.uri + mime_type = file.mime_type + display_name = getattr(file, 'display_name', None) + elif isinstance(file, str): + file_uri = file + mime_type = None + display_name = None + else: + raise UserError('UploadedFile.file must be a genai.types.File or file URI string') + + if not file_uri: + raise UserError('UploadedFile.file must include a file URI') + + file_data: FileDataDict = {'file_uri': file_uri} + if mime_type: + file_data['mime_type'] = mime_type + if display_name: + file_data['display_name'] = display_name + return file_data + def _map_response_schema(self, o: OutputObjectDefinition) -> dict[str, Any]: response_schema = o.json_schema.copy() if o.name: diff --git a/pydantic_ai_slim/pydantic_ai/models/huggingface.py b/pydantic_ai_slim/pydantic_ai/models/huggingface.py index 790b30bec3..bec147111c 100644 --- a/pydantic_ai_slim/pydantic_ai/models/huggingface.py +++ b/pydantic_ai_slim/pydantic_ai/models/huggingface.py @@ -34,6 +34,7 @@ ThinkingPart, ToolCallPart, ToolReturnPart, + UploadedFile, UserPromptPart, VideoUrl, ) @@ -448,6 +449,8 @@ async def _map_user_prompt(part: UserPromptPart) -> ChatCompletionInputMessage: raise NotImplementedError('DocumentUrl is not supported for Hugging Face') elif isinstance(item, VideoUrl): raise NotImplementedError('VideoUrl is not supported for Hugging Face') + elif isinstance(item, UploadedFile): + raise NotImplementedError('Uploaded files are not supported for Hugging Face') elif isinstance(item, CachePoint): # Hugging Face doesn't support prompt caching via CachePoint pass diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index d97c6843a7..95f8a02f0e 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -44,6 +44,7 @@ ThinkingPart, ToolCallPart, ToolReturnPart, + UploadedFile, UserPromptPart, VideoUrl, ) @@ -56,7 +57,7 @@ try: from openai import NOT_GIVEN, APIConnectionError, APIStatusError, AsyncOpenAI, AsyncStream - from openai.types import AllModels, chat, responses + from openai.types import AllModels, FileObject, chat, responses from openai.types.chat import ( ChatCompletionChunk, ChatCompletionContentPartImageParam, @@ -977,6 +978,9 @@ async def _map_user_prompt(self, part: UserPromptPart) -> chat.ChatCompletionUse type='file', ) ) + elif isinstance(item, UploadedFile): + file_id = _map_uploaded_file(item, self._provider) + content.append(File(file=FileFile(file_id=file_id), type='file')) elif isinstance(item, VideoUrl): # pragma: no cover raise NotImplementedError('VideoUrl is not supported for OpenAI') elif isinstance(item, CachePoint): @@ -1733,8 +1737,7 @@ def _map_json_schema(self, o: OutputObjectDefinition) -> responses.ResponseForma response_format_param['strict'] = o.strict return response_format_param - @staticmethod - async def _map_user_prompt(part: UserPromptPart) -> responses.EasyInputMessageParam: # noqa: C901 + async def _map_user_prompt(self, part: UserPromptPart) -> responses.EasyInputMessageParam: # noqa: C901 content: str | list[responses.ResponseInputContentParam] if isinstance(part.content, str): content = part.content @@ -1807,6 +1810,9 @@ async def _map_user_prompt(part: UserPromptPart) -> responses.EasyInputMessagePa filename=f'filename.{downloaded_item["data_type"]}', ) ) + elif isinstance(item, UploadedFile): + file_id = _map_uploaded_file(item, self._provider) + content.append(responses.ResponseInputFileParam(file_id=file_id, type='input_file')) elif isinstance(item, VideoUrl): # pragma: no cover raise NotImplementedError('VideoUrl is not supported for OpenAI.') elif isinstance(item, CachePoint): @@ -2324,6 +2330,21 @@ def _map_usage( ) +def _map_uploaded_file(uploaded_file: UploadedFile, _provider: Provider[Any]) -> str: + """Map an UploadedFile to a file ID understood by OpenAI-compatible APIs.""" + file = uploaded_file.file + if isinstance(file, str): + return file + if isinstance(file, FileObject): + return file.id + + file_id = getattr(file, 'id', None) + if isinstance(file_id, str): + return file_id + + raise UserError('UploadedFile.file must be a file ID string or an object with an `id` attribute') + + def _map_provider_details( choice: chat_completion_chunk.Choice | chat_completion.Choice, ) -> dict[str, Any]: diff --git a/tests/models/cassettes/test_google/test_uploaded_file_input.yaml b/tests/models/cassettes/test_google/test_uploaded_file_input.yaml new file mode 100644 index 0000000000..5ac10a4079 --- /dev/null +++ b/tests/models/cassettes/test_google/test_uploaded_file_input.yaml @@ -0,0 +1,69 @@ +interactions: +- request: + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '281' + content-type: + - application/json + host: + - generativelanguage.googleapis.com + method: POST + parsed_body: + contents: + - parts: + - text: Give me a short description of this image + - fileData: + fileUri: https://generativelanguage.googleapis.com/v1beta/files/9b7dfki4eo1b + mimeType: application/pdf + role: user + generationConfig: + responseModalities: + - TEXT + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + response: + headers: + alt-svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + content-length: + - '757' + content-type: + - application/json; charset=UTF-8 + server-timing: + - gfet4t7; dur=3602 + transfer-encoding: + - chunked + vary: + - Origin + - X-Origin + - Referer + parsed_body: + candidates: + - content: + parts: + - text: The image displays a plain white page with the bold black text "Dummy PDF file" centered at the top-left. + The rest of the page is blank. + role: model + finishReason: STOP + index: 0 + modelVersion: gemini-2.5-flash + responseId: SzQnae6DDISI_uMPsrGLsQg + usageMetadata: + candidatesTokenCount: 32 + promptTokenCount: 267 + promptTokensDetails: + - modality: TEXT + tokenCount: 9 + - modality: DOCUMENT + tokenCount: 258 + thoughtsTokenCount: 459 + totalTokenCount: 758 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/cassettes/test_openai/test_uploaded_file_input.yaml b/tests/models/cassettes/test_openai/test_uploaded_file_input.yaml new file mode 100644 index 0000000000..505224c3bb --- /dev/null +++ b/tests/models/cassettes/test_openai/test_uploaded_file_input.yaml @@ -0,0 +1,86 @@ +interactions: +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '206' + content-type: + - application/json + host: + - api.openai.com + method: POST + parsed_body: + messages: + - content: + - text: Give me a short description of this image + type: text + - file: + file_id: file-2bkCwDLR2p8cDXfT9he8tV + type: file + role: user + model: gpt-4o + stream: false + uri: https://api.openai.com/v1/chat/completions + response: + headers: + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + connection: + - keep-alive + content-length: + - '1080' + content-type: + - application/json + openai-organization: + - coplane + openai-processing-ms: + - '1905' + openai-project: + - proj_KGkpeAYM2vPXvZOVtXfnuZ9r + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + transfer-encoding: + - chunked + parsed_body: + choices: + - finish_reason: stop + index: 0 + logprobs: null + message: + annotations: [] + content: The file you uploaded is a "Dummy PDF file." There are no further descriptions or details available within + its content. The file likely contains filler or placeholder text for testing purposes. If you need a specific + type of content or have another file, please let me know! + refusal: null + role: assistant + created: 1764177247 + id: chatcmpl-CgDNf5yzHe6V6DQHxio2IdIAdMyoR + model: gpt-4o-2024-08-06 + object: chat.completion + service_tier: default + system_fingerprint: fp_cbf1785567 + usage: + completion_tokens: 53 + completion_tokens_details: + accepted_prediction_tokens: 0 + audio_tokens: 0 + reasoning_tokens: 0 + rejected_prediction_tokens: 0 + prompt_tokens: 234 + prompt_tokens_details: + audio_tokens: 0 + cached_tokens: 0 + total_tokens: 287 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/test_google.py b/tests/models/test_google.py index 1c7b234360..ea389e6775 100644 --- a/tests/models/test_google.py +++ b/tests/models/test_google.py @@ -39,6 +39,7 @@ ThinkingPartDelta, ToolCallPart, ToolReturnPart, + UploadedFile, UsageLimitExceeded, UserPromptPart, VideoUrl, @@ -67,6 +68,7 @@ with try_import() as imports_successful: from google.genai import errors from google.genai.types import ( + File, FinishReason as GoogleFinishReason, GenerateContentResponse, GenerateContentResponseUsageMetadata, @@ -2990,6 +2992,64 @@ def test_map_usage(): ) +def test_google_uploaded_file_accepts_uri_string(): + file_uri = 'https://generativelanguage.googleapis.com/v1beta/files/123' + + assert GoogleModel._map_uploaded_file(UploadedFile(file=file_uri)) == {'file_uri': file_uri} # pyright: ignore[reportPrivateUsage] + + +def test_google_uploaded_file_requires_valid_type(): + with pytest.raises(UserError, match='genai\\.types\\.File or file URI string'): + GoogleModel._map_uploaded_file(UploadedFile(file=object())) # pyright: ignore[reportPrivateUsage] + + +def test_google_uploaded_file_requires_uri(): + with pytest.raises(UserError, match='include a file URI'): + GoogleModel._map_uploaded_file(UploadedFile(file='')) # pyright: ignore[reportPrivateUsage] + + +def test_google_uploaded_file_includes_display_name(): + google_file = File( + name='files/123', + uri='https://generativelanguage.googleapis.com/v1beta/files/123', + mime_type='application/pdf', + display_name='resume.pdf', + ) + + assert GoogleModel._map_uploaded_file(UploadedFile(file=google_file)) == { # pyright: ignore[reportPrivateUsage] + 'file_uri': google_file.uri, + 'mime_type': 'application/pdf', + 'display_name': 'resume.pdf', + } + + +async def test_uploaded_file_input(allow_model_requests: None, google_provider: GoogleProvider): + m = GoogleModel('gemini-2.5-flash', provider=google_provider) + # VCR recording breaks when dealing with openai file upload request due to + # binary contents. For that reason, we have manually run once the upload + # and rebuild the FileObject manually (from the print command output). + # client = google_provider.client + # with open('tests/assets/dummy.pdf', 'rb') as f: + # google_file = client.files.upload( + # file=f, + # config={ + # 'mime_type': 'application/pdf', + # }, + # ) + # print(google_file) + google_file = File( + name='files/9b7dfki4eo1b', + mime_type='application/pdf', + uri='https://generativelanguage.googleapis.com/v1beta/files/9b7dfki4eo1b', + ) + agent = Agent(m) + + result = await agent.run(['Give me a short description of this image', UploadedFile(file=google_file)]) + assert result.output == snapshot( + 'The image displays a plain white page with the bold black text "Dummy PDF file" centered at the top-left. The rest of the page is blank.' + ) + + async def test_google_builtin_tools_with_other_tools(allow_model_requests: None, google_provider: GoogleProvider): m = GoogleModel('gemini-2.5-flash', provider=google_provider) diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py index cd651d891d..882be9226f 100644 --- a/tests/models/test_openai.py +++ b/tests/models/test_openai.py @@ -33,6 +33,7 @@ ToolCallPart, ToolReturnPart, UnexpectedModelBehavior, + UploadedFile, UserError, UserPromptPart, ) @@ -58,7 +59,7 @@ with try_import() as imports_successful: from openai import APIConnectionError, APIStatusError, AsyncOpenAI - from openai.types import chat + from openai.types import FileObject, chat from openai.types.chat.chat_completion import ChoiceLogprobs from openai.types.chat.chat_completion_chunk import ( Choice as ChunkChoice, @@ -79,6 +80,7 @@ OpenAIResponsesModel, OpenAIResponsesModelSettings, OpenAISystemPromptRole, + _map_uploaded_file, # pyright: ignore[reportPrivateUsage] ) from pydantic_ai.profiles.openai import OpenAIJsonSchemaTransformer from pydantic_ai.providers.cerebras import CerebrasProvider @@ -3103,6 +3105,80 @@ async def test_openai_model_settings_temperature_ignored_on_gpt_5(allow_model_re assert result.output == snapshot('Paris.') +def test_openai_uploaded_file_accepts_id_string(openai_api_key: str): + file_id = 'file-abc123' + provider = OpenAIProvider(api_key=openai_api_key) + + assert _map_uploaded_file(UploadedFile(file=file_id), provider) == file_id + + +def test_openai_uploaded_file_accepts_object_with_id(openai_api_key: str): + class FileStub: + id = 'file-stub' + + provider = OpenAIProvider(api_key=openai_api_key) + + assert _map_uploaded_file(UploadedFile(file=FileStub()), provider) == 'file-stub' + + +def test_openai_uploaded_file_requires_id(openai_api_key: str): + provider = OpenAIProvider(api_key=openai_api_key) + + class FileStub: + pass + + with pytest.raises( + UserError, match='UploadedFile\\.file must be a file ID string or an object with an `id` attribute' + ): + _map_uploaded_file(UploadedFile(file=FileStub()), provider) + + +async def test_openai_responses_uploaded_file_mapping(openai_api_key: str): + provider = OpenAIProvider(api_key=openai_api_key) + responses_model = OpenAIResponsesModel('gpt-4o-mini', provider=provider) + + msg = await responses_model._map_user_prompt( # pyright: ignore[reportPrivateUsage] + UserPromptPart(content=[UploadedFile(file='file-xyz')]) + ) + + assert msg == { + 'role': 'user', + 'content': [{'file_id': 'file-xyz', 'type': 'input_file'}], + } + + +async def test_uploaded_file_input(allow_model_requests: None, openai_api_key: str): + provider = OpenAIProvider(api_key=openai_api_key) + m = OpenAIChatModel('gpt-4o', provider=provider) + # VCR recording breaks when dealing with openai file upload request due to + # binary contents. For that reason, we have manually run once the upload + # and rebuild the FileObject manually (from the print command output). + # with open('tests/assets/dummy.pdf', 'rb') as f: + # file_bytes = f.read() + # openai_file = await provider.client.files.create( + # file=('image.pdf', file_bytes, 'application/pdf'), + # purpose='user_data', + # ) + # print(openai_file) + openai_file = FileObject( + id='file-2bkCwDLR2p8cDXfT9he8tV', + bytes=13264, + created_at=1764177089, + filename='image.pdf', # OpenAI file upload API only accepts pdf + object='file', + purpose='user_data', + status='processed', + expires_at=None, + status_details=None, + ) + agent = Agent(m) + + result = await agent.run(['Give me a short description of this image', UploadedFile(file=openai_file)]) + assert result.output == snapshot( + 'The file you uploaded is a "Dummy PDF file." There are no further descriptions or details available within its content. The file likely contains filler or placeholder text for testing purposes. If you need a specific type of content or have another file, please let me know!' + ) + + async def test_openai_model_cerebras_provider(allow_model_requests: None, cerebras_api_key: str): m = OpenAIChatModel('llama3.3-70b', provider=CerebrasProvider(api_key=cerebras_api_key)) agent = Agent(m) @@ -3159,7 +3235,8 @@ async def test_cache_point_filtering_responses_model(): """Test that CachePoint is filtered out in OpenAI Responses API requests.""" # Test the static method directly to trigger line 1680 msg = await OpenAIResponsesModel._map_user_prompt( # pyright: ignore[reportPrivateUsage] - UserPromptPart(content=['text before', CachePoint(), 'text after']) + None, # type: ignore + UserPromptPart(content=['text before', CachePoint(), 'text after']), ) # CachePoint should be filtered out, only text content should remain diff --git a/tests/test_uploaded_file.py b/tests/test_uploaded_file.py new file mode 100644 index 0000000000..aea3f9405a --- /dev/null +++ b/tests/test_uploaded_file.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +import hashlib + +from pydantic_ai.messages import ( + UploadedFile, + UserPromptPart, + _uploaded_file_identifier_source, # pyright: ignore[reportPrivateUsage] +) +from pydantic_ai.models.instrumented import InstrumentationSettings + + +def test_uploaded_file_identifier_source_prefers_known_fields(): + class WithId: + id = 'file-id' + + class WithUri: + uri = 'gs://bucket/file' + + class WithName: + name = 'named-file' + + class WithRepr: + def __repr__(self) -> str: + return 'repr-value' + + assert _uploaded_file_identifier_source('direct-id') == 'direct-id' + assert _uploaded_file_identifier_source(WithId()) == 'file-id' + assert _uploaded_file_identifier_source(WithUri()) == 'gs://bucket/file' + assert _uploaded_file_identifier_source(WithName()) == 'named-file' + assert _uploaded_file_identifier_source(WithRepr()) == 'repr-value' + + +def test_uploaded_file_identifier_defaults_to_hash_and_respects_override(): + file_id = 'file-abc' + uploaded_file = UploadedFile(file=file_id) + + expected = hashlib.sha1(file_id.encode('utf-8')).hexdigest()[:6] + assert uploaded_file.identifier == expected + + overridden = UploadedFile(file=file_id, identifier='explicit-id') + assert overridden.identifier == 'explicit-id' + + +def test_uploaded_file_instrumentation_parts_include_identifier_and_optional_file(): + uploaded_file = UploadedFile(file='file-123') + part = UserPromptPart(content=[uploaded_file]) + + without_content = part.otel_message_parts(InstrumentationSettings(include_content=False)) + assert without_content == [{'type': 'uploaded-file', 'identifier': uploaded_file.identifier}] + + with_content = part.otel_message_parts(InstrumentationSettings(include_content=True)) + assert with_content == [ + {'type': 'uploaded-file', 'identifier': uploaded_file.identifier, 'file': 'file-123'}, + ]