File tree Expand file tree Collapse file tree 2 files changed +31
-0
lines changed
pydantic_ai_slim/pydantic_ai Expand file tree Collapse file tree 2 files changed +31
-0
lines changed Original file line number Diff line number Diff line change @@ -312,6 +312,19 @@ def __init__(
312
312
313
313
def _infer_media_type (self ) -> str :
314
314
"""Return the media type of the document, based on the url."""
315
+ # Common document types are hardcoded here as mime-type support for these
316
+ # extensions varies across operating systems.
317
+ if self .url .endswith (('.md' , '.mdx' , '.markdown' )):
318
+ return 'text/markdown'
319
+ elif self .url .endswith ('.asciidoc' ):
320
+ return 'text/x-asciidoc'
321
+ elif self .url .endswith ('.txt' ):
322
+ return 'text/plain'
323
+ elif self .url .endswith ('.pdf' ):
324
+ return 'application/pdf'
325
+ elif self .url .endswith ('.rtf' ):
326
+ return 'application/rtf'
327
+
315
328
type_ , _ = guess_type (self .url )
316
329
if type_ is None :
317
330
raise ValueError (f'Unknown document file extension: { self .url } ' )
Original file line number Diff line number Diff line change @@ -41,6 +41,24 @@ def test_youtube_video_url(url: str, is_youtube: bool):
41
41
assert video_url .format == 'mp4'
42
42
43
43
44
+ @pytest .mark .parametrize (
45
+ 'url, expected_data_type' ,
46
+ [
47
+ ('https://raw.githubusercontent.com/pydantic/pydantic-ai/refs/heads/main/docs/help.md' , 'text/markdown' ),
48
+ ('https://raw.githubusercontent.com/pydantic/pydantic-ai/refs/heads/main/docs/help.txt' , 'text/plain' ),
49
+ ('https://raw.githubusercontent.com/pydantic/pydantic-ai/refs/heads/main/docs/help.pdf' , 'application/pdf' ),
50
+ ('https://raw.githubusercontent.com/pydantic/pydantic-ai/refs/heads/main/docs/help.rtf' , 'application/rtf' ),
51
+ (
52
+ 'https://raw.githubusercontent.com/pydantic/pydantic-ai/refs/heads/main/docs/help.asciidoc' ,
53
+ 'text/x-asciidoc' ,
54
+ ),
55
+ ],
56
+ )
57
+ def test_document_url_other_types (url : str , expected_data_type : str ) -> None :
58
+ document_url = DocumentUrl (url = url )
59
+ assert document_url .media_type == expected_data_type
60
+
61
+
44
62
def test_document_url ():
45
63
document_url = DocumentUrl (url = 'https://example.com/document.pdf' )
46
64
assert document_url .media_type == 'application/pdf'
You can’t perform that action at this time.
0 commit comments