Skip to content

Commit 0bd5c64

Browse files
fix client for pdf upload
1 parent f028ffc commit 0bd5c64

File tree

1 file changed

+33
-11
lines changed

1 file changed

+33
-11
lines changed

src/axiomatic/client.py

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import time
77
import json
88
from typing import Dict, List, Optional, Union
9+
from io import BytesIO
10+
from fastapi import UploadFile
911

1012
from .base_client import BaseClient, AsyncBaseClient
1113
from . import ParseResponse, EquationProcessingResponse
@@ -177,21 +179,41 @@ class DocumentHelper:
177179
def __init__(self, ax_client: Axiomatic):
178180
self._ax_client = ax_client
179181

180-
def pdf_from_url(self, url: str) -> ParseResponse:
182+
def pdf_from_file(self, path: str):
183+
"""Open a PDF document from a file path and parse it into a Markdown response."""
184+
with open(path, "rb") as f:
185+
file_bytes = f.read()
186+
187+
# Create a tuple with (filename, content and content-type)
188+
# we do this because .parse expects a FastAPI Uploadfile
189+
file_name = path.split("/")[-1]
190+
file_tuple = (file_name, file_bytes, "application/pdf")
191+
192+
response = self._ax_client.document.parse(file=file_tuple)
193+
return response
194+
195+
def pdf_from_url(self, url: str):
181196
"""Download a PDF document from a URL and parse it into a Markdown response."""
182-
if "arxiv" in url and "abs" in url:
197+
if "arxiv.org" in url and "abs" in url:
183198
url = url.replace("abs", "pdf")
184199
print("The URL is an arXiv abstract page. Replacing 'abs' with 'pdf' to download the PDF.")
185-
file = requests.get(url)
186-
response = self._ax_client.document.parse(file=file.content)
187-
return response
200+
response = requests.get(url)
201+
202+
if response.status_code != 200:
203+
raise Exception(f"Failed to download PDF. Status code: {response.status_code}")
204+
205+
# Extract filename from URL or use a default
206+
file_name = url.split("/")[-1]
207+
if not file_name.endswith(".pdf"):
208+
file_name = "document.pdf"
209+
210+
# Create a tuple with (filename, content and content-type)
211+
# we do this because .parse expects a FastAPI Uploadfile
212+
file_tuple = (file_name, response.content, "application/pdf")
213+
214+
parse_response = self._ax_client.document.parse(file=file_tuple)
215+
return parse_response
188216

189-
def pdf_from_file(self, path: str) -> ParseResponse:
190-
"""Open a PDF document from a file path and parse it into a Markdown response."""
191-
with open(path, "rb") as f:
192-
file = f.read()
193-
response = self._ax_client.document.parse(file=file)
194-
return response
195217

196218
def plot_b64_images(self, images: Dict[str, str]):
197219
"""Plot a dictionary of base64 images."""

0 commit comments

Comments
 (0)