2323 FormData ,
2424)
2525from unstructured_client .models import shared
26- from unstructured_client .utils import BackoffStrategy , Retries , RetryConfig , retry_async , serialize_request_body
26+ from unstructured_client .utils import (
27+ BackoffStrategy ,
28+ Retries ,
29+ RetryConfig ,
30+ retry_async ,
31+ serialize_request_body ,
32+ )
2733
2834logger = logging .getLogger (UNSTRUCTURED_CLIENT_LOGGER_NAME )
2935
36+
3037def get_multipart_stream_fields (request : httpx .Request ) -> dict [str , Any ]:
3138 """Extracts the multipart fields from the request.
3239
@@ -65,9 +72,9 @@ def get_multipart_stream_fields(request: httpx.Request) -> dict[str, Any]:
6572 }
6673 return mapped_fields
6774
75+
6876def create_pdf_chunk_request_params (
69- form_data : FormData ,
70- page_number : int
77+ form_data : FormData , page_number : int
7178) -> dict [str , Any ]:
7279 """Creates the request body for the partition API."
7380
@@ -88,11 +95,14 @@ def create_pdf_chunk_request_params(
8895 PARTITION_FORM_SPLIT_CACHE_TMP_DATA_KEY ,
8996 PARTITION_FORM_SPLIT_CACHE_TMP_DATA_DIR_KEY ,
9097 ]
91- chunk_payload = {key : form_data [key ] for key in form_data if key not in fields_to_drop }
98+ chunk_payload = {
99+ key : form_data [key ] for key in form_data if key not in fields_to_drop
100+ }
92101 chunk_payload [PARTITION_FORM_SPLIT_PDF_PAGE_KEY ] = "false"
93102 chunk_payload [PARTITION_FORM_STARTING_PAGE_NUMBER_KEY ] = str (page_number )
94103 return chunk_payload
95104
105+
96106def create_pdf_chunk_request (
97107 form_data : FormData ,
98108 pdf_chunk : Tuple [BinaryIO , int ],
@@ -148,7 +158,6 @@ def create_pdf_chunk_request(
148158 )
149159
150160
151-
152161async def call_api_async (
153162 client : httpx .AsyncClient ,
154163 pdf_chunk_request : httpx .Request ,
@@ -161,12 +170,12 @@ async def call_api_async(
161170 retry_config = RetryConfig (
162171 "backoff" ,
163172 BackoffStrategy (
164- initial_interval = one_second * 3 ,
165- max_interval = one_minute * 12 ,
166- max_elapsed_time = one_minute * 30 ,
167- exponent = 1.88 ,
173+ initial_interval = one_second * 3 ,
174+ max_interval = one_minute * 12 ,
175+ max_elapsed_time = one_minute * 30 ,
176+ exponent = 1.88 ,
168177 ),
169- retry_connection_errors = True
178+ retry_connection_errors = True ,
170179 )
171180
172181 retryable_codes = ["5xx" ]
@@ -177,12 +186,11 @@ async def do_request():
177186 async with limiter :
178187 try :
179188 response = await retry_async (
180- do_request ,
181- Retries (retry_config , retryable_codes )
189+ do_request , Retries (retry_config , retryable_codes )
182190 )
183191 return response
184192 except Exception as e :
185- logger .error ("Request failed with error" , exc_info = e )
193+ logger .error (f "Request failed with error: { e } " , exc_info = e )
186194 raise e
187195 finally :
188196 if not isinstance (pdf_chunk_file , io .BytesIO ) and not pdf_chunk_file .closed :
@@ -205,6 +213,7 @@ def prepare_request_headers(
205213 new_headers .pop ("Content-Length" , None )
206214 return new_headers
207215
216+
208217def create_response (elements : list ) -> httpx .Response :
209218 """
210219 Creates a modified response object with updated content.
@@ -216,13 +225,16 @@ def create_response(elements: list) -> httpx.Response:
216225 Returns:
217226 The modified response object with updated content.
218227 """
219- response = httpx .Response (status_code = 200 , headers = {"Content-Type" : "application/json" })
228+ response = httpx .Response (
229+ status_code = 200 , headers = {"Content-Type" : "application/json" }
230+ )
220231 content = json .dumps (elements ).encode ()
221232 content_length = str (len (content ))
222233 response .headers .update ({"Content-Length" : content_length })
223234 setattr (response , "_content" , content )
224235 return response
225236
237+
226238def get_base_url (url : str | URL ) -> str :
227239 """Extracts the base URL from the given URL.
228240
0 commit comments