|
31 | 31 | PARTITION_FORM_SPLIT_PDF_PAGE_KEY, |
32 | 32 | PARTITION_FORM_STARTING_PAGE_NUMBER_KEY, |
33 | 33 | ) |
| 34 | +from unstructured_client._hooks.custom.request_utils import get_base_url |
34 | 35 | from unstructured_client._hooks.types import ( |
35 | 36 | AfterErrorContext, |
36 | 37 | AfterErrorHook, |
@@ -156,7 +157,8 @@ class SplitPdfHook(SDKInitHook, BeforeRequestHook, AfterSuccessHook, AfterErrorH |
156 | 157 |
|
157 | 158 | def __init__(self) -> None: |
158 | 159 | self.client: Optional[HttpClient] = None |
159 | | - self.base_url: Optional[str] = None |
| 160 | + self.partition_base_url: Optional[str] = None |
| 161 | + self.is_partition_request: bool = False |
160 | 162 | self.async_client: Optional[AsyncHttpClient] = None |
161 | 163 | self.coroutines_to_execute: dict[ |
162 | 164 | str, list[partial[Coroutine[Any, Any, httpx.Response]]] |
@@ -212,7 +214,9 @@ def handle_request(self, request: httpx.Request) -> httpx.Response: |
212 | 214 | # return await self.base_transport.handle_async_request(request) |
213 | 215 |
|
214 | 216 | # Instead, save the base url so we can use it for our dummy request |
215 | | - self.base_url = base_url |
| 217 | + # As this can be overwritten with Platform API URL, we need to get it again in |
| 218 | + # `before_request` hook from the request object as the real URL is not available here. |
| 219 | + self.partition_base_url = base_url |
216 | 220 |
|
217 | 221 | # Explicit cast to httpx.Client to avoid a typing error |
218 | 222 | httpx_client = cast(httpx.Client, client) |
@@ -246,6 +250,16 @@ def before_request( |
246 | 250 | Union[httpx.PreparedRequest, Exception]: If `splitPdfPage` is set to `true`, |
247 | 251 | the last page request; otherwise, the original request. |
248 | 252 | """ |
| 253 | + |
| 254 | + # Actually the general.partition operation overwrites the default client's base url (as |
| 255 | + # the platform operations do). Here we need to get the base url from the request object. |
| 256 | + if hook_ctx.operation_id == "partition": |
| 257 | + self.partition_base_url = get_base_url(request.url) |
| 258 | + self.is_partition_request = True |
| 259 | + else: |
| 260 | + self.is_partition_request = False |
| 261 | + return request |
| 262 | + |
249 | 263 | if self.client is None: |
250 | 264 | logger.warning("HTTP client not accessible! Continuing without splitting.") |
251 | 265 | return request |
@@ -391,7 +405,7 @@ def before_request( |
391 | 405 | # dummy_request = httpx.Request("GET", "http://no-op") |
392 | 406 | return httpx.Request( |
393 | 407 | "GET", |
394 | | - f"{self.base_url}/general/docs", |
| 408 | + f"{self.partition_base_url}/general/docs", |
395 | 409 | headers={"operation_id": operation_id}, |
396 | 410 | ) |
397 | 411 |
|
@@ -644,6 +658,9 @@ def after_success( |
644 | 658 | combined response object; otherwise, the original response. Can return |
645 | 659 | exception if it ocurred during the execution. |
646 | 660 | """ |
| 661 | + if not self.is_partition_request: |
| 662 | + return response |
| 663 | + |
647 | 664 | # Grab the correct id out of the dummy request |
648 | 665 | operation_id = response.request.headers.get("operation_id") |
649 | 666 |
|
|
0 commit comments