From 4c7155719f19370c344295015bf57821261922f7 Mon Sep 17 00:00:00 2001 From: Austin Walker Date: Fri, 11 Oct 2024 13:45:10 -0400 Subject: [PATCH 1/2] fix: Use configured server_url for split page dummy request Fixes #191. Due to a limitation in our templated codebase, we need our split page logic to return a request that's just going to give a 200 response. Then, we jump back into the AfterSuccessHook and await all of the pdf splits. Hopefully we can clean this up soon to avoid having an extra call at all, but for now let's make sure the GET /general/docs is done on the correct base url. --- CHANGELOG.md | 9 +++++++++ _test_unstructured_client/integration/test_decorators.py | 6 ++++-- src/unstructured_client/_hooks/custom/split_pdf_hook.py | 5 ++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d59fbf5d..8ac13563 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +## 0.26.1 + +### Enhancements + +### Features + +### Fixes +* Use the configured server_url for our split page "dummy" request + ## 0.26.0 ### Enhancements diff --git a/_test_unstructured_client/integration/test_decorators.py b/_test_unstructured_client/integration/test_decorators.py index b80185f2..efef8591 100644 --- a/_test_unstructured_client/integration/test_decorators.py +++ b/_test_unstructured_client/integration/test_decorators.py @@ -304,12 +304,14 @@ async def mock_send(_, request: httpx.Request, **kwargs): We want to make sure both code paths are retried. """ - # Assert that the SDK issues our no-op request + # Assert that the SDK issues our dummy request # returned by the BeforeRequestHook nonlocal mock_endpoint_called - if request.url.host == "no-op" or "docs" in request.url.path: + if request.url.host == "localhost" and "docs" in request.url.path: mock_endpoint_called = True return Response(200, request=request) + elif "docs" in request.url.path: + assert False, "The server URL was not set in the dummy request" request_body = request.read() diff --git a/src/unstructured_client/_hooks/custom/split_pdf_hook.py b/src/unstructured_client/_hooks/custom/split_pdf_hook.py index a85d3d30..8522902e 100644 --- a/src/unstructured_client/_hooks/custom/split_pdf_hook.py +++ b/src/unstructured_client/_hooks/custom/split_pdf_hook.py @@ -156,6 +156,9 @@ def handle_request(self, request: httpx.Request) -> httpx.Response: # # Otherwise, pass the request to the default transport # return await self.base_transport.handle_async_request(request) + # Instead, save the base url so we can use it for our dummy request + self.base_url = base_url + # Explicit cast to httpx.Client to avoid a typing error httpx_client = cast(httpx.Client, client) # async_httpx_client = cast(httpx.AsyncClient, async_client) @@ -346,7 +349,7 @@ async def call_api_partial(page): # dummy_request = httpx.Request("GET", "http://no-op") return httpx.Request( "GET", - "https://api.unstructuredapp.io/general/docs", + f"{self.base_url}/general/docs", headers={"operation_id": operation_id}, ) From 75fe5adea670362f870b1f9fb53a1bdb9d409dbe Mon Sep 17 00:00:00 2001 From: Austin Walker Date: Fri, 11 Oct 2024 13:51:25 -0400 Subject: [PATCH 2/2] Fix lint error and bump version --- gen.yaml | 2 +- src/unstructured_client/_hooks/custom/split_pdf_hook.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/gen.yaml b/gen.yaml index 45b10d16..9f242398 100644 --- a/gen.yaml +++ b/gen.yaml @@ -10,7 +10,7 @@ generation: auth: oAuth2ClientCredentialsEnabled: false python: - version: 0.26.0 + version: 0.26.1 additionalDependencies: dev: deepdiff: '>=6.0' diff --git a/src/unstructured_client/_hooks/custom/split_pdf_hook.py b/src/unstructured_client/_hooks/custom/split_pdf_hook.py index 8522902e..fcf83ee3 100644 --- a/src/unstructured_client/_hooks/custom/split_pdf_hook.py +++ b/src/unstructured_client/_hooks/custom/split_pdf_hook.py @@ -105,6 +105,7 @@ class SplitPdfHook(SDKInitHook, BeforeRequestHook, AfterSuccessHook, AfterErrorH def __init__(self) -> None: self.client: Optional[HttpClient] = None + self.base_url: Optional[str] = None self.async_client: Optional[AsyncHttpClient] = None self.coroutines_to_execute: dict[ str, list[Coroutine[Any, Any, httpx.Response]]