Skip to content
3 changes: 3 additions & 0 deletions .genignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,6 @@ _test_unstructured_client

# ignore Makefile
Makefile

# Ignore the general.partition code until we can fix the base_url issue
src/unstructured_client/general.py
2 changes: 1 addition & 1 deletion .speakeasy/workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ speakeasyVersion: latest
sources:
my-source:
inputs:
- location: https://api.unstructured.io/general/openapi.json
- location: https://platform.unstructuredapp.io/openapi.json
- location: https://api.unstructured.io/general/openapi.json
overlays:
- location: ./overlay_client.yaml
registry:
Expand Down
2 changes: 1 addition & 1 deletion _test_contract/platform_api/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def platform_api_url():
def client(platform_api_url) -> UnstructuredClient:
_client = UnstructuredClient(
api_key_auth=FAKE_API_KEY,
server_url="platform-api",
server_url=platform_api_url,
retry_config=RetryConfig(
strategy="backoff",
retry_connection_errors=False,
Expand Down
247 changes: 247 additions & 0 deletions _test_unstructured_client/unit/test_server_urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
import httpx
import pytest

from unstructured_client.models import operations
from unstructured_client import UnstructuredClient, basesdk, utils


# Raise one of these from our mock to return to the test code
class BaseUrlCorrect(Exception):
pass


class BaseUrlIncorrect(Exception):
pass


def get_client_method_with_mock(
sdk_endpoint_name,
client_instance,
mocked_server_url,
monkeypatch
):
"""
Given an endpoint name, e.g. "general.partition", return a reference
to that method off of the given client instance.

The client's _build_request will have the following mock:
Assert that the provided server_url is passed into _build_request.
Raise a custom exception to get back to the test.
"""
# Mock this to get past param validation
def mock_unmarshal(*args, **kwargs):
return {}

monkeypatch.setattr(utils, "unmarshal", mock_unmarshal)

# Assert that the correct base_url makes it to here
def mock_build_request(*args, base_url, **kwargs):
if base_url == mocked_server_url:
raise BaseUrlCorrect
else:
raise BaseUrlIncorrect(base_url)

# Find the method from the given string
class_name, method_name = sdk_endpoint_name.split(".")
endpoint_class = getattr(client_instance, class_name)
endpoint_method = getattr(endpoint_class, method_name)

if "async" in method_name:
monkeypatch.setattr(endpoint_class, "_build_request_async", mock_build_request)
else:
monkeypatch.setattr(endpoint_class, "_build_request", mock_build_request)

return endpoint_method


@pytest.mark.parametrize(
"sdk_endpoint_name",
[
("general.partition"),
],
)
def test_endpoint_uses_correct_url(monkeypatch, sdk_endpoint_name):
# Test 1
# Pass server_url to the client, no path
s = UnstructuredClient(server_url="http://localhost:8000")
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
"http://localhost:8000",
monkeypatch
)

try:
client_method(request={})
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"server_url was passed to client and ignored, got {e}")

# Test 2
# Pass server_url to the client, with path
s = UnstructuredClient(server_url="http://localhost:8000/my/endpoint")
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
"http://localhost:8000",
monkeypatch
)

try:
client_method(request={})
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"server_url was passed to client and was not stripped, got {e}")

# Test 3
# Pass server_url to the endpoint, no path
s = UnstructuredClient()
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
"http://localhost:8000",
monkeypatch
)

try:
client_method(request={}, server_url="http://localhost:8000")
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"server_url was passed to endpoint and ignored, got {e}")

# Test 4
# Pass server_url to the endpoint, with path
s = UnstructuredClient()
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
"http://localhost:8000",
monkeypatch
)

try:
client_method(request={}, server_url="http://localhost:8000/my/endpoint")
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"server_url was passed to endpoint and ignored, got {e}")


# Test 5
# No server_url, should take the default
server_url = "https://api.unstructuredapp.io" if "partition" in sdk_endpoint_name else "https://platform.unstructuredapp.io"

s = UnstructuredClient()
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
server_url,
monkeypatch
)

try:
client_method(request={})
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"Default url not used, got {e}")


@pytest.mark.asyncio
@pytest.mark.parametrize(
"sdk_endpoint_name",
[
("general.partition_async"),
],
)
async def test_async_endpoint_uses_correct_url(monkeypatch, sdk_endpoint_name):
# Test 1
# Pass server_url to the client, no path
s = UnstructuredClient(server_url="http://localhost:8000")
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
"http://localhost:8000",
monkeypatch
)

try:
await client_method(request={})
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"server_url was passed to client and ignored, got {e}")

# Test 2
# Pass server_url to the client, with path
s = UnstructuredClient(server_url="http://localhost:8000/my/endpoint")
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
"http://localhost:8000",
monkeypatch
)

try:
await client_method(request={})
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"server_url was passed to client and was not stripped, got {e}")

# Test 3
# Pass server_url to the endpoint, no path
s = UnstructuredClient()
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
"http://localhost:8000",
monkeypatch
)

try:
await client_method(request={}, server_url="http://localhost:8000")
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"server_url was passed to endpoint and ignored, got {e}")

# Test 4
# Pass server_url to the endpoint, with path
s = UnstructuredClient()
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
"http://localhost:8000",
monkeypatch
)

try:
await client_method(request={}, server_url="http://localhost:8000/my/endpoint")
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"server_url was passed to endpoint and ignored, got {e}")


# Test 5
# No server_url, should take the default
server_url = "https://api.unstructuredapp.io" if "partition" in sdk_endpoint_name else "https://platform.unstructuredapp.io"

s = UnstructuredClient()
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
server_url,
monkeypatch
)

try:
await client_method(request={})
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"Default url not used, got {e}")
6 changes: 3 additions & 3 deletions docs/models/errors/httpvalidationerror.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@

## Fields

| Field | Type | Required | Description |
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------------- |
| `detail` | List[[shared.ValidationError](../../models/shared/validationerror.md)] | :heavy_minus_sign: | N/A |
| Field | Type | Required | Description |
| -------------------------------------------------------- | -------------------------------------------------------- | -------------------------------------------------------- | -------------------------------------------------------- |
| `detail` | [Optional[errors.Detail]](../../models/errors/detail.md) | :heavy_minus_sign: | N/A |
55 changes: 39 additions & 16 deletions src/unstructured_client/_hooks/custom/clean_server_url_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,54 @@
from unstructured_client.httpclient import HttpClient


class CleanServerUrlSDKInitHook(SDKInitHook):
"""Hook fixing common mistakes by users in defining `server_url` in the unstructured-client"""
def clean_server_url(base_url: str) -> str:
"""Fix url scheme and remove the '/general/v0/general' path."""

if not base_url:
return ""

# add a url scheme if not present (urllib.parse does not work reliably without it)
if "http" not in base_url:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would parse the URL first and then check if a url has a scheme by using parsed_url.scheme. Then we can prepend http:// or https://

Copy link
Collaborator Author

@awalker4 awalker4 Feb 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That comment has been around for a while - I tried parsing without the scheme and it does look like we need to make sure http:// is there first or it gets confused.

In [2]: urlparse("localhost:8000/general/v0/general")
Out[2]: ParseResult(scheme='localhost', netloc='', path='8000/general/v0/general', params='', query='', fragment='')

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I see, good to know!

base_url = "http://" + base_url

parsed_url: ParseResult = urlparse(base_url)

if "api.unstructuredapp.io" in parsed_url.netloc:
if parsed_url.scheme != "https":
parsed_url = parsed_url._replace(scheme="https")

# We only want the base url
return urlunparse(parsed_url._replace(path="", params="", query="", fragment=""))

def clean_server_url(self, base_url: str) -> str:
"""Fix url scheme and remove the '/general/v0/general' path."""

if not base_url:
return ""
# -- add a url scheme if not present (urllib.parse does not work reliably without it)
if "http" not in base_url:
base_url = "http://" + base_url
def choose_server_url(endpoint_url: str | None, client_url: str, default_endpoint_url: str) -> str:
"""
Helper function to fix a breaking change in the SDK past 0.30.0.
When we merged the partition and platform specs, the client server_url stopped working,
and users need to pass it in the endpoint function.
For now, call this helper in the generated code to set the correct url.

parsed_url: ParseResult = urlparse(base_url)
Order of choices:
Endpoint server_url -> s.general.partition(server_url=...)
(Passed in as None if not set)

if "api.unstructuredapp.io" in base_url:
if parsed_url.scheme != "https":
parsed_url = parsed_url._replace(scheme="https")
Base client server_url -> s = UnstructuredClient(server_url=...)
(Passed as empty string if not set)

# -- path should always be empty
return urlunparse(parsed_url._replace(path=""))
Default endpoint URL as defined in the spec
"""

url = endpoint_url if endpoint_url is not None else (client_url or default_endpoint_url)
return clean_server_url(url)


class CleanServerUrlSDKInitHook(SDKInitHook):
"""Hook fixing common mistakes by users in defining `server_url` in the unstructured-client"""

def sdk_init(
self, base_url: str, client: HttpClient
) -> Tuple[str, HttpClient]:
"""Concrete implementation for SDKInitHook."""
cleaned_url = self.clean_server_url(base_url)
cleaned_url = clean_server_url(base_url)

return cleaned_url, client
Loading
Loading