Skip to content

Commit 94e71aa

Browse files
authored
Merge pull request #32 from Unstructured-IO/jj/decorators
add decorator to change server_url and init jupyter notebook
2 parents 82b5cb4 + d3f618a commit 94e71aa

File tree

9 files changed

+257
-4
lines changed

9 files changed

+257
-4
lines changed

.genignore

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
# https://www.speakeasyapi.dev/docs/customize-sdks/monkey-patching
22

3-
# ignore human-written test files
4-
tests/test_utils_retries.py
3+
# ignore human-written files and directories
4+
src/unstructured_client/_unstructured
5+
_jupyter
6+
_sample_docs
7+
_test_unstructured_client
58

69
# ignore Makefile
710
Makefile

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,6 @@ __pycache__/
44
.pytest_cache/
55
.python-version
66
.DS_Store
7+
8+
# human-added igore files
9+
.ipynb_checkpoints/

Makefile

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,31 @@ ARCH := $(shell uname -m)
66
# Install #
77
###########
88

9-
test-install:
9+
.PHONY: install-test
10+
pip install pytest
1011
pip install requests_mock
1112

13+
.PHONY: install-dev
14+
pip install jupyter
15+
16+
## install: installs all test, dev, and experimental requirements
17+
.PHONY: install
18+
install: install-test install-dev
19+
1220
#################
1321
# Test and Lint #
1422
#################
1523

1624
.PHONY: test
1725
test:
1826
PYTHONPATH=. pytest \
19-
tests
27+
_test_unstructured_client
28+
29+
###########
30+
# Jupyter #
31+
###########
32+
33+
## run-jupyter: starts jupyter notebook
34+
.PHONY: run-jupyter
35+
run-jupyter:
36+
PYTHONPATH=$(realpath .) JUPYTER_PATH=$(realpath .) jupyter-notebook --NotebookApp.token='' --NotebookApp.password=''

_jupyter/README_example.ipynb

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "cd4f8056-2015-4c28-8974-d9862db07e84",
6+
"metadata": {},
7+
"source": [
8+
"Setup"
9+
]
10+
},
11+
{
12+
"cell_type": "code",
13+
"execution_count": null,
14+
"id": "8b8e5368-6268-4da9-9e8d-5c38637da8a5",
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"import os\n",
19+
"\n",
20+
"def get_api_key():\n",
21+
" api_key = os.getenv(\"UNS_API_KEY\")\n",
22+
" if api_key is None:\n",
23+
" raise ValueError(\"\"\"UNS_API_KEY environment variable not set. \n",
24+
"Set it in your current shell session with `export UNS_API_KEY=<api_key>`\"\"\")\n",
25+
" return api_key"
26+
]
27+
},
28+
{
29+
"cell_type": "markdown",
30+
"id": "11822c83-0791-432c-b1fb-05d8e2ae25bb",
31+
"metadata": {},
32+
"source": [
33+
"\"Usage\" instructions from README for `unstructured-python-client` (as of 01/29/2023)"
34+
]
35+
},
36+
{
37+
"cell_type": "code",
38+
"execution_count": null,
39+
"id": "5c28a39c-ad38-47a5-8247-a2fa1488313c",
40+
"metadata": {},
41+
"outputs": [],
42+
"source": [
43+
"from unstructured_client import UnstructuredClient\n",
44+
"from unstructured_client.models import shared\n",
45+
"from unstructured_client.models.errors import SDKError\n",
46+
"\n",
47+
"s = UnstructuredClient(api_key_auth=get_api_key())\n",
48+
"filename = \"../_sample_docs/layout-parser-paper-fast.pdf\"\n",
49+
"\n",
50+
"with open(filename, \"rb\") as f:\n",
51+
" # Note that this currently only supports a single filea\n",
52+
" files=shared.Files(\n",
53+
" content=f.read(),\n",
54+
" file_name=filename,\n",
55+
"\t)\n",
56+
"\n",
57+
"req = shared.PartitionParameters(\n",
58+
" files=files,\n",
59+
" # Other partition params\n",
60+
" strategy='ocr_only',\n",
61+
" languages=[\"eng\"],\n",
62+
")\n",
63+
"\n",
64+
"try:\n",
65+
" resp = s.general.partition(req)\n",
66+
" print(resp.elements[0])\n",
67+
"except SDKError as e:\n",
68+
" print(e)\n",
69+
"\n",
70+
"# {\n",
71+
"# 'type': 'UncategorizedText', \n",
72+
"# 'element_id': 'fc550084fda1e008e07a0356894f5816', \n",
73+
"# 'metadata': {\n",
74+
"# 'filename': 'layout-parser-paper-fast.pdf', \n",
75+
"# 'filetype': 'application/pdf', \n",
76+
"# 'languages': ['eng'], \n",
77+
"# 'page_number': 1\n",
78+
"# }\n",
79+
"# }"
80+
]
81+
},
82+
{
83+
"cell_type": "code",
84+
"execution_count": null,
85+
"id": "f5dfdb68-ba5d-4d21-98b2-4efe04126b7a",
86+
"metadata": {},
87+
"outputs": [],
88+
"source": []
89+
}
90+
],
91+
"metadata": {
92+
"kernelspec": {
93+
"display_name": "Python 3 (ipykernel)",
94+
"language": "python",
95+
"name": "python3"
96+
},
97+
"language_info": {
98+
"codemirror_mode": {
99+
"name": "ipython",
100+
"version": 3
101+
},
102+
"file_extension": ".py",
103+
"mimetype": "text/x-python",
104+
"name": "python",
105+
"nbconvert_exporter": "python",
106+
"pygments_lexer": "ipython3",
107+
"version": "3.10.13"
108+
}
109+
},
110+
"nbformat": 4,
111+
"nbformat_minor": 5
112+
}
168 KB
Binary file not shown.
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import os
2+
import pytest
3+
4+
from unstructured_client import UnstructuredClient
5+
6+
7+
def get_api_key():
8+
api_key = os.getenv("UNS_API_KEY")
9+
if api_key is None:
10+
raise ValueError("""UNS_API_KEY environment variable not set.
11+
Set it in your current shell session with `export UNS_API_KEY=<api_key>`""")
12+
return api_key
13+
14+
15+
@pytest.mark.parametrize(
16+
("server_url"),
17+
[
18+
("https://unstructured-000mock.api.unstructuredapp.io"), # correct url
19+
("unstructured-000mock.api.unstructuredapp.io"),
20+
("http://unstructured-000mock.api.unstructuredapp.io/general/v0/general"),
21+
("https://unstructured-000mock.api.unstructuredapp.io/general/v0/general"),
22+
("unstructured-000mock.api.unstructuredapp.io/general/v0/general"),
23+
]
24+
)
25+
def test_clean_server_url_on_paid_api_url(server_url: str):
26+
client = UnstructuredClient(
27+
server_url=server_url,
28+
api_key_auth=get_api_key(),
29+
)
30+
assert client.general.sdk_configuration.server_url == "https://unstructured-000mock.api.unstructuredapp.io"
31+
32+
33+
@pytest.mark.parametrize(
34+
("server_url"),
35+
[
36+
("http://localhost:8000"), # correct url
37+
("localhost:8000"),
38+
("localhost:8000/general/v0/general"),
39+
("http://localhost:8000/general/v0/general"),
40+
]
41+
)
42+
def test_clean_server_url_on_localhost(server_url: str):
43+
client = UnstructuredClient(
44+
server_url=server_url,
45+
api_key_auth=get_api_key(),
46+
)
47+
assert client.general.sdk_configuration.server_url == "http://localhost:8000"
48+
49+
50+
def test_clean_server_url_on_empty_string():
51+
client = UnstructuredClient(
52+
server_url="",
53+
api_key_auth=get_api_key(),
54+
)
55+
assert client.general.sdk_configuration.server_url == ""
56+
57+
@pytest.mark.parametrize(
58+
("server_url"),
59+
[
60+
("https://unstructured-000mock.api.unstructuredapp.io"),
61+
("unstructured-000mock.api.unstructuredapp.io/general/v0/general"),
62+
]
63+
)
64+
def test_clean_server_url_with_positional_arguments(server_url: str):
65+
client = UnstructuredClient(
66+
get_api_key(),
67+
"",
68+
server_url,
69+
)
70+
assert client.general.sdk_configuration.server_url == "https://unstructured-000mock.api.unstructuredapp.io"
File renamed without changes.

src/unstructured_client/sdk.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@
66
from typing import Callable, Dict, Union
77
from unstructured_client import utils
88
from unstructured_client.models import shared
9+
from unstructured_client.utils._decorators import clean_server_url
910

1011
class UnstructuredClient:
1112
r"""Unstructured Pipeline API: Partition documents with the Unstructured library"""
1213
general: General
1314

1415
sdk_configuration: SDKConfiguration
1516

17+
@clean_server_url
1618
def __init__(self,
1719
api_key_auth: Union[str, Callable[[], str]],
1820
server: str = None,
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from __future__ import annotations
2+
3+
import functools
4+
from typing import cast, Callable, Optional
5+
from typing_extensions import ParamSpec
6+
from urllib.parse import urlparse, urlunparse, ParseResult
7+
8+
9+
_P = ParamSpec("_P")
10+
11+
12+
def clean_server_url(func: Callable[_P, None]) -> Callable[_P, None]:
13+
14+
@functools.wraps(func)
15+
def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> None:
16+
SERVER_URL_ARG_IDX = 3
17+
url_is_in_kwargs = True
18+
19+
server_url: Optional[str] = cast(Optional[str], kwargs.get("server_url"))
20+
21+
if server_url is None and len(args) > SERVER_URL_ARG_IDX:
22+
server_url = cast(str, args[SERVER_URL_ARG_IDX])
23+
url_is_in_kwargs = False
24+
25+
if server_url:
26+
# -- add a url scheme if not present (urllib.parse does not work reliably without it)
27+
if "http" not in server_url:
28+
server_url = "http://" + server_url
29+
30+
parsed_url: ParseResult = urlparse(server_url)
31+
32+
if "api.unstructuredapp.io" in server_url:
33+
if parsed_url.scheme != "https":
34+
parsed_url = parsed_url._replace(scheme="https")
35+
36+
# -- path should always be empty
37+
cleaned_url = parsed_url._replace(path="")
38+
39+
if url_is_in_kwargs:
40+
kwargs["server_url"] = urlunparse(cleaned_url)
41+
else:
42+
args = args[:SERVER_URL_ARG_IDX] + (urlunparse(cleaned_url),) + args[SERVER_URL_ARG_IDX+1:] # type: ignore
43+
44+
return func(*args, **kwargs)
45+
46+
return wrapper

0 commit comments

Comments
 (0)