Skip to content

Commit cf6be27

Browse files
committed
Merge branch 'main' into NEXUS-703/remove-nestio-loop
2 parents d6ab606 + 6d680a2 commit cf6be27

35 files changed

+315
-111
lines changed

.speakeasy/gen.lock

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
lockVersion: 2.0.0
22
id: 8b5fa338-9106-4734-abf0-e30d67044a90
33
management:
4-
docChecksum: 83f88cfa4bd77e1d3065b11a97119361
4+
docChecksum: 8befe9665bd102e8cc2241c80080859a
55
docVersion: 1.0.78
6-
speakeasyVersion: 1.490.0
7-
generationVersion: 2.512.0
8-
releaseVersion: 0.30.6
9-
configChecksum: c946e26d3a3b2fe2756ff169416b448c
6+
speakeasyVersion: 1.509.0
7+
generationVersion: 2.539.0
8+
releaseVersion: 0.31.0
9+
configChecksum: faf5191e78f1597b78cd7600af4f1d96
1010
repoURL: https://github.com/Unstructured-IO/unstructured-python-client.git
1111
repoSubDirectory: .
1212
installationURL: https://github.com/Unstructured-IO/unstructured-python-client.git
@@ -16,22 +16,23 @@ features:
1616
acceptHeaders: 3.0.0
1717
additionalDependencies: 1.0.0
1818
constsAndDefaults: 1.0.5
19-
core: 5.11.0
19+
core: 5.12.2
2020
defaultEnabledRetries: 0.2.0
2121
enumUnions: 0.1.0
2222
envVarSecurityUsage: 0.3.2
2323
examples: 3.0.1
2424
globalSecurity: 3.0.3
2525
globalSecurityCallbacks: 1.0.0
2626
globalSecurityFlattening: 1.0.0
27+
globalServerURLs: 3.1.0
2728
methodServerURLs: 3.1.1
2829
multipartFileContentType: 1.0.0
2930
nameOverrides: 3.0.1
30-
nullables: 1.0.0
31+
nullables: 1.0.1
3132
openEnums: 1.0.0
3233
responseFormat: 1.0.1
3334
retries: 3.0.2
34-
sdkHooks: 1.0.0
35+
sdkHooks: 1.0.1
3536
serverIDs: 3.0.0
3637
unions: 3.0.4
3738
uploadStreams: 1.0.0
@@ -124,6 +125,7 @@ generatedFiles:
124125
- docs/models/shared/googledrivesourceconnectorconfig.md
125126
- docs/models/shared/googledrivesourceconnectorconfiginput.md
126127
- docs/models/shared/jobinformation.md
128+
- docs/models/shared/jobstatus.md
127129
- docs/models/shared/kafkaclouddestinationconnectorconfig.md
128130
- docs/models/shared/kafkaclouddestinationconnectorconfiginput.md
129131
- docs/models/shared/kafkacloudsourceconnectorconfig.md
@@ -269,6 +271,7 @@ generatedFiles:
269271
- src/unstructured_client/models/shared/googledrivesourceconnectorconfig.py
270272
- src/unstructured_client/models/shared/googledrivesourceconnectorconfiginput.py
271273
- src/unstructured_client/models/shared/jobinformation.py
274+
- src/unstructured_client/models/shared/jobstatus.py
272275
- src/unstructured_client/models/shared/kafkaclouddestinationconnectorconfig.py
273276
- src/unstructured_client/models/shared/kafkaclouddestinationconnectorconfiginput.py
274277
- src/unstructured_client/models/shared/kafkacloudsourceconnectorconfig.py
@@ -412,16 +415,16 @@ examples:
412415
header: {}
413416
responses:
414417
"200":
415-
application/json: {"created_at": "2025-01-14T00:09:32.358Z", "id": "d18f330b-506f-455c-9cb8-0cee2386ac29", "runtime": "<value>", "status": "<value>", "workflow_id": "136fd799-4c45-4417-a632-f460d8ebb2a2", "workflow_name": "<value>"}
418+
application/json: {"created_at": "2025-01-14T00:09:32.358Z", "id": "d18f330b-506f-455c-9cb8-0cee2386ac29", "runtime": "<value>", "status": "IN_PROGRESS", "workflow_id": "136fd799-4c45-4417-a632-f460d8ebb2a2", "workflow_name": "<value>"}
416419
"422":
417-
application/json: {"detail": [{"loc": ["<value>", 701982], "msg": "<value>", "type": "<value>"}, {"loc": ["<value>"], "msg": "<value>", "type": "<value>"}]}
420+
application/json: {"detail": "<value>"}
418421
list_jobs:
419422
speakeasy-default-list-jobs:
420423
responses:
421424
"200":
422-
application/json: [{"created_at": "2023-09-01T17:59:19.211Z", "id": "7098ff76-419f-4a5a-a313-d159507b629d", "runtime": "<value>", "status": "<value>", "workflow_id": "75cf761e-8c10-48e4-b3d7-6ae90ad4fd3e", "workflow_name": "<value>"}]
425+
application/json: [{"created_at": "2023-09-01T17:59:19.211Z", "id": "7098ff76-419f-4a5a-a313-d159507b629d", "runtime": "<value>", "status": "SCHEDULED", "workflow_id": "75cf761e-8c10-48e4-b3d7-6ae90ad4fd3e", "workflow_name": "<value>"}]
423426
"422":
424-
application/json: {"detail": []}
427+
application/json: {"detail": [{"loc": [], "msg": "<value>", "type": "<value>"}]}
425428
create_source:
426429
speakeasy-default-create-source:
427430
requestBody:

.speakeasy/workflow.lock

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,29 @@
1-
speakeasyVersion: 1.505.0
1+
speakeasyVersion: 1.509.0
22
sources:
33
my-source:
44
sourceNamespace: my-source
5-
sourceRevisionDigest: sha256:8cba0cf748e247efccefbb5d1839dbeb6e2e3210bb2c90f7b75b9034c22ae24a
6-
sourceBlobDigest: sha256:69fef8ed681f1d68013e71f67927821d64fc1045d3956ace3775fe62ed76b8bb
5+
sourceRevisionDigest: sha256:b62d80c1b902f285b4eaeaf67a4b1d2edb21053eded12c247daecbfeea1d8a30
6+
sourceBlobDigest: sha256:8d5c6a31539e2e1047e529fb6561fd4172fa80506241bc25dcf51d7c76f8b6e6
77
tags:
88
- latest
9-
- speakeasy-sdk-regen-1740605410
10-
- 1.0.68
9+
- speakeasy-sdk-regen-1741046993
10+
- 1.0.78
1111
targets:
1212
unstructured-python:
1313
source: my-source
1414
sourceNamespace: my-source
15-
sourceRevisionDigest: sha256:8cba0cf748e247efccefbb5d1839dbeb6e2e3210bb2c90f7b75b9034c22ae24a
16-
sourceBlobDigest: sha256:69fef8ed681f1d68013e71f67927821d64fc1045d3956ace3775fe62ed76b8bb
15+
sourceRevisionDigest: sha256:b62d80c1b902f285b4eaeaf67a4b1d2edb21053eded12c247daecbfeea1d8a30
16+
sourceBlobDigest: sha256:8d5c6a31539e2e1047e529fb6561fd4172fa80506241bc25dcf51d7c76f8b6e6
1717
codeSamplesNamespace: my-source-code-samples
18-
codeSamplesRevisionDigest: sha256:d7a20fe1c8d687f377099cdb412c6d0dd733dd982ec4662052df0894da211da2
18+
codeSamplesRevisionDigest: sha256:77255b7d9241b11e147927beb6e60a27654bd6089e121356a3d5ca06e5f0f2f1
1919
workflow:
2020
workflowVersion: 1.0.0
2121
speakeasyVersion: latest
2222
sources:
2323
my-source:
2424
inputs:
2525
- location: https://platform.unstructuredapp.io/openapi.json
26-
- location: https://api.unstructured.io/general/openapi.json
26+
- location: https://api.unstructuredapp.io/general/openapi.json
2727
overlays:
2828
- location: ./overlay_client.yaml
2929
registry:

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ from unstructured_client import UnstructuredClient
113113
from unstructured_client.models import shared
114114
from unstructured_client.utils import BackoffStrategy, RetryConfig
115115

116+
116117
with UnstructuredClient() as uc_client:
117118

118119
res = uc_client.destinations.create_destination(request={
@@ -141,6 +142,7 @@ from unstructured_client import UnstructuredClient
141142
from unstructured_client.models import shared
142143
from unstructured_client.utils import BackoffStrategy, RetryConfig
143144

145+
144146
with UnstructuredClient(
145147
retry_config=RetryConfig("backoff", BackoffStrategy(1, 50, 1.1, 100), False),
146148
) as uc_client:
@@ -193,6 +195,7 @@ When custom error responses are specified for an operation, the SDK may also rai
193195
from unstructured_client import UnstructuredClient
194196
from unstructured_client.models import errors, shared
195197

198+
196199
with UnstructuredClient() as uc_client:
197200
res = None
198201
try:
@@ -325,6 +328,7 @@ Generally, the SDK will work well with most IDEs out of the box. However, when u
325328
from unstructured_client import UnstructuredClient
326329
from unstructured_client.models import shared
327330

331+
328332
with UnstructuredClient() as uc_client:
329333

330334
res = uc_client.destinations.create_destination(request={
@@ -355,6 +359,7 @@ from unstructured_client import UnstructuredClient
355359
from unstructured_client.models import shared
356360

357361
async def main():
362+
358363
async with UnstructuredClient() as uc_client:
359364

360365
res = await uc_client.destinations.create_destination_async(request={
@@ -451,6 +456,7 @@ Certain SDK methods accept file objects as part of a request body or multi-part
451456
from unstructured_client import UnstructuredClient
452457
from unstructured_client.models import shared
453458

459+
454460
with UnstructuredClient() as uc_client:
455461

456462
res = uc_client.general.partition(request={
@@ -486,12 +492,14 @@ The `UnstructuredClient` class implements the context manager protocol and regis
486492
```python
487493
from unstructured_client import UnstructuredClient
488494
def main():
495+
489496
with UnstructuredClient() as uc_client:
490497
# Rest of application here...
491498

492499

493500
# Or when using async:
494501
async def amain():
502+
495503
async with UnstructuredClient() as uc_client:
496504
# Rest of application here...
497505
```

RELEASES.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -835,3 +835,14 @@ Based on:
835835
- [python v0.30.6] .
836836
### Releases
837837
- [PyPI v0.30.6] https://pypi.org/project/unstructured-client/0.30.6 - .
838+
839+
840+
## 2025-03-04 00:09:38
841+
### Changes
842+
Based on:
843+
- OpenAPI Doc
844+
- Speakeasy CLI 1.509.0 (2.539.0) https://github.com/speakeasy-api/speakeasy
845+
### Generated
846+
- [python v0.31.0] .
847+
### Releases
848+
- [PyPI v0.31.0] https://pypi.org/project/unstructured-client/0.31.0 - .

USAGE.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from unstructured_client import UnstructuredClient
55
from unstructured_client.models import shared
66

7+
78
with UnstructuredClient() as uc_client:
89

910
res = uc_client.destinations.create_destination(request={
@@ -34,6 +35,7 @@ from unstructured_client import UnstructuredClient
3435
from unstructured_client.models import shared
3536

3637
async def main():
38+
3739
async with UnstructuredClient() as uc_client:
3840

3941
res = await uc_client.destinations.create_destination_async(request={
553 KB
Loading

_sample_docs/fake-power-point.ppt

594 KB
Binary file not shown.

_test_unstructured_client/integration/test_integration_freemium.py renamed to _test_unstructured_client/integration/test_integration.py

Lines changed: 121 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,6 @@
1313
from unstructured_client.utils.retries import BackoffStrategy, RetryConfig
1414

1515

16-
FREEMIUM_URL = "https://api.unstructured.io"
17-
18-
1916
@pytest.fixture(scope="module")
2017
def client() -> UnstructuredClient:
2118
_client = UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY"))
@@ -47,7 +44,6 @@ def test_partition_strategies(split_pdf, strategy, client, doc_path):
4744
)
4845

4946
response = client.general.partition(
50-
server_url=FREEMIUM_URL,
5147
request=req
5248
)
5349
assert response.status_code == 200
@@ -109,7 +105,6 @@ def test_partition_handling_server_error(error, split_pdf, monkeypatch, doc_path
109105

110106
with pytest.raises(sdk_raises):
111107
response = client.general.partition(
112-
server_url=FREEMIUM_URL,
113108
request=req
114109
)
115110

@@ -220,7 +215,6 @@ async def call_api():
220215
)
221216

222217
resp = client.general.partition(
223-
server_url=FREEMIUM_URL,
224218
request=req
225219
)
226220

@@ -233,3 +227,124 @@ async def call_api():
233227
uvloop.install()
234228
elements = asyncio.run(call_api())
235229
assert len(elements) > 0
230+
231+
232+
@pytest.mark.parametrize("split_pdf", [True, False])
233+
@pytest.mark.parametrize("vlm_model", ["gpt-4o"])
234+
@pytest.mark.parametrize("vlm_model_provider", ["openai"])
235+
@pytest.mark.parametrize(
236+
"filename",
237+
[
238+
"layout-parser-paper-fast.pdf",
239+
"fake-power-point.ppt",
240+
"embedded-images-tables.jpg",
241+
]
242+
)
243+
def test_partition_strategy_vlm_openai(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
244+
with open(doc_path / filename, "rb") as f:
245+
files = shared.Files(
246+
content=f.read(),
247+
file_name=filename,
248+
)
249+
250+
req = operations.PartitionRequest(
251+
partition_parameters=shared.PartitionParameters(
252+
files=files,
253+
strategy="vlm",
254+
vlm_model=vlm_model,
255+
vlm_model_provider=vlm_model_provider,
256+
languages=["eng"],
257+
split_pdf_page=split_pdf,
258+
)
259+
)
260+
261+
response = client.general.partition(
262+
request=req
263+
)
264+
assert response.status_code == 200
265+
assert len(response.elements) > 0
266+
assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
267+
268+
269+
@pytest.mark.parametrize("split_pdf", [True, False])
270+
@pytest.mark.parametrize("vlm_model",
271+
[
272+
"us.amazon.nova-pro-v1:0",
273+
"us.amazon.nova-lite-v1:0",
274+
"us.anthropic.claude-3-5-sonnet-20241022-v2:0",
275+
"us.anthropic.claude-3-opus-20240229-v1:0",
276+
"us.anthropic.claude-3-haiku-20240307-v1:0",
277+
"us.anthropic.claude-3-sonnet-20240229-v1:0",
278+
"us.meta.llama3-2-90b-instruct-v1:0",
279+
"us.meta.llama3-2-11b-instruct-v1:0",
280+
]
281+
)
282+
@pytest.mark.parametrize("vlm_model_provider", ["bedrock"])
283+
@pytest.mark.parametrize(
284+
"filename",
285+
[
286+
"layout-parser-paper-fast.pdf",
287+
"fake-power-point.ppt",
288+
"embedded-images-tables.jpg",
289+
]
290+
)
291+
def test_partition_strategy_vlm_bedrock(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
292+
with open(doc_path / filename, "rb") as f:
293+
files = shared.Files(
294+
content=f.read(),
295+
file_name=filename,
296+
)
297+
298+
req = operations.PartitionRequest(
299+
partition_parameters=shared.PartitionParameters(
300+
files=files,
301+
strategy="vlm",
302+
vlm_model=vlm_model,
303+
vlm_model_provider=vlm_model_provider,
304+
languages=["eng"],
305+
split_pdf_page=split_pdf,
306+
)
307+
)
308+
309+
response = client.general.partition(
310+
request=req
311+
)
312+
assert response.status_code == 200
313+
assert len(response.elements) > 0
314+
assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
315+
316+
@pytest.mark.parametrize("split_pdf", [True, False])
317+
@pytest.mark.parametrize("vlm_model", ["claude-3-5-sonnet-20241022",])
318+
@pytest.mark.parametrize("vlm_model_provider", ["anthropic"])
319+
@pytest.mark.parametrize(
320+
"filename",
321+
[
322+
"layout-parser-paper-fast.pdf",
323+
"fake-power-point.ppt",
324+
"embedded-images-tables.jpg",
325+
]
326+
)
327+
def test_partition_strategy_vlm_anthropic(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
328+
with open(doc_path / filename, "rb") as f:
329+
files = shared.Files(
330+
content=f.read(),
331+
file_name=filename,
332+
)
333+
334+
req = operations.PartitionRequest(
335+
partition_parameters=shared.PartitionParameters(
336+
files=files,
337+
strategy="vlm",
338+
vlm_model=vlm_model,
339+
vlm_model_provider=vlm_model_provider,
340+
languages=["eng"],
341+
split_pdf_page=split_pdf,
342+
)
343+
)
344+
345+
response = client.general.partition(
346+
request=req
347+
)
348+
assert response.status_code == 200
349+
assert len(response.elements) > 0
350+
assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"

_test_unstructured_client/unit/test_custom_hooks.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -211,12 +211,6 @@ def test_unit_clean_server_url_fixes_malformed_localhost_url(server_url: str):
211211
assert client.general.sdk_configuration.server_url == "http://localhost:8000"
212212

213213

214-
215-
def test_unit_clean_server_url_returns_None_given_no_server_url():
216-
client = UnstructuredClient(api_key_auth=FAKE_KEY)
217-
assert client.general.sdk_configuration.server_url == None
218-
219-
220214
@pytest.mark.parametrize(
221215
"server_url",
222216
[

0 commit comments

Comments
 (0)