|
12 | 12 | from unstructured_client import UnstructuredClient |
13 | 13 | from unstructured_client.models import shared, operations |
14 | 14 | from unstructured_client.models.errors import HTTPValidationError |
| 15 | +from unstructured_client.models.shared.partition_parameters import OutputFormat |
15 | 16 | from unstructured_client.utils.retries import BackoffStrategy, RetryConfig |
16 | 17 | from unstructured_client._hooks.custom import form_utils |
17 | 18 | from unstructured_client._hooks.custom import split_pdf_hook |
@@ -374,3 +375,37 @@ async def mock_send(_, request: httpx.Request, **kwargs): |
374 | 375 | assert mock_endpoint_called |
375 | 376 |
|
376 | 377 | assert res.status_code == 200 |
| 378 | + |
| 379 | + |
| 380 | +@pytest.mark.parametrize("split_pdf_page", [True, False]) |
| 381 | +def test_integration_split_csv_response(split_pdf_page, doc_path): |
| 382 | + try: |
| 383 | + response = requests.get("http://127.0.0.1:8000/general/docs") |
| 384 | + assert response.status_code == 200 |
| 385 | + except requests.exceptions.ConnectionError: |
| 386 | + assert False, "The unstructured-api is not running on 127.0.0.1:8000" |
| 387 | + |
| 388 | + client = UnstructuredClient(api_key_auth="", server_url="127.0.0.1:8000") |
| 389 | + filename = "layout-parser-paper.pdf" |
| 390 | + with open(doc_path / filename, "rb") as f: |
| 391 | + files = shared.Files( |
| 392 | + content=f.read(), |
| 393 | + file_name=filename, |
| 394 | + ) |
| 395 | + req = operations.PartitionRequest( |
| 396 | + partition_parameters=shared.PartitionParameters( |
| 397 | + files=files, |
| 398 | + output_format=OutputFormat.TEXT_CSV, |
| 399 | + split_pdf_page=split_pdf_page, |
| 400 | + ) |
| 401 | + ) |
| 402 | + |
| 403 | + resp = client.general.partition(request=req) |
| 404 | + |
| 405 | + assert resp.status_code == 200 |
| 406 | + assert resp.content_type == "text/csv; charset=utf-8" |
| 407 | + assert resp.elements is None |
| 408 | + assert resp.csv_elements is not None |
| 409 | + assert resp.csv_elements.startswith( |
| 410 | + "type,element_id,text,filetype,languages,page_number,filename,parent_id" |
| 411 | + ) |
0 commit comments