Skip to content

Commit 07bd5a1

Browse files
committed
init conftest.py and move fixtures, move csv test to test_split_pdf_hook
1 parent c42a7ca commit 07bd5a1

File tree

5 files changed

+84
-49
lines changed

5 files changed

+84
-49
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from __future__ import annotations
2+
3+
import os
4+
from pathlib import Path
5+
from typing import Generator
6+
import pytest
7+
8+
from unstructured_client.sdk import UnstructuredClient
9+
10+
11+
@pytest.fixture(scope="module")
12+
def client() -> Generator[UnstructuredClient, None, None]:
13+
_client = UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY"), server='free-api')
14+
yield _client
15+
16+
17+
@pytest.fixture(scope="module")
18+
def doc_path() -> Path:
19+
return Path(__file__).resolve().parents[1] / "_sample_docs"

_test_unstructured_client/integration/test_integration_freemium.py

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,16 @@
33
import asyncio
44
import json
55
import os
6-
from pathlib import Path
76

87
import pytest
98
from deepdiff import DeepDiff
109

1110
from unstructured_client import UnstructuredClient
1211
from unstructured_client.models import shared, operations
1312
from unstructured_client.models.errors import SDKError, ServerError, HTTPValidationError
14-
from unstructured_client.models.shared.partition_parameters import OutputFormat
1513
from unstructured_client.utils.retries import BackoffStrategy, RetryConfig
1614

1715

18-
@pytest.fixture(scope="module")
19-
def client() -> UnstructuredClient:
20-
_client = UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY"), server='free-api')
21-
yield _client
22-
23-
24-
@pytest.fixture(scope="module")
25-
def doc_path() -> Path:
26-
return Path(__file__).resolve().parents[2] / "_sample_docs"
27-
28-
2916
@pytest.mark.parametrize("split_pdf", [True, False])
3017
@pytest.mark.parametrize("strategy", ["fast", "ocr_only", "hi_res"])
3118
def test_partition_strategies(split_pdf, strategy, client, doc_path):
@@ -223,29 +210,3 @@ async def call_api():
223210
uvloop.install()
224211
elements = asyncio.run(call_api())
225212
assert len(elements) > 0
226-
227-
228-
def test_partition_csv_response(client, doc_path):
229-
filename = "layout-parser-paper-fast.pdf"
230-
with open(doc_path / filename, "rb") as f:
231-
files = shared.Files(
232-
content=f.read(),
233-
file_name=filename,
234-
)
235-
236-
req = operations.PartitionRequest(
237-
partition_parameters=shared.PartitionParameters(
238-
files=files,
239-
output_format=OutputFormat.TEXT_CSV,
240-
)
241-
)
242-
243-
response = client.general.partition(request=req)
244-
assert response.status_code == 200
245-
assert response.content_type == "text/csv; charset=utf-8"
246-
assert response.elements is None
247-
assert response.csv_elements is not None
248-
assert response.csv_elements.startswith(
249-
"type,element_id,text,filetype,languages,page_number,filename,parent_id"
250-
"\nTitle,6aa0ff22f91bbe7e26e8e25ca8052acd,Layout"
251-
)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from __future__ import annotations
2+
3+
from unstructured_client._hooks.custom.request_utils import create_response
4+
5+
6+
def test_create_response_for_json():
7+
elements = [
8+
{"type": "Title", "text": "Hello, World!"},
9+
{"type": "NarrativeText", "text": "Goodbye!"},
10+
]
11+
response = create_response(elements)
12+
assert response.status_code == 200
13+
assert response.json() == elements
14+
assert response.headers["Content-Type"] == "application/json"
15+
16+
def test_create_response_for_csv():
17+
elements = 'type,element_id,text,languages,page_number,filename,filetype,parent_id' \
18+
'\nTitle,f73329878fbbb0bb131a83e7b6daacbe,Module One - Introduction to Product' \
19+
' Development and Quality Assurance,[\'eng\'],1,list-item-example-1.pdf,application/pdf,'
20+
response = create_response(elements)
21+
assert response.status_code == 200
22+
assert response.json() == None
23+
assert response.headers["Content-Type"] == "text/csv"

_test_unstructured_client/unit/test_split_pdf_hook.py

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,12 @@
11
from __future__ import annotations
22

33
import asyncio
4-
import io
5-
import logging
64
from asyncio import Task
75
from collections import Counter
8-
from typing import Coroutine
96

10-
import httpx
117
import pytest
128
import requests
13-
from requests_toolbelt import MultipartDecoder, MultipartEncoder
9+
from requests_toolbelt import MultipartDecoder
1410

1511
from unstructured_client._hooks.custom import form_utils, pdf_utils, request_utils
1612
from unstructured_client._hooks.custom.form_utils import (
@@ -27,7 +23,9 @@
2723
SplitPdfHook,
2824
get_optimal_split_size, run_tasks,
2925
)
30-
from unstructured_client.models import shared
26+
from unstructured_client.models import operations, shared
27+
from unstructured_client.models.shared.partition_parameters import OutputFormat
28+
from unstructured_client.sdk import UnstructuredClient
3129

3230

3331
def test_unit_clear_operation():
@@ -479,3 +477,37 @@ async def test_remaining_tasks_cancelled_when_fails_disallowed():
479477
await asyncio.sleep(1)
480478
print("Cancelled amount: ", cancelled_counter["cancelled"])
481479
assert len(tasks) > cancelled_counter["cancelled"] > 0
480+
481+
482+
@pytest.mark.parametrize("split_pdf_page", [True, False])
483+
def test_integration_get_split_csv_response(split_pdf_page, doc_path):
484+
try:
485+
response = requests.get("http://127.0.0.1:8000/general/docs")
486+
assert response.status_code == 200
487+
except requests.exceptions.ConnectionError:
488+
assert False, "The unstructured-api is not running on localhost:8000"
489+
490+
client = UnstructuredClient(api_key_auth="", server_url="127.0.0.1:8000")
491+
filename = "layout-parser-paper.pdf"
492+
with open(doc_path / filename, "rb") as f:
493+
files = shared.Files(
494+
content=f.read(),
495+
file_name=filename,
496+
)
497+
req = operations.PartitionRequest(
498+
partition_parameters=shared.PartitionParameters(
499+
files=files,
500+
output_format=OutputFormat.TEXT_CSV,
501+
split_pdf_page=split_pdf_page,
502+
)
503+
)
504+
505+
resp = client.general.partition(request=req)
506+
507+
assert resp.status_code == 200
508+
assert resp.content_type == "text/csv; charset=utf-8"
509+
assert resp.elements is None
510+
assert resp.csv_elements is not None
511+
assert resp.csv_elements.startswith(
512+
"type,element_id,text,filetype,languages,page_number,filename,parent_id"
513+
)

src/unstructured_client/_hooks/custom/request_utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def prepare_request_payload(form_data: FormData) -> FormData:
139139
return payload
140140

141141

142-
def create_response(elements: list) -> httpx.Response:
142+
def create_response(elements: list[dict[str, Any] | bytes]) -> httpx.Response:
143143
"""
144144
Creates a modified response object with updated content.
145145
@@ -150,9 +150,9 @@ def create_response(elements: list) -> httpx.Response:
150150
Returns:
151151
The modified response object with updated content.
152152
"""
153-
if not isinstance(elements[0], dict):
154-
response = httpx.Response(status_code=200, headers={"Content-Type": "text/csv"})
155-
content = b''.join(elements)
153+
if isinstance(elements, list) and all(isinstance(element, bytes) for element in elements):
154+
response = httpx.Response(status_code=200, headers={"Content-Type": "text/csv; charset=utf-8"})
155+
content = b''.join(elements) # type: ignore
156156
else:
157157
response = httpx.Response(status_code=200, headers={"Content-Type": "application/json"})
158158
content = json.dumps(elements).encode()

0 commit comments

Comments
 (0)