Skip to content

Commit c5dcabf

Browse files
committed
Run the one test to debug
1 parent e35b1c7 commit c5dcabf

File tree

3 files changed

+164
-164
lines changed

3 files changed

+164
-164
lines changed

.github/workflows/ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
test_integration:
4848
strategy:
4949
matrix:
50-
python-version: [ "3.9","3.10","3.11", "3.12" ]
50+
python-version: [ "3.12" ]
5151
runs-on: ubuntu-latest
5252
steps:
5353
- uses: actions/checkout@v4

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ test-integration-docker:
4545
-docker stop unstructured-api && docker kill unstructured-api
4646
docker run --name unstructured-api -p 8000:8000 -d --rm ${DOCKER_IMAGE} --host 0.0.0.0 && \
4747
curl -s -o /dev/null --retry 10 --retry-delay 5 --retry-all-errors http://localhost:8000/general/docs && \
48-
PYTHONPATH=. poetry run pytest -n auto _test_unstructured_client -v -k "integration" && \
48+
PYTHONASYNCIODEBUG=1 PYTHONPATH=. poetry run pytest -vvv -n auto _test_unstructured_client/integration/test_integration.py && \
4949
docker kill unstructured-api
5050

5151
.PHONY: lint

_test_unstructured_client/integration/test_integration.py

Lines changed: 162 additions & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -24,30 +24,30 @@ def doc_path() -> Path:
2424
return Path(__file__).resolve().parents[2] / "_sample_docs"
2525

2626

27-
@pytest.mark.parametrize("split_pdf", [True, False])
28-
@pytest.mark.parametrize("strategy", ["fast", "ocr_only", "hi_res"])
29-
def test_partition_strategies(split_pdf, strategy, client, doc_path):
30-
filename = "layout-parser-paper-fast.pdf"
31-
with open(doc_path / filename, "rb") as f:
32-
files = shared.Files(
33-
content=f.read(),
34-
file_name=filename,
35-
)
36-
37-
req = operations.PartitionRequest(
38-
partition_parameters=shared.PartitionParameters(
39-
files=files,
40-
strategy=strategy,
41-
languages=["eng"],
42-
split_pdf_page=split_pdf,
43-
)
44-
)
45-
46-
response = client.general.partition(
47-
request=req
48-
)
49-
assert response.status_code == 200
50-
assert len(response.elements)
27+
# @pytest.mark.parametrize("split_pdf", [True, False])
28+
# @pytest.mark.parametrize("strategy", ["fast", "ocr_only", "hi_res"])
29+
# def test_partition_strategies(split_pdf, strategy, client, doc_path):
30+
# filename = "layout-parser-paper-fast.pdf"
31+
# with open(doc_path / filename, "rb") as f:
32+
# files = shared.Files(
33+
# content=f.read(),
34+
# file_name=filename,
35+
# )
36+
37+
# req = operations.PartitionRequest(
38+
# partition_parameters=shared.PartitionParameters(
39+
# files=files,
40+
# strategy=strategy,
41+
# languages=["eng"],
42+
# split_pdf_page=split_pdf,
43+
# )
44+
# )
45+
46+
# response = client.general.partition(
47+
# request=req
48+
# )
49+
# assert response.status_code == 200
50+
# assert len(response.elements)
5151

5252

5353
@pytest.mark.parametrize("split_pdf", [True, False])
@@ -100,27 +100,27 @@ def test_partition_handling_server_error(error, split_pdf, monkeypatch, doc_path
100100
)
101101

102102

103-
@pytest.mark.asyncio
104-
async def test_partition_async_returns_elements(client, doc_path):
105-
filename = "layout-parser-paper.pdf"
106-
with open(doc_path / filename, "rb") as f:
107-
files = shared.Files(
108-
content=f.read(),
109-
file_name=filename,
110-
)
103+
# @pytest.mark.asyncio
104+
# async def test_partition_async_returns_elements(client, doc_path):
105+
# filename = "layout-parser-paper.pdf"
106+
# with open(doc_path / filename, "rb") as f:
107+
# files = shared.Files(
108+
# content=f.read(),
109+
# file_name=filename,
110+
# )
111111

112-
req = operations.PartitionRequest(
113-
partition_parameters=shared.PartitionParameters(
114-
files=files,
115-
strategy="fast",
116-
languages=["eng"],
117-
split_pdf_page=True,
118-
)
119-
)
112+
# req = operations.PartitionRequest(
113+
# partition_parameters=shared.PartitionParameters(
114+
# files=files,
115+
# strategy="fast",
116+
# languages=["eng"],
117+
# split_pdf_page=True,
118+
# )
119+
# )
120120

121-
response = await client.general.partition_async(request=req)
122-
assert response.status_code == 200
123-
assert len(response.elements)
121+
# response = await client.general.partition_async(request=req)
122+
# assert response.status_code == 200
123+
# assert len(response.elements)
124124

125125

126126
@pytest.mark.asyncio
@@ -220,125 +220,125 @@ async def call_api():
220220
assert len(elements) > 0
221221

222222

223-
@pytest.mark.parametrize("split_pdf", [True, False])
224-
@pytest.mark.parametrize("vlm_model", ["gpt-4o"])
225-
@pytest.mark.parametrize("vlm_model_provider", ["openai"])
226-
@pytest.mark.parametrize(
227-
"filename",
228-
[
229-
"layout-parser-paper-fast.pdf",
230-
"fake-power-point.ppt",
231-
"embedded-images-tables.jpg",
232-
]
233-
)
234-
def test_partition_strategy_vlm_openai(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
235-
with open(doc_path / filename, "rb") as f:
236-
files = shared.Files(
237-
content=f.read(),
238-
file_name=filename,
239-
)
240-
241-
req = operations.PartitionRequest(
242-
partition_parameters=shared.PartitionParameters(
243-
files=files,
244-
strategy="vlm",
245-
vlm_model=vlm_model,
246-
vlm_model_provider=vlm_model_provider,
247-
languages=["eng"],
248-
split_pdf_page=split_pdf,
249-
)
250-
)
251-
252-
response = client.general.partition(
253-
request=req
254-
)
255-
assert response.status_code == 200
256-
assert len(response.elements) > 0
257-
assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
258-
259-
260-
@pytest.mark.parametrize("split_pdf", [True, False])
261-
@pytest.mark.parametrize("vlm_model",
262-
[
263-
"us.amazon.nova-pro-v1:0",
264-
"us.amazon.nova-lite-v1:0",
265-
"us.anthropic.claude-3-5-sonnet-20241022-v2:0",
266-
"us.anthropic.claude-3-opus-20240229-v1:0",
267-
"us.anthropic.claude-3-haiku-20240307-v1:0",
268-
"us.anthropic.claude-3-sonnet-20240229-v1:0",
269-
"us.meta.llama3-2-90b-instruct-v1:0",
270-
"us.meta.llama3-2-11b-instruct-v1:0",
271-
]
272-
)
273-
@pytest.mark.parametrize("vlm_model_provider", ["bedrock"])
274-
@pytest.mark.parametrize(
275-
"filename",
276-
[
277-
"layout-parser-paper-fast.pdf",
278-
"fake-power-point.ppt",
279-
"embedded-images-tables.jpg",
280-
]
281-
)
282-
def test_partition_strategy_vlm_bedrock(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
283-
with open(doc_path / filename, "rb") as f:
284-
files = shared.Files(
285-
content=f.read(),
286-
file_name=filename,
287-
)
288-
289-
req = operations.PartitionRequest(
290-
partition_parameters=shared.PartitionParameters(
291-
files=files,
292-
strategy="vlm",
293-
vlm_model=vlm_model,
294-
vlm_model_provider=vlm_model_provider,
295-
languages=["eng"],
296-
split_pdf_page=split_pdf,
297-
)
298-
)
299-
300-
response = client.general.partition(
301-
request=req
302-
)
303-
assert response.status_code == 200
304-
assert len(response.elements) > 0
305-
assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
306-
307-
@pytest.mark.parametrize("split_pdf", [True, False])
308-
@pytest.mark.parametrize("vlm_model", ["claude-3-5-sonnet-20241022",])
309-
@pytest.mark.parametrize("vlm_model_provider", ["anthropic"])
310-
@pytest.mark.parametrize(
311-
"filename",
312-
[
313-
"layout-parser-paper-fast.pdf",
314-
"fake-power-point.ppt",
315-
"embedded-images-tables.jpg",
316-
]
317-
)
318-
def test_partition_strategy_vlm_anthropic(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
319-
with open(doc_path / filename, "rb") as f:
320-
files = shared.Files(
321-
content=f.read(),
322-
file_name=filename,
323-
)
324-
325-
req = operations.PartitionRequest(
326-
partition_parameters=shared.PartitionParameters(
327-
files=files,
328-
strategy="vlm",
329-
vlm_model=vlm_model,
330-
vlm_model_provider=vlm_model_provider,
331-
languages=["eng"],
332-
split_pdf_page=split_pdf,
333-
)
334-
)
335-
336-
response = client.general.partition(
337-
request=req
338-
)
339-
assert response.status_code == 200
340-
assert len(response.elements) > 0
341-
assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
223+
# @pytest.mark.parametrize("split_pdf", [True, False])
224+
# @pytest.mark.parametrize("vlm_model", ["gpt-4o"])
225+
# @pytest.mark.parametrize("vlm_model_provider", ["openai"])
226+
# @pytest.mark.parametrize(
227+
# "filename",
228+
# [
229+
# "layout-parser-paper-fast.pdf",
230+
# "fake-power-point.ppt",
231+
# "embedded-images-tables.jpg",
232+
# ]
233+
# )
234+
# def test_partition_strategy_vlm_openai(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
235+
# with open(doc_path / filename, "rb") as f:
236+
# files = shared.Files(
237+
# content=f.read(),
238+
# file_name=filename,
239+
# )
240+
241+
# req = operations.PartitionRequest(
242+
# partition_parameters=shared.PartitionParameters(
243+
# files=files,
244+
# strategy="vlm",
245+
# vlm_model=vlm_model,
246+
# vlm_model_provider=vlm_model_provider,
247+
# languages=["eng"],
248+
# split_pdf_page=split_pdf,
249+
# )
250+
# )
251+
252+
# response = client.general.partition(
253+
# request=req
254+
# )
255+
# assert response.status_code == 200
256+
# assert len(response.elements) > 0
257+
# assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
258+
259+
260+
# @pytest.mark.parametrize("split_pdf", [True, False])
261+
# @pytest.mark.parametrize("vlm_model",
262+
# [
263+
# "us.amazon.nova-pro-v1:0",
264+
# "us.amazon.nova-lite-v1:0",
265+
# "us.anthropic.claude-3-5-sonnet-20241022-v2:0",
266+
# "us.anthropic.claude-3-opus-20240229-v1:0",
267+
# "us.anthropic.claude-3-haiku-20240307-v1:0",
268+
# "us.anthropic.claude-3-sonnet-20240229-v1:0",
269+
# "us.meta.llama3-2-90b-instruct-v1:0",
270+
# "us.meta.llama3-2-11b-instruct-v1:0",
271+
# ]
272+
# )
273+
# @pytest.mark.parametrize("vlm_model_provider", ["bedrock"])
274+
# @pytest.mark.parametrize(
275+
# "filename",
276+
# [
277+
# "layout-parser-paper-fast.pdf",
278+
# "fake-power-point.ppt",
279+
# "embedded-images-tables.jpg",
280+
# ]
281+
# )
282+
# def test_partition_strategy_vlm_bedrock(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
283+
# with open(doc_path / filename, "rb") as f:
284+
# files = shared.Files(
285+
# content=f.read(),
286+
# file_name=filename,
287+
# )
288+
289+
# req = operations.PartitionRequest(
290+
# partition_parameters=shared.PartitionParameters(
291+
# files=files,
292+
# strategy="vlm",
293+
# vlm_model=vlm_model,
294+
# vlm_model_provider=vlm_model_provider,
295+
# languages=["eng"],
296+
# split_pdf_page=split_pdf,
297+
# )
298+
# )
299+
300+
# response = client.general.partition(
301+
# request=req
302+
# )
303+
# assert response.status_code == 200
304+
# assert len(response.elements) > 0
305+
# assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
306+
307+
# @pytest.mark.parametrize("split_pdf", [True, False])
308+
# @pytest.mark.parametrize("vlm_model", ["claude-3-5-sonnet-20241022",])
309+
# @pytest.mark.parametrize("vlm_model_provider", ["anthropic"])
310+
# @pytest.mark.parametrize(
311+
# "filename",
312+
# [
313+
# "layout-parser-paper-fast.pdf",
314+
# "fake-power-point.ppt",
315+
# "embedded-images-tables.jpg",
316+
# ]
317+
# )
318+
# def test_partition_strategy_vlm_anthropic(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
319+
# with open(doc_path / filename, "rb") as f:
320+
# files = shared.Files(
321+
# content=f.read(),
322+
# file_name=filename,
323+
# )
324+
325+
# req = operations.PartitionRequest(
326+
# partition_parameters=shared.PartitionParameters(
327+
# files=files,
328+
# strategy="vlm",
329+
# vlm_model=vlm_model,
330+
# vlm_model_provider=vlm_model_provider,
331+
# languages=["eng"],
332+
# split_pdf_page=split_pdf,
333+
# )
334+
# )
335+
336+
# response = client.general.partition(
337+
# request=req
338+
# )
339+
# assert response.status_code == 200
340+
# assert len(response.elements) > 0
341+
# assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
342342

343343

344344
def test_returns_422_for_invalid_pdf(

0 commit comments

Comments
 (0)