diff --git a/_test_unstructured_client/integration/test_integration.py b/_test_unstructured_client/integration/test_integration.py index 975b6356..11aef618 100644 --- a/_test_unstructured_client/integration/test_integration.py +++ b/_test_unstructured_client/integration/test_integration.py @@ -221,127 +221,6 @@ async def call_api(): assert len(elements) > 0 -@pytest.mark.parametrize("split_pdf", [True, False]) -@pytest.mark.parametrize("vlm_model", ["gpt-4o"]) -@pytest.mark.parametrize("vlm_model_provider", ["openai"]) -@pytest.mark.parametrize( - "filename", - [ - "layout-parser-paper-fast.pdf", - "fake-power-point.ppt", - "embedded-images-tables.jpg", - ] -) -def test_partition_strategy_vlm_openai(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename): - with open(doc_path / filename, "rb") as f: - files = shared.Files( - content=f.read(), - file_name=filename, - ) - - req = operations.PartitionRequest( - partition_parameters=shared.PartitionParameters( - files=files, - strategy="vlm", - vlm_model=vlm_model, - vlm_model_provider=vlm_model_provider, - languages=["eng"], - split_pdf_page=split_pdf, - ) - ) - - response = client.general.partition( - request=req - ) - assert response.status_code == 200 - assert len(response.elements) > 0 - assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition" - - -@pytest.mark.parametrize("split_pdf", [True, False]) -@pytest.mark.parametrize("vlm_model", - [ - "us.amazon.nova-pro-v1:0", - "us.amazon.nova-lite-v1:0", - "us.anthropic.claude-3-5-sonnet-20241022-v2:0", - "us.anthropic.claude-3-opus-20240229-v1:0", - "us.anthropic.claude-3-haiku-20240307-v1:0", - "us.anthropic.claude-3-sonnet-20240229-v1:0", - "us.meta.llama3-2-90b-instruct-v1:0", - "us.meta.llama3-2-11b-instruct-v1:0", - ] -) -@pytest.mark.parametrize("vlm_model_provider", ["bedrock"]) -@pytest.mark.parametrize( - "filename", - [ - "layout-parser-paper-fast.pdf", - "fake-power-point.ppt", - "embedded-images-tables.jpg", - ] -) -def test_partition_strategy_vlm_bedrock(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename): - with open(doc_path / filename, "rb") as f: - files = shared.Files( - content=f.read(), - file_name=filename, - ) - - req = operations.PartitionRequest( - partition_parameters=shared.PartitionParameters( - files=files, - strategy="vlm", - vlm_model=vlm_model, - vlm_model_provider=vlm_model_provider, - languages=["eng"], - split_pdf_page=split_pdf, - ) - ) - - response = client.general.partition( - request=req - ) - assert response.status_code == 200 - assert len(response.elements) > 0 - assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition" - -@pytest.mark.parametrize("split_pdf", [True, False]) -@pytest.mark.parametrize("vlm_model", ["claude-3-5-sonnet-20241022",]) -@pytest.mark.parametrize("vlm_model_provider", ["anthropic"]) -@pytest.mark.parametrize( - "filename", - [ - "layout-parser-paper-fast.pdf", - "fake-power-point.ppt", - "embedded-images-tables.jpg", - ] -) -def test_partition_strategy_vlm_anthropic(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename): - with open(doc_path / filename, "rb") as f: - files = shared.Files( - content=f.read(), - file_name=filename, - ) - - req = operations.PartitionRequest( - partition_parameters=shared.PartitionParameters( - files=files, - strategy="vlm", - vlm_model=vlm_model, - vlm_model_provider=vlm_model_provider, - languages=["eng"], - split_pdf_page=split_pdf, - ) - ) - - response = client.general.partition( - request=req - ) - assert response.status_code == 200 - assert len(response.elements) > 0 - assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition" - - def test_returns_422_for_invalid_pdf( caplog: pytest.LogCaptureFixture, doc_path: Path,