|
| 1 | +import pytest |
| 2 | +import requests |
| 3 | +from unittest import mock |
| 4 | + |
| 5 | +import unstructured.partition.pdf as pdf |
| 6 | +import unstructured.partition.image as image |
| 7 | +import unstructured_inference.inference.layout as layout |
| 8 | + |
| 9 | + |
| 10 | +class MockResponse: |
| 11 | + def __init__(self, status_code, response): |
| 12 | + self.status_code = status_code |
| 13 | + self.response = response |
| 14 | + |
| 15 | + def json(self): |
| 16 | + return self.response |
| 17 | + |
| 18 | + |
| 19 | +def mock_healthy_get(url, **kwargs): |
| 20 | + return MockResponse(status_code=200, response={}) |
| 21 | + |
| 22 | + |
| 23 | +def mock_unhealthy_get(url, **kwargs): |
| 24 | + return MockResponse(status_code=500, response={}) |
| 25 | + |
| 26 | + |
| 27 | +def mock_unsuccessful_post(url, **kwargs): |
| 28 | + return MockResponse(status_code=500, response={}) |
| 29 | + |
| 30 | + |
| 31 | +def mock_successful_post(url, **kwargs): |
| 32 | + response = { |
| 33 | + "pages": [ |
| 34 | + { |
| 35 | + "number": 0, |
| 36 | + "elements": [{"type": "Title", "text": "Charlie Brown and the Great Pumpkin"}], |
| 37 | + } |
| 38 | + ] |
| 39 | + } |
| 40 | + return MockResponse(status_code=200, response=response) |
| 41 | + |
| 42 | + |
| 43 | +class MockPageLayout(layout.PageLayout): |
| 44 | + def __init__(self, number: int): |
| 45 | + pass |
| 46 | + |
| 47 | + @property |
| 48 | + def elements(self): |
| 49 | + return [ |
| 50 | + layout.LayoutElement( |
| 51 | + type="Title", |
| 52 | + coordinates=[(0, 0), (2, 2)], |
| 53 | + text="Charlie Brown and the Great Pumpkin", |
| 54 | + ) |
| 55 | + ] |
| 56 | + |
| 57 | + |
| 58 | +class MockDocumentLayout(layout.DocumentLayout): |
| 59 | + @property |
| 60 | + def pages(self): |
| 61 | + return [ |
| 62 | + MockPageLayout( |
| 63 | + number=0, |
| 64 | + ) |
| 65 | + ] |
| 66 | + |
| 67 | + |
| 68 | +def test_partition_image_api(monkeypatch, filename="example-docs/example.jpg"): |
| 69 | + monkeypatch.setattr(requests, "post", mock_successful_post) |
| 70 | + monkeypatch.setattr(requests, "get", mock_healthy_get) |
| 71 | + |
| 72 | + partition_image_response = pdf._partition_via_api(filename) |
| 73 | + assert partition_image_response[0]["type"] == "Title" |
| 74 | + assert partition_image_response[0]["text"] == "Charlie Brown and the Great Pumpkin" |
| 75 | + |
| 76 | + |
| 77 | +@pytest.mark.parametrize("filename, file", [("example-docs/example.jpg", None), (None, b"0000")]) |
| 78 | +def test_partition_image_local(monkeypatch, filename, file): |
| 79 | + monkeypatch.setattr( |
| 80 | + layout, "process_data_with_model", lambda *args, **kwargs: MockDocumentLayout() |
| 81 | + ) |
| 82 | + monkeypatch.setattr( |
| 83 | + layout, "process_file_with_model", lambda *args, **kwargs: MockDocumentLayout() |
| 84 | + ) |
| 85 | + |
| 86 | + partition_image_response = pdf._partition_pdf_or_image_local(filename, file, is_image=True) |
| 87 | + assert partition_image_response[0].type == "Title" |
| 88 | + assert partition_image_response[0].text == "Charlie Brown and the Great Pumpkin" |
| 89 | + |
| 90 | + |
| 91 | +@pytest.mark.skip("Needs to be fixed upstream in unstructured-inference") |
| 92 | +def test_partition_image_local_raises_with_no_filename(): |
| 93 | + with pytest.raises(FileNotFoundError): |
| 94 | + pdf._partition_pdf_or_image_local(filename="", file=None, is_image=True) |
| 95 | + |
| 96 | + |
| 97 | +def test_partition_image_api_raises_with_failed_healthcheck( |
| 98 | + monkeypatch, filename="example-docs/example.jpg" |
| 99 | +): |
| 100 | + monkeypatch.setattr(requests, "post", mock_successful_post) |
| 101 | + monkeypatch.setattr(requests, "get", mock_unhealthy_get) |
| 102 | + |
| 103 | + with pytest.raises(ValueError): |
| 104 | + pdf._partition_via_api(filename=filename, url="http://ml.unstructured.io/layout/image") |
| 105 | + |
| 106 | + |
| 107 | +def test_partition_image_api_raises_with_failed_api_call( |
| 108 | + monkeypatch, filename="example-docs/example.jpg" |
| 109 | +): |
| 110 | + monkeypatch.setattr(requests, "post", mock_unsuccessful_post) |
| 111 | + monkeypatch.setattr(requests, "get", mock_healthy_get) |
| 112 | + |
| 113 | + with pytest.raises(ValueError): |
| 114 | + pdf._partition_via_api(filename=filename, url="http://ml.unstructured.io/layout/image") |
| 115 | + |
| 116 | + |
| 117 | +@pytest.mark.parametrize( |
| 118 | + "url, api_called, local_called", [("fakeurl", True, False), (None, False, True)] |
| 119 | +) |
| 120 | +def test_partition_image(url, api_called, local_called): |
| 121 | + with mock.patch.object( |
| 122 | + pdf, attribute="_partition_via_api", new=mock.MagicMock() |
| 123 | + ), mock.patch.object(pdf, "_partition_pdf_or_image_local", mock.MagicMock()): |
| 124 | + image.partition_image(filename="fake.pdf", url=url) |
| 125 | + assert pdf._partition_via_api.called == api_called |
| 126 | + assert pdf._partition_pdf_or_image_local.called == local_called |
0 commit comments