|
6 | 6 | import pytest
|
7 | 7 | from bson import ObjectId
|
8 | 8 |
|
9 |
| -from pymongo_voyageai import PyMongoVoyageAI |
| 9 | +from pymongo_voyageai import MemoryStorage, PyMongoVoyageAI |
10 | 10 |
|
11 | 11 | if "VOYAGEAI_API_KEY" not in os.environ:
|
12 | 12 | pytest.skip("Requires VoyageAI API Key.", allow_module_level=True)
|
@@ -95,6 +95,25 @@ def test_pdf_pages_storage(client: PyMongoVoyageAI):
|
95 | 95 | storage.client.delete_object(Bucket=storage.root_location, Key=object_name)
|
96 | 96 |
|
97 | 97 |
|
| 98 | +def test_pdf_pages_custom_storage(client: PyMongoVoyageAI): |
| 99 | + query = "The consequences of a dictator's peace" |
| 100 | + url = "https://www.fdrlibrary.org/documents/356632/390886/readingcopy.pdf" |
| 101 | + storage = client._storage |
| 102 | + object_name = f"{ObjectId()}.pdf" |
| 103 | + with urllib.request.urlopen(url) as response: |
| 104 | + storage.client.upload_fileobj(response, storage.root_location, object_name) |
| 105 | + url = f"s3://{storage.root_location}/{object_name}" |
| 106 | + client._storage = MemoryStorage() |
| 107 | + images = client.url_to_images(url) |
| 108 | + resp = client.add_documents(images) |
| 109 | + client.wait_for_indexing() |
| 110 | + data = client.similarity_search(query, extract_images=True) |
| 111 | + assert len(data[0]["inputs"][0].image.tobytes()) > 0 |
| 112 | + assert len(client.get_by_ids([d["_id"] for d in resp])) == len(resp) |
| 113 | + client.delete_by_ids([d["_id"] for d in resp]) |
| 114 | + storage.client.delete_object(Bucket=storage.root_location, Key=object_name) |
| 115 | + |
| 116 | + |
98 | 117 | @pytest.mark.asyncio
|
99 | 118 | async def test_image_set_async(client: PyMongoVoyageAI):
|
100 | 119 | url = "hf://datasets/princeton-nlp/CharXiv/val.parquet"
|
|
0 commit comments