|
1 | 1 | import os
|
| 2 | +import urllib.request |
2 | 3 | from collections.abc import Generator
|
3 | 4 |
|
4 | 5 | import numpy as np
|
5 | 6 | import pytest
|
| 7 | +from bson import ObjectId |
6 | 8 |
|
7 | 9 | from pymongo_voyageai import PyMongoVoyageAI
|
8 | 10 |
|
9 | 11 | if "VOYAGEAI_API_KEY" not in os.environ:
|
10 | 12 | pytest.skip("Requires VoyageAI API Key.", allow_module_level=True)
|
11 | 13 |
|
12 | 14 | if "S3_BUCKET_NAME" not in os.environ:
|
13 |
| - pytest.skip("Requires VoyageAI API Key.", allow_module_level=True) |
| 15 | + pytest.skip("Requires S3 Bucket name.", allow_module_level=True) |
14 | 16 |
|
15 | 17 | # mypy: disable_error_code="no-untyped-def"
|
16 | 18 |
|
@@ -75,6 +77,24 @@ def test_pdf_pages(client: PyMongoVoyageAI):
|
75 | 77 | client.delete_by_ids([d["_id"] for d in resp])
|
76 | 78 |
|
77 | 79 |
|
| 80 | +def test_pdf_pages_storage(client: PyMongoVoyageAI): |
| 81 | + query = "The consequences of a dictator's peace" |
| 82 | + url = "https://www.fdrlibrary.org/documents/356632/390886/readingcopy.pdf" |
| 83 | + storage = client._storage |
| 84 | + object_name = f"{ObjectId()}.pdf" |
| 85 | + with urllib.request.urlopen(url) as response: |
| 86 | + storage.client.upload_fileobj(response, storage.root_location, object_name) |
| 87 | + url = f"s3://{storage.root_location}/{object_name}" |
| 88 | + images = client.url_to_images(url) |
| 89 | + resp = client.add_documents(images) |
| 90 | + client.wait_for_indexing() |
| 91 | + data = client.similarity_search(query, extract_images=True) |
| 92 | + assert len(data[0]["inputs"][0].image.tobytes()) > 0 |
| 93 | + assert len(client.get_by_ids([d["_id"] for d in resp])) == len(resp) |
| 94 | + client.delete_by_ids([d["_id"] for d in resp]) |
| 95 | + storage.client.delete_object(Bucket=storage.root_location, Key=object_name) |
| 96 | + |
| 97 | + |
78 | 98 | @pytest.mark.asyncio
|
79 | 99 | async def test_image_set_async(client: PyMongoVoyageAI):
|
80 | 100 | url = "hf://datasets/princeton-nlp/CharXiv/val.parquet"
|
|
0 commit comments