|
13 | 13 | # limitations under the License.
|
14 | 14 |
|
15 | 15 | import asyncio
|
| 16 | +import json |
16 | 17 | import os
|
17 | 18 | import uuid
|
18 | 19 | from threading import Thread
|
|
52 | 53 | class FakeImageEmbedding(DeterministicFakeEmbedding):
|
53 | 54 |
|
54 | 55 | def embed_image(self, image_paths: list[str]) -> list[list[float]]:
|
55 |
| - return [self.embed_query(path) for path in image_paths] |
| 56 | + return [self.embed_query(f"Image Path: {path}") for path in image_paths] |
56 | 57 |
|
57 | 58 |
|
58 | 59 | image_embedding_service = FakeImageEmbedding(size=VECTOR_SIZE)
|
@@ -357,7 +358,52 @@ async def test_aadd_images(self, engine_sync, image_uris):
|
357 | 358 | assert len(results) == len(image_uris)
|
358 | 359 | assert results[0]["image_id"] == "0"
|
359 | 360 | assert results[0]["source"] == "google.com"
|
360 |
| - await aexecute(engine_sync, f'TRUNCATE TABLE "{IMAGE_TABLE}"') |
| 361 | + await aexecute(engine_sync, f'DROP TABLE IF EXISTS "{IMAGE_TABLE}"') |
| 362 | + |
| 363 | + async def test_aadd_images_store_uri_only(self, engine_sync, image_uris): |
| 364 | + table_name = IMAGE_TABLE_SYNC + "_store_uri_only" |
| 365 | + engine_sync.init_vectorstore_table( |
| 366 | + table_name, |
| 367 | + VECTOR_SIZE, |
| 368 | + metadata_columns=[ |
| 369 | + Column("image_id", "TEXT"), |
| 370 | + Column("source", "TEXT"), |
| 371 | + ], |
| 372 | + metadata_json_column="mymeta", |
| 373 | + ) |
| 374 | + vs = AlloyDBVectorStore.create_sync( |
| 375 | + engine_sync, |
| 376 | + embedding_service=image_embedding_service, |
| 377 | + table_name=table_name, |
| 378 | + metadata_columns=["image_id", "source"], |
| 379 | + metadata_json_column="mymeta", |
| 380 | + ) |
| 381 | + ids = [str(uuid.uuid4()) for i in range(len(image_uris))] |
| 382 | + metadatas = [ |
| 383 | + {"image_id": str(i), "source": "google.com"} for i in range(len(image_uris)) |
| 384 | + ] |
| 385 | + # Test the async method on the sync class |
| 386 | + await vs.aadd_images(image_uris, metadatas, ids, store_uri_only=True) |
| 387 | + results = await afetch(engine_sync, f'SELECT * FROM "{table_name}"') |
| 388 | + assert len(results) == len(image_uris) |
| 389 | + for i, result_row in enumerate(results): |
| 390 | + assert ( |
| 391 | + result_row[vs._AlloyDBVectorStore__vs.content_column] == image_uris[i] |
| 392 | + ) |
| 393 | + uri_embedding = embeddings_service.embed_query(image_uris[i]) |
| 394 | + image_embedding = image_embedding_service.embed_image([image_uris[i]])[0] |
| 395 | + actual_embedding = json.loads( |
| 396 | + result_row[vs._AlloyDBVectorStore__vs.embedding_column] |
| 397 | + ) |
| 398 | + assert actual_embedding != pytest.approx(uri_embedding) |
| 399 | + assert actual_embedding == pytest.approx(image_embedding) |
| 400 | + assert result_row["image_id"] == str(i) |
| 401 | + assert result_row["source"] == "google.com" |
| 402 | + assert ( |
| 403 | + result_row[vs._AlloyDBVectorStore__vs.metadata_json_column]["image_uri"] |
| 404 | + == image_uris[i] |
| 405 | + ) |
| 406 | + await aexecute(engine_sync, f'DROP TABLE IF EXISTS "{table_name}"') |
361 | 407 |
|
362 | 408 | async def test_adelete_custom(self, engine, vs_custom):
|
363 | 409 | ids = [str(uuid.uuid4()) for i in range(len(texts))]
|
@@ -405,6 +451,49 @@ async def test_add_images(self, engine_sync, image_uris):
|
405 | 451 | await vs.adelete(ids)
|
406 | 452 | await aexecute(engine_sync, f'DROP TABLE IF EXISTS "{IMAGE_TABLE_SYNC}"')
|
407 | 453 |
|
| 454 | + async def test_add_images_store_uri_only(self, engine_sync, image_uris): |
| 455 | + table_name = IMAGE_TABLE_SYNC + "_store_uri_only" |
| 456 | + engine_sync.init_vectorstore_table( |
| 457 | + table_name, |
| 458 | + VECTOR_SIZE, |
| 459 | + metadata_columns=[Column("image_id", "TEXT"), Column("source", "TEXT")], |
| 460 | + metadata_json_column="mymeta", |
| 461 | + ) |
| 462 | + vs = AlloyDBVectorStore.create_sync( |
| 463 | + engine_sync, |
| 464 | + embedding_service=image_embedding_service, |
| 465 | + table_name=table_name, |
| 466 | + metadata_columns=["image_id", "source"], |
| 467 | + metadata_json_column="mymeta", |
| 468 | + ) |
| 469 | + |
| 470 | + ids = [str(uuid.uuid4()) for i in range(len(image_uris))] |
| 471 | + metadatas = [ |
| 472 | + {"image_id": str(i), "source": "google.com"} for i in range(len(image_uris)) |
| 473 | + ] |
| 474 | + vs.add_images(image_uris, metadatas, ids, store_uri_only=True) |
| 475 | + results = await afetch(engine_sync, (f'SELECT * FROM "{table_name}"')) |
| 476 | + assert len(results) == len(image_uris) |
| 477 | + for i, result_row in enumerate(results): |
| 478 | + assert ( |
| 479 | + result_row[vs._AlloyDBVectorStore__vs.content_column] == image_uris[i] |
| 480 | + ) |
| 481 | + uri_embedding = embeddings_service.embed_query(image_uris[i]) |
| 482 | + image_embedding = image_embedding_service.embed_image([image_uris[i]])[0] |
| 483 | + actual_embedding = json.loads( |
| 484 | + result_row[vs._AlloyDBVectorStore__vs.embedding_column] |
| 485 | + ) |
| 486 | + assert actual_embedding != pytest.approx(uri_embedding) |
| 487 | + assert actual_embedding == pytest.approx(image_embedding) |
| 488 | + assert result_row["image_id"] == str(i) |
| 489 | + assert result_row["source"] == "google.com" |
| 490 | + assert ( |
| 491 | + result_row[vs._AlloyDBVectorStore__vs.metadata_json_column]["image_uri"] |
| 492 | + == image_uris[i] |
| 493 | + ) |
| 494 | + await vs.adelete(ids) |
| 495 | + await aexecute(engine_sync, f'DROP TABLE IF EXISTS "{table_name}"') |
| 496 | + |
408 | 497 | async def test_cross_env(self, engine_sync, vs_sync):
|
409 | 498 | ids = [str(uuid.uuid4()) for i in range(len(texts))]
|
410 | 499 | await vs_sync.aadd_texts(texts, ids=ids)
|
|
0 commit comments