44import pytest
55from array_paths import *
66from common import *
7+ from common import load_metadata
78
89from tiledb .cloud .dag import Mode
910from tiledb .vector_search .flat_index import FlatIndex
@@ -513,16 +514,16 @@ def test_ingestion_with_updates(tmp_path):
513514 partitions = partitions ,
514515 )
515516
516- # TODO(paris): Fix Vamana to have same metadata as Python and re-enable.
517- # with tiledb.Group(index_uri, "r", ctx={}) as group:
518- # ingestion_timestamps = [int(x) for x in list(json.loads(group.meta.get("ingestion_timestamps", "[]")))]
519- # base_sizes = [ int(x) for x in list(json.loads(group.meta.get("base_sizes", "[]")))]
520- # assert len(ingestion_timestamps) == 1
521- # assert len(base_sizes) == 1
522- # assert base_sizes [0] == 1000
523- # timestamp_2030 = 1903946089000
524- # timestamp_5_minutes_ago = int((time.time() - 5 * 60) * 1000 )
525- # assert ingestion_timestamps[0] > timestamp_5_minutes_ago and ingestion_timestamps[0] < timestamp_2030
517+ ingestion_timestamps , base_sizes = load_metadata ( index_uri )
518+ assert base_sizes == [ 1000 ]
519+ assert len ( ingestion_timestamps ) == 1
520+ timestamp_5_minutes_from_now = int (( time . time () + 5 * 60 ) * 1000 )
521+ timestamp_5_minutes_ago = int (( time . time () - 5 * 60 ) * 1000 )
522+ assert (
523+ ingestion_timestamps [0 ] > timestamp_5_minutes_ago
524+ and ingestion_timestamps [ 0 ] < timestamp_5_minutes_from_now
525+ )
526+ ingestion_timestamp = ingestion_timestamps [0 ]
526527
527528 _ , result = index .query (queries , k = k , nprobe = nprobe )
528529 assert accuracy (result , gt_i ) == 1.0
@@ -548,6 +549,16 @@ def test_ingestion_with_updates(tmp_path):
548549 _ , result = index .query (queries , k = k , nprobe = 20 )
549550 assert accuracy (result , gt_i , updated_ids = updated_ids ) == 1.0
550551
552+ ingestion_timestamps , base_sizes = load_metadata (index_uri )
553+ assert base_sizes == [1000 , 1000 ]
554+ assert len (ingestion_timestamps ) == 2
555+ assert ingestion_timestamps [0 ] == ingestion_timestamp
556+ assert (
557+ ingestion_timestamps [1 ] != ingestion_timestamp
558+ and ingestion_timestamps [1 ] > timestamp_5_minutes_ago
559+ and ingestion_timestamps [1 ] < timestamp_5_minutes_from_now
560+ )
561+
551562 assert vfs .dir_size (index_uri ) > 0
552563 Index .delete_index (uri = index_uri , config = {})
553564 assert vfs .dir_size (index_uri ) == 0
@@ -643,6 +654,10 @@ def test_ingestion_with_updates_and_timetravel(tmp_path):
643654 index_timestamp = 1 ,
644655 )
645656
657+ ingestion_timestamps , base_sizes = load_metadata (index_uri )
658+ assert ingestion_timestamps == [1 ]
659+ assert base_sizes == [1000 ]
660+
646661 if index_type == "IVF_FLAT" :
647662 assert index .partitions == partitions
648663
@@ -651,7 +666,8 @@ def test_ingestion_with_updates_and_timetravel(tmp_path):
651666
652667 update_ids_offset = MAX_UINT64 - size
653668 updated_ids = {}
654- for i in range (2 , 102 ):
669+ timestamp_end = 102
670+ for i in range (2 , timestamp_end ):
655671 index .delete (external_id = i , timestamp = i )
656672 index .update (
657673 vector = data [i ].astype (dtype ),
@@ -660,6 +676,10 @@ def test_ingestion_with_updates_and_timetravel(tmp_path):
660676 )
661677 updated_ids [i ] = i + update_ids_offset
662678
679+ ingestion_timestamps , base_sizes = load_metadata (index_uri )
680+ assert ingestion_timestamps == [1 ]
681+ assert base_sizes == [1000 ]
682+
663683 index = index_class (uri = index_uri )
664684 _ , result = index .query (queries , k = k , nprobe = partitions )
665685 assert accuracy (result , gt_i , updated_ids = updated_ids ) == 1.0
@@ -717,6 +737,11 @@ def test_ingestion_with_updates_and_timetravel(tmp_path):
717737
718738 # Consolidate updates
719739 index = index .consolidate_updates ()
740+
741+ ingestion_timestamps , base_sizes = load_metadata (index_uri )
742+ assert ingestion_timestamps == [1 , timestamp_end ]
743+ assert base_sizes == [1000 , 1000 ]
744+
720745 index = index_class (uri = index_uri )
721746 _ , result = index .query (queries , k = k , nprobe = partitions )
722747 assert accuracy (result , gt_i , updated_ids = updated_ids ) == 1.0
0 commit comments