Skip to content

Commit db9eae1

Browse files
Fix consolidate_updates error for tiledb:// URIs. (#329)
Fix `consolidate_updates` error for `tiledb://` URIs. `consolidate_updates` performs `timestamp` consolidation of all updates between consequent ingestions. This caused failures for file indexing when appending to an existing `tiledb://` index. This is an optional performance enhancement. We skip it for remote arrays as consolidation of remote arrays currently only supports modes `fragment_meta, commits, metadata`.
1 parent 5a7c478 commit db9eae1

File tree

2 files changed

+17
-5
lines changed

2 files changed

+17
-5
lines changed

apis/python/src/tiledb/vector_search/index.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -403,11 +403,15 @@ def consolidate_updates(self, retrain_index: bool = False, **kwargs):
403403
if fragment_info.timestamp_range[1] > max_timestamp:
404404
max_timestamp = fragment_info.timestamp_range[1]
405405
max_timestamp += 1
406-
conf = tiledb.Config(self.config)
407-
conf["sm.consolidation.timestamp_start"] = self.latest_ingestion_timestamp
408-
conf["sm.consolidation.timestamp_end"] = max_timestamp
409-
tiledb.consolidate(self.updates_array_uri, config=conf)
410-
tiledb.vacuum(self.updates_array_uri, config=conf)
406+
# Consolidate all updates since the previous ingestion_timestamp.
407+
# This is a performance optimization. We skip this for remote arrays as consolidation
408+
# of remote arrays currently only supports modes `fragment_meta, commits, metadata`.
409+
if not self.updates_array_uri.startswith("tiledb://"):
410+
conf = tiledb.Config(self.config)
411+
conf["sm.consolidation.timestamp_start"] = self.latest_ingestion_timestamp
412+
conf["sm.consolidation.timestamp_end"] = max_timestamp
413+
tiledb.consolidate(self.updates_array_uri, config=conf)
414+
tiledb.vacuum(self.updates_array_uri, config=conf)
411415

412416
# We don't copy the centroids if self.partitions=0 because this means our index was previously empty.
413417
should_pass_copy_centroids_uri = (

apis/python/test/test_cloud.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,10 +187,18 @@ def test_cloud_ivf_flat(self):
187187
resources=resources,
188188
)
189189

190+
index = vs.ivf_flat_index.IVFFlatIndex(
191+
uri=index_uri,
192+
config=tiledb.cloud.Config().dict(),
193+
)
190194
index.delete(external_id=42)
191195
_, result_i = index.query(queries, k=k, nprobe=nprobe)
192196
assert accuracy(result_i, gt_i) > MINIMUM_ACCURACY
193197

198+
index = index.consolidate_updates()
199+
_, result_i = index.query(queries, k=k, nprobe=nprobe)
200+
assert accuracy(result_i, gt_i) > MINIMUM_ACCURACY
201+
194202
def test_cloud_ivf_flat_random_sampling(self):
195203
# NOTE(paris): This was also tested with the following (and also with mode=Mode.BATCH):
196204
# source_uri = "tiledb://TileDB-Inc/ann_sift1b_raw_vectors_col_major"

0 commit comments

Comments
 (0)