Skip to content

Commit 45cf7fe

Browse files
author
Nikos Papailiou
committed
Bump storage version
1 parent b80e2e9 commit 45cf7fe

File tree

4 files changed

+29
-6
lines changed

4 files changed

+29
-6
lines changed

apis/python/src/tiledb/vector_search/index.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,21 @@ def __init__(
4040
self.ctx = Ctx(config)
4141
self.group = tiledb.Group(self.uri, "r", ctx=tiledb.Ctx(config))
4242
self.storage_version = self.group.meta.get("storage_version", "0.1")
43+
if not storage_formats[self.storage_version]["SUPPORT_TIMETRAVEL"] and timestamp is not None:
44+
raise ValueError(f"Time traveling is not supported for index storage_version={self.storage_version}")
45+
4346
updates_array_name = storage_formats[self.storage_version][
4447
"UPDATES_ARRAY_NAME"
4548
]
4649
self.updates_array_uri = f"{self.group.uri}/{updates_array_name}"
4750
self.index_version = self.group.meta.get("index_version", "")
48-
self.ingestion_timestamps = list(json.loads(self.group.meta.get("ingestion_timestamps", "[]")))
51+
self.ingestion_timestamps = [int(x) for x in
52+
list(json.loads(self.group.meta.get("ingestion_timestamps", "[]")))]
4953
if len(self.ingestion_timestamps) > 0:
5054
self.latest_ingestion_timestamp = self.ingestion_timestamps[len(self.ingestion_timestamps)-1]
5155
else:
5256
self.latest_ingestion_timestamp = MAX_UINT64
53-
self.base_sizes = list(json.loads(self.group.meta.get("base_sizes", "[]")))
57+
self.base_sizes = [int(x) for x in list(json.loads(self.group.meta.get("base_sizes", "[]")))]
5458
if len(self.base_sizes) > 0:
5559
self.base_size = self.base_sizes[len(self.ingestion_timestamps)-1]
5660
else:
@@ -245,9 +249,12 @@ def get_updates_uri(self):
245249
return self.updates_array_uri
246250

247251
def open_updates_array(self, timestamp: int = None):
248-
if timestamp is not None and timestamp <= self.latest_ingestion_timestamp:
249-
raise ValueError(f"Updates at a timestamp before the latest_ingestion_timestamp are not supported. "
250-
f"timestamp: {timestamp}, latest_ingestion_timestamp: {self.latest_ingestion_timestamp}")
252+
if timestamp is not None:
253+
if not storage_formats[self.storage_version]["SUPPORT_TIMETRAVEL"]:
254+
raise ValueError(f"Time traveling is not supported for index storage_version={self.storage_version}")
255+
if timestamp <= self.latest_ingestion_timestamp:
256+
raise ValueError(f"Updates at a timestamp before the latest_ingestion_timestamp are not supported. "
257+
f"timestamp: {timestamp}, latest_ingestion_timestamp: {self.latest_ingestion_timestamp}")
251258
if not tiledb.array_exists(self.updates_array_uri):
252259
updates_array_name = storage_formats[self.storage_version][
253260
"UPDATES_ARRAY_NAME"

apis/python/src/tiledb/vector_search/ingestion.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1886,6 +1886,7 @@ def consolidate_and_vacuum(
18861886
group.meta["dtype"] = np.dtype(vector_type).name
18871887
group.meta["partitions"] = partitions
18881888
group.meta["storage_version"] = STORAGE_VERSION
1889+
group.meta["index_type"] = index_type
18891890
group.meta["base_sizes"] = json.dumps(base_sizes)
18901891

18911892
if external_ids is not None:

apis/python/src/tiledb/vector_search/storage_formats.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"PARTIAL_WRITE_ARRAY_DIR": "write_temp",
1212
"DEFAULT_ATTR_FILTERS": None,
1313
"UPDATES_ARRAY_NAME": "updates",
14+
"SUPPORT_TIMETRAVEL": False,
1415
},
1516
"0.2": {
1617
"CENTROIDS_ARRAY_NAME": "partition_centroids",
@@ -22,7 +23,20 @@
2223
"PARTIAL_WRITE_ARRAY_DIR": "temp_data",
2324
"DEFAULT_ATTR_FILTERS": tiledb.FilterList([tiledb.ZstdFilter()]),
2425
"UPDATES_ARRAY_NAME": "updates",
26+
"SUPPORT_TIMETRAVEL": False,
27+
},
28+
"0.3": {
29+
"CENTROIDS_ARRAY_NAME": "partition_centroids",
30+
"INDEX_ARRAY_NAME": "partition_indexes",
31+
"IDS_ARRAY_NAME": "shuffled_vector_ids",
32+
"PARTS_ARRAY_NAME": "shuffled_vectors",
33+
"INPUT_VECTORS_ARRAY_NAME": "input_vectors",
34+
"EXTERNAL_IDS_ARRAY_NAME": "external_ids",
35+
"PARTIAL_WRITE_ARRAY_DIR": "temp_data",
36+
"DEFAULT_ATTR_FILTERS": tiledb.FilterList([tiledb.ZstdFilter()]),
37+
"UPDATES_ARRAY_NAME": "updates",
38+
"SUPPORT_TIMETRAVEL": True,
2539
},
2640
}
2741

28-
STORAGE_VERSION = "0.2"
42+
STORAGE_VERSION = "0.3"

apis/python/test/test_ingestion.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from tiledb.vector_search.utils import load_fvecs
66
from tiledb.vector_search.ingestion import ingest
7+
from tiledb.vector_search.index import Index
78
from tiledb.vector_search.flat_index import FlatIndex
89
from tiledb.vector_search.ivf_flat_index import IVFFlatIndex
910
from tiledb.cloud.dag import Mode

0 commit comments

Comments
 (0)