Skip to content

Commit 83552bc

Browse files
authored
Fix Index.delete_index() so that it uses recursive=True and deletes the full index (#348)
1 parent af0b03b commit 83552bc

File tree

4 files changed

+100
-1
lines changed

4 files changed

+100
-1
lines changed

apis/python/src/tiledb/vector_search/index.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ def delete_index(uri, config):
456456
return
457457
else:
458458
raise err
459-
group.delete()
459+
group.delete(recursive=True)
460460

461461
@staticmethod
462462
def clear_history(

apis/python/src/tiledb/vector_search/ingestion.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2505,15 +2505,20 @@ def consolidate_and_vacuum(
25052505
config: Optional[Mapping[str, Any]] = None,
25062506
):
25072507
with tiledb.Group(index_group_uri) as group:
2508+
write_group = tiledb.Group(index_group_uri, "w")
25082509
try:
25092510
if INPUT_VECTORS_ARRAY_NAME in group:
25102511
tiledb.Array.delete_array(group[INPUT_VECTORS_ARRAY_NAME].uri)
2512+
write_group.remove(INPUT_VECTORS_ARRAY_NAME)
25112513
if EXTERNAL_IDS_ARRAY_NAME in group:
25122514
tiledb.Array.delete_array(group[EXTERNAL_IDS_ARRAY_NAME].uri)
2515+
write_group.remove(EXTERNAL_IDS_ARRAY_NAME)
25132516
except tiledb.TileDBError as err:
25142517
message = str(err)
25152518
if "does not exist" not in message:
25162519
raise err
2520+
write_group.close()
2521+
25172522
modes = ["fragment_meta", "commits", "array_meta"]
25182523
for mode in modes:
25192524
conf = tiledb.Config(config)

apis/python/test/test_index.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,11 @@ def test_flat_index(tmp_path):
112112
index = index.consolidate_updates()
113113
query_and_check(index, np.array([[2, 2, 2]], dtype=np.float32), 3, {0, 2, 4})
114114

115+
vfs = tiledb.VFS()
116+
assert vfs.dir_size(uri) > 0
117+
Index.delete_index(uri=uri, config={})
118+
assert vfs.dir_size(uri) == 0
119+
115120

116121
def test_ivf_flat_index(tmp_path):
117122
partitions = 10
@@ -180,6 +185,11 @@ def test_ivf_flat_index(tmp_path):
180185
index, np.array([[2, 2, 2]], dtype=np.float32), 3, {0, 2, 4}, nprobe=partitions
181186
)
182187

188+
vfs = tiledb.VFS()
189+
assert vfs.dir_size(uri) > 0
190+
Index.delete_index(uri=uri, config={})
191+
assert vfs.dir_size(uri) == 0
192+
183193

184194
def test_vamana_index_simple(tmp_path):
185195
uri = os.path.join(tmp_path, "array")
@@ -196,6 +206,11 @@ def test_vamana_index_simple(tmp_path):
196206
assert index.get_dimensions() == dimensions
197207
query_and_check(index, np.array([[2, 2, 2]], dtype=np.float32), 3, {ind.MAX_UINT64})
198208

209+
vfs = tiledb.VFS()
210+
assert vfs.dir_size(uri) > 0
211+
Index.delete_index(uri=uri, config={})
212+
assert vfs.dir_size(uri) == 0
213+
199214

200215
def test_vamana_index(tmp_path):
201216
uri = os.path.join(tmp_path, "array")
@@ -264,26 +279,35 @@ def test_vamana_index(tmp_path):
264279
[[0, 1], [4, 3]],
265280
)
266281

282+
vfs = tiledb.VFS()
283+
assert vfs.dir_size(uri) > 0
284+
Index.delete_index(uri=uri, config={})
285+
assert vfs.dir_size(uri) == 0
286+
267287

268288
def test_delete_invalid_index(tmp_path):
269289
# We don't throw with an invalid uri.
270290
Index.delete_index(uri="invalid_uri", config={})
271291

272292

273293
def test_delete_index(tmp_path):
294+
vfs = tiledb.VFS()
295+
274296
indexes = ["FLAT", "IVF_FLAT", "VAMANA"]
275297
index_classes = [FlatIndex, IVFFlatIndex, VamanaIndex]
276298
data = np.array([[1.0, 1.1, 1.2, 1.3], [2.0, 2.1, 2.2, 2.3]], dtype=np.float32)
277299
for index_type, index_class in zip(indexes, index_classes):
278300
index_uri = os.path.join(tmp_path, f"array_{index_type}")
279301
ingest(index_type=index_type, index_uri=index_uri, input_vectors=data)
280302
Index.delete_index(uri=index_uri, config={})
303+
assert vfs.dir_size(index_uri) == 0
281304
with pytest.raises(tiledb.TileDBError) as error:
282305
index_class(uri=index_uri)
283306
assert "does not exist" in str(error.value)
284307

285308

286309
def test_index_with_incorrect_dimensions(tmp_path):
310+
vfs = tiledb.VFS()
287311
indexes = [flat_index, ivf_flat_index, vamana_index]
288312
for index_type in indexes:
289313
uri = os.path.join(tmp_path, f"array_{index_type.__name__}")
@@ -302,6 +326,10 @@ def test_index_with_incorrect_dimensions(tmp_path):
302326
# Okay otherwise.
303327
index.query(np.array([[1, 1, 1]], dtype=np.float32), k=3)
304328

329+
assert vfs.dir_size(uri) > 0
330+
Index.delete_index(uri=uri, config={})
331+
assert vfs.dir_size(uri) == 0
332+
305333

306334
def test_index_with_incorrect_num_of_query_columns_simple(tmp_path):
307335
siftsmall_uri = siftsmall_inputs_file
@@ -327,6 +355,8 @@ def test_index_with_incorrect_num_of_query_columns_simple(tmp_path):
327355

328356

329357
def test_index_with_incorrect_num_of_query_columns_complex(tmp_path):
358+
vfs = tiledb.VFS()
359+
330360
# Tests that we raise a TypeError if the number of columns in the query is not the same as the
331361
# number of columns in the indexed data.
332362
size = 1000
@@ -356,6 +386,10 @@ def test_index_with_incorrect_num_of_query_columns_complex(tmp_path):
356386
with pytest.raises(TypeError):
357387
index.query(query, k=1)
358388

389+
assert vfs.dir_size(index_uri) > 0
390+
Index.delete_index(uri=index_uri, config={})
391+
assert vfs.dir_size(index_uri) == 0
392+
359393

360394
def test_index_with_incorrect_num_of_query_columns_in_single_vector_query(tmp_path):
361395
# Tests that we raise a TypeError if the number of columns in the query is not the same as the

apis/python/test/test_ingestion.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ def query_and_check_equals(index, queries, expected_result_d, expected_result_i)
4040

4141

4242
def test_vamana_ingestion_u8(tmp_path):
43+
vfs = tiledb.VFS()
44+
4345
dataset_dir = os.path.join(tmp_path, "dataset")
4446
index_uri = os.path.join(tmp_path, "array")
4547
if os.path.exists(index_uri):
@@ -64,6 +66,10 @@ def test_vamana_ingestion_u8(tmp_path):
6466
_, result = index_ram.query(queries, k=k)
6567
assert accuracy(result, gt_i) > MINIMUM_ACCURACY
6668

69+
assert vfs.dir_size(index_uri) > 0
70+
Index.delete_index(uri=index_uri, config={})
71+
assert vfs.dir_size(index_uri) == 0
72+
6773

6874
def test_flat_ingestion_u8(tmp_path):
6975
dataset_dir = os.path.join(tmp_path, "dataset")
@@ -245,6 +251,8 @@ def test_ivf_flat_ingestion_f32(tmp_path):
245251

246252

247253
def test_ingestion_fvec(tmp_path):
254+
vfs = tiledb.VFS()
255+
248256
source_uri = siftsmall_inputs_file
249257
queries_uri = siftsmall_query_file
250258
gt_uri = siftsmall_groundtruth_file
@@ -289,8 +297,14 @@ def test_ingestion_fvec(tmp_path):
289297
_, result = index_ram.query(queries, k=k, nprobe=nprobe, mode=Mode.LOCAL)
290298
assert accuracy(result, gt_i) > MINIMUM_ACCURACY
291299

300+
assert vfs.dir_size(index_uri) > 0
301+
Index.delete_index(uri=index_uri, config={})
302+
assert vfs.dir_size(index_uri) == 0
303+
292304

293305
def test_ingestion_numpy(tmp_path):
306+
vfs = tiledb.VFS()
307+
294308
source_uri = siftsmall_inputs_file
295309
queries_uri = siftsmall_query_file
296310
gt_uri = siftsmall_groundtruth_file
@@ -336,8 +350,14 @@ def test_ingestion_numpy(tmp_path):
336350
_, result = index_ram.query(queries, k=k, nprobe=nprobe, mode=Mode.LOCAL)
337351
assert accuracy(result, gt_i) > MINIMUM_ACCURACY
338352

353+
assert vfs.dir_size(index_uri) > 0
354+
Index.delete_index(uri=index_uri, config={})
355+
assert vfs.dir_size(index_uri) == 0
356+
339357

340358
def test_ingestion_numpy_i8(tmp_path):
359+
vfs = tiledb.VFS()
360+
341361
source_uri = siftsmall_inputs_file
342362
queries_uri = siftsmall_query_file
343363
gt_uri = siftsmall_groundtruth_file
@@ -384,8 +404,14 @@ def test_ingestion_numpy_i8(tmp_path):
384404
_, result = index_ram.query(queries, k=k, nprobe=nprobe, mode=Mode.LOCAL)
385405
assert accuracy(result, gt_i) > MINIMUM_ACCURACY
386406

407+
assert vfs.dir_size(index_uri) > 0
408+
Index.delete_index(uri=index_uri, config={})
409+
assert vfs.dir_size(index_uri) == 0
410+
387411

388412
def test_ingestion_multiple_workers(tmp_path):
413+
vfs = tiledb.VFS()
414+
389415
source_uri = siftsmall_inputs_file
390416
queries_uri = siftsmall_query_file
391417
gt_uri = siftsmall_groundtruth_file
@@ -432,8 +458,14 @@ def test_ingestion_multiple_workers(tmp_path):
432458
_, result = index_ram.query(queries, k=k, nprobe=nprobe, mode=Mode.LOCAL)
433459
assert accuracy(result, gt_i) > MINIMUM_ACCURACY
434460

461+
assert vfs.dir_size(index_uri) > 0
462+
Index.delete_index(uri=index_uri, config={})
463+
assert vfs.dir_size(index_uri) == 0
464+
435465

436466
def test_ingestion_external_ids_numpy(tmp_path):
467+
vfs = tiledb.VFS()
468+
437469
source_uri = siftsmall_inputs_file
438470
queries_uri = siftsmall_query_file
439471
gt_uri = siftsmall_groundtruth_file
@@ -474,8 +506,14 @@ def test_ingestion_external_ids_numpy(tmp_path):
474506
_, result = index_ram.query(queries, k=k, nprobe=nprobe)
475507
assert accuracy(result, gt_i, external_ids_offset) > MINIMUM_ACCURACY
476508

509+
assert vfs.dir_size(index_uri) > 0
510+
Index.delete_index(uri=index_uri, config={})
511+
assert vfs.dir_size(index_uri) == 0
512+
477513

478514
def test_ingestion_with_updates(tmp_path):
515+
vfs = tiledb.VFS()
516+
479517
dataset_dir = os.path.join(tmp_path, "dataset")
480518
k = 10
481519
size = 1000
@@ -528,8 +566,14 @@ def test_ingestion_with_updates(tmp_path):
528566
_, result = index.query(queries, k=k, nprobe=20)
529567
assert accuracy(result, gt_i, updated_ids=updated_ids) == 1.0
530568

569+
assert vfs.dir_size(index_uri) > 0
570+
Index.delete_index(uri=index_uri, config={})
571+
assert vfs.dir_size(index_uri) == 0
572+
531573

532574
def test_ingestion_with_batch_updates(tmp_path):
575+
vfs = tiledb.VFS()
576+
533577
dataset_dir = os.path.join(tmp_path, "dataset")
534578
k = 10
535579
size = 100000
@@ -590,8 +634,14 @@ def test_ingestion_with_batch_updates(tmp_path):
590634
_, result = index.query(queries, k=k, nprobe=nprobe)
591635
assert accuracy(result, gt_i, updated_ids=updated_ids) > 0.99
592636

637+
assert vfs.dir_size(index_uri) > 0
638+
Index.delete_index(uri=index_uri, config={})
639+
assert vfs.dir_size(index_uri) == 0
640+
593641

594642
def test_ingestion_with_updates_and_timetravel(tmp_path):
643+
vfs = tiledb.VFS()
644+
595645
dataset_dir = os.path.join(tmp_path, "dataset")
596646
k = 10
597647
size = 1000
@@ -819,8 +869,14 @@ def test_ingestion_with_updates_and_timetravel(tmp_path):
819869
_, result = index.query(queries, k=k, nprobe=partitions)
820870
assert accuracy(result, gt_i, updated_ids=updated_ids) == 0.0
821871

872+
assert vfs.dir_size(index_uri) > 0
873+
Index.delete_index(uri=index_uri, config={})
874+
assert vfs.dir_size(index_uri) == 0
875+
822876

823877
def test_ingestion_with_additions_and_timetravel(tmp_path):
878+
vfs = tiledb.VFS()
879+
824880
dataset_dir = os.path.join(tmp_path, "dataset")
825881
k = 100
826882
size = 100
@@ -868,6 +924,10 @@ def test_ingestion_with_additions_and_timetravel(tmp_path):
868924
_, result = index.query(queries, k=k, nprobe=partitions, opt_l=k * 2)
869925
assert 0.45 < accuracy(result, gt_i)
870926

927+
assert vfs.dir_size(index_uri) > 0
928+
Index.delete_index(uri=index_uri, config={})
929+
assert vfs.dir_size(index_uri) == 0
930+
871931

872932
def test_ivf_flat_ingestion_tdb_random_sampling_policy(tmp_path):
873933
dataset_dir = os.path.join(tmp_path, "dataset")

0 commit comments

Comments
 (0)