Skip to content

Commit 46b299b

Browse files
committed
fix(database): handle deletion of multiple records in chunks
* Implemented chunking for delete operations to avoid SQL variable limits. * Ensured that the deletion process is efficient and does not exceed SQLite's constraints. * Updated logging to reflect the number of records deleted.
1 parent bdf98ff commit 46b299b

File tree

2 files changed

+62
-51
lines changed

2 files changed

+62
-51
lines changed

benchmarks/runner.py

Lines changed: 46 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -29,50 +29,51 @@ def run_benchmark_suite(
2929
)
3030
client = SQLiteVecClient(table="benchmark", db_path=db_path)
3131

32-
# Create table
33-
dim = config["dimension"]
34-
distance = config["distance"]
35-
client.create_table(dim=dim, distance=distance)
36-
37-
# Generate data
38-
texts = generate_texts(dataset_size)
39-
embeddings = generate_embeddings(dataset_size, dim)
40-
metadata = generate_metadata(dataset_size)
41-
42-
# Benchmark: Add
43-
print(f" Benchmarking add ({dataset_size} records)...")
44-
results.append(benchmark_add(client, texts, embeddings, metadata))
45-
46-
# Get rowids for subsequent operations
47-
rowids = list(range(1, dataset_size + 1))
48-
49-
# Benchmark: Get Many
50-
print(f" Benchmarking get_many ({dataset_size} records)...")
51-
results.append(benchmark_get_many(client, rowids))
52-
53-
# Benchmark: Similarity Search
54-
print(" Benchmarking similarity_search...")
55-
query_emb = [0.5] * dim
56-
iterations = config["similarity_search"]["iterations"]
57-
for top_k in config["similarity_search"]["top_k_values"]:
58-
results.append(
59-
benchmark_similarity_search(client, query_emb, top_k, iterations)
60-
)
61-
62-
# Benchmark: Update Many
63-
print(f" Benchmarking update_many ({dataset_size} records)...")
64-
new_texts = [f"updated_{i}" for i in range(dataset_size)]
65-
results.append(benchmark_update_many(client, rowids, new_texts))
66-
67-
# Benchmark: Get All
68-
print(f" Benchmarking get_all ({dataset_size} records)...")
69-
batch_size = config["batch_size"]
70-
results.append(benchmark_get_all(client, dataset_size, batch_size))
71-
72-
# Benchmark: Delete Many
73-
print(f" Benchmarking delete_many ({dataset_size} records)...")
74-
results.append(benchmark_delete_many(client, rowids))
75-
76-
client.close()
32+
try:
33+
# Create table
34+
dim = config["dimension"]
35+
distance = config["distance"]
36+
client.create_table(dim=dim, distance=distance)
37+
38+
# Generate data
39+
texts = generate_texts(dataset_size)
40+
embeddings = generate_embeddings(dataset_size, dim)
41+
metadata = generate_metadata(dataset_size)
42+
43+
# Benchmark: Add
44+
print(f" Benchmarking add ({dataset_size} records)...")
45+
results.append(benchmark_add(client, texts, embeddings, metadata))
46+
47+
# Get rowids for subsequent operations
48+
rowids = list(range(1, dataset_size + 1))
49+
50+
# Benchmark: Get Many
51+
print(f" Benchmarking get_many ({dataset_size} records)...")
52+
results.append(benchmark_get_many(client, rowids))
53+
54+
# Benchmark: Similarity Search
55+
print(" Benchmarking similarity_search...")
56+
query_emb = [0.5] * dim
57+
iterations = config["similarity_search"]["iterations"]
58+
for top_k in config["similarity_search"]["top_k_values"]:
59+
results.append(
60+
benchmark_similarity_search(client, query_emb, top_k, iterations)
61+
)
62+
63+
# Benchmark: Update Many
64+
print(f" Benchmarking update_many ({dataset_size} records)...")
65+
new_texts = [f"updated_{i}" for i in range(dataset_size)]
66+
results.append(benchmark_update_many(client, rowids, new_texts))
67+
68+
# Benchmark: Get All
69+
print(f" Benchmarking get_all ({dataset_size} records)...")
70+
batch_size = config["batch_size"]
71+
results.append(benchmark_get_all(client, dataset_size, batch_size))
72+
73+
# Benchmark: Delete Many
74+
print(f" Benchmarking delete_many ({dataset_size} records)...")
75+
results.append(benchmark_delete_many(client, rowids))
76+
finally:
77+
client.close()
7778

7879
return results

sqlite_vec_client/base.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -456,15 +456,25 @@ def delete_many(self, rowids: list[int]) -> int:
456456
if not rowids:
457457
return 0
458458
logger.debug(f"Deleting {len(rowids)} records")
459-
placeholders = ",".join(["?"] * len(rowids))
459+
460+
# SQLite has a limit on SQL variables (typically 999 or 32766)
461+
# Split into chunks to avoid "too many SQL variables" error
462+
chunk_size = 500
460463
cur = self.connection.cursor()
461-
cur.execute(
462-
f"DELETE FROM {self.table} WHERE rowid IN ({placeholders})",
463-
rowids,
464-
)
464+
deleted_count = 0
465+
466+
for i in range(0, len(rowids), chunk_size):
467+
chunk = rowids[i : i + chunk_size]
468+
placeholders = ",".join(["?"] * len(chunk))
469+
cur.execute(
470+
f"DELETE FROM {self.table} WHERE rowid IN ({placeholders})",
471+
chunk,
472+
)
473+
deleted_count += cur.rowcount
474+
465475
if not self._in_transaction:
466476
self.connection.commit()
467-
deleted_count = cur.rowcount
477+
468478
logger.info(f"Deleted {deleted_count} records from table '{self.table}'")
469479
return deleted_count
470480

0 commit comments

Comments
 (0)