Skip to content

Commit 042d093

Browse files
committed
Add optimize/VACUUM integration test and docs
1 parent 7b72554 commit 042d093

File tree

2 files changed

+148
-7
lines changed

2 files changed

+148
-7
lines changed

README.md

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -267,19 +267,37 @@ order by distance;
267267

268268
### Space Reclamation with Optimize
269269

270-
Reclaim disk space after deleting vectors:
270+
`optimize` compacts vec shadow tables. To shrink the database file:
271271

272272
```sql
273-
-- Delete vectors
274-
delete from vec_examples where rowid in (2, 4, 6, 8, 10);
273+
-- Before creating vec tables: enable autovacuum and apply it (recommended)
274+
PRAGMA auto_vacuum = FULL; -- or INCREMENTAL
275+
VACUUM; -- activates the setting
275276

276-
-- Reclaim space by compacting shadow tables
277-
insert into vec_examples(vec_examples) values('optimize');
277+
-- Use WAL for better concurrency
278+
PRAGMA journal_mode = WAL;
279+
```
280+
281+
After deletes, reclaim space:
282+
283+
```sql
284+
-- Compact shadow tables
285+
INSERT INTO vec_examples(vec_examples) VALUES('optimize');
278286

279-
-- Verify deletion
280-
select count(*) from vec_examples; -- Returns 15
287+
- Flush WAL
288+
PRAGMA wal_checkpoint(TRUNCATE);
289+
290+
-- Reclaim freed pages (if using auto_vacuum=INCREMENTAL)
291+
PRAGMA incremental_vacuum;
292+
293+
-- If you did NOT enable autovacuum, run VACUUM (after checkpoint) to shrink the file.
294+
-- With autovacuum on, VACUUM is optional.
295+
VACUUM;
281296
```
282297

298+
`VACUUM` should not corrupt vec tables; a checkpoint first is recommended when
299+
using WAL so the rewrite starts from a clean state.
300+
283301
## Sponsors
284302

285303
> [!NOTE]
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
import os
2+
import pytest
3+
4+
5+
def load_vec_extension(db):
6+
if not hasattr(db, "load_extension"):
7+
pytest.skip("SQLite build does not support loading extensions")
8+
if hasattr(db, "enable_load_extension"):
9+
db.enable_load_extension(True)
10+
ext = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "dist", "vec0"))
11+
try:
12+
# Explicit entrypoint to avoid relying on default name
13+
db.load_extension(ext, "sqlite3_vec_init")
14+
except Exception:
15+
# Some loaders accept missing suffix path without explicit entrypoint
16+
db.load_extension(ext)
17+
18+
19+
def pragma_int(db, sql):
20+
return db.execute(sql).fetchone()[0]
21+
22+
23+
def test_optimize_reclaims_pages_with_autovacuum_incremental(tmp_path):
24+
try:
25+
import pysqlite3 as sqlite3 # uses bundled modern SQLite with extension loading
26+
except ImportError: # fallback if not available
27+
import sqlite3
28+
29+
db_path = tmp_path / "optimize_reclaim.db"
30+
31+
db = sqlite3.connect(str(db_path))
32+
db.row_factory = sqlite3.Row
33+
34+
# Enable autovacuum before creating vec tables; VACUUM is safe here because
35+
# the database only has SQLite system tables at this point.
36+
db.execute("PRAGMA auto_vacuum = INCREMENTAL")
37+
db.execute("VACUUM")
38+
db.execute("PRAGMA journal_mode = WAL")
39+
40+
load_vec_extension(db)
41+
42+
# Use a modest chunk_size so we create several chunks and can reclaim them
43+
db.execute("create virtual table v using vec0(vector float[1], chunk_size=64)")
44+
45+
# Insert 256 rows (four chunks at chunk_size=64)
46+
db.executemany(
47+
"insert into v(rowid, vector) values(?, ?)",
48+
((i, b"\x11\x11\x11\x11") for i in range(1, 257)),
49+
)
50+
db.commit()
51+
chunk_rows_after_insert = pragma_int(db, "select count(*) from v_chunks")
52+
53+
# Delete half the rows to create free space inside vec shadow tables
54+
db.execute("delete from v where rowid > 128")
55+
db.commit()
56+
chunk_rows_after_delete = pragma_int(db, "select count(*) from v_chunks")
57+
58+
# Compact vec shadow tables and reclaim file pages with autovacuum
59+
db.execute("insert into v(v) values('optimize')")
60+
db.commit()
61+
db.execute("PRAGMA wal_checkpoint(TRUNCATE)")
62+
db.execute("PRAGMA incremental_vacuum")
63+
chunk_rows_after_optimize = pragma_int(db, "select count(*) from v_chunks")
64+
65+
# Initially 256 rows at chunk_size 64 -> 4 chunk rows. After deleting half,
66+
# optimize should compact to 2 chunk rows.
67+
assert chunk_rows_after_insert == 4
68+
assert chunk_rows_after_delete == 4
69+
assert chunk_rows_after_optimize == 2
70+
71+
72+
def test_optimize_then_vacuum_allows_future_writes(tmp_path):
73+
try:
74+
import pysqlite3 as sqlite3 # uses bundled modern SQLite with extension loading
75+
except ImportError:
76+
import sqlite3
77+
78+
db_path = tmp_path / "vacuum_safe.db"
79+
80+
db = sqlite3.connect(str(db_path))
81+
db.row_factory = sqlite3.Row
82+
load_vec_extension(db)
83+
84+
db.execute("PRAGMA journal_mode = WAL")
85+
db.execute("create virtual table v using vec0(vector float[1], chunk_size=8)")
86+
87+
# 32 rows -> 4 chunks at chunk_size=8
88+
db.executemany(
89+
"insert into v(rowid, vector) values(?, ?)",
90+
((i, b"\x11\x11\x11\x11") for i in range(1, 33)),
91+
)
92+
db.commit()
93+
94+
# Delete half, then compact
95+
db.execute("delete from v where rowid > 16")
96+
db.commit()
97+
db.execute("insert into v(v) values('optimize')")
98+
db.commit()
99+
100+
# Checkpoint before VACUUM; capture size/page count
101+
db.execute("PRAGMA wal_checkpoint(TRUNCATE)")
102+
size_before_vacuum = db.execute("PRAGMA page_count").fetchone()[0]
103+
disk_bytes_before = os.stat(db_path).st_size
104+
105+
# VACUUM should preserve shadow table consistency
106+
db.execute("VACUUM")
107+
db.execute("PRAGMA journal_mode = WAL")
108+
size_after_vacuum = db.execute("PRAGMA page_count").fetchone()[0]
109+
disk_bytes_after = os.stat(db_path).st_size
110+
111+
# Insert more rows after VACUUM; expect no blob-open failures
112+
db.executemany(
113+
"insert into v(rowid, vector) values(?, ?)",
114+
((i, b"\x22\x22\x22\x22") for i in range(17, 25)),
115+
)
116+
db.commit()
117+
118+
# Row count and chunk rows should be consistent (3 chunks cover 24 rows)
119+
assert db.execute("select count(*) from v").fetchone()[0] == 24
120+
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 3
121+
# File/page count should not grow; should shrink when pages are freed
122+
assert size_after_vacuum <= size_before_vacuum
123+
assert disk_bytes_after <= disk_bytes_before

0 commit comments

Comments
 (0)