Skip to content

Commit 8bd55db

Browse files
Merge pull request #11 from sqliteai/fix-rebuild
fix(sentences): remove sentences together with the document
2 parents 2cd0927 + adda288 commit 8bd55db

File tree

2 files changed

+65
-9
lines changed

2 files changed

+65
-9
lines changed

src/sqlite_rag/repository.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def document_exists_by_hash(self, hash: str) -> bool:
104104
return cursor.fetchone() is not None
105105

106106
def remove_document(self, document_id: str) -> bool:
107-
"""Remove document and its chunks by document ID"""
107+
"""Remove document and its related resources by document ID"""
108108
cursor = self._conn.cursor()
109109

110110
# Check if document exists
@@ -114,11 +114,29 @@ def remove_document(self, document_id: str) -> bool:
114114
if cursor.fetchone()["total"] == 0:
115115
return False
116116

117-
# Remove chunks first
117+
# Delete sentences
118118
cursor.execute(
119-
"DELETE FROM chunks_fts WHERE rowid IN (SELECT rowid FROM chunks WHERE document_id = ?)",
119+
"""
120+
DELETE FROM sentences
121+
WHERE chunk_id IN (
122+
SELECT id FROM chunks WHERE document_id = ?
123+
)
124+
""",
120125
(document_id,),
121126
)
127+
128+
# Delete chunks FTS
129+
cursor.execute(
130+
"""
131+
DELETE FROM chunks_fts
132+
WHERE rowid IN (
133+
SELECT rowid FROM chunks WHERE document_id = ?
134+
)
135+
""",
136+
(document_id,),
137+
)
138+
139+
# Delete chunks
122140
cursor.execute("DELETE FROM chunks WHERE document_id = ?", (document_id,))
123141

124142
# Remove document

tests/test_repository.py

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from sqlite_rag.models.chunk import Chunk
22
from sqlite_rag.models.document import Document
3+
from sqlite_rag.models.sentence import Sentence
34
from sqlite_rag.repository import Repository
45

56

@@ -153,35 +154,72 @@ def test_remove_document_success(self, db_conn):
153154
conn, settings = db_conn
154155
repo = Repository(conn, settings)
155156

156-
# Add a document with chunks
157+
# Add a document with chunks and sentences
157158
doc = Document(
158159
content="Test document content.",
159160
uri="test.txt",
160161
metadata={"author": "test"},
161162
)
162-
doc.chunks = [
163-
Chunk(content="Chunk 1", embedding=b"\x00" * 384),
164-
Chunk(content="Chunk 2", embedding=b"\x00" * 384),
163+
chunk1 = Chunk(content="Chunk 1", embedding=b"\x00" * 384)
164+
chunk1.sentences = [
165+
Sentence(
166+
content="Sentence 1",
167+
embedding=b"\x00" * 384,
168+
start_offset=0,
169+
end_offset=10,
170+
),
171+
Sentence(
172+
content="Sentence 2",
173+
embedding=b"\x00" * 384,
174+
start_offset=11,
175+
end_offset=20,
176+
),
177+
]
178+
chunk2 = Chunk(content="Chunk 2", embedding=b"\x00" * 384)
179+
chunk2.sentences = [
180+
Sentence(
181+
content="Sentence 3",
182+
embedding=b"\x00" * 384,
183+
start_offset=0,
184+
end_offset=10,
185+
),
165186
]
187+
doc.chunks = [chunk1, chunk2]
166188
doc_id = repo.add_document(doc)
167189

168-
# Verify document and chunks exist
190+
# Verify document, chunks, and sentences exist
169191
cursor = conn.cursor()
170192
cursor.execute("SELECT COUNT(*) FROM documents WHERE id = ?", (doc_id,))
171193
assert cursor.fetchone()[0] == 1
172194
cursor.execute("SELECT COUNT(*) FROM chunks WHERE document_id = ?", (doc_id,))
173195
assert cursor.fetchone()[0] == 2
196+
cursor.execute(
197+
"""
198+
SELECT COUNT(*) FROM sentences
199+
WHERE chunk_id IN (SELECT id FROM chunks WHERE document_id = ?)
200+
""",
201+
(doc_id,),
202+
)
203+
assert cursor.fetchone()[0] == 3
174204

175205
# Remove document
176206
success = repo.remove_document(doc_id)
177207

178208
assert success is True
179209

180-
# Verify document and chunks are removed
210+
# Verify document, chunks, and sentences are removed
181211
cursor.execute("SELECT COUNT(*) FROM documents WHERE id = ?", (doc_id,))
182212
assert cursor.fetchone()[0] == 0
183213
cursor.execute("SELECT COUNT(*) FROM chunks WHERE document_id = ?", (doc_id,))
184214
assert cursor.fetchone()[0] == 0
215+
cursor.execute(
216+
"""
217+
SELECT COUNT(*) FROM sentences
218+
WHERE chunk_id IN (SELECT id FROM chunks WHERE document_id = ?)
219+
""",
220+
(doc_id,),
221+
)
222+
assert cursor.fetchone()[0] == 0
185223

186224
def test_remove_document_not_found(self, db_conn):
187225
conn, settings = db_conn

0 commit comments

Comments
 (0)