Skip to content

Commit fa06d14

Browse files
author
Daniele Briggi
committed
fix(tests): engine tests
1 parent 20731f3 commit fa06d14

File tree

7 files changed

+328
-191
lines changed

7 files changed

+328
-191
lines changed

src/sqlite_rag/engine.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@ def __init__(
2222
conn: sqlite3.Connection,
2323
settings: Settings,
2424
chunker: Chunker,
25-
sentence_chunker: SentenceSplitter,
25+
sentence_splitter: SentenceSplitter,
2626
):
2727
self._conn = conn
2828
self._settings = settings
2929
self._chunker = chunker
30-
self._sentence_chunker = sentence_chunker
30+
self._sentence_splitter = sentence_splitter
3131
self._logger = Logger()
3232

3333
def load_model(self):
@@ -55,7 +55,7 @@ def process(self, document: Document) -> Document:
5555
chunk.title = document.get_title()
5656
chunk.embedding = self.generate_embedding(chunk.get_embedding_text())
5757

58-
sentences = self._sentence_chunker.split(chunk)
58+
sentences = self._sentence_splitter.split(chunk)
5959
for sentence in sentences:
6060
sentence.embedding = self.generate_embedding(sentence.content)
6161
chunk.sentences = sentences
@@ -225,7 +225,7 @@ def search(
225225
return results
226226

227227
def search_sentences(
228-
self, query: str, chunk_id: int, k: int
228+
self, query: str, chunk_id: int, top_k: int
229229
) -> List[SentenceResult]:
230230
query_embedding = self.generate_embedding(query)
231231

@@ -250,7 +250,8 @@ def search_sentences(
250250
FROM {vector_scan_type}('sentences', 'embedding', :query_embedding) AS v
251251
JOIN sentences ON sentences.rowid = v.rowid
252252
WHERE sentences.chunk_id = :chunk_id
253-
LIMIT :k
253+
ORDER BY rank_number ASC
254+
LIMIT :top_k
254255
)
255256
SELECT
256257
sentence_id,
@@ -260,11 +261,10 @@ def search_sentences(
260261
rank_number,
261262
distance
262263
FROM vec_matches
263-
ORDER BY rank_number ASC
264264
""", # nosec B608
265265
{
266266
"query_embedding": query_embedding,
267-
"k": k,
267+
"top_k": top_k,
268268
"chunk_id": chunk_id,
269269
},
270270
)
@@ -283,7 +283,7 @@ def search_sentences(
283283
)
284284
)
285285

286-
return sentences[:k]
286+
return sentences[:top_k]
287287

288288
def versions(self) -> dict:
289289
"""Get versions of the loaded extensions."""

src/sqlite_rag/sentence_splitter.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,23 +10,23 @@ class SentenceSplitter:
1010

1111
def split(self, chunk: Chunk) -> List[Sentence]:
1212
"""Split chunk into sentences."""
13-
sentence_chunks = []
13+
sentences = []
1414

15-
sentences = self._split_into_sentences(chunk.content)
15+
sentences_text = self._split_into_sentences(chunk.content)
1616
start_offset = 0
1717
end_offset = 0
18-
for sentence in sentences:
19-
start_offset = chunk.content.index(sentence, end_offset)
20-
end_offset = start_offset + len(sentence)
18+
for sentence_text in sentences_text:
19+
start_offset = chunk.content.index(sentence_text, end_offset)
20+
end_offset = start_offset + len(sentence_text)
2121

22-
sentence_chunk = Sentence(
23-
content=sentence,
22+
sentence = Sentence(
23+
content=sentence_text,
2424
start_offset=start_offset,
2525
end_offset=end_offset,
2626
)
27-
sentence_chunks.append(sentence_chunk)
27+
sentences.append(sentence)
2828

29-
return sentence_chunks
29+
return sentences
3030

3131
def _split_into_sentences(self, text: str) -> List[str]:
3232
"""Split into focused segments for semantic matching."""

src/sqlite_rag/sqliterag.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def __init__(self, connection: sqlite3.Connection, settings: Settings):
3131
self._conn,
3232
settings,
3333
chunker=self._chunker,
34-
sentence_chunker=SentenceSplitter(),
34+
sentence_splitter=SentenceSplitter(),
3535
)
3636
self._extractor = Extractor()
3737

@@ -332,7 +332,7 @@ def search(
332332
# Refine chunks with top sentences
333333
for result in results:
334334
result.sentences = self._engine.search_sentences(
335-
semantic_query, result.chunk_id, k=self._settings.top_k_sentences
335+
semantic_query, result.chunk_id, top_k=self._settings.top_k_sentences
336336
)
337337

338338
return results

tests/conftest.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from sqlite_rag.chunker import Chunker
77
from sqlite_rag.database import Database
88
from sqlite_rag.engine import Engine
9+
from sqlite_rag.sentence_splitter import SentenceSplitter
910
from sqlite_rag.settings import Settings
1011

1112

@@ -28,7 +29,12 @@ def db_conn():
2829
def engine(db_conn) -> Engine:
2930
conn, settings = db_conn
3031

31-
engine = Engine(conn, settings, chunker=Chunker(conn, settings))
32+
engine = Engine(
33+
conn,
34+
settings,
35+
chunker=Chunker(conn, settings),
36+
sentence_splitter=SentenceSplitter(),
37+
)
3238
engine.load_model()
3339
engine.quantize()
3440
engine.create_new_context()

0 commit comments

Comments
 (0)