sqliteai
diff --git a/‎src/sqlite_rag/database.py‎
Lines changed: 0 additions & 1 deletion b/‎src/sqlite_rag/database.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/sqlite_rag/engine.py‎
Lines changed: 35 additions & 8 deletions b/‎src/sqlite_rag/engine.py‎
Lines changed: 35 additions & 8 deletions
diff --git a/‎src/sqlite_rag/formatters.py‎
Lines changed: 7 additions & 95 deletions b/‎src/sqlite_rag/formatters.py‎
Lines changed: 7 additions & 95 deletions
diff --git a/‎src/sqlite_rag/models/document_result.py‎
Lines changed: 65 additions & 1 deletion b/‎src/sqlite_rag/models/document_result.py‎
Lines changed: 65 additions & 1 deletion
diff --git a/‎src/sqlite_rag/sqliterag.py‎
Lines changed: 1 addition & 20 deletions b/‎src/sqlite_rag/sqliterag.py‎
Lines changed: 1 addition & 20 deletions
@@ -88,7 +88,6 @@ def _create_schema(conn: sqlite3.Connection, settings: Settings):
         """
         )
 
-        # TODO: remove sequence
         cursor.execute(
             """
             CREATE TABLE IF NOT EXISTS sentences (
 
@@ -1,4 +1,5 @@
 import json
+import re
 import sqlite3
 from pathlib import Path
 from typing import List
@@ -123,11 +124,38 @@ def free_context(self) -> None:
 
         cursor.execute("SELECT llm_context_free();")
 
-    def search(
-        self, semantic_query: str, fts_query, top_k: int = 10
+    def search(self, query, top_k: int = 10) -> list[DocumentResult]:
+        """Semantic search and full-text search sorted with Reciprocal Rank Fusion
+        with top matching sentences to highlight."""
+        semantic_query = query
+        if self._settings.use_prompt_templates:
+            semantic_query = self._settings.prompt_template_retrieval_query.format(
+                content=query
+            )
+
+        # Clean up and split into words
+        # '*' is used to match while typing
+        fts_query = " ".join(re.findall(r"\b\w+\b", query.lower())) + "*"
+
+        query_embedding = self.generate_embedding(semantic_query)
+
+        results = self.search_documents(query_embedding, fts_query, top_k=top_k)
+
+        # Refine chunks with top sentences
+        for result in results:
+            result.sentences = self.search_sentences(
+                query_embedding, result.chunk_id, top_k=self._settings.top_k_sentences
+            )
+
+        return results
+
+    def search_documents(
+        self, query_embedding: bytes, fts_query: str, top_k: int
     ) -> list[DocumentResult]:
         """Semantic search and full-text search sorted with Reciprocal Rank Fusion."""
-        query_embedding = self.generate_embedding(semantic_query)
+        # invalid query
+        if query_embedding == b"" or fts_query.strip() == "":
+            return []
 
         vector_scan_type = (
             "vector_quantize_scan"
@@ -180,7 +208,7 @@ def search(
                 documents.content as document_content,
                 documents.metadata,
                 chunks.id AS chunk_id,
-                chunks.content AS snippet,
+                chunks.content AS chunk_content,
                 vec_rank,
                 fts_rank,
                 combined_rank,
@@ -212,7 +240,7 @@ def search(
                     metadata=json.loads(row["metadata"]) if row["metadata"] else {},
                 ),
                 chunk_id=row["chunk_id"],
-                snippet=row["snippet"],
+                chunk_content=row["chunk_content"],
                 vec_rank=row["vec_rank"],
                 fts_rank=row["fts_rank"],
                 combined_rank=row["combined_rank"],
@@ -225,10 +253,9 @@ def search(
         return results
 
     def search_sentences(
-        self, query: str, chunk_id: int, top_k: int
+        self, query_embedding: bytes, chunk_id: int, top_k: int
     ) -> List[SentenceResult]:
-        query_embedding = self.generate_embedding(query)
-
+        """Semantic search for sentences within a chunk."""
         vector_scan_type = (
             "vector_quantize_scan_stream"
             if self._settings.quantize_scan
 
@@ -6,8 +6,6 @@
 
 import typer
 
-from sqlite_rag.models.sentence_result import SentenceResult
-
 from .models.document_result import DocumentResult
 
 
@@ -82,81 +80,6 @@ def _format_uri_display(self, uri: str, icon: str, max_width: int = 75) -> str:
             uri_display = f"{icon} ...{uri[-available_width:]}"
         return uri_display
 
-    def _build_sentence_preview(
-        self,
-        chunk_content: str,
-        sentences: List[SentenceResult],
-        max_chars: int = 400,
-    ) -> str:
-        """Build preview from top 3 ranked sentences with [...] for gaps.
-
-        Args:
-            chunk_content: The full chunk text
-            sentences: List of SentenceResult objects (should already be sorted by rank)
-            max_chars: Maximum total characters for preview
-
-        Returns:
-            Preview string with top sentences and [...] separators.
-            Falls back to truncated chunk_content if sentences have no offsets.
-        """
-
-        # Take top 3 sentences (they should already be sorted by rank/distance)
-        top_sentences = sentences[:3] if sentences else []
-
-        if not top_sentences:
-            # Fallback: no sentences, return truncated chunk content
-            return chunk_content[:max_chars]
-
-        # Filter sentences that have offset information
-        sentences_with_offsets = [
-            s
-            for s in top_sentences
-            if s.start_offset is not None and s.end_offset is not None
-        ]
-
-        if not sentences_with_offsets:
-            # Fallback: sentences exist but no offset information, return truncated chunk content
-            return chunk_content[:max_chars]
-
-        # Sort by start_offset to maintain document order
-        sentences_with_offsets.sort(
-            key=lambda s: s.start_offset if s.start_offset is not None else -1
-        )
-
-        preview_parts = []
-        total_chars = 0
-        prev_end_offset = None
-
-        for sentence in sentences_with_offsets:
-            # Extract sentence text using offsets
-            sentence_text = chunk_content[
-                sentence.start_offset : sentence.end_offset
-            ].strip()
-
-            # Calculate remaining budget including potential separator
-            separator_len = len(" [...] ") if preview_parts else 0
-            remaining = max_chars - total_chars - separator_len
-
-            if remaining <= 0:
-                break
-
-            # Truncate sentence if needed
-            if len(sentence_text) > remaining:
-                sentence_text = sentence_text[: remaining - 3] + "..."
-
-            # Check if there's a gap > 10 chars from previous sentence
-            if prev_end_offset is not None and sentence.start_offset is not None:
-                gap_size = sentence.start_offset - prev_end_offset
-                if gap_size > 10:
-                    preview_parts.append("[...]")
-                    total_chars += len(" [...] ")
-
-            preview_parts.append(sentence_text)
-            total_chars += len(sentence_text)
-            prev_end_offset = sentence.end_offset
-
-        return " ".join(preview_parts)
-
 
 class BoxedFormatter(SearchResultFormatter):
     """Base class for boxed result formatters."""
@@ -176,11 +99,8 @@ def _format_single_result(self, doc: DocumentResult, idx: int) -> None:
         """Format a single result with box layout."""
         icon = self._get_file_icon(doc.document.uri or "")
 
-        # Use sentence-based preview if sentences are available
-        if doc.sentences:
-            snippet_text = self._build_sentence_preview(doc.snippet, doc.sentences)
-        else:
-            snippet_text = doc.snippet
+        # Get snippet from DocumentResult (handles sentence-based preview automatically)
+        snippet_text = doc.get_preview(max_chars=400)
 
         snippet_lines = self._clean_and_wrap_snippet(
             snippet_text, width=75, max_length=400
@@ -250,11 +170,8 @@ def _format_single_result(self, doc: DocumentResult, idx: int) -> None:
         """Format a single result with box layout including sentence summary."""
         icon = self._get_file_icon(doc.document.uri or "")
 
-        # Use sentence-based preview if sentences are available
-        if doc.sentences:
-            snippet_text = self._build_sentence_preview(doc.snippet, doc.sentences)
-        else:
-            snippet_text = doc.snippet
+        # Get snippet from DocumentResult (handles sentence-based preview automatically)
+        snippet_text = doc.get_preview(max_chars=400)
 
         snippet_lines = self._clean_and_wrap_snippet(
             snippet_text, width=75, max_length=400
@@ -305,7 +222,7 @@ def _format_single_result(self, doc: DocumentResult, idx: int) -> None:
                     sentence.start_offset is not None
                     and sentence.end_offset is not None
                 ):
-                    sentence_text = doc.snippet[
+                    sentence_text = doc.chunk_content[
                         sentence.start_offset : sentence.end_offset
                     ].strip()
                     # Truncate and clean for display
@@ -364,13 +281,8 @@ def _print_table_header(self) -> None:
 
     def _print_table_row(self, idx: int, doc: DocumentResult) -> None:
         """Print a single table row."""
-        # Use sentence-based preview if sentences are available
-        if doc.sentences:
-            snippet = self._build_sentence_preview(
-                doc.snippet, doc.sentences, max_chars=52
-            )
-        else:
-            snippet = doc.snippet
+        # Get snippet from DocumentResult (handles sentence-based preview automatically)
+        snippet = doc.get_preview(max_chars=52)
 
         # Clean snippet display
         snippet = snippet.replace("\n", " ").replace("\r", "")
 
@@ -9,7 +9,7 @@ class DocumentResult:
     document: Document
 
     chunk_id: int
-    snippet: str
+    chunk_content: str
 
     combined_rank: float
     vec_rank: float | None = None
@@ -20,3 +20,67 @@ class DocumentResult:
 
     # highlight sentences
     sentences: list[SentenceResult] = field(default_factory=list)
+
+    def get_preview(
+        self, top_k_sentences: int = 3, max_chars: int = 400, gap: str = "[...]"
+    ) -> str:
+        """Build preview from top ranked sentences with [...] for gaps.
+
+        Args:
+            top_k_sentences: Number of top sentences to include in preview
+            max_chars: Maximum total characters for preview
+
+        Returns:
+            Preview string with top sentences and [...] separators.
+            Falls back to truncated chunk_content if sentences have no offsets.
+        """
+        top_sentences = self.sentences[:top_k_sentences] if self.sentences else []
+
+        if not top_sentences:
+            # Fallback: no sentences, return truncated chunk content
+            return self.chunk_content[:max_chars]
+
+        # Filter sentences that have offset information
+        sentences_with_offsets = [
+            s
+            for s in top_sentences
+            if s.start_offset is not None and s.end_offset is not None
+        ]
+
+        if not sentences_with_offsets:
+            return self.chunk_content[:max_chars]
+
+        # Sort by start_offset to maintain document order
+        sentences_with_offsets.sort(
+            key=lambda s: s.start_offset if s.start_offset is not None else -1
+        )
+
+        preview_parts = []
+        total_chars = 0
+        prev_end_offset = None
+
+        for sentence in sentences_with_offsets:
+            sentence_text = self.chunk_content[
+                sentence.start_offset : sentence.end_offset
+            ].strip()
+
+            # Calculate remaining budget including potential separator
+            separator_len = len("[...] ") if preview_parts else 0
+            remaining = max_chars - total_chars - separator_len
+
+            if remaining <= 0:
+                break
+
+            if prev_end_offset is not None and sentence.start_offset is not None:
+                gap_size = sentence.start_offset - prev_end_offset
+                if gap_size > 10:
+                    preview_parts.append(gap)
+                    total_chars += len(gap)
+
+            preview_parts.append(sentence_text)
+            total_chars += len(sentence_text)
+            prev_end_offset = sentence.end_offset
+
+        preview = " ".join(preview_parts)
+
+        return preview[: max_chars - 3] + "..." if len(preview) > max_chars else preview
@@ -1,4 +1,3 @@
-import re
 import sqlite3
 from dataclasses import asdict
 from pathlib import Path
@@ -317,25 +316,7 @@ def search(
         if new_context:
             self._engine.create_new_context()
 
-        semantic_query = query
-        if self._settings.use_prompt_templates:
-            semantic_query = self._settings.prompt_template_retrieval_query.format(
-                content=query
-            )
-
-        # Clean up and split into words
-        # '*' is used to match while typing
-        fts_query = " ".join(re.findall(r"\b\w+\b", query.lower())) + "*"
-
-        results = self._engine.search(semantic_query, fts_query, top_k=top_k)
-
-        # Refine chunks with top sentences
-        for result in results:
-            result.sentences = self._engine.search_sentences(
-                semantic_query, result.chunk_id, top_k=self._settings.top_k_sentences
-            )
-
-        return results
+        return self._engine.search(query, top_k=top_k)
 
     def get_settings(self) -> dict:
         """Get settings and more useful information"""
Original file line number	Diff line number	Diff line change
`@@ -88,7 +88,6 @@ def _create_schema(conn: sqlite3.Connection, settings: Settings):`
`88`	`88`	`"""`
`89`	`89`	`)`
`90`	`90`
`91`		`- # TODO: remove sequence`
`92`	`91`	`cursor.execute(`
`93`	`92`	`"""`
`94`	`93`	`CREATE TABLE IF NOT EXISTS sentences (`