fix(tests): consider prompts

Daniele Briggi · Daniele Briggi · commit 2d22ad5e56e4 · 2025-09-23T13:35:24.000Z
diff --git a/pyproject.toml b/pyproject.toml
@@ -35,7 +35,7 @@ dependencies = [
 dev = [
     "pytest",
     "pytest-mock",
-    "pytest-cov",
+    "pytest-cov==6.3.0",
     "black",
     "flake8",
     "bandit",
diff --git a/src/sqlite_rag/chunker.py b/src/sqlite_rag/chunker.py
@@ -11,12 +11,15 @@ def __init__(self, conn: sqlite3.Connection, settings: Settings):
         self._conn = conn
         self._settings = settings
 
-    def chunk(self, text: str) -> list[Chunk]:
+    def chunk(self, text: str, metadata: dict = {}) -> list[Chunk]:
         """Chunk text using Recursive Character Text Splitter."""
+        chunks = []
         if self._get_token_count(text) <= self._settings.chunk_size:
-            return [Chunk(content=text)]
+            chunks = [Chunk(content=text)]
+        else:
+            chunks = self._recursive_split(text)
 
-        return self._recursive_split(text)
+        return self._enrich_chunk(chunks, metadata)
 
     def _get_token_count(self, text: str) -> int:
         """Get token count using SQLite AI extension."""
@@ -190,3 +193,13 @@ def _get_overlap_text(self, text: str, max_overlap_tokens: int) -> str:
 
         # If even single word is too large, return empty
         return ""
+
+    def _enrich_chunk(self, chunks: List[Chunk], metadata: dict) -> List[Chunk]:
+        """Add extra information to chunk which may improve the model embeddings."""
+        for chunk in chunks:
+            if "title" in metadata:
+                chunk.title = metadata["title"]
+            elif "title" in metadata.get("generated", {}):
+                chunk.title = metadata["generated"]["title"]
+
+        return chunks
diff --git a/src/sqlite_rag/cli.py b/src/sqlite_rag/cli.py
@@ -149,6 +149,15 @@ def configure_settings(
     use_gpu: Optional[bool] = typer.Option(
         None, help="Whether to allow sqlite-ai extension to use the GPU"
     ),
+    no_prompt_templates: bool = typer.Option(
+        False,
+        "--no-prompt-templates",
+        help="Disable prompt templates for embedding generation",
+    ),
+    prompt_template_retrieval_document: Optional[str] = typer.Option(
+        None,
+        help="Template for retrieval document prompts. Supported placeholders are `{title}` and `{content}`",
+    ),
     prompt_template_retrieval_query: Optional[str] = typer.Option(
         None,
         help="Template for retrieval query prompts, use `{content}` as placeholder",
@@ -176,9 +185,13 @@ def configure_settings(
         "weight_fts": weight_fts,
         "weight_vec": weight_vec,
         "use_gpu": use_gpu,
+        "use_prompt_templates": (
+            False if no_prompt_templates else None
+        ),  # Set only if True
+        "prompt_template_retrieval_document": prompt_template_retrieval_document,
         "prompt_template_retrieval_query": prompt_template_retrieval_query,
     }
-
+    print(updates)
     # Filter out None values (unset options)
     updates = {k: v for k, v in updates.items() if v is not None}
 
diff --git a/src/sqlite_rag/engine.py b/src/sqlite_rag/engine.py
@@ -35,7 +35,7 @@ def load_model(self):
         )
 
     def process(self, document: Document) -> Document:
-        chunks = self._chunker.chunk(document.content)
+        chunks = self._chunker.chunk(document.content, document.metadata)
         chunks = self.generate_embedding(chunks)
         document.chunks = chunks
         return document
@@ -46,12 +46,18 @@ def generate_embedding(self, chunks: list[Chunk]) -> list[Chunk]:
         for chunk in chunks:
             cursor = self._conn.cursor()
 
-            try:
-                cursor.execute(
-                    "SELECT llm_embed_generate(?) AS embedding", (chunk.content,)
+            # Format using the prompt template if available
+            content = chunk.content
+            if self._settings.use_prompt_templates:
+                title = chunk.title if chunk.title else "none"
+                content = self._settings.prompt_template_retrieval_document.format(
+                    title=title, content=chunk.content
                 )
+
+            try:
+                cursor.execute("SELECT llm_embed_generate(?) AS embedding", (content,))
             except sqlite3.Error as e:
-                print(f"Error generating embedding for chunk\n: ```{chunk.content}```")
+                print(f"Error generating embedding for chunk\n: ```{content}```")
                 raise e
 
             result = cursor.fetchone()
diff --git a/src/sqlite_rag/models/chunk.py b/src/sqlite_rag/models/chunk.py
@@ -8,3 +8,5 @@ class Chunk:
     content: str = ""
     embedding: str | bytes = b""
     core_start_pos: int = 0
+
+    title: str | None = None
diff --git a/src/sqlite_rag/settings.py b/src/sqlite_rag/settings.py
@@ -31,6 +31,7 @@ class Settings:
         "distance=cosine"  # e.g. distance=metric,other=value,...
     )
 
+    # It includes the overlap size but not the prompt template length
     chunk_size: int = 384
     # Tokens overlap between chunks
     chunk_overlap: int = 48
@@ -53,8 +54,13 @@ class Settings:
     # Some models are trained to work better with specific prompts
     # depending on the task. For example, Gemma models work better
     # when the prompt includes a task description.
+    # More: https://huggingface.co/unsloth/embeddinggemma-300m-GGUF#prompt-instructions
     #
 
+    use_prompt_templates: bool = True
+
+    # Template to index documents for retrieval, use `{title}` with the title or the string `"none"`
+    prompt_template_retrieval_document: str = "title: {title} | text: {content}"
     prompt_template_retrieval_query: str = "task: search result | query: {content}"
 
 
diff --git a/src/sqlite_rag/sqliterag.py b/src/sqlite_rag/sqliterag.py
@@ -272,7 +272,7 @@ def search(
         if new_context:
             self._engine.create_new_context()
 
-        if self._settings.prompt_template_retrieval_query:
+        if self._settings.use_prompt_templates:
             query = self._settings.prompt_template_retrieval_query.format(content=query)
 
         return self._engine.search(query, top_k=top_k)
diff --git a/tests/integration/test_cli.py b/tests/integration/test_cli.py
@@ -29,8 +29,7 @@ def test_search_exact_match(self):
                     "configure",
                     "--model-path",
                     str(model_path),
-                    "--prompt-template-retrieval-query",
-                    "",
+                    "--no-prompt-templates",
                     "--other-vector-options",
                     "distance=cosine",
                 ],
diff --git a/tests/test_chunker.py b/tests/test_chunker.py
@@ -42,7 +42,7 @@ def mock_conn():
 @pytest.fixture
 def chunker_large(mock_conn):
     """Fixture providing a chunker with large chunk size."""
-    settings = Settings("test-model")
+    settings = Settings("test-model", use_prompt_templates=False)
     settings.chunk_size = 100
     settings.chunk_overlap = 20
     return Chunker(mock_conn, settings)
@@ -51,7 +51,7 @@ def chunker_large(mock_conn):
 @pytest.fixture
 def chunker_small(mock_conn):
     """Fixture providing a chunker with small chunk size."""
-    settings = Settings("test-model")
+    settings = Settings("test-model", use_prompt_templates=False)
     settings.chunk_size = 25
     settings.chunk_overlap = 5
     return Chunker(mock_conn, settings)
@@ -60,7 +60,7 @@ def chunker_small(mock_conn):
 @pytest.fixture
 def chunker_tiny(mock_conn):
     """Fixture providing a chunker with tiny chunk size."""
-    settings = Settings("test-model")
+    settings = Settings("test-model", use_prompt_templates=False)
     settings.chunk_size = 8
     settings.chunk_overlap = 2
     return Chunker(mock_conn, settings)
@@ -85,6 +85,27 @@ def test_empty_text(self, chunker_large):
         assert len(chunks) == 1
         assert chunks[0].content == ""
 
+    def test_chunk_enrichness_with_input_title(self, chunker_large):
+        """Test that chunk enrichment adds metadata correctly."""
+        text = "This is a test chunk."
+        metadata = {"title": "Test Title"}
+
+        chunks = chunker_large.chunk(text, metadata)
+
+        assert len(chunks) == 1
+        assert chunks[0].content == text
+        assert chunks[0].title == "Test Title"
+
+    def test_chunk_enrichness_with_generated_title(self, chunker_large):
+        text = "# My title\n\nThis is a paragraph to test chunk."
+        metadata = {"generated": {"title": "My title"}}
+
+        chunks = chunker_large.chunk(text, metadata)
+
+        assert len(chunks) == 1
+        assert chunks[0].content == text
+        assert chunks[0].title == "My title"
+
 
 class TestParagraphSplitting:
     """Test cases for paragraph-level splitting."""
diff --git a/tests/test_engine.py b/tests/test_engine.py
@@ -18,6 +18,41 @@ def test_generate_embedding(self, engine):
         assert result_chunks[0].embedding is not None
         assert isinstance(result_chunks[0].embedding, bytes)
 
+    @pytest.mark.parametrize("use_prompt_templates", [True, False])
+    def test_generate_embedding_with_prompt_template(
+        self, mocker, use_prompt_templates
+    ):
+        # Arrange
+        mock_conn = mocker.Mock()
+        mock_cursor = mocker.Mock()
+        mock_cursor.fetchone.return_value = {"embedding": b"fake_embedding"}
+        mock_conn.cursor.return_value = mock_cursor
+
+        settings = Settings(
+            use_prompt_templates=use_prompt_templates,
+            prompt_template_retrieval_document="Title: {title}\nContent: {content}",
+        )
+
+        engine = Engine(mock_conn, settings, mocker.Mock())
+
+        chunk = Chunk(
+            content="Test content",
+            title="Test Title",
+        )
+
+        # Act
+        engine.generate_embedding([chunk])
+
+        # Assert - verify cursor.execute was called with formatted template
+        expected_content = (
+            "Title: Test Title\nContent: Test content"
+            if use_prompt_templates
+            else "Test content"
+        )
+        mock_cursor.execute.assert_called_with(
+            "SELECT llm_embed_generate(?) AS embedding", (expected_content,)
+        )
+
     def test_search_with_empty_database(self, engine):
         results = engine.search("nonexistent query", top_k=5)
 
diff --git a/tests/test_sqlite_rag.py b/tests/test_sqlite_rag.py
@@ -6,6 +6,8 @@
 import pytest
 
 from sqlite_rag import SQLiteRag
+from sqlite_rag.engine import Engine
+from sqlite_rag.settings import Settings
 
 
 class TestSQLiteRag:
@@ -512,10 +514,12 @@ def test_reset_database(self):
 
         assert not Path(temp_file_path).exists()
 
+
+class TestSQLiteRagSearch:
     def test_search_exact_match(self):
         # cosin distance for searching embedding is exact 0.0 when strings match
         settings = {
-            "prompt_template_retrieval_query": None,
+            "use_prompt_templates": False,
             "other_vector_options": "distance=cosine",
         }
 
@@ -553,7 +557,7 @@ def test_search_samples_exact_match_by_scan_type(self, quantize_scan: bool):
         # Test that searching for exact content from sample files returns distance 0
         # FTS not included in the combined score
         settings = {
-            "prompt_template_retrieval_query": None,
+            "use_prompt_templates": False,
             "other_vector_options": "distance=cosine",
             "weight_fts": 0.0,
             "quantize_scan": quantize_scan,
@@ -605,3 +609,33 @@ def test_search_uses_retrieval_query_template(self, mocker):
             content=query
         )
         mock_engine.search.assert_called_once_with(expected_query, top_k=10)
+
+    @pytest.mark.parametrize("use_prompt_templates", [True, False])
+    def test_search_with_prompt_template(self, mocker, use_prompt_templates):
+        # Arrange
+        mock_conn = mocker.Mock()
+        mock_cursor = mocker.Mock()
+        mock_cursor.fetchone.return_value = {"embedding": b"fake_embedding"}
+        mock_cursor.fetchall.return_value = []
+        mock_conn.cursor.return_value = mock_cursor
+
+        settings = Settings(
+            use_prompt_templates=use_prompt_templates,
+            prompt_template_retrieval_document="Title: {title}\nContent: {content}",
+        )
+
+        engine = Engine(mock_conn, settings, mocker.Mock())
+
+        # Act
+        engine.search("test query")
+
+        # Assert - verify the query embedding generation used correct content
+        expected_content = (
+            "Title: none\nContent: test query" if use_prompt_templates else "test query"
+        )
+
+        # Check that cursor.execute was called with the formatted query content for embedding generation
+        calls = mock_cursor.execute.call_args_list
+        embedding_call = calls[0]  # First call should be for generating query embedding
+        assert embedding_call[0][0] == "SELECT llm_embed_generate(?) AS embedding"
+        assert embedding_call[0][1] == (expected_content,)