feat(context): add context usage

Daniele Briggi · Daniele Briggi · commit 0b47386dd43d · 2025-11-18T13:52:26.000Z
diff --git a/src/sqlite_rag/cli.py b/src/sqlite_rag/cli.py
@@ -461,7 +461,7 @@ def spinner() -> None:
     stats_line = f"{elapsed_time:.3f} seconds"
     if token_count > 0 and elapsed_time > 0:
         tokens_per_sec = token_count / elapsed_time
-        stats_line = f"{stats_line} ({token_count} tokens, {tokens_per_sec:.2f} tok/s)"
+        stats_line = f"{stats_line} ({token_count} tokens, {tokens_per_sec:.2f} tok/s, usage: {rag.context_used()} tokens)"
     typer.echo(stats_line)
 
 
diff --git a/src/sqlite_rag/engine.py b/src/sqlite_rag/engine.py
@@ -101,6 +101,13 @@ def quantize_cleanup(self) -> None:
 
         conn.commit()
 
+    def context_used(self) -> int:
+        """Get the percentage of the current context used."""
+        cursor = self._text_generation_model.ensure_loaded().cursor()
+
+        cursor.execute("SELECT llm_context_used();")
+        return cursor.fetchone()[0]
+
     def free_context(self) -> None:
         """Release resources associated with the current context."""
         cursor = self._embedding_model.ensure_loaded().cursor()
@@ -313,7 +320,7 @@ def create_new_chat(self) -> None:
 
     def ask(self, query: str) -> sqlite3.Cursor:
         """Generate an answer to the query using the LLM."""
-        results = self.search(query, top_k=3)
+        results = self.search(query, top_k=10)
 
         context = ""
         for result in results:
@@ -323,16 +330,15 @@ def ask(self, query: str) -> sqlite3.Cursor:
             if result.combined_rank > self._settings.results_threshold:
                 self._logger.debug("\r\b - taken")
                 # TODO: how to improve context limit?
-                preview = result.document.content[:5000].replace("\n", "\\n")
+                preview = result.document.content[:5000]
                 context += f"{preview}\n\n"
 
         prompt = query
         if context != "":
             # prompt = f"""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say you that don't know. Use three sentences maximum and keep the answer coincise.
             # prompt = prompt = f"""Answer the question based only on the following documents. Answer with the summary of the documents provided. Do **NOT** include any introductory phrases, titles, or prefixes such as "Answer:", "The answer is:", "Final Answer:", or "Based on the context,". Start your response with the answer itself:"""
             prompt = f"""Answer the question based on the following documents.
-Answer with the summary of the documents provided.
-Do **NOT** include any introductory phrases, titles, or prefixes such as "Answer:", "The answer is:", "Final Answer:", or "Based on the context,". Start your response with the answer itself:
+Do **NOT** include any introductory phrases, titles, or prefixes such as "The documents explain", "Answer:", "The answer is:", "Final Answer:", or "Based on the context,". Start your response with the answer itself:
 
 {context}
 
diff --git a/src/sqlite_rag/settings.py b/src/sqlite_rag/settings.py
@@ -101,11 +101,11 @@ class Settings:
 
     # Weights for combining FTS and vector search results
     weight_fts: float = field(
-        default=1.5,
+        default=1.0,
         metadata={"help": "Weight applied to full text search scores"},
     )
     weight_vec: float = field(
-        default=1.0,
+        default=1.5,
         metadata={"help": "Weight applied to vector similarity scores"},
     )
 
diff --git a/src/sqlite_rag/sqliterag.py b/src/sqlite_rag/sqliterag.py
@@ -339,6 +339,10 @@ def quantize_cleanup(self) -> None:
         """Clean up quantization structures"""
         self._engine.quantize_cleanup()
 
+    def context_used(self) -> int:
+        """Get the percentage of the current context used."""
+        return self._engine.context_used()
+
     def close(self) -> None:
         """Free up resources"""
         self._embedding_model.unload()

Original file line number	Diff line number	Diff line change
`@@ -101,11 +101,11 @@ class Settings:`
`101`	`101`
`102`	`102`	`# Weights for combining FTS and vector search results`
`103`	`103`	`weight_fts: float = field(`
`104`		`- default=1.5,`
	`104`	`+ default=1.0,`
`105`	`105`	`metadata={"help": "Weight applied to full text search scores"},`
`106`	`106`	`)`
`107`	`107`	`weight_vec: float = field(`
`108`		`- default=1.0,`
	`108`	`+ default=1.5,`
`109`	`109`	`metadata={"help": "Weight applied to vector similarity scores"},`
`110`	`110`	`)`
`111`	`111`