Skip to content

Commit 0b47386

Browse files
author
Daniele Briggi
committed
feat(context): add context usage
1 parent 7c3eff3 commit 0b47386

File tree

4 files changed

+17
-7
lines changed

4 files changed

+17
-7
lines changed

src/sqlite_rag/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ def spinner() -> None:
461461
stats_line = f"{elapsed_time:.3f} seconds"
462462
if token_count > 0 and elapsed_time > 0:
463463
tokens_per_sec = token_count / elapsed_time
464-
stats_line = f"{stats_line} ({token_count} tokens, {tokens_per_sec:.2f} tok/s)"
464+
stats_line = f"{stats_line} ({token_count} tokens, {tokens_per_sec:.2f} tok/s, usage: {rag.context_used()} tokens)"
465465
typer.echo(stats_line)
466466

467467

src/sqlite_rag/engine.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,13 @@ def quantize_cleanup(self) -> None:
101101

102102
conn.commit()
103103

104+
def context_used(self) -> int:
105+
"""Get the percentage of the current context used."""
106+
cursor = self._text_generation_model.ensure_loaded().cursor()
107+
108+
cursor.execute("SELECT llm_context_used();")
109+
return cursor.fetchone()[0]
110+
104111
def free_context(self) -> None:
105112
"""Release resources associated with the current context."""
106113
cursor = self._embedding_model.ensure_loaded().cursor()
@@ -313,7 +320,7 @@ def create_new_chat(self) -> None:
313320

314321
def ask(self, query: str) -> sqlite3.Cursor:
315322
"""Generate an answer to the query using the LLM."""
316-
results = self.search(query, top_k=3)
323+
results = self.search(query, top_k=10)
317324

318325
context = ""
319326
for result in results:
@@ -323,16 +330,15 @@ def ask(self, query: str) -> sqlite3.Cursor:
323330
if result.combined_rank > self._settings.results_threshold:
324331
self._logger.debug("\r\b - taken")
325332
# TODO: how to improve context limit?
326-
preview = result.document.content[:5000].replace("\n", "\\n")
333+
preview = result.document.content[:5000]
327334
context += f"{preview}\n\n"
328335

329336
prompt = query
330337
if context != "":
331338
# prompt = f"""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say you that don't know. Use three sentences maximum and keep the answer coincise.
332339
# prompt = prompt = f"""Answer the question based only on the following documents. Answer with the summary of the documents provided. Do **NOT** include any introductory phrases, titles, or prefixes such as "Answer:", "The answer is:", "Final Answer:", or "Based on the context,". Start your response with the answer itself:"""
333340
prompt = f"""Answer the question based on the following documents.
334-
Answer with the summary of the documents provided.
335-
Do **NOT** include any introductory phrases, titles, or prefixes such as "Answer:", "The answer is:", "Final Answer:", or "Based on the context,". Start your response with the answer itself:
341+
Do **NOT** include any introductory phrases, titles, or prefixes such as "The documents explain", "Answer:", "The answer is:", "Final Answer:", or "Based on the context,". Start your response with the answer itself:
336342
337343
{context}
338344

src/sqlite_rag/settings.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,11 @@ class Settings:
101101

102102
# Weights for combining FTS and vector search results
103103
weight_fts: float = field(
104-
default=1.5,
104+
default=1.0,
105105
metadata={"help": "Weight applied to full text search scores"},
106106
)
107107
weight_vec: float = field(
108-
default=1.0,
108+
default=1.5,
109109
metadata={"help": "Weight applied to vector similarity scores"},
110110
)
111111

src/sqlite_rag/sqliterag.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,10 @@ def quantize_cleanup(self) -> None:
339339
"""Clean up quantization structures"""
340340
self._engine.quantize_cleanup()
341341

342+
def context_used(self) -> int:
343+
"""Get the percentage of the current context used."""
344+
return self._engine.context_used()
345+
342346
def close(self) -> None:
343347
"""Free up resources"""
344348
self._embedding_model.unload()

0 commit comments

Comments
 (0)