Skip to content

Commit b583d09

Browse files
author
Daniele Briggi
committed
refact(settings): store on database and manage changes
1 parent a4be6d5 commit b583d09

File tree

17 files changed

+463
-178
lines changed

17 files changed

+463
-178
lines changed

.github/workflows/test.yaml

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ jobs:
88
test:
99
runs-on: ubuntu-latest
1010
strategy:
11+
max-parallel: 1
1112
matrix:
1213
python-version: ["3.10", "3.11", "3.12"]
1314

@@ -19,15 +20,33 @@ jobs:
1920
uses: actions/setup-python@v5
2021
with:
2122
python-version: ${{ matrix.python-version }}
22-
cache: 'pip'
2323

2424
- name: Install dependencies
2525
run: |
2626
pip install .[dev]
2727
28+
# Cache the downloaded model between workflows
29+
- name: Restore GGUF model cache
30+
uses: actions/cache@v4
31+
id: cache-model
32+
with:
33+
path: ${{ vars.HF_MODEL_LOCAL_PATH }}
34+
# Change the HF_GGUF_UPDATE_DATE variable to force update the cache
35+
key: gguf-${{ vars.HF_MODEL_ID }}-${{ vars.HF_GGUF_FILE }}-${{ vars.HF_GGUF_UPDATE_DATE }}
36+
restore-keys: |
37+
gguf-${{ vars.HF_MODEL_ID }}-${{ vars.HF_GGUF_FILE }}-${{ vars.HF_GGUF_UPDATE_DATE }}-
38+
39+
- name: Download the model
40+
if: ${{ steps.cache-npm.outputs.cache-hit != 'true' }}
41+
run: |
42+
pip install huggingface_hub
43+
chmod +x ./scripts/get_gguf_model.sh
44+
./scripts/get_gguf_model.sh "${{ vars.HF_MODEL_ID }}" "${{ vars.HF_GGUF_FILE }}" "${{ vars.HF_MODEL_LOCAL_PATH }}"
45+
2846
- name: Test
47+
# Using default directory for models
2948
run: |
30-
pytest ./tests
49+
pytest -v -m "not slow" ./tests
3150
3251
code-style:
3352
runs-on: ubuntu-latest

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
semsearch/
22
docs/
3+
samples/headlines
34

45
# LLM models
56
*.gguf
@@ -63,7 +64,7 @@ dmypy.json
6364
# SQLite database files
6465
*.db
6566
*.sqlite3
66-
*.so
67+
#*.so
6768

6869
# Environment variables
6970
.env

extensions/ai.so

4.06 MB
Binary file not shown.

extensions/vector.so

84.9 KB
Binary file not shown.

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,6 @@ testpaths = [
4646
pythonpath = [
4747
"src"
4848
]
49+
markers = [
50+
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
51+
]

scripts/get_gguf_model.sh

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/bin/bash
2+
3+
# Usage: ./get_gguf_model.sh <HF_MODEL_ID> <HF_GGUF_FILEname>
4+
# or via Environment variables: HF_MODEL_ID, HF_HF_GGUF_FILE
5+
6+
# Example: ./get_gguf_model.sh Qwen/Qwen3-Embedding-0.6B-GGUF Qwen3-Embedding-0.6B-Q8_0.gguf
7+
8+
set -e
9+
10+
LOCAL_DIR="./models"
11+
12+
# Priority: CLI arguments > environment variables
13+
if [ $# -ge 2 ]; then
14+
HF_MODEL_ID="$1"
15+
HF_GGUF_FILE="$2"
16+
[ $# -ge 3 ] && LOCAL_DIR="$3"
17+
elif [ -z "$HF_MODEL_ID" ] || [ -z "$HF_GGUF_FILE" ]; then
18+
echo "Error: Missing required parameters"
19+
echo "Provide either:"
20+
echo " CLI: $0 <HF_MODEL_ID> <HF_GGUF_FILE>"
21+
echo " ENV: HF_MODEL_ID and HF_GGUF_FILE environment variables"
22+
exit 1
23+
fi
24+
25+
# Check if huggingface-hub is installed
26+
if ! command -v huggingface-cli &> /dev/null; then
27+
echo "Error: huggingface-cli not found. Install with: pip install huggingface_hub"
28+
exit 1
29+
fi
30+
31+
mkdir -p $LOCAL_DIR
32+
33+
# Download specific GGUF file
34+
echo "Downloading $HF_GGUF_FILE from $HF_MODEL_ID..."
35+
hf download "$HF_MODEL_ID" "$HF_GGUF_FILE" --local-dir "$LOCAL_DIR/$HF_MODEL_ID"
36+
37+
echo "Downloaded to: $LOCAL_DIR/$HF_MODEL_ID/$HF_GGUF_FILE"

src/sqlite_rag/cli.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ def __call__(self, *args, **kwds):
2626
cli = CLI(app)
2727

2828

29+
@app.command()
30+
def set(settings: Optional[str] = typer.Argument(None)):
31+
"""Set the model and database path"""
32+
pass
33+
34+
2935
@app.command()
3036
def add(
3137
path: str = typer.Argument(..., help="File or directory path to add"),
@@ -44,11 +50,10 @@ def add(
4450
help="Optional metadata in JSON format to associate with the document",
4551
metavar="JSON",
4652
show_default=False,
47-
prompt="Metadata (JSON format, e.g. {'author': 'John Doe', 'date': '2023-10-01'}'",
4853
),
4954
):
5055
"""Add a file path to the database"""
51-
rag = SQLiteRag()
56+
rag = SQLiteRag.create()
5257
rag.add(
5358
path,
5459
recursive=recursive,
@@ -71,14 +76,14 @@ def add_text(
7176
),
7277
):
7378
"""Add a text to the database"""
74-
rag = SQLiteRag()
79+
rag = SQLiteRag.create()
7580
rag.add_text(text, uri=uri, metadata=json.loads(metadata or "{}"))
7681

7782

7883
@app.command("list")
7984
def list_documents():
8085
"""List all documents in the database"""
81-
rag = SQLiteRag()
86+
rag = SQLiteRag.create()
8287
documents = rag.list_documents()
8388

8489
if not documents:
@@ -108,7 +113,7 @@ def remove(
108113
yes: bool = typer.Option(False, "-y", "--yes", help="Skip confirmation prompt"),
109114
):
110115
"""Remove document by path or UUID"""
111-
rag = SQLiteRag()
116+
rag = SQLiteRag.create()
112117

113118
# Find the document first
114119
document = rag.find_document(identifier)
@@ -151,7 +156,7 @@ def rebuild(
151156
)
152157
):
153158
"""Rebuild embeddings and full-text index"""
154-
rag = SQLiteRag()
159+
rag = SQLiteRag.create()
155160

156161
typer.echo("Rebuild process...")
157162

@@ -169,14 +174,13 @@ def reset(
169174
yes: bool = typer.Option(False, "-y", "--yes", help="Skip confirmation prompt")
170175
):
171176
"""Reset/clear the entire database"""
172-
rag = SQLiteRag()
177+
rag = SQLiteRag.create()
173178

174179
# Show warning and ask for confirmation unless -y flag is used
175180
if not yes:
176181
typer.echo(
177182
"WARNING: This will permanently delete all documents and data from the database!"
178183
)
179-
typer.echo(f"Database file: {rag.settings.db_path}")
180184
typer.echo()
181185
confirm = typer.confirm("Are you sure you want to reset the entire database?")
182186
if not confirm:
@@ -203,7 +207,7 @@ def search(
203207
),
204208
):
205209
"""Search for documents using hybrid vector + full-text search"""
206-
rag = SQLiteRag()
210+
rag = SQLiteRag.create()
207211
results = rag.search(query, top_k=limit)
208212

209213
if not results:

src/sqlite_rag/database.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def _create_schema(conn: sqlite3.Connection, settings: Settings):
8080

8181
cursor.execute(
8282
f"""
83-
SELECT vector_init('chunks', 'embedding', 'type={settings.vector_type},dimension={settings.embedding_dim}');
83+
SELECT vector_init('chunks', 'embedding', 'type={settings.vector_type},dimension={settings.embedding_dim},{settings.other_vector_config}');
8484
"""
8585
)
8686

src/sqlite_rag/engine.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,8 @@ def search(self, query: str, limit: int = 10) -> list[DocumentResult]:
8080
query_embedding = self.generate_embedding([Chunk(content=query)])[0].embedding
8181

8282
# Clean up and split into words
83-
query = " ".join(re.findall(r"\b\w+\b", query.lower()))
83+
# '*' is used to match while typing
84+
query = " ".join(re.findall(r"\b\w+\b", query.lower())) + "*"
8485

8586
cursor.execute(
8687
# TODO: use vector_convert_XXX to convert the query to the correct type
@@ -137,8 +138,7 @@ def search(self, query: str, limit: int = 10) -> list[DocumentResult]:
137138
;
138139
""",
139140
{
140-
# '*' is used to match while typing
141-
"query": query + "*",
141+
"query": query,
142142
"query_embedding": query_embedding,
143143
"k": limit,
144144
# TODO: move to settings or costants
@@ -165,3 +165,12 @@ def search(self, query: str, limit: int = 10) -> list[DocumentResult]:
165165
)
166166
for row in rows
167167
]
168+
169+
def close(self):
170+
"""Close the database connection."""
171+
try:
172+
self._conn.execute("SELECT llm_model_free();")
173+
except sqlite3.ProgrammingError:
174+
# When connection is already closed the model
175+
# is already freed.
176+
pass

src/sqlite_rag/settings.py

Lines changed: 83 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,88 @@
1+
import json
2+
import sqlite3
3+
from dataclasses import asdict, dataclass, fields
4+
5+
6+
@dataclass
17
class Settings:
2-
def __init__(self, model_path_or_name: str, db_path: str = "sqliterag.db"):
3-
self.model_path_or_name = model_path_or_name
4-
self.db_path = db_path
8+
model_path_or_name: str = (
9+
"./models/Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf"
10+
)
11+
model_config: str = "n_ctx=384"
12+
13+
embedding_dim: int = 384
14+
vector_type: str = "FLOAT32"
15+
other_vector_config: str = "distance=cosine" # e.g. distance=metric,other=value,...
16+
17+
chunk_size: int = 12000
18+
# Token overlap between chunks
19+
chunk_overlap: int = 1200
20+
21+
# Whether to quantize the vector for faster search
22+
quantize_scan: bool = True
23+
24+
25+
class SettingsManager:
26+
def __init__(self, connection: sqlite3.Connection):
27+
self.connection = connection
28+
self._ensure_table_exists()
29+
30+
def _ensure_table_exists(self):
31+
cursor = self.connection.cursor()
32+
cursor.execute(
33+
"""
34+
CREATE TABLE IF NOT EXISTS settings (
35+
id TEXT PRIMARY KEY,
36+
settings JSON NOT NULL
37+
);
38+
"""
39+
)
40+
self.connection.commit()
41+
42+
def load_settings(self) -> Settings | None:
43+
cursor = self.connection.cursor()
44+
45+
cursor.execute("SELECT settings FROM settings LIMIT 1")
46+
row = cursor.fetchone()
47+
48+
if not row:
49+
return None
50+
51+
current_settings = json.loads(row[0])
52+
53+
# Start from defaults, update with values from db (ignore extra keys)
54+
defaults = Settings()
55+
valid_keys = {f.name for f in fields(Settings)}
56+
filtered = {k: v for k, v in current_settings.items() if k in valid_keys}
57+
58+
# Use defaults as base, update with valid properties
59+
settings_dict = {**asdict(defaults), **filtered}
60+
return Settings(**settings_dict)
61+
62+
def store(self, settings: Settings):
63+
cursor = self.connection.cursor()
564

6-
self.embedding_dim = 384
7-
self.vector_type = "FLOAT32"
65+
settings_json = json.dumps(asdict(settings))
866

9-
self.model_config = "n_ctx=384" # See: https://github.com/sqliteai/sqlite-ai/blob/e172b9c9b9d76435be635d1e02c1e88b3681cc6e/src/sqlite-ai.c#L51-L57
67+
# Upsert the settings
68+
cursor.execute(
69+
"""
70+
INSERT INTO settings (id, settings)
71+
VALUES ('1', ?)
72+
ON CONFLICT(id) DO UPDATE SET settings = excluded.settings;
73+
""",
74+
(settings_json,),
75+
)
1076

11-
self.chunk_size = 256 # Maximum tokens per chunk
12-
self.chunk_overlap = 32 # Token overlap between chunks
77+
self.connection.commit()
78+
return settings
1379

14-
self.quantize_scan = True # Whether to quantize the vector for faster search
80+
def has_critical_changes(
81+
self, new_settings: Settings, current_settings: Settings
82+
) -> bool:
83+
"""Check if the new settings have critical changes compared to the current settings."""
84+
return (
85+
new_settings.model_path_or_name != current_settings.model_path_or_name
86+
or new_settings.embedding_dim != current_settings.embedding_dim
87+
or new_settings.vector_type != current_settings.vector_type
88+
)

0 commit comments

Comments
 (0)