Skip to content

Commit 146e0e2

Browse files
author
Daniele Briggi
committed
feat: versions and settings commands
1 parent b583d09 commit 146e0e2

File tree

15 files changed

+393
-61
lines changed

15 files changed

+393
-61
lines changed

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,7 @@
11
# sqlite-rag
2+
3+
## Installation
4+
5+
```bash
6+
pip install .[dev]
7+
```

bandit.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
# https://bndit.readthedocs.io/en/latest/config.html
2-
skips: ['B101']
2+
skips: ['B101', 'B608']
33
exclude_dirs: ['tests']

extensions/ai.so

-4.06 MB
Binary file not shown.

extensions/vector.so

-84.9 KB
Binary file not shown.

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ dependencies = [
1212
"attrs",
1313
"typer",
1414
"huggingface_hub",
15-
"markitdown[all]"
15+
"markitdown[all]",
16+
"sqlite-ai",
17+
"sqliteai-vector"
1618
]
1719

1820
# .. or [dependency-groups] ?

src/sqlite_rag/chunker.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99
class Chunker:
1010
def __init__(self, conn: sqlite3.Connection, settings: Settings):
1111
self._conn = conn
12-
self.settings = settings
12+
self._settings = settings
1313

1414
def chunk(self, text: str) -> list[Chunk]:
1515
"""Chunk text using Recursive Character Text Splitter."""
16-
if self._get_token_count(text) <= self.settings.chunk_size:
16+
if self._get_token_count(text) <= self._settings.chunk_size:
1717
return [Chunk(content=text)]
1818

1919
return self._recursive_split(text)
@@ -55,7 +55,7 @@ def _split_text_with_separators(
5555
"""Split text using hierarchical separators."""
5656
chunks = []
5757

58-
if self.settings.chunk_size <= self.settings.chunk_overlap:
58+
if self._settings.chunk_size <= self._settings.chunk_overlap:
5959
raise ValueError("Chunk size must be greater than chunk overlap.")
6060

6161
if not separators:
@@ -70,7 +70,7 @@ def _split_text_with_separators(
7070

7171
# Reserve space for overlap
7272
effective_chunk_size = max(
73-
1, self.settings.chunk_size - self.settings.chunk_overlap
73+
1, self._settings.chunk_size - self._settings.chunk_overlap
7474
)
7575

7676
splits = text.split(separator)
@@ -108,7 +108,7 @@ def _split_by_characters(self, text: str) -> List[Chunk]:
108108

109109
# Reserve space for overlap
110110
effective_chunk_size = max(
111-
1, self.settings.chunk_size - self.settings.chunk_overlap
111+
1, self._settings.chunk_size - self._settings.chunk_overlap
112112
)
113113

114114
total_tokens = self._get_token_count(text)
@@ -145,7 +145,7 @@ def _split_by_characters(self, text: str) -> List[Chunk]:
145145

146146
def _apply_overlap(self, chunks: List[Chunk]) -> List[Chunk]:
147147
"""Apply overlap between consecutive chunks."""
148-
if len(chunks) <= 1 or self.settings.chunk_overlap <= 0:
148+
if len(chunks) <= 1 or self._settings.chunk_overlap <= 0:
149149
return chunks
150150

151151
overlapped_chunks = [chunks[0]] # First chunk has no overlap
@@ -156,7 +156,7 @@ def _apply_overlap(self, chunks: List[Chunk]) -> List[Chunk]:
156156

157157
# Get overlap text from end of previous chunk
158158
overlap_text = self._get_overlap_text(
159-
prev_content, self.settings.chunk_overlap
159+
prev_content, self._settings.chunk_overlap
160160
)
161161

162162
if overlap_text:

src/sqlite_rag/cli.py

Lines changed: 86 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22
import json
33
import shlex
44
import sys
5+
from dataclasses import replace
56
from typing import Optional
67

78
import typer
89

10+
from sqlite_rag.settings import Settings
11+
912
from .sqliterag import SQLiteRag
1013

1114

@@ -26,10 +29,89 @@ def __call__(self, *args, **kwds):
2629
cli = CLI(app)
2730

2831

29-
@app.command()
30-
def set(settings: Optional[str] = typer.Argument(None)):
31-
"""Set the model and database path"""
32-
pass
32+
@app.command("settings")
33+
def show_settings():
34+
"""Show current settings"""
35+
rag = SQLiteRag.create()
36+
current_settings = rag.get_settings()
37+
38+
typer.echo("Current settings:")
39+
for key, value in current_settings.items():
40+
typer.echo(f" {key}: {value}")
41+
42+
43+
# TODO: separate store settings from SQLiteRag.create()?
44+
@app.command("set")
45+
def set_settings(
46+
model_path_or_name: Optional[str] = typer.Option(
47+
None, help="Path to the embedding model file or Hugging Face model name"
48+
),
49+
model_config: Optional[str] = typer.Option(
50+
None, help="Model configuration parameters"
51+
),
52+
embedding_dim: Optional[int] = typer.Option(
53+
None, help="Dimension of the embedding vectors"
54+
),
55+
vector_type: Optional[str] = typer.Option(
56+
None, help="Vector storage type (FLOAT16, FLOAT32, etc.)"
57+
),
58+
other_vector_config: Optional[str] = typer.Option(
59+
None, help="Additional vector configuration"
60+
),
61+
chunk_size: Optional[int] = typer.Option(
62+
None, help="Size of text chunks for processing"
63+
),
64+
chunk_overlap: Optional[int] = typer.Option(
65+
None, help="Token overlap between consecutive chunks"
66+
),
67+
quantize_scan: Optional[bool] = typer.Option(
68+
None, help="Whether to quantize vector for faster search"
69+
),
70+
quantize_preload: Optional[bool] = typer.Option(
71+
None, help="Whether to preload quantized vectors in memory for faster search"
72+
),
73+
weight_fts: Optional[float] = typer.Option(
74+
None, help="Weight for full-text search results"
75+
),
76+
weight_vec: Optional[float] = typer.Option(
77+
None, help="Weight for vector search results"
78+
),
79+
):
80+
"""Change default settings for the RAG system.
81+
82+
Update model configuration, embedding parameters, chunking settings,
83+
and search weights. Only specify the options you want to change.
84+
Use 'sqlite-rag settings' to view current values.
85+
"""
86+
# Build updates dict from all provided parameters
87+
updates = {
88+
"model_path_or_name": model_path_or_name,
89+
"model_config": model_config,
90+
"embedding_dim": embedding_dim,
91+
"vector_type": vector_type,
92+
"other_vector_config": other_vector_config,
93+
"chunk_size": chunk_size,
94+
"chunk_overlap": chunk_overlap,
95+
"quantize_scan": quantize_scan,
96+
"quantize_preload": quantize_preload,
97+
"weight_fts": weight_fts,
98+
"weight_vec": weight_vec,
99+
}
100+
101+
# Filter out None values (unset options)
102+
updates = {k: v for k, v in updates.items() if v is not None}
103+
104+
if not updates:
105+
typer.echo("No settings provided to update.")
106+
show_settings()
107+
return
108+
109+
# Create new settings with updated fields
110+
new_settings = replace(Settings(), **updates)
111+
SQLiteRag.create(settings=new_settings)
112+
113+
show_settings()
114+
typer.echo("Settings updated.")
33115

34116

35117
@app.command()
@@ -42,14 +124,12 @@ def add(
42124
False,
43125
"--absolute-paths",
44126
help="Store absolute paths instead of relative paths",
45-
is_flag=True,
46127
),
47128
metadata: Optional[str] = typer.Option(
48129
None,
49130
"--metadata",
50131
help="Optional metadata in JSON format to associate with the document",
51132
metavar="JSON",
52-
show_default=False,
53133
),
54134
):
55135
"""Add a file path to the database"""
@@ -71,8 +151,6 @@ def add_text(
71151
"--metadata",
72152
help="Optional metadata in JSON format to associate with the document",
73153
metavar="JSON",
74-
show_default=False,
75-
prompt="Metadata (JSON format, e.g. {'author': 'John Doe', 'date': '2023-10-01'}'",
76154
),
77155
):
78156
"""Add a text to the database"""

src/sqlite_rag/database.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,40 @@
1+
import importlib
2+
import importlib.resources
13
import sqlite3
2-
from pathlib import Path
34

45
from .settings import Settings
56

67

78
class Database:
89
"""Database initialization and schema management for SQLiteRag."""
910

11+
@staticmethod
12+
def new_connection(db_path: str = "./sqliterag.sqlite") -> sqlite3.Connection:
13+
"""Create a new SQLite connection to the specified database path."""
14+
conn = sqlite3.connect(db_path)
15+
conn.row_factory = sqlite3.Row
16+
return conn
17+
1018
@staticmethod
1119
def initialize(conn: sqlite3.Connection, settings: Settings) -> sqlite3.Connection:
1220
"""Initialize the database with extensions and schema"""
1321
conn.enable_load_extension(True)
1422
try:
1523
conn.load_extension(
16-
str(Path(__file__).parent.parent.parent / "extensions" / "ai")
24+
str(importlib.resources.files("sqlite-vector.binaries") / "ai")
1725
)
1826
conn.load_extension(
19-
str(Path(__file__).parent.parent.parent / "extensions" / "vector")
27+
str(importlib.resources.files("sqlite-vector.binaries") / "vector")
2028
)
2129
except sqlite3.OperationalError as e:
2230
raise RuntimeError(
2331
"Failed to load extensions: "
2432
+ str(e)
2533
+ """\n
26-
Download from:
34+
Install via pip:
35+
pip install sqlite-ai sqliteai-vector
36+
37+
See more:
2738
sqlite-ai: https://github.com/sqliteai/sqlite-ai/releases
2839
sqlite-vector: https://github.com/sqliteai/sqlite-vector/releases
2940
"""

0 commit comments

Comments
 (0)