Skip to content

Commit 2cd0927

Browse files
Merge pull request #10 from sqliteai/highlight-sentence-in-results
Highlight sentence in results
2 parents 01a860c + c9ee5dd commit 2cd0927

25 files changed

+1561
-312
lines changed

.devcontainer/devcontainer.json

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,6 @@
11
{
22
"name": "Python 3.10",
33
"image": "mcr.microsoft.com/devcontainers/python:3.10",
4-
"runArgs": [
5-
"--runtime",
6-
"nvidia",
7-
"--gpus",
8-
"all",
9-
// optional but make sure CUDA workloads are available
10-
"--env",
11-
"NVIDIA_VISIBLE_DEVICES=all",
12-
// optional but make sure CUDA workloads are available
13-
"--env",
14-
"NVIDIA_DRIVER_CAPABILITIES=compute,utility"
15-
],
164
"customizations": {
175
"vscode": {
186
"extensions": [
@@ -26,13 +14,5 @@
2614
"hbenl.vscode-test-explorer"
2715
]
2816
}
29-
},
30-
"hostRequirements": {
31-
"gpu": "optional"
32-
},
33-
"remoteEnv": {
34-
// optional but make sure CUDA workloads are available
35-
"NVIDIA_VISIBLE_DEVICES": "all",
36-
"NVIDIA_DRIVER_CAPABILITIES": "compute,utility"
3717
}
3818
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
{
2+
"name": "Python 3.11",
3+
"image": "mcr.microsoft.com/devcontainers/python:3.11",
4+
"runArgs": [
5+
"--runtime",
6+
"nvidia",
7+
"--gpus",
8+
"all",
9+
// optional but make sure CUDA workloads are available
10+
"--env",
11+
"NVIDIA_VISIBLE_DEVICES=all",
12+
// optional but make sure CUDA workloads are available
13+
"--env",
14+
"NVIDIA_DRIVER_CAPABILITIES=compute,utility"
15+
],
16+
"customizations": {
17+
"vscode": {
18+
"extensions": [
19+
"ms-python.black-formatter",
20+
"ms-python.flake8",
21+
"ms-python.isort",
22+
"ms-python.vscode-pylance",
23+
"ms-python.python",
24+
"ms-python.debugpy",
25+
"ms-python.vscode-python-envs",
26+
"hbenl.vscode-test-explorer"
27+
]
28+
}
29+
},
30+
"hostRequirements": {
31+
"gpu": "optional"
32+
},
33+
"remoteEnv": {
34+
// optional but make sure CUDA workloads are available
35+
"NVIDIA_VISIBLE_DEVICES": "all",
36+
"NVIDIA_DRIVER_CAPABILITIES": "compute,utility"
37+
}
38+
}

.github/workflows/test.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ jobs:
4949
5050
- name: Test
5151
# Using default directory for models
52+
# COVERAGE_CORE=pytrace: Workaround for Python 3.11 segfault with SQLite extensions + C tracer
53+
# See: https://github.com/nedbat/coveragepy/issues/1665
54+
env:
55+
COVERAGE_CORE: ${{ matrix.python-version == '3.11' && 'pytrace' || '' }}
5256
run: |
5357
pytest --cov --cov-branch --cov-report=xml -v -m "not slow" ./tests
5458

src/sqlite_rag/cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -439,12 +439,12 @@ def reset(
439439
def search(
440440
ctx: typer.Context,
441441
query: str,
442-
limit: int = typer.Option(10, help="Number of results to return"),
442+
limit: int = typer.Option(5, help="Number of results to return"),
443443
debug: bool = typer.Option(
444444
False,
445445
"-d",
446446
"--debug",
447-
help="Print extra debug information with modern formatting",
447+
help="Print extra debug information with sentence-level details",
448448
),
449449
peek: bool = typer.Option(
450450
False, "--peek", help="Print debug information using compact table format"

src/sqlite_rag/database.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,28 +76,50 @@ def _create_schema(conn: sqlite3.Connection, settings: Settings):
7676
)
7777

7878
# TODO: this table is not ready for sqlite-sync, it uses the id AUTOINCREMENT
79-
cursor.execute(
79+
cursor.executescript(
8080
"""
8181
CREATE TABLE IF NOT EXISTS chunks (
8282
id INTEGER PRIMARY KEY AUTOINCREMENT,
8383
document_id TEXT,
8484
content TEXT,
85-
embedding BLOB,
86-
FOREIGN KEY (document_id) REFERENCES documents (id) ON DELETE CASCADE
85+
embedding BLOB
8786
);
87+
CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks (document_id);
8888
"""
8989
)
9090

91+
cursor.executescript(
92+
"""
93+
CREATE TABLE IF NOT EXISTS sentences (
94+
id TEXT PRIMARY KEY,
95+
chunk_id INTEGER,
96+
content TEXT,
97+
embedding BLOB,
98+
start_offset INTEGER,
99+
end_offset INTEGER
100+
);
101+
CREATE INDEX IF NOT EXISTS idx_sentences_chunk_id ON sentences (chunk_id);
102+
"""
103+
)
104+
91105
cursor.execute(
92106
"""
93107
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(content, content='chunks', content_rowid='id');
94108
"""
95109
)
96110

97111
cursor.execute(
98-
f"""
99-
SELECT vector_init('chunks', 'embedding', 'type={settings.vector_type},dimension={settings.embedding_dim},{settings.other_vector_options}');
100-
"""
112+
"""
113+
SELECT vector_init('chunks', 'embedding', ?);
114+
""",
115+
(settings.get_vector_init_options(),),
116+
)
117+
# TODO: same configuration as chunks (or different options?)
118+
cursor.execute(
119+
"""
120+
SELECT vector_init('sentences', 'embedding', ?);
121+
""",
122+
(settings.get_vector_init_options(),),
101123
)
102124

103125
conn.commit()

0 commit comments

Comments
 (0)