Skip to content

Commit 2989f31

Browse files
author
Daniele Briggi
committed
fix(metadata): passed by reference
chore(dependencies): remove attrs
1 parent 561061f commit 2989f31

File tree

8 files changed

+51
-16
lines changed

8 files changed

+51
-16
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# SQLite RAG
44

5-
[![Run Tests](https://github.com/sqliteai/sqlite-rag/actions/workflows/test.yaml/badge.svg?branch=main&event=release)](https://github.com/sqliteai/sqlite-rag/actions/workflows/test.yaml)
5+
[![Run Tests](https://github.com/sqliteai/sqlite-rag/actions/workflows/test.yaml/badge.svg)](https://github.com/sqliteai/sqlite-rag/actions/workflows/test.yaml)
66
[![codecov](https://codecov.io/github/sqliteai/sqlite-rag/graph/badge.svg?token=30KYPY7864)](https://codecov.io/github/sqliteai/sqlite-rag)
77
![PyPI - Version](https://img.shields.io/pypi/v/sqlite-rag?link=https%3A%2F%2Fpypi.org%2Fproject%2Fsqlite-rag%2F)
88
![PyPI - Python Version](https://img.shields.io/pypi/pyversions/sqlite-rag?link=https%3A%2F%2Fpypi.org%2Fproject%2Fsqlite-rag)

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ classifiers = [
1717
"Operating System :: OS Independent",
1818
]
1919
dependencies = [
20-
"attrs",
2120
"typer",
2221
"huggingface_hub[hf_transfer]",
2322
"markitdown[docx]",

src/sqlite_rag/models/chunk.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from attr import dataclass
1+
from dataclasses import dataclass
22

33

44
@dataclass

src/sqlite_rag/models/document.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
import hashlib
22
import re
3+
from dataclasses import dataclass, field
34
from datetime import datetime
45
from typing import Optional
56

6-
from attr import dataclass
7-
87
from .chunk import Chunk
98

109

@@ -15,11 +14,11 @@ class Document:
1514
id: str | None = None
1615
content: str = ""
1716
uri: str | None = None
18-
metadata: dict = {}
17+
metadata: dict = field(default_factory=dict)
1918
created_at: datetime | None = None
2019
updated_at: datetime | None = None
2120

22-
chunks: list["Chunk"] = []
21+
chunks: list["Chunk"] = field(default_factory=list)
2322

2423
def hash(self) -> str:
2524
"""Generate a hash for the document content using SHA-3 for maximum collision resistance"""

src/sqlite_rag/models/document_result.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from attr import dataclass
1+
from dataclasses import dataclass
22

33
from .document import Document
44

src/sqlite_rag/sqliterag.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def add(
103103
if use_relative_paths
104104
else str(file_path.absolute())
105105
)
106-
document = Document(content=content, uri=uri, metadata=metadata)
106+
document = Document(content=content, uri=uri, metadata=metadata.copy())
107107

108108
exists = self._repository.document_exists_by_hash(document.hash())
109109
if exists:
@@ -132,7 +132,7 @@ def add_text(
132132
"""Add a text content into the database"""
133133
self._ensure_initialized()
134134

135-
document = Document(content=text, uri=uri, metadata=metadata)
135+
document = Document(content=text, uri=uri, metadata=metadata.copy())
136136

137137
self._engine.create_new_context()
138138
document = self._engine.process(document)

tests/integration/test_engine.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33

44
import pytest
55

6-
from sqlite_rag.models.chunk import Chunk
7-
86

97
class TestEngine:
108
@pytest.mark.slow
@@ -20,8 +18,8 @@ def random_string(length=30):
2018
result_chunks = {}
2119
for i in range(1000):
2220
try:
23-
chunk = engine.generate_embeddings([Chunk(content=random_string())])
24-
result_chunks[chunk[0].embedding.hex()] = chunk[0]
21+
embedding = engine.generate_embedding(random_string())
22+
result_chunks[embedding.hex()] = embedding
2523
assert len(result_chunks) == i + 1
2624
except Exception as e:
2725
pytest.fail(f"Embedding generation failed on chunk {i}: {e}")

tests/test_sqlite_rag.py

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,39 @@ def test_add_file_with_metadata(self):
139139
doc = cursor.fetchone()
140140
assert doc
141141
assert doc[0] == "This is a test document with metadata."
142-
assert doc[1] == json.dumps(metadata)
142+
assert doc[1] == json.dumps(
143+
{
144+
**metadata,
145+
"generated": {"title": "This is a test document with metadata."},
146+
}
147+
)
148+
149+
def test_add_documents_with_generated_title(self):
150+
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as doc1:
151+
doc1.write("# Title 1\nThis is the second test document.")
152+
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as doc2:
153+
doc2.write("# Title 2\nThis is the second test document.")
154+
155+
doc3 = "# Title 3\nThis is the third test document."
156+
doc4 = "# Title 4\nThis is the fourth test document."
157+
158+
rag = SQLiteRag.create(db_path=":memory:")
159+
160+
rag.add(doc1.name)
161+
rag.add(doc2.name)
162+
rag.add_text(doc3)
163+
rag.add_text(doc4)
164+
165+
conn = rag._conn
166+
cursor = conn.execute("SELECT metadata FROM documents")
167+
docs = cursor.fetchall()
168+
assert len(docs) == 4
169+
170+
titles = [json.loads(doc[0]).get("generated", {}).get("title") for doc in docs]
171+
assert "Title 1" in titles
172+
assert "Title 2" in titles
173+
assert "Title 3" in titles
174+
assert "Title 4" in titles
143175

144176
def test_add_empty_file(self):
145177
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
@@ -229,7 +261,14 @@ def test_add_text_with_metadata(self):
229261
assert doc
230262
assert doc[0] == "This is a test document content with metadata."
231263
assert doc[1] == "test_doc_with_metadata.txt"
232-
assert doc[2] == json.dumps(metadata)
264+
assert doc[2] == json.dumps(
265+
{
266+
**metadata,
267+
"generated": {
268+
"title": "This is a test document content with metadata."
269+
},
270+
}
271+
)
233272

234273
def test_list_documents(self):
235274
rag = SQLiteRag.create(":memory:")

0 commit comments

Comments
 (0)