Skip to content

Commit 06fe8f4

Browse files
committed
fix: resolve SQLite embedding ValueError by using TypeDecorator (fixes #382)
The SQLite models inherited embedding: list[float] from parent classes but never overrode it with an SQLAlchemy-compatible column type. When SQLModel tried to create table columns, list had no matching SQLAlchemy type, raising ValueError. Replace the embedding_json + property pattern with a JSONEncodedList TypeDecorator that maps list[float] to TEXT via JSON serialization, matching how PostgreSQL models use Column(Vector()) to override the parent field.
1 parent 357aefc commit 06fe8f4

File tree

4 files changed

+47
-81
lines changed

4 files changed

+47
-81
lines changed

src/memu/database/sqlite/models.py

Lines changed: 29 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,39 @@
1111
import pendulum
1212
from pydantic import BaseModel
1313
from sqlalchemy import JSON, MetaData, String, Text
14+
from sqlalchemy.types import TypeDecorator
1415
from sqlmodel import Column, DateTime, Field, Index, SQLModel, func
1516

1617
from memu.database.models import CategoryItem, MemoryCategory, MemoryItem, MemoryType, Resource
1718

1819
logger = logging.getLogger(__name__)
1920

2021

22+
class JSONEncodedList(TypeDecorator):
23+
"""Store a list of floats as a JSON-encoded TEXT column.
24+
25+
SQLite has no native vector type, so embeddings are serialized to JSON
26+
strings for storage and deserialized back to ``list[float]`` on read.
27+
"""
28+
29+
impl = Text
30+
cache_ok = True
31+
32+
def process_bind_param(self, value: list[float] | None, dialect: Any) -> str | None:
33+
if value is not None:
34+
return json.dumps(value)
35+
return None
36+
37+
def process_result_value(self, value: str | None, dialect: Any) -> list[float] | None:
38+
if value is not None:
39+
try:
40+
return [float(x) for x in json.loads(value)]
41+
except (json.JSONDecodeError, TypeError, ValueError):
42+
logger.warning("Failed to decode embedding JSON from SQLite")
43+
return None
44+
return None
45+
46+
2147
class TZDateTime(DateTime):
2248
"""DateTime type with timezone support."""
2349

@@ -52,27 +78,7 @@ class SQLiteResourceModel(SQLiteBaseModelMixin, Resource):
5278
modality: str = Field(sa_column=Column(String, nullable=False))
5379
local_path: str = Field(sa_column=Column(String, nullable=False))
5480
caption: str | None = Field(default=None, sa_column=Column(Text, nullable=True))
55-
# Store embedding as JSON string since SQLite doesn't have native vector type
56-
embedding_json: str | None = Field(default=None, sa_column=Column(Text, nullable=True))
57-
58-
@property
59-
def embedding(self) -> list[float] | None:
60-
"""Parse embedding from JSON string."""
61-
if self.embedding_json is None:
62-
return None
63-
try:
64-
return list(json.loads(self.embedding_json))
65-
except (json.JSONDecodeError, TypeError) as e:
66-
logger.warning("Failed to parse resource embedding JSON: %s", e)
67-
return None
68-
69-
@embedding.setter
70-
def embedding(self, value: list[float] | None) -> None:
71-
"""Serialize embedding to JSON string."""
72-
if value is None:
73-
self.embedding_json = None
74-
else:
75-
self.embedding_json = json.dumps(value)
81+
embedding: list[float] | None = Field(default=None, sa_column=Column(JSONEncodedList(), nullable=True))
7682

7783

7884
class SQLiteMemoryItemModel(SQLiteBaseModelMixin, MemoryItem):
@@ -81,59 +87,19 @@ class SQLiteMemoryItemModel(SQLiteBaseModelMixin, MemoryItem):
8187
resource_id: str | None = Field(sa_column=Column(String, nullable=True))
8288
memory_type: MemoryType = Field(sa_column=Column(String, nullable=False))
8389
summary: str = Field(sa_column=Column(Text, nullable=False))
84-
# Store embedding as JSON string since SQLite doesn't have native vector type
85-
embedding_json: str | None = Field(default=None, sa_column=Column(Text, nullable=True))
90+
embedding: list[float] | None = Field(default=None, sa_column=Column(JSONEncodedList(), nullable=True))
8691
happened_at: datetime | None = Field(default=None, sa_column=Column(DateTime, nullable=True))
8792
extra: dict[str, Any] = Field(default={}, sa_column=Column(JSON, nullable=True))
8893

89-
@property
90-
def embedding(self) -> list[float] | None:
91-
"""Parse embedding from JSON string."""
92-
if self.embedding_json is None:
93-
return None
94-
try:
95-
return list(json.loads(self.embedding_json))
96-
except (json.JSONDecodeError, TypeError) as e:
97-
logger.warning("Failed to parse memory item embedding JSON: %s", e)
98-
return None
99-
100-
@embedding.setter
101-
def embedding(self, value: list[float] | None) -> None:
102-
"""Serialize embedding to JSON string."""
103-
if value is None:
104-
self.embedding_json = None
105-
else:
106-
self.embedding_json = json.dumps(value)
107-
10894

10995
class SQLiteMemoryCategoryModel(SQLiteBaseModelMixin, MemoryCategory):
11096
"""SQLite memory category model."""
11197

11298
name: str = Field(sa_column=Column(String, nullable=False, index=True))
11399
description: str = Field(sa_column=Column(Text, nullable=False))
114-
# Store embedding as JSON string since SQLite doesn't have native vector type
115-
embedding_json: str | None = Field(default=None, sa_column=Column(Text, nullable=True))
100+
embedding: list[float] | None = Field(default=None, sa_column=Column(JSONEncodedList(), nullable=True))
116101
summary: str | None = Field(default=None, sa_column=Column(Text, nullable=True))
117102

118-
@property
119-
def embedding(self) -> list[float] | None:
120-
"""Parse embedding from JSON string."""
121-
if self.embedding_json is None:
122-
return None
123-
try:
124-
return list(json.loads(self.embedding_json))
125-
except (json.JSONDecodeError, TypeError) as e:
126-
logger.warning("Failed to parse category embedding JSON: %s", e)
127-
return None
128-
129-
@embedding.setter
130-
def embedding(self, value: list[float] | None) -> None:
131-
"""Serialize embedding to JSON string."""
132-
if value is None:
133-
self.embedding_json = None
134-
else:
135-
self.embedding_json = json.dumps(value)
136-
137103

138104
class SQLiteCategoryItemModel(SQLiteBaseModelMixin, CategoryItem):
139105
"""SQLite category-item relation model."""

src/memu/database/sqlite/repositories/memory_category_repo.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def list_categories(self, where: Mapping[str, Any] | None = None) -> dict[str, M
7070
id=row.id,
7171
name=row.name,
7272
description=row.description,
73-
embedding=self._normalize_embedding(row.embedding_json),
73+
embedding=row.embedding,
7474
summary=row.summary,
7575
created_at=row.created_at,
7676
updated_at=row.updated_at,
@@ -104,7 +104,7 @@ def clear_categories(self, where: Mapping[str, Any] | None = None) -> dict[str,
104104
id=row.id,
105105
name=row.name,
106106
description=row.description,
107-
embedding=self._normalize_embedding(row.embedding_json),
107+
embedding=row.embedding,
108108
summary=row.summary,
109109
created_at=row.created_at,
110110
updated_at=row.updated_at,
@@ -156,7 +156,7 @@ def get_or_create_category(
156156
id=existing.id,
157157
name=existing.name,
158158
description=existing.description,
159-
embedding=self._normalize_embedding(existing.embedding_json),
159+
embedding=existing.embedding,
160160
summary=existing.summary,
161161
created_at=existing.created_at,
162162
updated_at=existing.updated_at,
@@ -170,7 +170,7 @@ def get_or_create_category(
170170
row = self._memory_category_model(
171171
name=name,
172172
description=description,
173-
embedding_json=self._prepare_embedding(embedding),
173+
embedding=embedding,
174174
summary=None,
175175
created_at=now,
176176
updated_at=now,
@@ -230,7 +230,7 @@ def update_category(
230230
if description is not None:
231231
row.description = description
232232
if embedding is not None:
233-
row.embedding_json = self._prepare_embedding(embedding)
233+
row.embedding = embedding
234234
if summary is not None:
235235
row.summary = summary
236236
row.updated_at = self._now()
@@ -243,7 +243,7 @@ def update_category(
243243
id=row.id,
244244
name=row.name,
245245
description=row.description,
246-
embedding=self._normalize_embedding(row.embedding_json),
246+
embedding=row.embedding,
247247
summary=row.summary,
248248
created_at=row.created_at,
249249
updated_at=row.updated_at,

src/memu/database/sqlite/repositories/memory_item_repo.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def get_item(self, item_id: str) -> MemoryItem | None:
7575
resource_id=row.resource_id,
7676
memory_type=row.memory_type,
7777
summary=row.summary,
78-
embedding=self._normalize_embedding(row.embedding_json),
78+
embedding=row.embedding,
7979
created_at=row.created_at,
8080
updated_at=row.updated_at,
8181
**self._scope_kwargs_from(row),
@@ -106,7 +106,7 @@ def list_items(self, where: Mapping[str, Any] | None = None) -> dict[str, Memory
106106
resource_id=row.resource_id,
107107
memory_type=row.memory_type,
108108
summary=row.summary,
109-
embedding=self._normalize_embedding(row.embedding_json),
109+
embedding=row.embedding,
110110
created_at=row.created_at,
111111
updated_at=row.updated_at,
112112
**self._scope_kwargs_from(row),
@@ -151,7 +151,7 @@ def list_items_by_ref_ids(
151151
resource_id=row.resource_id,
152152
memory_type=row.memory_type,
153153
summary=row.summary,
154-
embedding=self._normalize_embedding(row.embedding_json),
154+
embedding=row.embedding,
155155
created_at=row.created_at,
156156
updated_at=row.updated_at,
157157
**self._scope_kwargs_from(row),
@@ -185,7 +185,7 @@ def clear_items(self, where: Mapping[str, Any] | None = None) -> dict[str, Memor
185185
resource_id=row.resource_id,
186186
memory_type=row.memory_type,
187187
summary=row.summary,
188-
embedding=self._normalize_embedding(row.embedding_json),
188+
embedding=row.embedding,
189189
created_at=row.created_at,
190190
updated_at=row.updated_at,
191191
**self._scope_kwargs_from(row),
@@ -257,7 +257,7 @@ def create_item(
257257
resource_id=resource_id,
258258
memory_type=memory_type,
259259
summary=summary,
260-
embedding_json=self._prepare_embedding(embedding),
260+
embedding=embedding,
261261
extra=extra if extra else {},
262262
created_at=now,
263263
updated_at=now,
@@ -338,7 +338,7 @@ def create_item_reinforce(
338338
resource_id=existing.resource_id,
339339
memory_type=existing.memory_type,
340340
summary=existing.summary,
341-
embedding=self._normalize_embedding(existing.embedding_json),
341+
embedding=existing.embedding,
342342
created_at=existing.created_at,
343343
updated_at=existing.updated_at,
344344
extra=existing.extra,
@@ -360,7 +360,7 @@ def create_item_reinforce(
360360
resource_id=resource_id,
361361
memory_type=memory_type,
362362
summary=summary,
363-
embedding_json=self._prepare_embedding(embedding),
363+
embedding=embedding,
364364
extra=item_extra,
365365
created_at=now,
366366
updated_at=now,
@@ -424,7 +424,7 @@ def update_item(
424424
if summary is not None:
425425
row.summary = summary
426426
if embedding is not None:
427-
row.embedding_json = self._prepare_embedding(embedding)
427+
row.embedding = embedding
428428

429429
# Merge extra and tool_record into existing extra dict
430430
current_extra = row.extra or {}
@@ -449,7 +449,7 @@ def update_item(
449449
resource_id=row.resource_id,
450450
memory_type=row.memory_type,
451451
summary=row.summary,
452-
embedding=self._normalize_embedding(row.embedding_json),
452+
embedding=row.embedding,
453453
extra=row.extra,
454454
created_at=row.created_at,
455455
updated_at=row.updated_at,

src/memu/database/sqlite/repositories/resource_repo.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def list_resources(self, where: Mapping[str, Any] | None = None) -> dict[str, Re
7676
modality=row.modality,
7777
local_path=row.local_path,
7878
caption=row.caption,
79-
embedding=self._normalize_embedding(row.embedding_json),
79+
embedding=row.embedding,
8080
created_at=row.created_at,
8181
updated_at=row.updated_at,
8282
**self._scope_kwargs_from(row),
@@ -111,7 +111,7 @@ def clear_resources(self, where: Mapping[str, Any] | None = None) -> dict[str, R
111111
modality=row.modality,
112112
local_path=row.local_path,
113113
caption=row.caption,
114-
embedding=self._normalize_embedding(row.embedding_json),
114+
embedding=row.embedding,
115115
created_at=row.created_at,
116116
updated_at=row.updated_at,
117117
**self._scope_kwargs_from(row),
@@ -163,7 +163,7 @@ def create_resource(
163163
modality=modality,
164164
local_path=local_path,
165165
caption=caption,
166-
embedding_json=self._prepare_embedding(embedding),
166+
embedding=embedding,
167167
created_at=now,
168168
updated_at=now,
169169
**user_data,

0 commit comments

Comments
 (0)