Skip to content

Commit f2fca84

Browse files
author
raold
committed
fix: Enable automatic embedding generation for vector search
- Changed enable_embeddings default to true in MemoryService - Added generate_embedding parameter to create_memory method - Fixed vector format conversion for PostgreSQL (list to string) - Created _format_vector helper method for consistent formatting - Fixed hybrid_search function ambiguity issue - Added vector_weight parameter to hybrid search calls - Vector and hybrid search now working correctly with embeddings
1 parent 7f0a43a commit f2fca84

File tree

4 files changed

+99
-10
lines changed

4 files changed

+99
-10
lines changed

app/services/memory_service.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,11 @@ class MemoryService:
2222
def __init__(self):
2323
"""Initialize PostgreSQL memory service"""
2424
db_url = os.getenv("DATABASE_URL", "postgresql://secondbrain:changeme@localhost/secondbrain")
25+
# Check if embeddings should be enabled (default: True for v4.2.0)
26+
enable_embeddings = os.getenv("ENABLE_EMBEDDINGS", "true").lower() == "true"
2527
self.service = MemoryServicePostgres(
2628
connection_string=db_url,
27-
enable_embeddings=False # Disabled by default for performance
29+
enable_embeddings=enable_embeddings # Enabled by default for v4.2.0 vector search
2830
)
2931
self._initialized = False
3032

@@ -41,7 +43,8 @@ async def create_memory(
4143
memory_type: str = "generic",
4244
importance_score: float = 0.5,
4345
tags: List[str] = None,
44-
metadata: Dict[str, Any] = None
46+
metadata: Dict[str, Any] = None,
47+
generate_embedding: bool = True
4548
) -> Dict[str, Any]:
4649
"""Create a new memory"""
4750
await self.initialize()
@@ -51,7 +54,7 @@ async def create_memory(
5154
importance_score=importance_score,
5255
tags=tags,
5356
metadata=metadata,
54-
generate_embedding=False
57+
generate_embedding=generate_embedding
5558
)
5659

5760
async def get_memory(self, memory_id: str) -> Optional[Dict[str, Any]]:
@@ -65,7 +68,8 @@ async def update_memory(
6568
content: Optional[str] = None,
6669
importance_score: Optional[float] = None,
6770
tags: Optional[List[str]] = None,
68-
metadata: Optional[Dict[str, Any]] = None
71+
metadata: Optional[Dict[str, Any]] = None,
72+
regenerate_embedding: bool = False
6973
) -> Optional[Dict[str, Any]]:
7074
"""Update a memory"""
7175
await self.initialize()
@@ -74,7 +78,8 @@ async def update_memory(
7478
content=content,
7579
importance_score=importance_score,
7680
tags=tags,
77-
metadata=metadata
81+
metadata=metadata,
82+
regenerate_embedding=regenerate_embedding
7883
)
7984

8085
async def delete_memory(self, memory_id: str) -> bool:

app/services/memory_service_postgres.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ async def search_memories(
264264
query=query,
265265
embedding=embedding,
266266
limit=limit,
267+
vector_weight=0.5, # Default weight
267268
min_score=min_score
268269
)
269270

app/storage/postgres_unified.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,8 @@ async def create_memory(
126126
embedding_generated_at = None
127127

128128
if embedding:
129-
embedding_vector = np.array(embedding).tolist()
129+
# Convert embedding to PostgreSQL vector format
130+
embedding_vector = self._format_vector(embedding)
130131
embedding_model = memory.get("embedding_model", "text-embedding-ada-002")
131132
embedding_generated_at = datetime.utcnow()
132133

@@ -202,7 +203,9 @@ async def update_memory(
202203
if new_embedding:
203204
param_count += 1
204205
set_clauses.append(f"embedding = ${param_count}::vector")
205-
params.append(np.array(new_embedding).tolist())
206+
# Convert embedding to PostgreSQL vector format
207+
embedding_vector = self._format_vector(new_embedding)
208+
params.append(embedding_vector)
206209

207210
param_count += 1
208211
set_clauses.append(f"embedding_generated_at = ${param_count}")
@@ -322,7 +325,7 @@ async def vector_search(
322325
async with self.acquire() as conn:
323326
rows = await conn.fetch(
324327
query,
325-
np.array(embedding).tolist(),
328+
self._format_vector(embedding),
326329
container_id,
327330
min_similarity,
328331
limit
@@ -390,7 +393,7 @@ async def hybrid_search(
390393
rows = await conn.fetch(
391394
query_sql,
392395
query,
393-
np.array(embedding).tolist(),
396+
self._format_vector(embedding),
394397
limit,
395398
vector_weight,
396399
min_score
@@ -620,7 +623,7 @@ async def record_search(
620623
await conn.execute(
621624
query_sql,
622625
query,
623-
np.array(embedding).tolist() if embedding else None,
626+
self._format_vector(embedding) if embedding else None,
624627
results_count,
625628
[uuid.UUID(sid) for sid in selected_ids],
626629
search_type,
@@ -660,6 +663,10 @@ async def migrate_from_sqlite(self, sqlite_path: str):
660663

661664
# ==================== Helper Methods ====================
662665

666+
def _format_vector(self, embedding: List[float]) -> str:
667+
"""Convert embedding list to PostgreSQL vector format"""
668+
return f"[{','.join(str(x) for x in embedding)}]"
669+
663670
def _row_to_dict(self, row: asyncpg.Record) -> Dict[str, Any]:
664671
"""Convert database row to dictionary"""
665672
if not row:

scripts/create_hybrid_search.sql

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
-- Create hybrid search function for v4.2.0
2+
CREATE OR REPLACE FUNCTION hybrid_search(
3+
query_text TEXT,
4+
query_embedding vector(1536),
5+
result_limit INTEGER DEFAULT 10,
6+
vector_weight FLOAT DEFAULT 0.5,
7+
min_score FLOAT DEFAULT 0.0
8+
)
9+
RETURNS TABLE (
10+
id UUID,
11+
content TEXT,
12+
memory_type VARCHAR,
13+
importance_score FLOAT,
14+
tags TEXT[],
15+
metadata JSONB,
16+
created_at TIMESTAMPTZ,
17+
similarity_score FLOAT,
18+
text_rank FLOAT,
19+
combined_score FLOAT
20+
) AS $$
21+
BEGIN
22+
RETURN QUERY
23+
WITH vector_search AS (
24+
SELECT
25+
m.id,
26+
m.content,
27+
m.memory_type,
28+
m.importance_score,
29+
m.tags,
30+
m.metadata,
31+
m.created_at,
32+
1 - (m.embedding <=> query_embedding) AS similarity_score
33+
FROM memories m
34+
WHERE m.deleted_at IS NULL
35+
AND m.embedding IS NOT NULL
36+
ORDER BY m.embedding <=> query_embedding
37+
LIMIT result_limit * 2
38+
),
39+
text_search AS (
40+
SELECT
41+
m.id,
42+
m.content,
43+
m.memory_type,
44+
m.importance_score,
45+
m.tags,
46+
m.metadata,
47+
m.created_at,
48+
ts_rank(m.content_tsvector, plainto_tsquery('english', query_text)) AS text_rank
49+
FROM memories m
50+
WHERE m.deleted_at IS NULL
51+
AND m.content_tsvector @@ plainto_tsquery('english', query_text)
52+
ORDER BY text_rank DESC
53+
LIMIT result_limit * 2
54+
),
55+
combined AS (
56+
SELECT
57+
COALESCE(v.id, t.id) AS id,
58+
COALESCE(v.content, t.content) AS content,
59+
COALESCE(v.memory_type, t.memory_type) AS memory_type,
60+
COALESCE(v.importance_score, t.importance_score) AS importance_score,
61+
COALESCE(v.tags, t.tags) AS tags,
62+
COALESCE(v.metadata, t.metadata) AS metadata,
63+
COALESCE(v.created_at, t.created_at) AS created_at,
64+
COALESCE(v.similarity_score, 0) AS similarity_score,
65+
COALESCE(t.text_rank, 0) AS text_rank,
66+
(COALESCE(v.similarity_score, 0) * vector_weight +
67+
COALESCE(t.text_rank, 0) * (1 - vector_weight)) AS combined_score
68+
FROM vector_search v
69+
FULL OUTER JOIN text_search t ON v.id = t.id
70+
)
71+
SELECT * FROM combined
72+
WHERE combined.combined_score >= min_score
73+
ORDER BY combined.combined_score DESC
74+
LIMIT result_limit;
75+
END;
76+
$$ LANGUAGE plpgsql;

0 commit comments

Comments
 (0)