Skip to content

Commit 953872e

Browse files
committed
feat: update full text mem search
1 parent b3acc98 commit 953872e

File tree

1 file changed

+125
-0
lines changed

1 file changed

+125
-0
lines changed

src/memos/graph_dbs/polardb.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1450,6 +1450,131 @@ def get_subgraph(
14501450
def get_context_chain(self, id: str, type: str = "FOLLOWS") -> list[str]:
14511451
"""Get the ordered context chain starting from a node."""
14521452
raise NotImplementedError
1453+
1454+
@timed
1455+
def search_by_fulltext(
1456+
self,
1457+
query_words: list[str],
1458+
top_k: int = 10,
1459+
scope: str | None = None,
1460+
status: str | None = None,
1461+
threshold: float | None = None,
1462+
search_filter: dict | None = None,
1463+
user_name: str | None = None,
1464+
filter: dict | None = None,
1465+
knowledgebase_ids: list[str] | None = None,
1466+
tsvector_field: str = "properties_tsvector_zh",
1467+
tsquery_config: str = "jiebaqry",
1468+
**kwargs,
1469+
) -> list[dict]:
1470+
"""
1471+
Full-text search functionality using PostgreSQL's full-text search capabilities.
1472+
1473+
Args:
1474+
query_text: query text
1475+
top_k: maximum number of results to return
1476+
scope: memory type filter (memory_type)
1477+
status: status filter, defaults to "activated"
1478+
threshold: similarity threshold filter
1479+
search_filter: additional property filter conditions
1480+
user_name: username filter
1481+
knowledgebase_ids: knowledgebase ids filter
1482+
filter: filter conditions with 'and' or 'or' logic for search results.
1483+
tsvector_field: full-text index field name, defaults to properties_tsvector_zh_1
1484+
tsquery_config: full-text search configuration, defaults to jiebaqry (Chinese word segmentation)
1485+
**kwargs: other parameters (e.g. cube_name)
1486+
1487+
Returns:
1488+
list[dict]: result list containing id and score
1489+
"""
1490+
# Build WHERE clause dynamically, same as search_by_embedding
1491+
where_clauses = []
1492+
1493+
if scope:
1494+
where_clauses.append(
1495+
f"ag_catalog.agtype_access_operator(properties, '\"memory_type\"'::agtype) = '\"{scope}\"'::agtype"
1496+
)
1497+
if status:
1498+
where_clauses.append(
1499+
f"ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"{status}\"'::agtype"
1500+
)
1501+
else:
1502+
where_clauses.append(
1503+
"ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"activated\"'::agtype"
1504+
)
1505+
1506+
# Build user_name filter with knowledgebase_ids support (OR relationship) using common method
1507+
user_name_conditions = self._build_user_name_and_kb_ids_conditions_sql(
1508+
user_name=user_name,
1509+
knowledgebase_ids=knowledgebase_ids,
1510+
default_user_name=self.config.user_name,
1511+
)
1512+
1513+
# Add OR condition if we have any user_name conditions
1514+
if user_name_conditions:
1515+
if len(user_name_conditions) == 1:
1516+
where_clauses.append(user_name_conditions[0])
1517+
else:
1518+
where_clauses.append(f"({' OR '.join(user_name_conditions)})")
1519+
1520+
# Add search_filter conditions
1521+
if search_filter:
1522+
for key, value in search_filter.items():
1523+
if isinstance(value, str):
1524+
where_clauses.append(
1525+
f"ag_catalog.agtype_access_operator(properties, '\"{key}\"'::agtype) = '\"{value}\"'::agtype"
1526+
)
1527+
else:
1528+
where_clauses.append(
1529+
f"ag_catalog.agtype_access_operator(properties, '\"{key}\"'::agtype) = {value}::agtype"
1530+
)
1531+
1532+
1533+
# Build filter conditions using common method
1534+
filter_conditions = self._build_filter_conditions_sql(filter)
1535+
where_clauses.extend(filter_conditions)
1536+
# Add fulltext search condition
1537+
# Convert query_text to OR query format: "word1 | word2 | word3"
1538+
tsquery_string = " | ".join(query_words)
1539+
1540+
where_clauses.append(f"{tsvector_field} @@ to_tsquery('{tsquery_config}', %s)")
1541+
1542+
where_clause = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
1543+
1544+
# Build fulltext search query
1545+
query = f"""
1546+
SELECT
1547+
ag_catalog.agtype_access_operator(properties, '"id"'::agtype) AS old_id,
1548+
agtype_object_field_text(properties, 'memory') as memory_text,
1549+
ts_rank({tsvector_field}, to_tsquery('{tsquery_config}', %s)) as rank
1550+
FROM "{self.db_name}_graph"."Memory"
1551+
{where_clause}
1552+
ORDER BY rank DESC
1553+
LIMIT {top_k};
1554+
"""
1555+
1556+
params = [tsquery_string, tsquery_string]
1557+
1558+
conn = self._get_connection()
1559+
try:
1560+
with conn.cursor() as cursor:
1561+
cursor.execute(query, params)
1562+
results = cursor.fetchall()
1563+
output = []
1564+
for row in results:
1565+
oldid = row[0] # old_id
1566+
rank = row[2] # rank score
1567+
1568+
id_val = str(oldid)
1569+
score_val = float(rank)
1570+
1571+
# Apply threshold filter if specified
1572+
if threshold is None or score_val >= threshold:
1573+
output.append({"id": id_val, "score": score_val})
1574+
1575+
return output[:top_k]
1576+
finally:
1577+
self._return_connection(conn)
14531578

14541579
@timed
14551580
def search_by_embedding(

0 commit comments

Comments
 (0)