Skip to content

Commit 4231c89

Browse files
whipser030黑布林CaralHsifridayL
authored
patch: The supplementary catch-all method for polardb keyword search uses LIKE instead of TFIDF for recall (#635)
* update reader and search strategy * set strategy reader and search config * fix install problem * fix * fix test * turn off graph recall * turn off graph recall * turn off graph recall * fix Searcher input bug * fix Searcher * fix Search * fix bug * adjust strategy reader * adjust strategy reader * adjust search config input * reformat code * re pr * format repair * fix time issue * develop feedback process * feedback handler configuration * upgrade feedback using * add threshold * update prompt * update prompt * fix handler * add feedback scheduler * add handler change node update * add handler change node update * add handler change node update * add handler change node update * fix interface input * add chunk and ratio filter * update stopwords * fix messages queue * add seach_by_keywords_LIKE --------- Co-authored-by: 黑布林 <[email protected]> Co-authored-by: CaralHsi <[email protected]> Co-authored-by: chunyu li <[email protected]>
1 parent f5eae2f commit 4231c89

File tree

3 files changed

+116
-10
lines changed

3 files changed

+116
-10
lines changed

src/memos/graph_dbs/polardb.py

Lines changed: 97 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1528,7 +1528,97 @@ def get_context_chain(self, id: str, type: str = "FOLLOWS") -> list[str]:
15281528
raise NotImplementedError
15291529

15301530
@timed
1531-
def seach_by_keywords(
1531+
def seach_by_keywords_like(
1532+
self,
1533+
query_word: str,
1534+
scope: str | None = None,
1535+
status: str | None = None,
1536+
search_filter: dict | None = None,
1537+
user_name: str | None = None,
1538+
filter: dict | None = None,
1539+
knowledgebase_ids: list[str] | None = None,
1540+
**kwargs,
1541+
) -> list[dict]:
1542+
where_clauses = []
1543+
1544+
if scope:
1545+
where_clauses.append(
1546+
f"ag_catalog.agtype_access_operator(properties, '\"memory_type\"'::agtype) = '\"{scope}\"'::agtype"
1547+
)
1548+
if status:
1549+
where_clauses.append(
1550+
f"ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"{status}\"'::agtype"
1551+
)
1552+
else:
1553+
where_clauses.append(
1554+
"ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"activated\"'::agtype"
1555+
)
1556+
1557+
# Build user_name filter with knowledgebase_ids support (OR relationship) using common method
1558+
user_name_conditions = self._build_user_name_and_kb_ids_conditions_sql(
1559+
user_name=user_name,
1560+
knowledgebase_ids=knowledgebase_ids,
1561+
default_user_name=self.config.user_name,
1562+
)
1563+
1564+
# Add OR condition if we have any user_name conditions
1565+
if user_name_conditions:
1566+
if len(user_name_conditions) == 1:
1567+
where_clauses.append(user_name_conditions[0])
1568+
else:
1569+
where_clauses.append(f"({' OR '.join(user_name_conditions)})")
1570+
1571+
# Add search_filter conditions
1572+
if search_filter:
1573+
for key, value in search_filter.items():
1574+
if isinstance(value, str):
1575+
where_clauses.append(
1576+
f"ag_catalog.agtype_access_operator(properties, '\"{key}\"'::agtype) = '\"{value}\"'::agtype"
1577+
)
1578+
else:
1579+
where_clauses.append(
1580+
f"ag_catalog.agtype_access_operator(properties, '\"{key}\"'::agtype) = {value}::agtype"
1581+
)
1582+
1583+
# Build filter conditions using common method
1584+
filter_conditions = self._build_filter_conditions_sql(filter)
1585+
where_clauses.extend(filter_conditions)
1586+
1587+
# Build key
1588+
where_clauses.append("""(properties -> '"memory"')::text LIKE %s""")
1589+
where_clause = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
1590+
1591+
query = f"""
1592+
SELECT
1593+
ag_catalog.agtype_access_operator(properties, '"id"'::agtype) AS old_id,
1594+
agtype_object_field_text(properties, 'memory') as memory_text
1595+
FROM "{self.db_name}_graph"."Memory"
1596+
{where_clause}
1597+
"""
1598+
1599+
params = (query_word,)
1600+
logger.info(
1601+
f"[seach_by_keywords_LIKE start:] user_name: {user_name}, query: {query}, params: {params}"
1602+
)
1603+
conn = self._get_connection()
1604+
try:
1605+
with conn.cursor() as cursor:
1606+
cursor.execute(query, params)
1607+
results = cursor.fetchall()
1608+
output = []
1609+
for row in results:
1610+
oldid = row[0]
1611+
id_val = str(oldid)
1612+
output.append({"id": id_val})
1613+
logger.info(
1614+
f"[seach_by_keywords_LIKE end:] user_name: {user_name}, query: {query}, params: {params} recalled: {output}"
1615+
)
1616+
return output
1617+
finally:
1618+
self._return_connection(conn)
1619+
1620+
@timed
1621+
def seach_by_keywords_tfidf(
15321622
self,
15331623
query_words: list[str],
15341624
scope: str | None = None,
@@ -1603,7 +1693,9 @@ def seach_by_keywords(
16031693
"""
16041694

16051695
params = (tsquery_string,)
1606-
logger.info(f"[search_by_fulltext] query: {query}, params: {params}")
1696+
logger.info(
1697+
f"[seach_by_keywords_TFIDF start:] user_name: {user_name}, query: {query}, params: {params}"
1698+
)
16071699
conn = self._get_connection()
16081700
try:
16091701
with conn.cursor() as cursor:
@@ -1615,6 +1707,9 @@ def seach_by_keywords(
16151707
id_val = str(oldid)
16161708
output.append({"id": id_val})
16171709

1710+
logger.info(
1711+
f"[seach_by_keywords_TFIDF end:] user_name: {user_name}, query: {query}, params: {params} recalled: {output}"
1712+
)
16181713
return output
16191714
finally:
16201715
self._return_connection(conn)

src/memos/mem_feedback/feedback.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ def __init__(self, config: MemFeedbackConfig):
7878
is_reorganize=self.is_reorganize,
7979
)
8080
self.searcher: Searcher = self.memory_manager.searcher
81+
self.DB_IDX_READY = False
8182

8283
def _batch_embed(self, texts: list[str], embed_bs: int = 5):
8384
embed_bs = 5
@@ -569,15 +570,24 @@ def process_keyword_replace(self, user_id: str, user_name: str, kwp_judge: dict
569570
original_word = kwp_judge.get("original")
570571
target_word = kwp_judge.get("target")
571572

572-
# retrieve
573-
lang = detect_lang(original_word)
574-
queries = self._tokenize_chinese(original_word) if lang == "zh" else original_word.split()
573+
if self.DB_IDX_READY:
574+
# retrieve
575+
lang = detect_lang(original_word)
576+
queries = (
577+
self._tokenize_chinese(original_word) if lang == "zh" else original_word.split()
578+
)
575579

576-
must_part = f"{' & '.join(queries)}" if len(queries) > 1 else queries[0]
577-
retrieved_ids = self.graph_store.seach_by_keywords([must_part], user_name=user_name)
578-
if len(retrieved_ids) < 1:
579-
retrieved_ids = self.graph_store.search_by_fulltext(
580-
queries, top_k=100, user_name=user_name
580+
must_part = f"{' & '.join(queries)}" if len(queries) > 1 else queries[0]
581+
retrieved_ids = self.graph_store.seach_by_keywords_tfidf(
582+
[must_part], user_name=user_name
583+
)
584+
if len(retrieved_ids) < 1:
585+
retrieved_ids = self.graph_store.search_by_fulltext(
586+
queries, top_k=100, user_name=user_name
587+
)
588+
else:
589+
retrieved_ids = self.graph_store.seach_by_keywords_like(
590+
f"%{original_word}%", user_name=user_name
581591
)
582592

583593
# filter by doc scope

src/memos/mem_feedback/simple_feedback.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,4 @@ def __init__(
2929
self.mem_reader = mem_reader
3030
self.searcher = searcher
3131
self.stopword_manager = StopwordManager
32+
self.DB_IDX_READY = False

0 commit comments

Comments
 (0)