feat: chat bot api (#302)

CaralHsi · web-flow · commit c688ead6cb2f · 2025-09-16T10:47:18.000+08:00
* fix: add safe guard when parsing node memory

* feat: add filter as a parameter in tree-text searcher

* feat: add filter for user and long-term memory

* feat: add filter in working memory

* add filter in task-parser

* feat: only mix-retrieve for vector-recall; TODO: mix reranker

* feat: add 'session_id' as an optional parameter for product api

* feat: api 1.0 finish

* maintain: update gitignore

* maintain: update gitignore

* feat: add 'type' in TextualMemory Sources

* feat: add annotation to item

* fix: add session_id to product add

* fix: test

* feat: [WIP] add filter in reranker

* fix: bug in recall
diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py
@@ -217,7 +217,9 @@ def search_single(vec, filt=None):
         all_hits = []
         # Path A: without filter
         with ContextThreadPoolExecutor() as executor:
-            futures = [ex.submit(search_single, vec, None) for vec in query_embedding[:max_num]]
+            futures = [
+                executor.submit(search_single, vec, None) for vec in query_embedding[:max_num]
+            ]
             for f in concurrent.futures.as_completed(futures):
                 all_hits.extend(f.result() or [])
 
diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py
@@ -253,6 +253,7 @@ def _retrieve_from_working_memory(
             graph_results=items,
             top_k=top_k,
             parsed_goal=parsed_goal,
+            search_filter=search_filter,
         )
 
     # --- Path B
@@ -292,6 +293,7 @@ def _retrieve_from_long_term_and_user(
             graph_results=results,
             top_k=top_k,
             parsed_goal=parsed_goal,
+            search_filter=search_filter,
         )
 
     @timed
diff --git a/src/memos/reranker/cosine_local.py b/src/memos/reranker/cosine_local.py
@@ -49,6 +49,7 @@ def __init__(
         self,
         level_weights: dict[str, float] | None = None,
         level_field: str = "background",
+        **kwargs,
     ):
         self.level_weights = level_weights or {"topic": 1.0, "concept": 1.0, "fact": 1.0}
         self.level_field = level_field
diff --git a/src/memos/reranker/http_bge.py b/src/memos/reranker/http_bge.py
@@ -19,12 +19,35 @@
 if TYPE_CHECKING:
     from memos.memories.textual.item import TextualMemoryItem
 
+# Strip a leading "[...]" tag (e.g., "[2025-09-01] ..." or "[meta] ...")
+# before sending text to the reranker. This keeps inputs clean and
+# avoids misleading the model with bracketed prefixes.
 _TAG1 = re.compile(r"^\s*\[[^\]]*\]\s*")
 
 
 class HTTPBGEReranker(BaseReranker):
     """
-    HTTP-based BGE reranker. Mirrors your old MemoryReranker, but configurable.
+    HTTP-based BGE reranker.
+
+    This class sends (query, documents[]) to a remote HTTP endpoint that
+    performs cross-encoder-style re-ranking (e.g., BGE reranker) and returns
+    relevance scores. It then maps those scores back onto the original
+    TextualMemoryItem list and returns (item, score) pairs sorted by score.
+
+    Notes
+    -----
+    - The endpoint is expected to accept JSON:
+        {
+          "model": "<model-name>",
+          "query": "<query text>",
+          "documents": ["doc1", "doc2", ...]
+        }
+    - Two response shapes are supported:
+        1) {"results": [{"index": <int>, "relevance_score": <float>}, ...]}
+           where "index" refers to the *position in the documents array*.
+        2) {"data": [{"score": <float>}, ...]} (aligned by list order)
+    - If the service fails or responds unexpectedly, this falls back to
+      returning the original items with 0.0 scores (best-effort).
     """
 
     def __init__(
@@ -35,7 +58,22 @@ def __init__(
         timeout: int = 10,
         headers_extra: dict | None = None,
         rerank_source: list[str] | None = None,
+        **kwargs,
     ):
+        """
+        Parameters
+        ----------
+        reranker_url : str
+            HTTP endpoint for the reranker service.
+        token : str, optional
+            Bearer token for auth. If non-empty, added to the Authorization header.
+        model : str, optional
+            Model identifier understood by the server.
+        timeout : int, optional
+            Request timeout (seconds).
+        headers_extra : dict | None, optional
+            Additional headers to merge into the request headers.
+        """
         if not reranker_url:
             raise ValueError("reranker_url must not be empty")
         self.reranker_url = reranker_url
@@ -48,13 +86,37 @@ def __init__(
     def rerank(
         self,
         query: str,
-        graph_results: list,
+        graph_results: list[TextualMemoryItem],
         top_k: int,
+        search_filter: dict | None = None,
         **kwargs,
     ) -> list[tuple[TextualMemoryItem, float]]:
+        """
+        Rank candidate memories by relevance to the query.
+
+        Parameters
+        ----------
+        query : str
+            The search query.
+        graph_results : list[TextualMemoryItem]
+            Candidate items to re-rank. Each item is expected to have a
+            `.memory` str field; non-strings are ignored.
+        top_k : int
+            Return at most this many items.
+        search_filter : dict | None
+            Currently unused. Present to keep signature compatible.
+
+        Returns
+        -------
+        list[tuple[TextualMemoryItem, float]]
+            Re-ranked items with scores, sorted descending by score.
+        """
         if not graph_results:
             return []
 
+        # Build a mapping from "payload docs index" -> "original graph_results index"
+        # Only include items that have a non-empty string memory. This ensures that
+        # any index returned by the server can be mapped back correctly.
         documents = []
         if self.concat_source:
             documents = concat_original_source(graph_results, self.concat_source)
@@ -74,6 +136,7 @@ def rerank(
         payload = {"model": self.model, "query": query, "documents": documents}
 
         try:
+            # Make the HTTP request to the reranker service
             resp = requests.post(
                 self.reranker_url, headers=headers, json=payload, timeout=self.timeout
             )
@@ -83,9 +146,14 @@ def rerank(
             scored_items: list[tuple[TextualMemoryItem, float]] = []
 
             if "results" in data:
+                # Format:
+                # dict("results": [{"index": int, "relevance_score": float},
+                # ...])
                 rows = data.get("results", [])
                 for r in rows:
                     idx = r.get("index")
+                    # The returned index refers to 'documents' (i.e., our 'pairs' order),
+                    # so we must map it back to the original graph_results index.
                     if isinstance(idx, int) and 0 <= idx < len(graph_results):
                         score = float(r.get("relevance_score", r.get("score", 0.0)))
                         scored_items.append((graph_results[idx], score))
@@ -94,21 +162,28 @@ def rerank(
                 return scored_items[: min(top_k, len(scored_items))]
 
             elif "data" in data:
+                # Format: {"data": [{"score": float}, ...]} aligned by list order
                 rows = data.get("data", [])
+                # Build a list of scores aligned with our 'documents' (pairs)
                 score_list = [float(r.get("score", 0.0)) for r in rows]
 
                 if len(score_list) < len(graph_results):
                     score_list += [0.0] * (len(graph_results) - len(score_list))
                 elif len(score_list) > len(graph_results):
                     score_list = score_list[: len(graph_results)]
 
+                # Map back to original items using 'pairs'
                 scored_items = list(zip(graph_results, score_list, strict=False))
                 scored_items.sort(key=lambda x: x[1], reverse=True)
                 return scored_items[: min(top_k, len(scored_items))]
 
             else:
+                # Unexpected response schema: return a 0.0-scored fallback of the first top_k valid docs
+                # Note: we use 'pairs' to keep alignment with valid (string) docs.
                 return [(item, 0.0) for item in graph_results[:top_k]]
 
         except Exception as e:
+            # Network error, timeout, JSON decode error, etc.
+            # Degrade gracefully by returning first top_k valid docs with 0.0 score.
             logger.error(f"[HTTPBGEReranker] request failed: {e}")
             return [(item, 0.0) for item in graph_results[:top_k]]

Original file line number	Diff line number	Diff line change
`@@ -253,6 +253,7 @@ def _retrieve_from_working_memory(`
`253`	`253`	`graph_results=items,`
`254`	`254`	`top_k=top_k,`
`255`	`255`	`parsed_goal=parsed_goal,`
	`256`	`+ search_filter=search_filter,`
`256`	`257`	`)`
`257`	`258`
`258`	`259`	`# --- Path B`
`@@ -292,6 +293,7 @@ def _retrieve_from_long_term_and_user(`
`292`	`293`	`graph_results=results,`
`293`	`294`	`top_k=top_k,`
`294`	`295`	`parsed_goal=parsed_goal,`
	`296`	`+ search_filter=search_filter,`
`295`	`297`	`)`
`296`	`298`
`297`	`299`	`@timed`