Skip to content

Commit 2998969

Browse files
fridayLCaralHsi
andauthored
feat:add doc source reranker (#642)
* feat: update memos headers * feat: headers add * feat: update search agent * feat: upadte mem story * feat: update mem scehduler * feat: update deepsearch mem code * feat: update deepsearch agent * feat: update test code * fix: remove dup config * feat: dock search pipeline * fix: code test * feat: add test scripts * feat: add test * feat: update need_raw process * fix: add initter * fix: change agent search func name * feat: update logs and defined * feat: update full text mem search * feat: cp plugin to dev * feat: add one recall for fulltext retrieval * fix: set default for fulltext search * feat: add langchain chunk * feat: fix playground for query * feat: update file content memory extract * feat: update code * feat: update import * code: reformat suffix * feat: update file_id * remove langchain-text-splitters==1.0.0 * feat: add reqiuement * feat: make test * feat: fix markdown * feat: fix simple chunker * feat: add file sources * feat: add concat doc source --------- Co-authored-by: CaralHsi <[email protected]>
1 parent eb60331 commit 2998969

File tree

2 files changed

+107
-0
lines changed

2 files changed

+107
-0
lines changed
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# memos/reranker/strategies/single_turn.py
2+
from __future__ import annotations
3+
4+
import re
5+
6+
from typing import Any
7+
8+
from .base import BaseRerankerStrategy
9+
from .dialogue_common import DialogueRankingTracker
10+
11+
12+
_TAG1 = re.compile(r"^\s*\[[^\]]*\]\s*")
13+
14+
15+
class ConcatDocSourceStrategy(BaseRerankerStrategy):
16+
"""
17+
Concat background strategy.
18+
19+
This strategy processes dialogue pairs by concatenating background and
20+
user and assistant messages into single strings for ranking. Each dialogue pair becomes a
21+
separate document for ranking.
22+
"""
23+
24+
"""
25+
Concat background strategy.
26+
27+
This strategy processes dialogue pairs by concatenating background and
28+
user and assistant messages into single strings for ranking. Each dialogue pair becomes a
29+
separate document for ranking.
30+
"""
31+
32+
def prepare_documents(
33+
self,
34+
query: str,
35+
graph_results: list,
36+
top_k: int,
37+
**kwargs,
38+
) -> tuple[DialogueRankingTracker, dict[str, Any], list[str]]:
39+
"""
40+
Prepare documents based on single turn concatenation strategy.
41+
42+
Args:
43+
query: The search query
44+
graph_results: List of graph results
45+
top_k: Maximum number of items to return
46+
47+
Returns:
48+
tuple[DialogueRankingTracker, dict[str, Any], list[str]]:
49+
- Tracker: DialogueRankingTracker instance
50+
- original_items: Dict mapping memory_id to original TextualMemoryItem
51+
- documents: List of text documents ready for ranking
52+
"""
53+
54+
original_items = {}
55+
tracker = DialogueRankingTracker()
56+
documents = []
57+
for item in graph_results:
58+
memory = getattr(item, "memory", None)
59+
if isinstance(memory, str):
60+
memory = _TAG1.sub("", memory)
61+
62+
chunk_text = ""
63+
if hasattr(item, "metadata") and hasattr(item.metadata, "sources"):
64+
sources = getattr(item.metadata, "sources", [])
65+
for source in sources:
66+
if source.type == "file":
67+
chunk_text += source.content
68+
if chunk_text:
69+
documents.append(f"{memory}\n\n[Sources]:\n{chunk_text}")
70+
else:
71+
documents.append(memory)
72+
return tracker, original_items, documents
73+
74+
def reconstruct_items(
75+
self,
76+
ranked_indices: list[int],
77+
scores: list[float],
78+
tracker: DialogueRankingTracker,
79+
original_items: dict[str, Any],
80+
top_k: int,
81+
**kwargs,
82+
) -> list[tuple[Any, float]]:
83+
"""
84+
Reconstruct TextualMemoryItem objects from ranked dialogue pairs.
85+
86+
Args:
87+
ranked_indices: List of dialogue pair indices sorted by relevance
88+
scores: Corresponding relevance scores
89+
tracker: DialogueRankingTracker instance
90+
original_items: Dict mapping memory_id to original TextualMemoryItem
91+
top_k: Maximum number of items to return
92+
93+
Returns:
94+
List of (reconstructed_memory_item, aggregated_score) tuples
95+
"""
96+
graph_results = kwargs.get("graph_results")
97+
documents = kwargs.get("documents")
98+
reconstructed_items = []
99+
for idx in ranked_indices:
100+
item = graph_results[idx]
101+
item.memory = f"{documents[idx]}"
102+
reconstructed_items.append((item, scores[idx]))
103+
104+
reconstructed_items.sort(key=lambda x: x[1], reverse=True)
105+
return reconstructed_items[:top_k]

src/memos/reranker/strategies/factory.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from typing import TYPE_CHECKING, Any, ClassVar
55

66
from .concat_background import ConcatBackgroundStrategy
7+
from .concat_docsource import ConcatDocSourceStrategy
78
from .single_turn import SingleTurnStrategy
89
from .singleturn_outmem import SingleTurnOutMemStrategy
910

@@ -19,6 +20,7 @@ class RerankerStrategyFactory:
1920
"single_turn": SingleTurnStrategy,
2021
"concat_background": ConcatBackgroundStrategy,
2122
"singleturn_outmem": SingleTurnOutMemStrategy,
23+
"concat_docsource": ConcatDocSourceStrategy,
2224
}
2325

2426
@classmethod

0 commit comments

Comments
 (0)