Skip to content

Commit dfb4020

Browse files
committed
fix(memory): improve search relevance and configuration defaults
1 parent 410c1b9 commit dfb4020

File tree

7 files changed

+74
-26
lines changed

7 files changed

+74
-26
lines changed

reme/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
"ReMeFs",
1919
]
2020

21-
__version__ = "0.3.0.0a2"
21+
__version__ = "0.3.0.0a3"
2222

2323

2424
"""

reme/agent/chat/fs_cli.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ system_prompt_zh: |
9797
9898
### 🔍 检索工具
9999
在回答关于过往工作、决策、日期、人员、偏好或待办事项的问题之前:
100-
1. 对 MEMORY.md + memory/*.md 运行 `memory_search`
100+
1. 对 MEMORY.md + memory/*.md 运行 `memory_search`,没有搜索结果可以从不同角度多次尝试
101101
2. 如果你需要阅读每日笔记 `memory/YYYY-MM-DD.md`,可以使用读取工具访问它。
102102
103103
### 🛠️ 其他工具
@@ -108,6 +108,7 @@ system_prompt_zh: |
108108
- **write_tool** — 创建新文件
109109
- **execute_code** — 运行 Python 代码
110110
- **dashscope_search** — 网络搜索
111+
如果对于工具结果不满意,可以混合使用多种工具,或者单个工具不同的使用参数。
111112
112113
## 像人类一样回应 😊
113114
**何时使用表情回应:**

reme/core/memory_store/sqlite_memory_store.py

Lines changed: 54 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -159,12 +159,13 @@ async def _create_tables(self) -> None:
159159
path UNINDEXED,
160160
source UNINDEXED,
161161
start_line UNINDEXED,
162-
end_line UNINDEXED
162+
end_line UNINDEXED,
163+
tokenize='trigram'
163164
)
164165
""",
165166
)
166167
self.fts_available = True
167-
logger.info("Created FTS5 table")
168+
logger.info("Created FTS5 table with trigram tokenizer")
168169

169170
self.conn.commit()
170171
cursor.close()
@@ -538,7 +539,10 @@ async def vector_search(
538539

539540
results = []
540541
for _, path, start, end, src, text, dist in cursor.fetchall():
541-
score = max(0.0, 1.0 - dist)
542+
# Convert L2 distance to similarity score
543+
# For normalized vectors, L2 distance range is [0, 2]
544+
# Map to [1, 0] score range (higher score = more similar)
545+
score = max(0.0, 1.0 - dist / 2.0)
542546
snippet = text
543547
results.append(
544548
MemorySearchResult(
@@ -548,7 +552,7 @@ async def vector_search(
548552
score=score,
549553
snippet=snippet,
550554
source=MemorySource(src),
551-
distance=dist,
555+
raw_metric=dist,
552556
),
553557
)
554558

@@ -568,7 +572,19 @@ def _sanitize_fts_query(self, query: str) -> str:
568572
- " (phrase search, needs escaping)
569573
- : (column filter)
570574
- ^ (start of line anchor, not standard FTS5)
571-
- Other special chars that may interfere
575+
- ' (single quote, causes syntax errors)
576+
- ` (backtick, can cause issues)
577+
- | (pipe, OR operator)
578+
- + (plus, can be used for required terms)
579+
- - (minus, NOT operator)
580+
- = (equals, can cause issues)
581+
- < > (angle brackets, comparison operators)
582+
- ! (exclamation, NOT operator variant)
583+
- @ # $ % & (other special chars)
584+
- "\"
585+
- / (slash, can interfere)
586+
- ; (semicolon, statement separator)
587+
- , (comma, can interfere with phrase parsing)
572588
573589
Args:
574590
query: Raw query string
@@ -580,8 +596,38 @@ def _sanitize_fts_query(self, query: str) -> str:
580596
return ""
581597

582598
# Remove FTS5 special characters that we don't want users to use
583-
# Keep only alphanumeric, spaces, and some safe punctuation
584-
special_chars = ["*", "?", ":", "^", "(", ")", "[", "]", "{", "}"]
599+
# Keep only alphanumeric, spaces, periods, and underscores
600+
special_chars = [
601+
"*",
602+
"?",
603+
":",
604+
"^",
605+
"(",
606+
")",
607+
"[",
608+
"]",
609+
"{",
610+
"}",
611+
"'",
612+
'"',
613+
"`",
614+
"|",
615+
"+",
616+
"-",
617+
"=",
618+
"<",
619+
">",
620+
"!",
621+
"@",
622+
"#",
623+
"$",
624+
"%",
625+
"&",
626+
"\\",
627+
"/",
628+
";",
629+
",",
630+
]
585631
cleaned = query
586632
for char in special_chars:
587633
cleaned = cleaned.replace(char, " ")
@@ -650,6 +696,7 @@ async def keyword_search(
650696
score=score,
651697
snippet=snippet,
652698
source=MemorySource(src),
699+
raw_metric=rank,
653700
),
654701
)
655702

reme/core/schema/memory_search_result.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ class MemorySearchResult(BaseModel):
1414
score: float = Field(..., description="Relevance score of the search result")
1515
snippet: str = Field(..., description="Text snippet from the matched content")
1616
source: MemorySource = Field(..., description="Source of the memory data")
17-
distance: float | None = Field(None, description="Original distance value from vector search")
17+
raw_metric: float | None = Field(None, description="Raw metric value from search (e.g., distance, rank)")
1818
metadata: dict = Field(default_factory=dict, description="Additional metadata")
1919

2020
@property

reme/core/schema/service_config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,8 @@ class FileWatcherConfig(BaseModel):
107107
suffix_filters: list[str] = Field(default_factory=list)
108108
recursive: bool = Field(default=False)
109109
debounce: int = Field(default=500)
110-
chunk_tokens: int = Field(default=400)
111-
chunk_overlap: int = Field(default=80)
110+
chunk_tokens: int = Field(default=1000)
111+
chunk_overlap: int = Field(default=100)
112112
memory_store: str = Field(default="default")
113113
scan_on_start: bool = Field(default=True)
114114

reme/reme_fs.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ def __init__(
5353
suffix_filters: list[str] | None = None,
5454
recursive: bool = False,
5555
debounce: int = 500,
56-
chunk_tokens: int = 400,
57-
chunk_overlap: int = 80,
56+
chunk_tokens: int = 1000,
57+
chunk_overlap: int = 100,
5858
scan_on_start: bool = True,
5959
default_file_watcher_config: dict | None = None,
6060
context_window_tokens: int = 128000,
@@ -182,16 +182,16 @@ async def summary(self, messages: list[Message | dict], date: str, language: str
182182
)
183183
return await summarizer.call(messages=messages, date=date, service_context=self.service_context)
184184

185-
async def memory_search(self, query: str, max_results: int = 10, min_score: float = 0.3) -> str:
185+
async def memory_search(self, query: str, max_results: int = 5, min_score: float = 0.1) -> str:
186186
"""
187187
Mandatory recall step: semantically search MEMORY.md + memory/*.md (and optional session transcripts)
188188
before answering questions about prior work, decisions, dates, people, preferences, or todos;
189189
returns top snippets with path + lines.
190190
191191
Args:
192192
query: The semantic search query to find relevant memory snippets
193-
max_results: Maximum number of search results to return (optional), default is 10
194-
min_score: Minimum similarity score threshold for results (optional), default is 0.3
193+
max_results: Maximum number of search results to return (optional), default is 5
194+
min_score: Minimum similarity score threshold for results (optional), default is 0.1
195195
196196
Returns:
197197
Search results as formatted string

reme/tool/fs/fs_memory_search.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def __init__(
1616
self,
1717
sources: list[MemorySource] | None = None,
1818
min_score: float = 0.1,
19-
max_results: int = 20,
19+
max_results: int = 5,
2020
hybrid_enabled: bool = True,
2121
hybrid_vector_weight: float = 0.7,
2222
hybrid_text_weight: float = 0.3,
@@ -52,11 +52,11 @@ def _build_tool_call(self) -> ToolCall:
5252
},
5353
"max_results": {
5454
"type": "integer",
55-
"description": "Maximum number of search results to return (optional)",
55+
"description": "Maximum number of search results to return (optional), default 5",
5656
},
5757
"min_score": {
5858
"type": "number",
59-
"description": "Minimum similarity score threshold for results (optional)",
59+
"description": "Minimum similarity score threshold for results (optional), default 0.1",
6060
},
6161
},
6262
"required": ["query"],
@@ -79,16 +79,16 @@ async def execute(self) -> str:
7979
vector_results = await self._search_vector(query, candidates)
8080

8181
# Log original vector results
82-
logger.debug("\n=== Vector Search Results ===")
82+
logger.info("\n=== Vector Search Results ===")
8383
for i, r in enumerate(vector_results[:10], 1):
8484
snippet_preview = (r.snippet[:100] + "...") if len(r.snippet) > 100 else r.snippet
85-
logger.debug(f"{i}. Score: {r.score:.4f} | Snippet: {snippet_preview}")
85+
logger.info(f"{i}. Score: {r.score:.4f} | Snippet: {snippet_preview}")
8686

8787
# Log original keyword results
88-
logger.debug("\n=== Keyword Search Results ===")
88+
logger.info("\n=== Keyword Search Results ===")
8989
for i, r in enumerate(keyword_results[:10], 1):
9090
snippet_preview = (r.snippet[:100] + "...") if len(r.snippet) > 100 else r.snippet
91-
logger.debug(f"{i}. Score: {r.score:.4f} | Snippet: {snippet_preview}")
91+
logger.info(f"{i}. Score: {r.score:.4f} | Snippet: {snippet_preview}")
9292

9393
if not keyword_results:
9494
results = [r for r in vector_results if r.score >= min_score][:max_results]
@@ -103,10 +103,10 @@ async def execute(self) -> str:
103103
)
104104

105105
# Log merged results
106-
logger.debug("\n=== Merged Hybrid Results ===")
106+
logger.info("\n=== Merged Hybrid Results ===")
107107
for i, r in enumerate(merged[:10], 1):
108108
snippet_preview = (r.snippet[:100] + "...") if len(r.snippet) > 100 else r.snippet
109-
logger.debug(f"{i}. Score: {r.score:.4f} | Snippet: {snippet_preview}")
109+
logger.info(f"{i}. Score: {r.score:.4f} | Snippet: {snippet_preview}")
110110

111111
results = [r for r in merged if r.score >= min_score][:max_results]
112112
else:

0 commit comments

Comments
 (0)