forked from CortexReach/memory-lancedb-pro
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathadaptive-retrieval.ts
More file actions
102 lines (88 loc) · 4.38 KB
/
adaptive-retrieval.ts
File metadata and controls
102 lines (88 loc) · 4.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/**
* Adaptive Retrieval
* Determines whether a query needs memory retrieval at all.
* Skips retrieval for greetings, commands, simple instructions, and system messages.
* Saves embedding API calls and reduces noise injection.
*/
// Queries that are clearly NOT memory-retrieval candidates
const SKIP_PATTERNS = [
// Greetings & pleasantries
/^(hi|hello|hey|good\s*(morning|afternoon|evening|night)|greetings|yo|sup|howdy|what'?s up)\b/i,
// System/bot commands
/^\/[a-z][\w-]*\s*$/i, // slash commands like /help, /recall
/^(run|build|test|ls|cd|git|npm|pip|docker|curl|cat|grep|find|make|sudo)\b/i,
// Simple affirmations/negations
/^(yes|no|yep|nope|ok|okay|sure|fine|thanks|thank you|thx|ty|got it|understood|cool|nice|great|good|perfect|awesome|👍|👎|✅|❌)\s*[.!]?$/i,
// Continuation prompts
/^(go ahead|continue|proceed|do it|start|begin|next|实施|實施|开始|開始|继续|繼續|好的|可以|行)\s*[.!]?$/i,
// Pure emoji
/^[\p{Extended_Pictographic}\u200d\ufe0f\s]+$/u,
// Heartbeat/system (match anywhere, not just at start, to handle prefixed formats)
/HEARTBEAT/i,
/^\[System/i,
// Single-word utility pings
/^(ping|pong|test|debug)\s*[.!?]?$/i,
];
// Queries that SHOULD trigger retrieval even if short
const FORCE_RETRIEVE_PATTERNS = [
/\b(remember|recall|forgot|memory|memories)\b/i,
/\b(last time|before|previously|earlier|yesterday|ago)\b/i,
/\b(my (name|email|phone|address|birthday|preference))\b/i,
/\b(what did (i|we)|did i (tell|say|mention))\b/i,
/(你记得|[你妳]記得|之前|上次|以前|还记得|還記得|提到过|提到過|说过|說過)/i,
];
/**
* Normalize the raw prompt before applying skip/force rules.
*
* OpenClaw may wrap cron prompts like:
* "[cron:<jobId> <jobName>] run ..."
*
* We strip such prefixes so command-style prompts are properly detected and we
* can skip auto-recall injection (saves tokens).
*/
function normalizeQuery(query: string): string {
let s = query.trim();
// 1. Strip OpenClaw injected metadata headers (Conversation info or Sender).
// Use a global regex to strip all metadata blocks including following blank lines.
const metadataPattern = /^(Conversation info|Sender) \(untrusted metadata\):[\s\S]*?\n\s*\n/gim;
s = s.replace(metadataPattern, "");
// 2. Strip OpenClaw cron wrapper prefix.
s = s.trim().replace(/^\[cron:[^\]]+\]\s*/i, "");
// 3. Strip OpenClaw timestamp prefix [Mon 2026-03-02 04:21 GMT+8].
s = s.trim().replace(/^\[[A-Za-z]{3}\s\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}\s[^\]]+\]\s*/, "");
const result = s.trim();
return result;
}
/**
* Determine if a query should skip memory retrieval.
* Returns true if retrieval should be skipped.
* @param query The raw prompt text
* @param minLength Optional minimum length override (if set, overrides built-in thresholds)
*/
export function shouldSkipRetrieval(query: string, minLength?: number): boolean {
const trimmed = normalizeQuery(query);
// Force retrieve if query has memory-related intent (checked FIRST,
// before length check, so short CJK queries like "你记得吗" aren't skipped)
// 注意:slash 命令(如 /recall)优先走 SKIP 路径,不走 FORCE 路径
const isSlashCmd = /^\/[a-z][\w-]*\s*$/i.test(trimmed);
if (!isSlashCmd && FORCE_RETRIEVE_PATTERNS.some(p => p.test(trimmed))) return false;
// Too short to be meaningful
// 含数字的字符串(如端口号 8080、issue 号 #123)携带语义信息,豁免长度截断
const hasCJK = /[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/.test(trimmed);
const hasDigit = /\d/.test(trimmed);
if (!hasDigit && trimmed.length < (hasCJK ? 2 : 5)) return true;
// Skip if matches any skip pattern
if (SKIP_PATTERNS.some(p => p.test(trimmed))) return true;
// If caller provides a custom minimum length, use it
if (minLength !== undefined && minLength > 0) {
if (trimmed.length < minLength && !trimmed.includes('?') && !trimmed.includes('?')) return true;
return false;
}
// Skip very short non-question messages (likely commands or affirmations)
// CJK characters carry more meaning per character, so use a lower threshold
// 含数字的字符串豁免此规则(端口号、issue 号等均属有语义内容)
const defaultMinLength = hasCJK ? 3 : 13;
if (!hasDigit && trimmed.length < defaultMinLength && !trimmed.includes('?') && !trimmed.includes('?')) return true;
// Default: do retrieve
return false;
}