Skip to content

Commit 8f38371

Browse files
committed
fix: 标签查询类型安全 + 扩展性 + 测试覆盖
- 修复类型安全:BM25-only 结果显式添加 vector: undefined - 增强扩展性:新增 tagPrefixes 配置(默认:proj/env/team/scope) - 完善测试:4 个测试用例全部通过 - BM25-only + mustContain 过滤 - 类型安全验证 - 自定义标签前缀 - 动态配置更新 解决 #55 中提到的标签查询语义误报问题
1 parent e7fbcc1 commit 8f38371

File tree

3 files changed

+306
-35
lines changed

3 files changed

+306
-35
lines changed

package-lock.json

Lines changed: 8 additions & 34 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/retriever.ts

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ export interface RetrievalConfig {
7878
/** Maximum half-life multiplier from access reinforcement.
7979
* Prevents frequently accessed memories from becoming immortal. (default: 3) */
8080
maxHalfLifeMultiplier: number;
81+
/** Tag prefixes for exact-match queries (default: ["proj", "env", "team", "scope"]).
82+
* Queries containing these prefixes (e.g. "proj:AIF") will use BM25-only + mustContain
83+
* to avoid semantic false positives from vector search. */
84+
tagPrefixes: string[];
8185
}
8286

8387
export interface RetrievalContext {
@@ -119,6 +123,7 @@ export const DEFAULT_RETRIEVAL_CONFIG: RetrievalConfig = {
119123
timeDecayHalfLifeDays: 60,
120124
reinforcementFactor: 0.5,
121125
maxHalfLifeMultiplier: 3,
126+
tagPrefixes: ["proj", "env", "team", "scope"],
122127
};
123128

124129
// ============================================================================
@@ -320,18 +325,39 @@ function cosineSimilarity(a: number[], b: number[]): number {
320325
export class MemoryRetriever {
321326
private accessTracker: AccessTracker | null = null;
322327
private tierManager: TierManager | null = null;
328+
private tagQueryRegex: RegExp;
323329

324330
constructor(
325331
private store: MemoryStore,
326332
private embedder: Embedder,
327333
private config: RetrievalConfig = DEFAULT_RETRIEVAL_CONFIG,
328334
private decayEngine: DecayEngine | null = null,
329-
) { }
335+
) {
336+
this.tagQueryRegex = this.buildTagQueryRegex(config.tagPrefixes);
337+
}
330338

331339
setAccessTracker(tracker: AccessTracker): void {
332340
this.accessTracker = tracker;
333341
}
334342

343+
private buildTagQueryRegex(prefixes: string[]): RegExp {
344+
if (!prefixes || prefixes.length === 0) {
345+
// Fallback: match nothing
346+
return /(?!)/g;
347+
}
348+
const escaped = prefixes.map((p) => p.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
349+
const pattern = `\\b(?:${escaped.join("|")}):[A-Za-z0-9][A-Za-z0-9._-]{0,63}\\b`;
350+
return new RegExp(pattern, "g");
351+
}
352+
353+
private extractTagTokens(query: string): string[] {
354+
const matches = query.match(this.tagQueryRegex) || [];
355+
const uniq = Array.from(
356+
new Set(matches.map((s) => s.trim()).filter(Boolean)),
357+
);
358+
return uniq.slice(0, 5);
359+
}
360+
335361
private filterActiveResults<T extends MemorySearchResult>(results: T[]): T[] {
336362
return results.filter((result) =>
337363
isMemoryActiveAt(parseSmartMetadata(result.entry.metadata, result.entry)),
@@ -342,6 +368,28 @@ export class MemoryRetriever {
342368
const { query, limit, scopeFilter, category, source } = context;
343369
const safeLimit = clampInt(limit, 1, 20);
344370

371+
// Tag-style queries (e.g. "proj:AIF") should behave like exact filters.
372+
// Hybrid vector search tends to introduce semantic false positives for short tokens.
373+
const tags = this.extractTagTokens(query);
374+
if (tags.length > 0 && this.config.mode !== "vector" && this.store.hasFtsSupport) {
375+
const bm25 = await this.bm25OnlyRetrieval(
376+
query,
377+
safeLimit,
378+
scopeFilter,
379+
category,
380+
tags,
381+
);
382+
if (bm25.length > 0) {
383+
// Record access for reinforcement (manual recall only)
384+
if (this.accessTracker && source === "manual") {
385+
this.accessTracker.recordAccess(bm25.map((r) => r.entry.id));
386+
}
387+
return bm25;
388+
}
389+
// If there are no literal matches, fall back to normal retrieval so
390+
// users can still find related wording.
391+
}
392+
345393
let results: RetrievalResult[];
346394
if (this.config.mode === "vector" || !this.store.hasFtsSupport) {
347395
results = await this.vectorOnlyRetrieval(
@@ -367,6 +415,57 @@ export class MemoryRetriever {
367415
return results;
368416
}
369417

418+
private async bm25OnlyRetrieval(
419+
query: string,
420+
limit: number,
421+
scopeFilter?: string[],
422+
category?: string,
423+
mustContain?: string[],
424+
): Promise<RetrievalResult[]> {
425+
const results = await this.store.bm25Search(
426+
query,
427+
Math.max(limit * 4, 20),
428+
scopeFilter,
429+
);
430+
431+
const filteredByCategory = category
432+
? results.filter((r) => r.entry.category === category)
433+
: results;
434+
435+
const required = mustContain || [];
436+
const literalFiltered = required.length
437+
? filteredByCategory.filter((r) =>
438+
required.every((t) => r.entry.text.includes(t)),
439+
)
440+
: filteredByCategory;
441+
442+
const mapped = literalFiltered.map(
443+
(result, index) =>
444+
({
445+
...result,
446+
sources: {
447+
vector: undefined,
448+
bm25: { score: result.score, rank: index + 1 },
449+
fused: { score: result.score },
450+
},
451+
}) as RetrievalResult,
452+
);
453+
454+
const temporal = this.applyRecencyBoost(mapped);
455+
const importance = this.applyImportanceWeight(temporal);
456+
const lengthNormalized = this.applyLengthNormalization(importance);
457+
const timeDecayed = this.applyTimeDecay(lengthNormalized);
458+
const hardFiltered = timeDecayed.filter(
459+
(r) => r.score >= this.config.hardMinScore,
460+
);
461+
const denoised = this.config.filterNoise
462+
? filterNoise(hardFiltered, (r) => r.entry.text)
463+
: hardFiltered;
464+
const deduplicated = this.applyMMRDiversity(denoised);
465+
466+
return deduplicated.slice(0, limit);
467+
}
468+
370469
private async vectorOnlyRetrieval(
371470
query: string,
372471
limit: number,
@@ -1019,6 +1118,10 @@ export class MemoryRetriever {
10191118
// Update configuration
10201119
updateConfig(newConfig: Partial<RetrievalConfig>): void {
10211120
this.config = { ...this.config, ...newConfig };
1121+
// Rebuild tag regex if tagPrefixes changed
1122+
if (newConfig.tagPrefixes) {
1123+
this.tagQueryRegex = this.buildTagQueryRegex(this.config.tagPrefixes);
1124+
}
10221125
}
10231126

10241127
// Get current configuration

0 commit comments

Comments
 (0)