@@ -47,15 +47,8 @@ export interface RetrievalConfig {
4747 * - "jina" (default): Authorization: Bearer, string[] documents, results[].relevance_score
4848 * - "siliconflow": same format as jina (alias, for clarity)
4949 * - "voyage": Authorization: Bearer, string[] documents, data[].relevance_score
50- * - "pinecone": Api-Key header, {text}[] documents, data[].score
51- * - "tei": Authorization: Bearer, string[] texts, top-level [{ index, score }] */
52- rerankProvider ?:
53- | "jina"
54- | "siliconflow"
55- | "voyage"
56- | "pinecone"
57- | "dashscope"
58- | "tei" ;
50+ * - "pinecone": Api-Key header, {text}[] documents, data[].score */
51+ rerankProvider ?: "jina" | "siliconflow" | "voyage" | "pinecone" | "dashscope" ;
5952 /**
6053 * Length normalization: penalize long entries that dominate via sheer keyword
6154 * density. Formula: score *= 1 / (1 + log2(charLen / anchor)).
@@ -85,6 +78,10 @@ export interface RetrievalConfig {
8578 /** Maximum half-life multiplier from access reinforcement.
8679 * Prevents frequently accessed memories from becoming immortal. (default: 3) */
8780 maxHalfLifeMultiplier : number ;
81+ /** Tag prefixes for exact-match queries (default: ["proj", "env", "team", "scope"]).
82+ * Queries containing these prefixes (e.g. "proj:AIF") will use BM25-only + mustContain
83+ * to avoid semantic false positives from vector search. */
84+ tagPrefixes : string [ ] ;
8885}
8986
9087export interface RetrievalContext {
@@ -126,6 +123,7 @@ export const DEFAULT_RETRIEVAL_CONFIG: RetrievalConfig = {
126123 timeDecayHalfLifeDays : 60 ,
127124 reinforcementFactor : 0.5 ,
128125 maxHalfLifeMultiplier : 3 ,
126+ tagPrefixes : [ "proj" , "env" , "team" , "scope" ] ,
129127} ;
130128
131129// ============================================================================
@@ -151,13 +149,7 @@ function clamp01WithFloor(value: number, floor: number): number {
151149// Rerank Provider Adapters
152150// ============================================================================
153151
154- type RerankProvider =
155- | "jina"
156- | "siliconflow"
157- | "voyage"
158- | "pinecone"
159- | "dashscope"
160- | "tei" ;
152+ type RerankProvider = "jina" | "siliconflow" | "voyage" | "pinecone" | "dashscope" ;
161153
162154interface RerankItem {
163155 index : number ;
@@ -170,21 +162,10 @@ function buildRerankRequest(
170162 apiKey : string ,
171163 model : string ,
172164 query : string ,
173- candidates : string [ ] ,
165+ documents : string [ ] ,
174166 topN : number ,
175167) : { headers : Record < string , string > ; body : Record < string , unknown > } {
176168 switch ( provider ) {
177- case "tei" :
178- return {
179- headers : {
180- "Content-Type" : "application/json" ,
181- Authorization : `Bearer ${ apiKey } ` ,
182- } ,
183- body : {
184- query,
185- texts : candidates ,
186- } ,
187- } ;
188169 case "dashscope" :
189170 // DashScope wraps query+documents under `input` and does not use top_n.
190171 // Endpoint: https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank
@@ -197,7 +178,7 @@ function buildRerankRequest(
197178 model,
198179 input : {
199180 query,
200- documents : candidates ,
181+ documents,
201182 } ,
202183 } ,
203184 } ;
@@ -211,7 +192,7 @@ function buildRerankRequest(
211192 body : {
212193 model,
213194 query,
214- documents : candidates . map ( ( text ) => ( { text } ) ) ,
195+ documents : documents . map ( ( text ) => ( { text } ) ) ,
215196 top_n : topN ,
216197 rank_fields : [ "text" ] ,
217198 } ,
@@ -225,7 +206,7 @@ function buildRerankRequest(
225206 body : {
226207 model,
227208 query,
228- documents : candidates ,
209+ documents,
229210 // Voyage uses top_k (not top_n) to limit reranked outputs.
230211 top_k : topN ,
231212 } ,
@@ -241,7 +222,7 @@ function buildRerankRequest(
241222 body : {
242223 model,
243224 query,
244- documents : candidates ,
225+ documents,
245226 top_n : topN ,
246227 } ,
247228 } ;
@@ -251,7 +232,7 @@ function buildRerankRequest(
251232/** Parse provider-specific response into unified format */
252233function parseRerankResponse (
253234 provider : RerankProvider ,
254- data : unknown ,
235+ data : Record < string , unknown > ,
255236) : RerankItem [ ] | null {
256237 const parseItems = (
257238 items : unknown ,
@@ -277,41 +258,31 @@ function parseRerankResponse(
277258 }
278259 return parsed . length > 0 ? parsed : null ;
279260 } ;
280- const objectData =
281- data && typeof data === "object" && ! Array . isArray ( data )
282- ? ( data as Record < string , unknown > )
283- : undefined ;
284261
285262 switch ( provider ) {
286- case "tei" :
287- return (
288- parseItems ( data , [ "score" , "relevance_score" ] ) ??
289- parseItems ( objectData ?. results , [ "score" , "relevance_score" ] ) ??
290- parseItems ( objectData ?. data , [ "score" , "relevance_score" ] )
291- ) ;
292263 case "dashscope" : {
293264 // DashScope: { output: { results: [{ index, relevance_score }] } }
294- const output = objectData ? .output as Record < string , unknown > | undefined ;
265+ const output = data . output as Record < string , unknown > | undefined ;
295266 if ( output ) {
296267 return parseItems ( output . results , [ "relevance_score" , "score" ] ) ;
297268 }
298269 // Fallback: try top-level results in case API format changes
299- return parseItems ( objectData ? .results , [ "relevance_score" , "score" ] ) ;
270+ return parseItems ( data . results , [ "relevance_score" , "score" ] ) ;
300271 }
301272 case "pinecone" : {
302273 // Pinecone: usually { data: [{ index, score, ... }] }
303274 // Also tolerate results[] with score/relevance_score for robustness.
304275 return (
305- parseItems ( objectData ? .data , [ "score" , "relevance_score" ] ) ??
306- parseItems ( objectData ? .results , [ "score" , "relevance_score" ] )
276+ parseItems ( data . data , [ "score" , "relevance_score" ] ) ??
277+ parseItems ( data . results , [ "score" , "relevance_score" ] )
307278 ) ;
308279 }
309280 case "voyage" : {
310281 // Voyage: usually { data: [{ index, relevance_score }] }
311282 // Also tolerate results[] for compatibility across gateways.
312283 return (
313- parseItems ( objectData ? .data , [ "relevance_score" , "score" ] ) ??
314- parseItems ( objectData ? .results , [ "relevance_score" , "score" ] )
284+ parseItems ( data . data , [ "relevance_score" , "score" ] ) ??
285+ parseItems ( data . results , [ "relevance_score" , "score" ] )
315286 ) ;
316287 }
317288 case "siliconflow" :
@@ -320,8 +291,8 @@ function parseRerankResponse(
320291 // Jina / SiliconFlow: usually { results: [{ index, relevance_score }] }
321292 // Also tolerate data[] for compatibility across gateways.
322293 return (
323- parseItems ( objectData ? .results , [ "relevance_score" , "score" ] ) ??
324- parseItems ( objectData ? .data , [ "relevance_score" , "score" ] )
294+ parseItems ( data . results , [ "relevance_score" , "score" ] ) ??
295+ parseItems ( data . data , [ "relevance_score" , "score" ] )
325296 ) ;
326297 }
327298 }
@@ -354,18 +325,39 @@ function cosineSimilarity(a: number[], b: number[]): number {
354325export class MemoryRetriever {
355326 private accessTracker : AccessTracker | null = null ;
356327 private tierManager : TierManager | null = null ;
328+ private tagQueryRegex : RegExp ;
357329
358330 constructor (
359331 private store : MemoryStore ,
360332 private embedder : Embedder ,
361333 private config : RetrievalConfig = DEFAULT_RETRIEVAL_CONFIG ,
362334 private decayEngine : DecayEngine | null = null ,
363- ) { }
335+ ) {
336+ this . tagQueryRegex = this . buildTagQueryRegex ( config . tagPrefixes ) ;
337+ }
364338
365339 setAccessTracker ( tracker : AccessTracker ) : void {
366340 this . accessTracker = tracker ;
367341 }
368342
343+ private buildTagQueryRegex ( prefixes : string [ ] ) : RegExp {
344+ if ( ! prefixes || prefixes . length === 0 ) {
345+ // Fallback: match nothing
346+ return / (? ! ) / ;
347+ }
348+ const escaped = prefixes . map ( ( p ) => p . replace ( / [ . * + ? ^ $ { } ( ) | [ \] \\ ] / g, "\\$&" ) ) ;
349+ const pattern = `\\b(?:${ escaped . join ( "|" ) } ):[A-Za-z0-9][A-Za-z0-9._-]{0,63}\\b` ;
350+ return new RegExp ( pattern , "gi" ) ;
351+ }
352+
353+ private extractTagTokens ( query : string ) : string [ ] {
354+ const matches = query . match ( this . tagQueryRegex ) || [ ] ;
355+ const uniq = Array . from (
356+ new Set ( matches . map ( ( s ) => s . trim ( ) ) . filter ( Boolean ) ) ,
357+ ) ;
358+ return uniq . slice ( 0 , 5 ) ;
359+ }
360+
369361 private filterActiveResults < T extends MemorySearchResult > ( results : T [ ] ) : T [ ] {
370362 return results . filter ( ( result ) =>
371363 isMemoryActiveAt ( parseSmartMetadata ( result . entry . metadata , result . entry ) ) ,
@@ -376,6 +368,28 @@ export class MemoryRetriever {
376368 const { query, limit, scopeFilter, category, source } = context ;
377369 const safeLimit = clampInt ( limit , 1 , 20 ) ;
378370
371+ // Tag-style queries (e.g. "proj:AIF") should behave like exact filters.
372+ // Hybrid vector search tends to introduce semantic false positives for short tokens.
373+ const tags = this . extractTagTokens ( query ) ;
374+ if ( tags . length > 0 && this . config . mode !== "vector" && this . store . hasFtsSupport ) {
375+ const bm25 = await this . bm25OnlyRetrieval (
376+ query ,
377+ safeLimit ,
378+ scopeFilter ,
379+ category ,
380+ tags ,
381+ ) ;
382+ if ( bm25 . length > 0 ) {
383+ // Record access for reinforcement (manual recall only)
384+ if ( this . accessTracker && source === "manual" ) {
385+ this . accessTracker . recordAccess ( bm25 . map ( ( r ) => r . entry . id ) ) ;
386+ }
387+ return bm25 ;
388+ }
389+ // If there are no literal matches, fall back to normal retrieval so
390+ // users can still find related wording.
391+ }
392+
379393 let results : RetrievalResult [ ] ;
380394 if ( this . config . mode === "vector" || ! this . store . hasFtsSupport ) {
381395 results = await this . vectorOnlyRetrieval (
@@ -401,6 +415,64 @@ export class MemoryRetriever {
401415 return results ;
402416 }
403417
418+ private applyPostProcessingPipeline (
419+ results : RetrievalResult [ ] ,
420+ limit : number ,
421+ ) : RetrievalResult [ ] {
422+ const temporal = this . applyRecencyBoost ( results ) ;
423+ const importance = this . applyImportanceWeight ( temporal ) ;
424+ const lengthNormalized = this . applyLengthNormalization ( importance ) ;
425+ const timeDecayed = this . applyTimeDecay ( lengthNormalized ) ;
426+ const hardFiltered = timeDecayed . filter (
427+ ( r ) => r . score >= this . config . hardMinScore ,
428+ ) ;
429+ const denoised = this . config . filterNoise
430+ ? filterNoise ( hardFiltered , ( r ) => r . entry . text )
431+ : hardFiltered ;
432+ const deduplicated = this . applyMMRDiversity ( denoised ) ;
433+ return deduplicated . slice ( 0 , limit ) ;
434+ }
435+
436+ private async bm25OnlyRetrieval (
437+ query : string ,
438+ limit : number ,
439+ scopeFilter ?: string [ ] ,
440+ category ?: string ,
441+ mustContain ?: string [ ] ,
442+ ) : Promise < RetrievalResult [ ] > {
443+ const results = await this . store . bm25Search (
444+ query ,
445+ Math . max ( limit * 4 , 20 ) ,
446+ scopeFilter ,
447+ ) ;
448+
449+ const filteredByCategory = category
450+ ? results . filter ( ( r ) => r . entry . category === category )
451+ : results ;
452+
453+ const required = mustContain || [ ] ;
454+ const literalFiltered = required . length
455+ ? filteredByCategory . filter ( ( r ) => {
456+ const textLower = r . entry . text . toLowerCase ( ) ;
457+ return required . every ( ( t ) => textLower . includes ( t . toLowerCase ( ) ) ) ;
458+ } )
459+ : filteredByCategory ;
460+
461+ const mapped = literalFiltered . map (
462+ ( result , index ) =>
463+ ( {
464+ ...result ,
465+ sources : {
466+ vector : undefined ,
467+ bm25 : { score : result . score , rank : index + 1 } ,
468+ fused : { score : result . score } ,
469+ } ,
470+ } ) as RetrievalResult ,
471+ ) ;
472+
473+ return this . applyPostProcessingPipeline ( mapped , limit ) ;
474+ }
475+
404476 private async vectorOnlyRetrieval (
405477 query : string ,
406478 limit : number ,
@@ -687,7 +759,7 @@ export class MemoryRetriever {
687759 clearTimeout ( timeout ) ;
688760
689761 if ( response . ok ) {
690- const data : unknown = await response . json ( ) ;
762+ const data = ( await response . json ( ) ) as Record < string , unknown > ;
691763
692764 // Parse provider-specific response into unified format
693765 const parsed = parseRerankResponse ( provider , data ) ;
@@ -1053,6 +1125,10 @@ export class MemoryRetriever {
10531125 // Update configuration
10541126 updateConfig ( newConfig : Partial < RetrievalConfig > ) : void {
10551127 this . config = { ...this . config , ...newConfig } ;
1128+ // Rebuild tag regex if tagPrefixes changed
1129+ if ( newConfig . tagPrefixes ) {
1130+ this . tagQueryRegex = this . buildTagQueryRegex ( this . config . tagPrefixes ) ;
1131+ }
10561132 }
10571133
10581134 // Get current configuration
0 commit comments