Skip to content

Commit 686f1ce

Browse files
committed
fix: extractTagTokens regex - add g flag and use non-capturing group
- Add 'g' flag to match all tags in query (fixes multi-tag queries like 'proj:AIF env:prod') - Change capturing group (pattern) to non-capturing (?:pattern) to avoid returning bare prefixes - Add test cases for multi-tag extraction and BM25 false positive filtering Fixes reviewer feedback in PR #212
1 parent 471376d commit 686f1ce

File tree

2 files changed

+86
-1
lines changed

2 files changed

+86
-1
lines changed

src/retriever.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@ export class MemoryRetriever {
421421
if (!this.config.tagPrefixes?.length) return [];
422422

423423
const pattern = this.config.tagPrefixes.join("|");
424-
const regex = new RegExp(`(${pattern}):[\\w-]+`, "i");
424+
const regex = new RegExp(`(?:${pattern}):[\\w-]+`, "gi");
425425
const matches = query.match(regex);
426426
return matches || [];
427427
}

test/retriever-tag-query.test.mjs

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,5 +201,90 @@ describe("MemoryRetriever - Tag Query", () => {
201201
assert.equal(results2.length, 1);
202202
assert.equal(results2[0].entry.id, "2");
203203
});
204+
205+
it("should extract multiple tags from query", async () => {
206+
const entries = [
207+
{
208+
id: "1",
209+
text: "proj:AIF env:prod deployment notes",
210+
scope: "global",
211+
category: "fact",
212+
timestamp: Date.now(),
213+
vector: new Array(384).fill(0.1),
214+
},
215+
{
216+
id: "2",
217+
text: "proj:AIF env:dev testing",
218+
scope: "global",
219+
category: "fact",
220+
timestamp: Date.now(),
221+
vector: new Array(384).fill(0.1),
222+
},
223+
{
224+
id: "3",
225+
text: "proj:AIF only",
226+
scope: "global",
227+
category: "fact",
228+
timestamp: Date.now(),
229+
vector: new Array(384).fill(0.1),
230+
},
231+
];
232+
233+
const store = createMockStore(entries);
234+
const embedder = createMockEmbedder();
235+
const retriever = new MemoryRetriever(store, embedder, {
236+
...DEFAULT_RETRIEVAL_CONFIG,
237+
tagPrefixes: ["proj", "env"],
238+
});
239+
240+
const results = await retriever.retrieve({
241+
query: "proj:AIF env:prod",
242+
limit: 5,
243+
});
244+
245+
// Should only return entry that contains BOTH tags
246+
assert.equal(results.length, 1);
247+
assert.equal(results[0].entry.id, "1");
248+
assert.ok(results[0].entry.text.includes("proj:AIF"));
249+
assert.ok(results[0].entry.text.includes("env:prod"));
250+
});
251+
252+
it("should filter BM25 false positives with mustContain", async () => {
253+
const entries = [
254+
{
255+
id: "1",
256+
text: "proj:AIF exact tag match",
257+
scope: "global",
258+
category: "fact",
259+
timestamp: Date.now(),
260+
vector: new Array(384).fill(0.1),
261+
},
262+
{
263+
id: "2",
264+
text: "This proj is about AIF but not tagged",
265+
scope: "global",
266+
category: "fact",
267+
timestamp: Date.now(),
268+
vector: new Array(384).fill(0.1),
269+
},
270+
];
271+
272+
const store = createMockStore(entries);
273+
const embedder = createMockEmbedder();
274+
const retriever = new MemoryRetriever(store, embedder, {
275+
...DEFAULT_RETRIEVAL_CONFIG,
276+
tagPrefixes: ["proj"],
277+
});
278+
279+
const results = await retriever.retrieve({
280+
query: "proj:AIF",
281+
limit: 5,
282+
});
283+
284+
// Should only return entry with exact tag, not the one with separate words
285+
assert.equal(results.length, 1);
286+
assert.equal(results[0].entry.id, "1");
287+
assert.ok(!results.some((r) => r.entry.id === "2"));
288+
});
204289
});
205290
});

0 commit comments

Comments
 (0)