Skip to content

Commit df87b72

Browse files
fix: recall exact slug match as search candidate
When searching for a skill by its exact slug (e.g. 'ima-all-ai'), the result could be missing from the candidate pool because: 1. The vector search candidate pool (75-256) might not include the skill 2. matchesExactTokens uses `.some()` (by design, see #27), so unrelated skills with common tokens like 'ai' inflate exactMatches count 3. When exactMatches >= limit, lexicalFallbackSkills is skipped entirely, along with its exact slug lookup via the by_slug index Fix: after all recall stages (vector + exactMatches + lexicalFallback), reconstruct the candidate slug from query tokens and check if it exists in merged results. If missing, perform a single O(1) index lookup via the new `lookupExactSlug` internalQuery to ensure it enters the candidate pool. It then participates in normal scoring and ranking — SLUG_EXACT_BOOST (1.4) makes it very likely to rank first, but the fix does not force any particular ranking. Co-Authored-By: 戴硕 <daishuo@gmail.com>
1 parent 8d5a64b commit df87b72

File tree

3 files changed

+10576
-5
lines changed

3 files changed

+10576
-5
lines changed

convex/search.test.ts

Lines changed: 108 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@ vi.mock('./lib/badges', () => ({
1717
Boolean(skill.badges?.highlighted),
1818
}))
1919

20+
type SkillSearchEntry = {
21+
skill: { slug: string; _id: string }
22+
version: unknown
23+
ownerHandle: string | null
24+
owner: unknown
25+
}
26+
2027
type WrappedHandler = {
2128
_handler: (
2229
ctx: unknown,
@@ -186,7 +193,8 @@ describe('search helpers', () => {
186193
const runQuery = vi
187194
.fn()
188195
.mockResolvedValueOnce(vectorEntries) // hydrateResults
189-
.mockResolvedValueOnce(fallbackEntries) // lexicalFallbackSkills
196+
.mockResolvedValueOnce(fallbackEntries) // lexicalFallbackSkills (triggered because exactMatches=2 < limit=5)
197+
.mockResolvedValueOnce(null) // lookupExactSlug (no exact slug "foo")
190198

191199
const result = await searchSkillsHandler(
192200
{
@@ -196,12 +204,12 @@ describe('search helpers', () => {
196204
]),
197205
runQuery,
198206
},
199-
{ query: 'foo', limit: 2 },
207+
{ query: 'foo', limit: 5 },
200208
)
201209

202-
expect(result).toHaveLength(2)
210+
expect(result).toHaveLength(3)
203211
expect(result[0].skill.slug).toBe('foo-b')
204-
expect(new Set(result.map((entry: { skill: { _id: string } }) => entry.skill._id)).size).toBe(2)
212+
expect(new Set(result.map((entry: { skill: { _id: string } }) => entry.skill._id)).size).toBe(3)
205213
})
206214

207215
it('filters suspicious vector results in hydrateResults when requested', async () => {
@@ -515,6 +523,7 @@ describe('search helpers', () => {
515523
owner: null,
516524
}))
517525
}
526+
if ('slug' in args) return null // lookupExactSlug
518527
return [] // lexicalFallbackSkills
519528
})
520529

@@ -533,6 +542,101 @@ describe('search helpers', () => {
533542
expect(overlap).toHaveLength(0)
534543
})
535544

545+
it('recalls exact slug match even when vector search and lexical fallback both miss it', async () => {
546+
generateEmbeddingMock.mockResolvedValueOnce([0, 1, 2])
547+
548+
// Vector search returns candidates that don't include the exact slug "ima-all-ai".
549+
// All candidates have "ai" in their summary so matchesExactTokens (which uses `some`)
550+
// will pass them, making exactMatches >= limit and skipping lexicalFallback entirely.
551+
const vectorEntries = Array.from({ length: 12 }, (_, i) => ({
552+
embeddingId: `skillEmbeddings:e${i}`,
553+
skill: makePublicSkill({
554+
id: `skills:other${i}`,
555+
slug: `other-ai-skill-${i}`,
556+
displayName: `Other AI Skill ${i}`,
557+
}),
558+
version: null,
559+
ownerHandle: 'owner',
560+
owner: { _id: 'users:owner', handle: 'owner', name: 'Owner' },
561+
}))
562+
563+
// The exact slug skill that should be recalled.
564+
const exactSlugSkill = {
565+
skill: makePublicSkill({
566+
id: 'skills:ima',
567+
slug: 'ima-all-ai',
568+
displayName: 'IMA All AI',
569+
downloads: 100,
570+
}),
571+
version: null,
572+
ownerHandle: 'owner',
573+
owner: { _id: 'users:owner', handle: 'owner', name: 'Owner' },
574+
}
575+
576+
let lookupSlugCalled = false
577+
const runQuery = vi.fn(async (_ref: unknown, args: Record<string, unknown>) => {
578+
if ('embeddingIds' in args) return vectorEntries // hydrateResults
579+
if ('slug' in args) {
580+
lookupSlugCalled = true
581+
return args.slug === 'ima-all-ai' ? exactSlugSkill : null // lookupExactSlug
582+
}
583+
return [] // lexicalFallbackSkills (should not be called)
584+
})
585+
586+
const result = await searchSkillsHandler(
587+
{
588+
vectorSearch: vi.fn().mockResolvedValue(
589+
vectorEntries.map((e) => ({ _id: e.embeddingId, _score: 0.5 })),
590+
),
591+
runQuery,
592+
},
593+
{ query: 'ima-all-ai', limit: 10 },
594+
)
595+
596+
expect(lookupSlugCalled).toBe(true)
597+
expect(result.some((e: { skill: { slug: string } }) => e.skill.slug === 'ima-all-ai')).toBe(true)
598+
// With SLUG_EXACT_BOOST + NAME_EXACT_BOOST, it should be ranked first.
599+
expect(result[0].skill.slug).toBe('ima-all-ai')
600+
})
601+
602+
it('skips exact slug lookup when slug is already in merged results', async () => {
603+
generateEmbeddingMock.mockResolvedValueOnce([0, 1, 2])
604+
605+
const imaEntry = {
606+
embeddingId: 'skillEmbeddings:ima',
607+
skill: makePublicSkill({
608+
id: 'skills:ima',
609+
slug: 'ima-all-ai',
610+
displayName: 'IMA All AI',
611+
}),
612+
version: null,
613+
ownerHandle: 'owner',
614+
owner: null,
615+
}
616+
617+
let lookupSlugCalled = false
618+
const runQuery = vi.fn(async (_ref: unknown, args: Record<string, unknown>) => {
619+
if ('embeddingIds' in args) return [imaEntry]
620+
if ('slug' in args) {
621+
lookupSlugCalled = true
622+
return null
623+
}
624+
return []
625+
})
626+
627+
await searchSkillsHandler(
628+
{
629+
vectorSearch: vi.fn().mockResolvedValue([
630+
{ _id: 'skillEmbeddings:ima', _score: 0.9 },
631+
]),
632+
runQuery,
633+
},
634+
{ query: 'ima-all-ai', limit: 10 },
635+
)
636+
637+
expect(lookupSlugCalled).toBe(false)
638+
})
639+
536640
it('merges fallback matches without duplicate skill ids', () => {
537641
const primary = [
538642
{

convex/search.ts

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,25 @@ export const searchSkills: ReturnType<typeof action> = action({
200200
nonSuspiciousOnly: args.nonSuspiciousOnly,
201201
})) as SkillSearchEntry[])
202202

203-
const mergedMatches = mergeUniqueBySkillId(exactMatches, fallbackMatches)
203+
let mergedMatches = mergeUniqueBySkillId(exactMatches, fallbackMatches)
204+
205+
// Ensure an exact slug match is always recalled, even when the vector
206+
// candidate pool and lexical fallback both missed it. The candidate
207+
// slug is reconstructed from query tokens (e.g. "ima all ai" → "ima-all-ai").
208+
const candidateSlug = queryTokens.join('-')
209+
if (
210+
/^[a-z0-9][a-z0-9-]*$/.test(candidateSlug) &&
211+
!mergedMatches.some((e) => e.skill.slug === candidateSlug)
212+
) {
213+
const exactSlugEntry = (await ctx.runQuery(internal.search.lookupExactSlug, {
214+
slug: candidateSlug,
215+
highlightedOnly: args.highlightedOnly,
216+
nonSuspiciousOnly: args.nonSuspiciousOnly,
217+
})) as SkillSearchEntry | null
218+
if (exactSlugEntry) {
219+
mergedMatches = [exactSlugEntry, ...mergedMatches]
220+
}
221+
}
204222

205223
return mergedMatches
206224
.map((entry) => {
@@ -274,6 +292,36 @@ export const hydrateResults = internalQuery({
274292
},
275293
})
276294

295+
export const lookupExactSlug = internalQuery({
296+
args: {
297+
slug: v.string(),
298+
highlightedOnly: v.optional(v.boolean()),
299+
nonSuspiciousOnly: v.optional(v.boolean()),
300+
},
301+
handler: async (ctx, args): Promise<SkillSearchEntry | null> => {
302+
const slug = args.slug.trim().toLowerCase()
303+
if (!/^[a-z0-9][a-z0-9-]*$/.test(slug)) return null
304+
const skill = await ctx.db
305+
.query('skills')
306+
.withIndex('by_slug', (q) => q.eq('slug', slug))
307+
.unique()
308+
if (!skill || skill.softDeletedAt) return null
309+
if (args.nonSuspiciousOnly && isSkillSuspicious(skill)) return null
310+
if (args.highlightedOnly && !isSkillHighlighted(skill)) return null
311+
const publicSkill = toPublicSkill(skill)
312+
if (!publicSkill) return null
313+
const getOwnerInfo = makeOwnerInfoGetter(ctx)
314+
const resolved = await getOwnerInfo(skill.ownerUserId)
315+
if (!resolved.owner) return null
316+
return {
317+
skill: publicSkill,
318+
version: null as Doc<'skillVersions'> | null,
319+
ownerHandle: resolved.ownerHandle,
320+
owner: resolved.owner,
321+
}
322+
},
323+
})
324+
277325
export const lexicalFallbackSkills = internalQuery({
278326
args: {
279327
query: v.string(),

0 commit comments

Comments
 (0)