kentcdodds
diff --git a/‎docs/agents/project-context.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/agents/project-context.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/agents/search-relevance.md‎
Lines changed: 13 additions & 0 deletions b/‎docs/agents/search-relevance.md‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎services/search-shared/src/search-shared.ts‎
Lines changed: 4 additions & 0 deletions b/‎services/search-shared/src/search-shared.ts‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎services/search-worker/src/index.test.ts‎
Lines changed: 7 additions & 1 deletion b/‎services/search-worker/src/index.test.ts‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎services/search-worker/src/index.ts‎
Lines changed: 8 additions & 2 deletions b/‎services/search-worker/src/index.ts‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎services/search-worker/src/search-results.test.ts‎
Lines changed: 91 additions & 0 deletions b/‎services/search-worker/src/search-results.test.ts‎
Lines changed: 91 additions & 0 deletions
diff --git a/‎services/search-worker/src/search-results.ts‎
Lines changed: 84 additions & 7 deletions b/‎services/search-worker/src/search-results.ts‎
Lines changed: 84 additions & 7 deletions
diff --git a/‎services/search-worker/src/search-service.test.ts‎
Lines changed: 68 additions & 3 deletions b/‎services/search-worker/src/search-service.test.ts‎
Lines changed: 68 additions & 3 deletions
@@ -36,6 +36,8 @@ reference:
   and run worker/package scripts with `npm run <script> --workspace <name>`.
 - The main site lives in `services/site`. Root `npm run dev`, `npm run build`,
   `npm run test`, and similar commands forward to that workspace.
+- Search worker relevance thresholds (`M`, `R`, `noCloseMatches`): see
+  [`search-relevance.md`](./search-relevance.md).
 - Playwright already launches Chromium with fake media permissions/device input
   plus `tests/sample.wav`. If an e2e needs recorded audio, drive the real
   recorder UI and keep the fake-audio setup in Playwright/helpers rather than
 
@@ -0,0 +1,13 @@
+# Search relevance tuning
+
+Worker: [`services/search-worker/src/search-results.ts`](../../services/search-worker/src/search-results.ts).
+
+| Constant | Role |
+|----------|------|
+| `SEARCH_CONFIDENCE_MIN_BEST_SCORE` (`0.013`) | If the best fused RRF score is below this, return no results and `noCloseMatches: true`. |
+| `SEARCH_CONFIDENCE_RELATIVE_RATIO` (`0.5`) | Keep only hits with `score >= maxScore * ratio` (then cap at `topK`). |
+| `SEARCH_LOW_RANKING_MAX` (`35`) | Extra hits returned as `lowRankingResults` for the search page “Show low ranking results” control. |
+
+Fused scores are on a small scale (~0.016 for a single-list #1, ~0.035 for strong dual-signal). Adjust in staging if results are over- or under-filtered.
+
+Site cache key prefix: `search:kcd:v3:` (payload includes `noCloseMatches` and `lowRankingResults`).
@@ -33,6 +33,10 @@ export type SearchWorkerSearchResponse =
 	| {
 			ok: true
 			results: Array<SearchResult>
+			/** Candidates below primary confidence / beyond topK; for optional UI. */
+			lowRankingResults?: Array<SearchResult>
+			/** True when candidates existed but none met confidence thresholds. */
+			noCloseMatches?: boolean
 	  }
 	| {
 			ok: false
 
@@ -21,7 +21,11 @@ function createEnv(): Env {
 function createService() {
 	return {
 		health: vi.fn(async () => ({ syncedAt: '2026-03-17T00:00:00.000Z' })),
-		search: vi.fn(async () => [{ id: 'blog:hello-world', score: 0.9 }]),
+		search: vi.fn(async () => ({
+			results: [{ id: 'blog:hello-world', score: 0.9 }],
+			lowRankingResults: [],
+			noCloseMatches: false,
+		})),
 		sync: vi.fn(async () => ({ syncedAt: '2026-03-17T00:00:00.000Z' })),
 	}
 }
@@ -91,6 +95,8 @@ test('search endpoint returns fused results', async () => {
 	expect(await response.json()).toEqual({
 		ok: true,
 		results: [{ id: 'blog:hello-world', score: 0.9 }],
+		lowRankingResults: [],
+		noCloseMatches: false,
 	})
 })
 
 
@@ -94,8 +94,14 @@ export async function handleRequest({
 			if (request.method !== 'POST') return methodNotAllowed()
 			const body = await parseJsonBody(request)
 			const parsed = searchRequestSchema.parse(body)
-			const results = await service.search(parsed)
-			return json({ ok: true, results })
+			const { results, lowRankingResults, noCloseMatches } =
+				await service.search(parsed)
+			return json({
+				ok: true,
+				results,
+				lowRankingResults,
+				noCloseMatches,
+			})
 		}
 
 		if (url.pathname === '/internal/sync') {
 
@@ -0,0 +1,91 @@
+import type { SearchResult } from '@kcd-internal/search-shared'
+import { expect, test } from 'vitest'
+import {
+	filterFusedResultsByConfidence,
+	fuseRankedResultsAll,
+	SEARCH_CONFIDENCE_MIN_BEST_SCORE,
+	SEARCH_CONFIDENCE_RELATIVE_RATIO,
+	type RankedDocResult,
+} from './search-results'
+
+function doc(id: string): SearchResult {
+	return { id, score: 0 }
+}
+
+test('filterFusedResultsByConfidence: empty input', () => {
+	expect(
+		filterFusedResultsByConfidence({ fusedSorted: [], topK: 10 }),
+	).toEqual({ results: [], lowRankingResults: [], noCloseMatches: false })
+})
+
+test('filterFusedResultsByConfidence: maxScore below minBestScore', () => {
+	const fused = [{ ...doc('a'), score: 0.01 }]
+	expect(
+		filterFusedResultsByConfidence({
+			fusedSorted: fused,
+			topK: 10,
+			minBestScore: SEARCH_CONFIDENCE_MIN_BEST_SCORE,
+		}),
+	).toEqual({
+		results: [],
+		lowRankingResults: [{ ...doc('a'), score: 0.01 }],
+		noCloseMatches: true,
+	})
+})
+
+test('filterFusedResultsByConfidence: drops tail below relative ratio', () => {
+	const fused = [
+		{ ...doc('strong'), score: 0.04 },
+		{ ...doc('weak'), score: 0.015 },
+	]
+	const threshold = 0.04 * 0.45
+	expect(0.015 < threshold).toBe(true)
+	expect(
+		filterFusedResultsByConfidence({
+			fusedSorted: fused,
+			topK: 10,
+			minBestScore: 0.01,
+			relativeRatio: 0.45,
+		}),
+	).toEqual({
+		results: [{ ...doc('strong'), score: 0.04 }],
+		lowRankingResults: [{ ...doc('weak'), score: 0.015 }],
+		noCloseMatches: false,
+	})
+})
+
+test('filterFusedResultsByConfidence: top hit always passes relative threshold when above minBest', () => {
+	const fused = [
+		{ ...doc('a'), score: 0.02 },
+		{ ...doc('b'), score: 0.019 },
+	]
+	expect(
+		filterFusedResultsByConfidence({
+			fusedSorted: fused,
+			topK: 10,
+			minBestScore: 0.015,
+			relativeRatio: 0.99,
+		}),
+	).toEqual({
+		results: [{ ...doc('a'), score: 0.02 }],
+		lowRankingResults: [{ ...doc('b'), score: 0.019 }],
+		noCloseMatches: false,
+	})
+})
+
+test('fuseRankedResultsAll ranks dual-signal doc above single-source', () => {
+	const fused = fuseRankedResultsAll({
+		semanticResults: [
+			{ rank: 0, result: { id: 'a', title: 'A', score: 0 } },
+			{ rank: 1, result: { id: 'b', title: 'B', score: 0 } },
+		],
+		lexicalResults: [
+			{ rank: 0, result: { id: 'a', title: 'A', score: 0 } },
+			{ rank: 1, result: { id: 'noise', title: 'N', score: 0 } },
+		],
+	})
+	expect(fused[0]?.id).toBe('a')
+	expect(filterFusedResultsByConfidence({ fusedSorted: fused, topK: 5 }).noCloseMatches).toBe(
+		false,
+	)
+})
@@ -164,6 +164,37 @@ export function fuseRankedResults({
 	lexicalResults: Array<RankedDocResult>
 	topK: number
 }) {
+	return fuseRankedResultsAll({ semanticResults, lexicalResults }).slice(
+		0,
+		topK,
+	)
+}
+
+/** RRF-style fused scores; typical single-list #1 ~0.016–0.019, strong dual ~0.035. */
+export const SEARCH_CONFIDENCE_MIN_BEST_SCORE = 0.013
+/** Keep hits within this fraction of the top fused score (drops weak tail). */
+export const SEARCH_CONFIDENCE_RELATIVE_RATIO = 0.5
+/** Max extra hits returned for optional “show low ranking” UI. */
+export const SEARCH_LOW_RANKING_MAX = 35
+
+function fuseMapToSortedResults(
+	fused: Map<string, { score: number; result: SearchResult }>,
+): Array<SearchResult> {
+	return [...fused.values()]
+		.sort((left, right) => right.score - left.score)
+		.map((entry) => ({
+			...entry.result,
+			score: entry.score,
+		}))
+}
+
+export function fuseRankedResultsAll({
+	semanticResults,
+	lexicalResults,
+}: {
+	semanticResults: Array<RankedDocResult>
+	lexicalResults: Array<RankedDocResult>
+}): Array<SearchResult> {
 	const rankConstant = 60
 	const weights = {
 		semantic: 1,
@@ -224,13 +255,59 @@ export function fuseRankedResults({
 	apply('semantic', semanticResults)
 	apply('lexical', lexicalResults)
 
-	return [...fused.values()]
-		.sort((left, right) => right.score - left.score)
-		.slice(0, topK)
-		.map((entry) => ({
-			...entry.result,
-			score: entry.score,
-		}))
+	return fuseMapToSortedResults(fused)
+}
+
+export function filterFusedResultsByConfidence({
+	fusedSorted,
+	topK,
+	minBestScore = SEARCH_CONFIDENCE_MIN_BEST_SCORE,
+	relativeRatio = SEARCH_CONFIDENCE_RELATIVE_RATIO,
+}: {
+	fusedSorted: Array<SearchResult>
+	topK: number
+	minBestScore?: number
+	relativeRatio?: number
+}): {
+	results: Array<SearchResult>
+	lowRankingResults: Array<SearchResult>
+	noCloseMatches: boolean
+} {
+	if (fusedSorted.length === 0) {
+		return { results: [], lowRankingResults: [], noCloseMatches: false }
+	}
+
+	const capLow = (items: Array<SearchResult>) =>
+		items.slice(0, SEARCH_LOW_RANKING_MAX)
+
+	const maxScore = fusedSorted[0]?.score ?? 0
+	if (!Number.isFinite(maxScore) || maxScore < minBestScore) {
+		return {
+			results: [],
+			lowRankingResults: capLow(fusedSorted),
+			noCloseMatches: true,
+		}
+	}
+
+	const threshold = maxScore * relativeRatio
+	const filtered = fusedSorted.filter((r) => r.score >= threshold)
+	if (filtered.length === 0) {
+		return {
+			results: [],
+			lowRankingResults: capLow(fusedSorted),
+			noCloseMatches: true,
+		}
+	}
+
+	const primary = filtered.slice(0, topK)
+	const primaryIds = new Set(primary.map((r) => r.id))
+	const lowRanking = fusedSorted.filter((r) => !primaryIds.has(r.id))
+
+	return {
+		results: primary,
+		lowRankingResults: capLow(lowRanking),
+		noCloseMatches: false,
+	}
 }
 
 export function normalizeYoutubeTimestampSeconds({
 
@@ -85,12 +85,13 @@ test('search fuses lexical matches with semantic matches', async () => {
 	}
 	const service = createSearchService(createEnv(), dependencies)
 
-	const results = await service.search({
+	const { results, noCloseMatches } = await service.search({
 		query: 'How do I use useFetcher in React?',
 		topK: 5,
 	})
 
 	expect(dependencies.ensureSchema).toHaveBeenCalled()
+	expect(noCloseMatches).toBe(false)
 	expect(results).toHaveLength(2)
 	expect(results[0]?.id).toBe('blog:react-hooks-pitfalls')
 	expect(results[1]?.id).toBe('blog:some-other-post')
@@ -119,7 +120,7 @@ test('search preserves YouTube timestamps from lexical matches', async () => {
 	}
 	const service = createSearchService(createEnv(), dependencies)
 
-	const results = await service.search({
+	const { results } = await service.search({
 		query: 'shallow rendering',
 		topK: 5,
 	})
@@ -152,14 +153,78 @@ test('search with SEARCH_LEXICAL_ONLY skips embedding and Vectorize', async () =
 	}
 	const service = createSearchService(env, dependencies)
 
-	const results = await service.search({ query: 'test', topK: 5 })
+	const { results } = await service.search({ query: 'test', topK: 5 })
 
 	expect(getEmbedding).not.toHaveBeenCalled()
 	expect(queryVectorize).not.toHaveBeenCalled()
 	expect(results).toHaveLength(1)
 	expect(results[0]?.id).toBe('blog:only-lexical')
 })
 
+test('search drops weak tail below relative confidence of top hit', async () => {
+	const dependencies = {
+		ensureSchema: vi.fn(async () => undefined),
+		queryLexicalMatches: vi.fn(async () => [
+			{
+				id: 'blog:strong:chunk:0',
+				type: 'blog',
+				slug: 'strong',
+				title: 'Strong',
+				url: '/blog/strong',
+				snippet: 's',
+			},
+		]),
+		getEmbedding: vi.fn(async () => [0.1, 0.2, 0.3]),
+		queryVectorize: vi.fn(async () => {
+			const deep = Array.from({ length: 55 }, (_, i) => ({
+				id: `blog:deep-${i}:chunk:0`,
+				score: 0.9 - i * 0.01,
+				metadata: {
+					type: 'blog',
+					slug: `deep-${i}`,
+					title: `Deep ${i}`,
+					url: `/blog/deep-${i}`,
+					snippet: 'd',
+				},
+			}))
+			return [
+				{
+					id: 'blog:strong:chunk:0',
+					score: 0.99,
+					metadata: {
+						type: 'blog',
+						slug: 'strong',
+						title: 'Strong',
+						url: '/blog/strong',
+						snippet: 's',
+					},
+				},
+				...deep,
+			]
+		}),
+		syncArtifacts: vi.fn(async () => ({
+			syncedAt: '2026-03-17T00:00:00.000Z',
+		})),
+		getSyncedAt: vi.fn(async () => '2026-03-17T00:00:00.000Z'),
+	}
+	const service = createSearchService(createEnv(), dependencies)
+
+	const { results, lowRankingResults, noCloseMatches } = await service.search({
+		query: 'test',
+		topK: 20,
+	})
+
+	expect(noCloseMatches).toBe(false)
+	expect(results.some((r) => r.id === 'blog:strong')).toBe(true)
+	expect(
+		results.filter((r) => r.id.startsWith('blog:deep-')).length,
+	).toBeLessThanOrEqual(2)
+	expect(results[0]?.id).toBe('blog:strong')
+	expect(
+		lowRankingResults.some((r) => r.id.startsWith('blog:deep-')),
+	).toBe(true)
+})
+
 test('search rejects overly long queries', async () => {
 	const dependencies = {
 		ensureSchema: vi.fn(async () => undefined),