Skip to content

Commit 95fbd65

Browse files
committed
refactor(search): reduce topK limit for search queries and adjust related components
- Updated the maximum value of topK from 20 to 15 in the search request schema and related functions. - Adjusted default topK values in various components to align with the new limit, ensuring consistent behavior across the application. - Modified tests to reflect the updated topK values, maintaining the integrity of search functionality.
1 parent 79d5042 commit 95fbd65

File tree

6 files changed

+13
-10
lines changed

6 files changed

+13
-10
lines changed

services/search-worker/src/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import { type Env } from './env'
1010

1111
const searchRequestSchema = z.object({
1212
query: z.string(),
13-
topK: z.number().int().min(1).max(20).optional(),
13+
topK: z.number().int().min(1).max(15).optional(),
1414
})
1515

1616
const syncRequestSchema = z.object({

services/search-worker/src/search-service.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ test('search drops weak tail below relative confidence of top hit', async () =>
211211

212212
const { results, lowRankingResults, noCloseMatches } = await service.search({
213213
query: 'test',
214-
topK: 20,
214+
topK: 15,
215215
})
216216

217217
expect(noCloseMatches).toBe(false)

services/search-worker/src/search-service.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,14 @@ type SearchDependencies = {
6060
syncArtifacts: (args: { force?: boolean }) => Promise<{ syncedAt: string }>
6161
}
6262

63+
const SEARCH_TOP_K_MAX = 15
64+
const SEARCH_TOP_K_DEFAULT = 8
65+
6366
function clampTopK(topK: number | undefined) {
6467
if (typeof topK === 'number' && Number.isFinite(topK)) {
65-
return Math.max(1, Math.min(20, Math.floor(topK)))
68+
return Math.max(1, Math.min(SEARCH_TOP_K_MAX, Math.floor(topK)))
6669
}
67-
return 15
70+
return SEARCH_TOP_K_DEFAULT
6871
}
6972

7073
function isLexicalOnlySearch(env: Env) {

services/site/app/routes/search.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ export async function loader({ request }: Route.LoaderArgs) {
6666

6767
const searchPayload = searchKCD({
6868
query: normalizedQ,
69-
topK: 20,
69+
topK: 8,
7070
request,
7171
}).catch((e) => {
7272
console.error(e)

services/site/app/utils/search.server.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,9 @@ function makeSearchCacheKey({
7676

7777
function clampTopK(topK: number | undefined) {
7878
if (typeof topK === 'number' && Number.isFinite(topK)) {
79-
return Math.max(1, Math.min(20, Math.floor(topK)))
79+
return Math.max(1, Math.min(15, Math.floor(topK)))
8080
}
81-
return 15
81+
return 8
8282
}
8383

8484
function normalizeCachedSearchPayload(
@@ -109,7 +109,7 @@ async function enrichResults(results: Array<SearchResult>) {
109109

110110
export async function searchKCD({
111111
query,
112-
topK = 15,
112+
topK = 8,
113113
request,
114114
timings,
115115
}: {

services/site/content/blog/building-semantic-search-on-my-content.mdx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,8 @@ I overfetch chunk-level matches (`safeTopK * 5`, capped at 20), because several
188188

189189
```ts
190190
// `K` means "how many nearest neighbors/results to return".
191-
const safeTopK = Math.max(1, Math.min(20, Math.floor(topK)))
192-
const rawTopK = Math.min(20, safeTopK * 5)
191+
const safeTopK = Math.max(1, Math.min(15, Math.floor(topK)))
192+
const rawTopK = Math.min(15, safeTopK * 5)
193193

194194
const [queryVector] = await getEmbeddings({ texts: [query] })
195195

0 commit comments

Comments
 (0)