Skip to content

Commit a4ca7f2

Browse files
committed
ack comments
1 parent 8f32a3f commit a4ca7f2

File tree

2 files changed

+58
-22
lines changed

2 files changed

+58
-22
lines changed

apps/docs/app/api/search/route.ts

Lines changed: 48 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ export const runtime = 'nodejs'
77
export const revalidate = 0
88

99
/**
10-
* Semantic search API endpoint using vector embeddings + hybrid search
10+
* Hybrid search API endpoint
11+
* - English: Vector embeddings + keyword search
12+
* - Other languages: Keyword search only
1113
*/
1214
export async function GET(request: NextRequest) {
1315
try {
@@ -20,27 +22,51 @@ export async function GET(request: NextRequest) {
2022
return NextResponse.json([])
2123
}
2224

23-
const queryEmbedding = await generateSearchEmbedding(query)
2425
const candidateLimit = limit * 3
2526
const similarityThreshold = 0.6
2627

27-
const vectorResults = await db
28-
.select({
29-
chunkId: docsEmbeddings.chunkId,
30-
chunkText: docsEmbeddings.chunkText,
31-
sourceDocument: docsEmbeddings.sourceDocument,
32-
sourceLink: docsEmbeddings.sourceLink,
33-
headerText: docsEmbeddings.headerText,
34-
headerLevel: docsEmbeddings.headerLevel,
35-
similarity: sql<number>`1 - (${docsEmbeddings.embedding} <=> ${JSON.stringify(queryEmbedding)}::vector)`,
36-
searchType: sql<string>`'vector'`,
37-
})
38-
.from(docsEmbeddings)
39-
.where(
40-
sql`1 - (${docsEmbeddings.embedding} <=> ${JSON.stringify(queryEmbedding)}::vector) >= ${similarityThreshold}`
41-
)
42-
.orderBy(sql`${docsEmbeddings.embedding} <=> ${JSON.stringify(queryEmbedding)}::vector`)
43-
.limit(candidateLimit)
28+
const localeMap: Record<string, string> = {
29+
en: 'english',
30+
es: 'spanish',
31+
fr: 'french',
32+
de: 'german',
33+
ja: 'simple', // PostgreSQL doesn't have Japanese support, use simple
34+
zh: 'simple', // PostgreSQL doesn't have Chinese support, use simple
35+
}
36+
const tsConfig = localeMap[locale] || 'simple'
37+
38+
const useVectorSearch = locale === 'en'
39+
let vectorResults: Array<{
40+
chunkId: string
41+
chunkText: string
42+
sourceDocument: string
43+
sourceLink: string
44+
headerText: string
45+
headerLevel: number
46+
similarity: number
47+
searchType: string
48+
}> = []
49+
50+
if (useVectorSearch) {
51+
const queryEmbedding = await generateSearchEmbedding(query)
52+
vectorResults = await db
53+
.select({
54+
chunkId: docsEmbeddings.chunkId,
55+
chunkText: docsEmbeddings.chunkText,
56+
sourceDocument: docsEmbeddings.sourceDocument,
57+
sourceLink: docsEmbeddings.sourceLink,
58+
headerText: docsEmbeddings.headerText,
59+
headerLevel: docsEmbeddings.headerLevel,
60+
similarity: sql<number>`1 - (${docsEmbeddings.embedding} <=> ${JSON.stringify(queryEmbedding)}::vector)`,
61+
searchType: sql<string>`'vector'`,
62+
})
63+
.from(docsEmbeddings)
64+
.where(
65+
sql`1 - (${docsEmbeddings.embedding} <=> ${JSON.stringify(queryEmbedding)}::vector) >= ${similarityThreshold}`
66+
)
67+
.orderBy(sql`${docsEmbeddings.embedding} <=> ${JSON.stringify(queryEmbedding)}::vector`)
68+
.limit(candidateLimit)
69+
}
4470

4571
const keywordResults = await db
4672
.select({
@@ -50,13 +76,13 @@ export async function GET(request: NextRequest) {
5076
sourceLink: docsEmbeddings.sourceLink,
5177
headerText: docsEmbeddings.headerText,
5278
headerLevel: docsEmbeddings.headerLevel,
53-
similarity: sql<number>`ts_rank(${docsEmbeddings.chunkTextTsv}, plainto_tsquery('english', ${query}))`,
79+
similarity: sql<number>`ts_rank(${docsEmbeddings.chunkTextTsv}, plainto_tsquery(${tsConfig}, ${query}))`,
5480
searchType: sql<string>`'keyword'`,
5581
})
5682
.from(docsEmbeddings)
57-
.where(sql`${docsEmbeddings.chunkTextTsv} @@ plainto_tsquery('english', ${query})`)
83+
.where(sql`${docsEmbeddings.chunkTextTsv} @@ plainto_tsquery(${tsConfig}, ${query})`)
5884
.orderBy(
59-
sql`ts_rank(${docsEmbeddings.chunkTextTsv}, plainto_tsquery('english', ${query})) DESC`
85+
sql`ts_rank(${docsEmbeddings.chunkTextTsv}, plainto_tsquery(${tsConfig}, ${query})) DESC`
6086
)
6187
.limit(candidateLimit)
6288

apps/docs/lib/embeddings.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,15 @@ export async function generateSearchEmbedding(query: string): Promise<number[]>
2727
}
2828

2929
const data = await response.json()
30+
31+
// Validate response structure
32+
if (!data?.data || !Array.isArray(data.data) || data.data.length === 0) {
33+
throw new Error('OpenAI API returned invalid response structure: missing or empty data array')
34+
}
35+
36+
if (!data.data[0]?.embedding || !Array.isArray(data.data[0].embedding)) {
37+
throw new Error('OpenAI API returned invalid response structure: missing or invalid embedding')
38+
}
39+
3040
return data.data[0].embedding
3141
}

0 commit comments

Comments
 (0)