Skip to content

Commit 8f32a3f

Browse files
committed
feat(docs): added vector search
1 parent 26ec125 commit 8f32a3f

File tree

5 files changed

+141
-16
lines changed

5 files changed

+141
-16
lines changed

apps/docs/app/api/search/route.ts

Lines changed: 100 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,100 @@
1-
import { createFromSource } from 'fumadocs-core/search/server'
2-
import { source } from '@/lib/source'
3-
4-
export const revalidate = 3600 // Revalidate every hour
5-
6-
export const { GET } = createFromSource(source, {
7-
localeMap: {
8-
en: { language: 'english' },
9-
es: { language: 'spanish' },
10-
fr: { language: 'french' },
11-
de: { language: 'german' },
12-
// ja and zh are not supported by the stemmer library, so we'll skip language config for them
13-
ja: {},
14-
zh: {},
15-
},
16-
})
1+
import { sql } from 'drizzle-orm'
2+
import { type NextRequest, NextResponse } from 'next/server'
3+
import { db, docsEmbeddings } from '@/lib/db'
4+
import { generateSearchEmbedding } from '@/lib/embeddings'
5+
6+
export const runtime = 'nodejs'
7+
export const revalidate = 0
8+
9+
/**
10+
* Semantic search API endpoint using vector embeddings + hybrid search
11+
*/
12+
export async function GET(request: NextRequest) {
13+
try {
14+
const searchParams = request.nextUrl.searchParams
15+
const query = searchParams.get('query') || searchParams.get('q') || ''
16+
const locale = searchParams.get('locale') || 'en'
17+
const limit = Number.parseInt(searchParams.get('limit') || '10', 10)
18+
19+
if (!query || query.trim().length === 0) {
20+
return NextResponse.json([])
21+
}
22+
23+
const queryEmbedding = await generateSearchEmbedding(query)
24+
const candidateLimit = limit * 3
25+
const similarityThreshold = 0.6
26+
27+
const vectorResults = await db
28+
.select({
29+
chunkId: docsEmbeddings.chunkId,
30+
chunkText: docsEmbeddings.chunkText,
31+
sourceDocument: docsEmbeddings.sourceDocument,
32+
sourceLink: docsEmbeddings.sourceLink,
33+
headerText: docsEmbeddings.headerText,
34+
headerLevel: docsEmbeddings.headerLevel,
35+
similarity: sql<number>`1 - (${docsEmbeddings.embedding} <=> ${JSON.stringify(queryEmbedding)}::vector)`,
36+
searchType: sql<string>`'vector'`,
37+
})
38+
.from(docsEmbeddings)
39+
.where(
40+
sql`1 - (${docsEmbeddings.embedding} <=> ${JSON.stringify(queryEmbedding)}::vector) >= ${similarityThreshold}`
41+
)
42+
.orderBy(sql`${docsEmbeddings.embedding} <=> ${JSON.stringify(queryEmbedding)}::vector`)
43+
.limit(candidateLimit)
44+
45+
const keywordResults = await db
46+
.select({
47+
chunkId: docsEmbeddings.chunkId,
48+
chunkText: docsEmbeddings.chunkText,
49+
sourceDocument: docsEmbeddings.sourceDocument,
50+
sourceLink: docsEmbeddings.sourceLink,
51+
headerText: docsEmbeddings.headerText,
52+
headerLevel: docsEmbeddings.headerLevel,
53+
similarity: sql<number>`ts_rank(${docsEmbeddings.chunkTextTsv}, plainto_tsquery('english', ${query}))`,
54+
searchType: sql<string>`'keyword'`,
55+
})
56+
.from(docsEmbeddings)
57+
.where(sql`${docsEmbeddings.chunkTextTsv} @@ plainto_tsquery('english', ${query})`)
58+
.orderBy(
59+
sql`ts_rank(${docsEmbeddings.chunkTextTsv}, plainto_tsquery('english', ${query})) DESC`
60+
)
61+
.limit(candidateLimit)
62+
63+
const seenIds = new Set<string>()
64+
const mergedResults = []
65+
66+
for (let i = 0; i < Math.max(vectorResults.length, keywordResults.length); i++) {
67+
if (i < vectorResults.length && !seenIds.has(vectorResults[i].chunkId)) {
68+
mergedResults.push(vectorResults[i])
69+
seenIds.add(vectorResults[i].chunkId)
70+
}
71+
if (i < keywordResults.length && !seenIds.has(keywordResults[i].chunkId)) {
72+
mergedResults.push(keywordResults[i])
73+
seenIds.add(keywordResults[i].chunkId)
74+
}
75+
}
76+
77+
const filteredResults = mergedResults.slice(0, limit)
78+
const searchResults = filteredResults.map((result) => {
79+
const title = result.headerText || result.sourceDocument.replace('.mdx', '')
80+
const pathParts = result.sourceDocument
81+
.replace('.mdx', '')
82+
.split('/')
83+
.map((part) => part.charAt(0).toUpperCase() + part.slice(1))
84+
85+
return {
86+
id: result.chunkId,
87+
type: 'page' as const,
88+
url: result.sourceLink,
89+
content: title,
90+
breadcrumbs: pathParts,
91+
}
92+
})
93+
94+
return NextResponse.json(searchResults)
95+
} catch (error) {
96+
console.error('Semantic search error:', error)
97+
98+
return NextResponse.json([])
99+
}
100+
}

apps/docs/lib/db.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
import { db } from '@sim/db'
2+
import { docsEmbeddings } from '@sim/db/schema'
3+
4+
export { db, docsEmbeddings }

apps/docs/lib/embeddings.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/**
2+
* Generate embeddings for search queries using OpenAI API
3+
*/
4+
export async function generateSearchEmbedding(query: string): Promise<number[]> {
5+
const apiKey = process.env.OPENAI_API_KEY
6+
7+
if (!apiKey) {
8+
throw new Error('OPENAI_API_KEY environment variable is required')
9+
}
10+
11+
const response = await fetch('https://api.openai.com/v1/embeddings', {
12+
method: 'POST',
13+
headers: {
14+
Authorization: `Bearer ${apiKey}`,
15+
'Content-Type': 'application/json',
16+
},
17+
body: JSON.stringify({
18+
input: query,
19+
model: 'text-embedding-3-small',
20+
encoding_format: 'float',
21+
}),
22+
})
23+
24+
if (!response.ok) {
25+
const errorText = await response.text()
26+
throw new Error(`OpenAI API failed: ${response.status} ${response.statusText} - ${errorText}`)
27+
}
28+
29+
const data = await response.json()
30+
return data.data[0].embedding
31+
}

apps/docs/package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,19 @@
1111
"type-check": "tsc --noEmit"
1212
},
1313
"dependencies": {
14+
"@sim/db": "workspace:*",
1415
"@tabler/icons-react": "^3.31.0",
1516
"@vercel/og": "^0.6.5",
1617
"class-variance-authority": "^0.7.1",
1718
"clsx": "^2.1.1",
19+
"drizzle-orm": "^0.44.5",
1820
"fumadocs-core": "16.2.3",
1921
"fumadocs-mdx": "14.1.0",
2022
"fumadocs-ui": "16.2.3",
2123
"lucide-react": "^0.511.0",
2224
"next": "16.1.0-canary.21",
2325
"next-themes": "^0.4.6",
26+
"postgres": "^3.4.5",
2427
"react": "19.2.1",
2528
"react-dom": "19.2.1",
2629
"tailwind-merge": "^3.0.2"

bun.lock

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,16 +44,19 @@
4444
"name": "docs",
4545
"version": "0.0.0",
4646
"dependencies": {
47+
"@sim/db": "workspace:*",
4748
"@tabler/icons-react": "^3.31.0",
4849
"@vercel/og": "^0.6.5",
4950
"class-variance-authority": "^0.7.1",
5051
"clsx": "^2.1.1",
52+
"drizzle-orm": "^0.44.5",
5153
"fumadocs-core": "16.2.3",
5254
"fumadocs-mdx": "14.1.0",
5355
"fumadocs-ui": "16.2.3",
5456
"lucide-react": "^0.511.0",
5557
"next": "16.1.0-canary.21",
5658
"next-themes": "^0.4.6",
59+
"postgres": "^3.4.5",
5760
"react": "19.2.1",
5861
"react-dom": "19.2.1",
5962
"tailwind-merge": "^3.0.2",

0 commit comments

Comments
 (0)