|
| 1 | +import {promises as fs} from "node:fs"; |
| 2 | +import path from "node:path"; |
| 3 | + |
| 4 | +import {buildDocUrl} from "../shared/docs-utils"; |
| 5 | + |
| 6 | +const SEARCH_INDEX_PATH = path.join(process.cwd(), "public", "search-index.json"); |
| 7 | + |
| 8 | +type RawSearchIndexEntry = { |
| 9 | + path: string; |
| 10 | + title: string; |
| 11 | + hierarchy: string[]; |
| 12 | + summary: string; |
| 13 | + content: string; |
| 14 | +}; |
| 15 | + |
| 16 | +type SearchIndexFile = { |
| 17 | + generatedAt: string; |
| 18 | + total: number; |
| 19 | + entries: RawSearchIndexEntry[]; |
| 20 | +}; |
| 21 | + |
| 22 | +export type SearchMatch = { |
| 23 | + path: string; |
| 24 | + title: string; |
| 25 | + hierarchy: string[]; |
| 26 | + summary: string; |
| 27 | + snippet: string | null; |
| 28 | + score: number; |
| 29 | + matchedTokens: number; |
| 30 | +}; |
| 31 | + |
| 32 | +type CachedEntry = RawSearchIndexEntry & { |
| 33 | + pathLower: string; |
| 34 | + titleLower: string; |
| 35 | + hierarchyLower: string[]; |
| 36 | + contentLower: string; |
| 37 | +}; |
| 38 | + |
| 39 | +let searchIndexPromise: Promise<CachedEntry[]> | null = null; |
| 40 | + |
| 41 | +async function loadSearchIndexInternal(): Promise<CachedEntry[]> { |
| 42 | + const raw = await fs.readFile(SEARCH_INDEX_PATH, "utf8"); |
| 43 | + const parsed = JSON.parse(raw) as SearchIndexFile; |
| 44 | + return parsed.entries.map(entry => ({ |
| 45 | + ...entry, |
| 46 | + pathLower: entry.path.toLowerCase(), |
| 47 | + titleLower: entry.title.toLowerCase(), |
| 48 | + hierarchyLower: entry.hierarchy.map(segment => segment.toLowerCase()), |
| 49 | + contentLower: entry.content.toLowerCase(), |
| 50 | + })); |
| 51 | +} |
| 52 | + |
| 53 | +export async function ensureSearchIndex(): Promise<CachedEntry[]> { |
| 54 | + if (!searchIndexPromise) { |
| 55 | + searchIndexPromise = loadSearchIndexInternal().catch(error => { |
| 56 | + searchIndexPromise = null; |
| 57 | + throw error; |
| 58 | + }); |
| 59 | + } |
| 60 | + |
| 61 | + return searchIndexPromise; |
| 62 | +} |
| 63 | + |
| 64 | +function scoreEntry(entry: CachedEntry, tokens: string[]) { |
| 65 | + let score = 0; |
| 66 | + let matchedTokens = 0; |
| 67 | + |
| 68 | + for (const token of tokens) { |
| 69 | + let tokenMatched = false; |
| 70 | + |
| 71 | + if (entry.titleLower.includes(token)) { |
| 72 | + score += 6; |
| 73 | + tokenMatched = true; |
| 74 | + } |
| 75 | + |
| 76 | + if (entry.pathLower.includes(token)) { |
| 77 | + score += 4; |
| 78 | + tokenMatched = true; |
| 79 | + } |
| 80 | + |
| 81 | + if (entry.hierarchyLower.some(segment => segment.includes(token))) { |
| 82 | + score += 3; |
| 83 | + tokenMatched = true; |
| 84 | + } |
| 85 | + |
| 86 | + if (entry.contentLower.includes(token)) { |
| 87 | + score += 1; |
| 88 | + tokenMatched = true; |
| 89 | + } |
| 90 | + |
| 91 | + if (tokenMatched) { |
| 92 | + matchedTokens += 1; |
| 93 | + } |
| 94 | + } |
| 95 | + |
| 96 | + if (matchedTokens === 0) { |
| 97 | + return null; |
| 98 | + } |
| 99 | + |
| 100 | + score += getInstallBias(entry); |
| 101 | + |
| 102 | + return {score, matchedTokens}; |
| 103 | +} |
| 104 | + |
| 105 | +function buildSnippet(entry: CachedEntry, tokens: string[]): string | null { |
| 106 | + const lines = entry.content.split(/\r?\n/); |
| 107 | + for (const line of lines) { |
| 108 | + const lineLower = line.toLowerCase(); |
| 109 | + if (tokens.some(token => lineLower.includes(token))) { |
| 110 | + const trimmed = line.trim(); |
| 111 | + if (trimmed.length === 0) { |
| 112 | + continue; |
| 113 | + } |
| 114 | + return trimmed.length > 200 ? `${trimmed.slice(0, 199)}…` : trimmed; |
| 115 | + } |
| 116 | + } |
| 117 | + return null; |
| 118 | +} |
| 119 | + |
| 120 | +export async function searchIndex(query: string, limit: number): Promise<SearchMatch[]> { |
| 121 | + const tokens = query |
| 122 | + .toLowerCase() |
| 123 | + .split(/\s+/) |
| 124 | + .map(token => token.trim()) |
| 125 | + .filter(Boolean); |
| 126 | + |
| 127 | + if (tokens.length === 0) { |
| 128 | + return []; |
| 129 | + } |
| 130 | + |
| 131 | + const entries = await ensureSearchIndex(); |
| 132 | + const matches: SearchMatch[] = []; |
| 133 | + |
| 134 | + for (const entry of entries) { |
| 135 | + const scoreResult = scoreEntry(entry, tokens); |
| 136 | + if (!scoreResult) { |
| 137 | + continue; |
| 138 | + } |
| 139 | + |
| 140 | + matches.push({ |
| 141 | + path: entry.path, |
| 142 | + title: entry.title, |
| 143 | + hierarchy: entry.hierarchy, |
| 144 | + summary: entry.summary, |
| 145 | + snippet: buildSnippet(entry, tokens), |
| 146 | + score: scoreResult.score, |
| 147 | + matchedTokens: scoreResult.matchedTokens, |
| 148 | + }); |
| 149 | + } |
| 150 | + |
| 151 | + matches.sort((a, b) => { |
| 152 | + if (b.score !== a.score) { |
| 153 | + return b.score - a.score; |
| 154 | + } |
| 155 | + if (b.matchedTokens !== a.matchedTokens) { |
| 156 | + return b.matchedTokens - a.matchedTokens; |
| 157 | + } |
| 158 | + return a.path.localeCompare(b.path); |
| 159 | + }); |
| 160 | + |
| 161 | + return matches.slice(0, limit); |
| 162 | +} |
| 163 | + |
| 164 | +function getInstallBias(entry: CachedEntry): number { |
| 165 | + const segments = entry.pathLower.split("/"); |
| 166 | + const fileName = segments[segments.length - 1] ?? ""; |
| 167 | + const baseName = fileName.replace(/\.md$/, ""); |
| 168 | + |
| 169 | + let bias = 0; |
| 170 | + |
| 171 | + // Top-level platform doc like "platforms/react.md" |
| 172 | + if (segments[0] === "platforms" && segments.length === 2) { |
| 173 | + bias += 40; |
| 174 | + } |
| 175 | + |
| 176 | + // JavaScript guide root doc like "platforms/javascript/guides/react.md" |
| 177 | + if ( |
| 178 | + segments[0] === "platforms" && |
| 179 | + segments[1] === "javascript" && |
| 180 | + segments[2] === "guides" && |
| 181 | + segments.length === 4 |
| 182 | + ) { |
| 183 | + bias += 50; |
| 184 | + } |
| 185 | + |
| 186 | + // Files under an install directory get a boost |
| 187 | + if (segments.includes("install")) { |
| 188 | + bias += 20; |
| 189 | + } |
| 190 | + |
| 191 | + // Common install filenames get additional weight |
| 192 | + if (["install", "installation", "setup", "getting-started"].includes(baseName)) { |
| 193 | + bias += 25; |
| 194 | + } |
| 195 | + |
| 196 | + return bias; |
| 197 | +} |
| 198 | + |
| 199 | +export function formatMatchAsBlock(match: SearchMatch): string { |
| 200 | + const header = `# ${match.hierarchy.join(" > ")}`; |
| 201 | + const link = `[${match.title}](${match.path})`; |
| 202 | + const lines = [header, link]; |
| 203 | + |
| 204 | + if (match.snippet) { |
| 205 | + lines.push(match.snippet); |
| 206 | + } |
| 207 | + |
| 208 | + return lines.join("\n"); |
| 209 | +} |
| 210 | + |
| 211 | +export function mapMatchToResponse(match: SearchMatch) { |
| 212 | + return { |
| 213 | + path: match.path, |
| 214 | + title: match.title, |
| 215 | + hierarchy: match.hierarchy, |
| 216 | + summary: match.summary, |
| 217 | + snippet: match.snippet, |
| 218 | + url: buildDocUrl(match.path), |
| 219 | + score: match.score, |
| 220 | + matchedTokens: match.matchedTokens, |
| 221 | + }; |
| 222 | +} |
0 commit comments