@@ -5,6 +5,11 @@ import * as fs from 'node:fs/promises';
55import * as path from 'node:path';
66import { getLogger } from './logger';
77
8+ type PerLanguageData = {
9+ method?: string;
10+ example?: string;
11+ };
12+
813type MethodEntry = {
914 name: string;
1015 endpoint: string;
@@ -16,6 +21,7 @@ type MethodEntry = {
1621 params?: string[];
1722 response?: string;
1823 markdown?: string;
24+ perLanguage?: Record<string, PerLanguageData>;
1925};
2026
2127type ProseChunk = {
@@ -5564,6 +5570,8 @@ const EMBEDDED_METHODS: MethodEntry[] = [
55645570 },
55655571];
55665572
5573+ const EMBEDDED_READMES: { language: string; content: string }[] = [];
5574+
55675575const INDEX_OPTIONS = {
55685576 fields: [
55695577 'name',
@@ -5578,13 +5586,15 @@ const INDEX_OPTIONS = {
55785586 storeFields: ['kind', '_original'],
55795587 searchOptions: {
55805588 prefix: true,
5581- fuzzy: 0.2 ,
5589+ fuzzy: 0.1 ,
55825590 boost: {
5583- name: 3,
5584- endpoint: 2,
5591+ name: 5,
5592+ stainlessPath: 3,
5593+ endpoint: 3,
5594+ qualified: 3,
55855595 summary: 2,
5586- qualified: 2,
55875596 content: 1,
5597+ description: 1,
55885598 } as Record<string, number>,
55895599 },
55905600};
@@ -5606,30 +5616,45 @@ export class LocalDocsSearch {
56065616 static async create(opts?: { docsDir?: string }): Promise<LocalDocsSearch> {
56075617 const instance = new LocalDocsSearch();
56085618 instance.indexMethods(EMBEDDED_METHODS);
5619+ for (const readme of EMBEDDED_READMES) {
5620+ instance.indexProse(readme.content, `readme:${readme.language}`);
5621+ }
56095622 if (opts?.docsDir) {
56105623 await instance.loadDocsDirectory(opts.docsDir);
56115624 }
56125625 return instance;
56135626 }
56145627
5615- // Note: Language is accepted for interface consistency with remote search, but currently has no
5616- // effect since this local search only supports TypeScript docs.
56175628 search(props: {
56185629 query: string;
56195630 language?: string;
56205631 detail?: string;
56215632 maxResults?: number;
56225633 maxLength?: number;
56235634 }): SearchResult {
5624- const { query, detail = 'default', maxResults = 5, maxLength = 100_000 } = props;
5635+ const { query, language = 'typescript', detail = 'default', maxResults = 5, maxLength = 100_000 } = props;
56255636
56265637 const useMarkdown = detail === 'verbose' || detail === 'high';
56275638
5628- // Search both indices and merge results by score
5639+ // Search both indices and merge results by score.
5640+ // Filter prose hits so language-tagged content (READMEs and docs with
5641+ // frontmatter) only matches the requested language.
56295642 const methodHits = this.methodIndex
56305643 .search(query)
56315644 .map((hit) => ({ ...hit, _kind: 'http_method' as const }));
5632- const proseHits = this.proseIndex.search(query).map((hit) => ({ ...hit, _kind: 'prose' as const }));
5645+ const proseHits = this.proseIndex
5646+ .search(query)
5647+ .filter((hit) => {
5648+ const source = ((hit as Record<string, unknown>)['_original'] as ProseChunk | undefined)?.source;
5649+ if (!source) return true;
5650+ // Check for language-tagged sources: "readme:<lang>" or "lang:<lang>:<filename>"
5651+ let taggedLang: string | undefined;
5652+ if (source.startsWith('readme:')) taggedLang = source.slice('readme:'.length);
5653+ else if (source.startsWith('lang:')) taggedLang = source.split(':')[1];
5654+ if (!taggedLang) return true;
5655+ return taggedLang === language || (language === 'javascript' && taggedLang === 'typescript');
5656+ })
5657+ .map((hit) => ({ ...hit, _kind: 'prose' as const }));
56335658 const merged = [...methodHits, ...proseHits].sort((a, b) => b.score - a.score);
56345659 const top = merged.slice(0, maxResults);
56355660
@@ -5642,11 +5667,16 @@ export class LocalDocsSearch {
56425667 if (useMarkdown && m.markdown) {
56435668 fullResults.push(m.markdown);
56445669 } else {
5670+ // Use per-language data when available, falling back to the
5671+ // top-level fields (which are TypeScript-specific in the
5672+ // legacy codepath).
5673+ const langData = m.perLanguage?.[language];
56455674 fullResults.push({
5646- method: m.qualified,
5675+ method: langData?.method ?? m.qualified,
56475676 summary: m.summary,
56485677 description: m.description,
56495678 endpoint: `${m.httpMethod.toUpperCase()} ${m.endpoint}`,
5679+ ...(langData?.example ? { example: langData.example } : {}),
56505680 ...(m.params ? { params: m.params } : {}),
56515681 ...(m.response ? { response: m.response } : {}),
56525682 });
@@ -5717,7 +5747,19 @@ export class LocalDocsSearch {
57175747 this.indexProse(texts.join('\n\n'), file.name);
57185748 }
57195749 } else {
5720- this.indexProse(content, file.name);
5750+ // Parse optional YAML frontmatter for language tagging.
5751+ // Files with a "language" field in frontmatter will only
5752+ // surface in searches for that language.
5753+ //
5754+ // Example:
5755+ // ---
5756+ // language: python
5757+ // ---
5758+ // # Error handling in Python
5759+ // ...
5760+ const frontmatter = parseFrontmatter(content);
5761+ const source = frontmatter.language ? `lang:${frontmatter.language}:${file.name}` : file.name;
5762+ this.indexProse(content, source);
57215763 }
57225764 } catch (err) {
57235765 getLogger().warn({ err, file: file.name }, 'Failed to index docs file');
@@ -5795,3 +5837,12 @@ function extractTexts(data: unknown, depth = 0): string[] {
57955837 }
57965838 return [];
57975839}
5840+
5841+ /** Parses YAML frontmatter from a markdown string, extracting the language field if present. */
5842+ function parseFrontmatter(markdown: string): { language?: string } {
5843+ const match = markdown.match(/^---\n([\s\S]*?)\n---/);
5844+ if (!match) return {};
5845+ const body = match[1] ?? '';
5846+ const langMatch = body.match(/^language:\s*(.+)$/m);
5847+ return langMatch ? { language: langMatch[1]!.trim() } : {};
5848+ }
0 commit comments