Skip to content

Commit 477d18b

Browse files
chore(internal): improve local docs search for MCP servers
1 parent 98b896d commit 477d18b

File tree

2 files changed

+64
-22
lines changed

2 files changed

+64
-22
lines changed

packages/mcp-server/src/docs-search-tool.ts

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -50,29 +50,20 @@ export function setLocalSearch(search: LocalDocsSearch): void {
5050
_localSearch = search;
5151
}
5252

53-
const SUPPORTED_LANGUAGES = new Set(['http', 'typescript', 'javascript']);
54-
5553
async function searchLocal(args: Record<string, unknown>): Promise<unknown> {
5654
if (!_localSearch) {
5755
throw new Error('Local search not initialized');
5856
}
5957

6058
const query = (args['query'] as string) ?? '';
6159
const language = (args['language'] as string) ?? 'typescript';
62-
const detail = (args['detail'] as string) ?? 'verbose';
63-
64-
if (!SUPPORTED_LANGUAGES.has(language)) {
65-
throw new Error(
66-
`Local docs search only supports HTTP, TypeScript, and JavaScript. Got language="${language}". ` +
67-
`Use --docs-search-mode stainless-api for other languages, or set language to "http", "typescript", or "javascript".`,
68-
);
69-
}
60+
const detail = (args['detail'] as string) ?? 'default';
7061

7162
return _localSearch.search({
7263
query,
7364
language,
7465
detail,
75-
maxResults: 10,
66+
maxResults: 5,
7667
}).results;
7768
}
7869

packages/mcp-server/src/local-docs-search.ts

Lines changed: 62 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@ import * as fs from 'node:fs/promises';
55
import * as path from 'node:path';
66
import { getLogger } from './logger';
77

8+
type PerLanguageData = {
9+
method?: string;
10+
example?: string;
11+
};
12+
813
type MethodEntry = {
914
name: string;
1015
endpoint: string;
@@ -16,6 +21,7 @@ type MethodEntry = {
1621
params?: string[];
1722
response?: string;
1823
markdown?: string;
24+
perLanguage?: Record<string, PerLanguageData>;
1925
};
2026

2127
type ProseChunk = {
@@ -5564,6 +5570,8 @@ const EMBEDDED_METHODS: MethodEntry[] = [
55645570
},
55655571
];
55665572

5573+
const EMBEDDED_READMES: { language: string; content: string }[] = [];
5574+
55675575
const INDEX_OPTIONS = {
55685576
fields: [
55695577
'name',
@@ -5578,13 +5586,15 @@ const INDEX_OPTIONS = {
55785586
storeFields: ['kind', '_original'],
55795587
searchOptions: {
55805588
prefix: true,
5581-
fuzzy: 0.2,
5589+
fuzzy: 0.1,
55825590
boost: {
5583-
name: 3,
5584-
endpoint: 2,
5591+
name: 5,
5592+
stainlessPath: 3,
5593+
endpoint: 3,
5594+
qualified: 3,
55855595
summary: 2,
5586-
qualified: 2,
55875596
content: 1,
5597+
description: 1,
55885598
} as Record<string, number>,
55895599
},
55905600
};
@@ -5606,30 +5616,45 @@ export class LocalDocsSearch {
56065616
static async create(opts?: { docsDir?: string }): Promise<LocalDocsSearch> {
56075617
const instance = new LocalDocsSearch();
56085618
instance.indexMethods(EMBEDDED_METHODS);
5619+
for (const readme of EMBEDDED_READMES) {
5620+
instance.indexProse(readme.content, `readme:${readme.language}`);
5621+
}
56095622
if (opts?.docsDir) {
56105623
await instance.loadDocsDirectory(opts.docsDir);
56115624
}
56125625
return instance;
56135626
}
56145627

5615-
// Note: Language is accepted for interface consistency with remote search, but currently has no
5616-
// effect since this local search only supports TypeScript docs.
56175628
search(props: {
56185629
query: string;
56195630
language?: string;
56205631
detail?: string;
56215632
maxResults?: number;
56225633
maxLength?: number;
56235634
}): SearchResult {
5624-
const { query, detail = 'default', maxResults = 5, maxLength = 100_000 } = props;
5635+
const { query, language = 'typescript', detail = 'default', maxResults = 5, maxLength = 100_000 } = props;
56255636

56265637
const useMarkdown = detail === 'verbose' || detail === 'high';
56275638

5628-
// Search both indices and merge results by score
5639+
// Search both indices and merge results by score.
5640+
// Filter prose hits so language-tagged content (READMEs and docs with
5641+
// frontmatter) only matches the requested language.
56295642
const methodHits = this.methodIndex
56305643
.search(query)
56315644
.map((hit) => ({ ...hit, _kind: 'http_method' as const }));
5632-
const proseHits = this.proseIndex.search(query).map((hit) => ({ ...hit, _kind: 'prose' as const }));
5645+
const proseHits = this.proseIndex
5646+
.search(query)
5647+
.filter((hit) => {
5648+
const source = ((hit as Record<string, unknown>)['_original'] as ProseChunk | undefined)?.source;
5649+
if (!source) return true;
5650+
// Check for language-tagged sources: "readme:<lang>" or "lang:<lang>:<filename>"
5651+
let taggedLang: string | undefined;
5652+
if (source.startsWith('readme:')) taggedLang = source.slice('readme:'.length);
5653+
else if (source.startsWith('lang:')) taggedLang = source.split(':')[1];
5654+
if (!taggedLang) return true;
5655+
return taggedLang === language || (language === 'javascript' && taggedLang === 'typescript');
5656+
})
5657+
.map((hit) => ({ ...hit, _kind: 'prose' as const }));
56335658
const merged = [...methodHits, ...proseHits].sort((a, b) => b.score - a.score);
56345659
const top = merged.slice(0, maxResults);
56355660

@@ -5642,11 +5667,16 @@ export class LocalDocsSearch {
56425667
if (useMarkdown && m.markdown) {
56435668
fullResults.push(m.markdown);
56445669
} else {
5670+
// Use per-language data when available, falling back to the
5671+
// top-level fields (which are TypeScript-specific in the
5672+
// legacy codepath).
5673+
const langData = m.perLanguage?.[language];
56455674
fullResults.push({
5646-
method: m.qualified,
5675+
method: langData?.method ?? m.qualified,
56475676
summary: m.summary,
56485677
description: m.description,
56495678
endpoint: `${m.httpMethod.toUpperCase()} ${m.endpoint}`,
5679+
...(langData?.example ? { example: langData.example } : {}),
56505680
...(m.params ? { params: m.params } : {}),
56515681
...(m.response ? { response: m.response } : {}),
56525682
});
@@ -5717,7 +5747,19 @@ export class LocalDocsSearch {
57175747
this.indexProse(texts.join('\n\n'), file.name);
57185748
}
57195749
} else {
5720-
this.indexProse(content, file.name);
5750+
// Parse optional YAML frontmatter for language tagging.
5751+
// Files with a "language" field in frontmatter will only
5752+
// surface in searches for that language.
5753+
//
5754+
// Example:
5755+
// ---
5756+
// language: python
5757+
// ---
5758+
// # Error handling in Python
5759+
// ...
5760+
const frontmatter = parseFrontmatter(content);
5761+
const source = frontmatter.language ? `lang:${frontmatter.language}:${file.name}` : file.name;
5762+
this.indexProse(content, source);
57215763
}
57225764
} catch (err) {
57235765
getLogger().warn({ err, file: file.name }, 'Failed to index docs file');
@@ -5795,3 +5837,12 @@ function extractTexts(data: unknown, depth = 0): string[] {
57955837
}
57965838
return [];
57975839
}
5840+
5841+
/** Parses YAML frontmatter from a markdown string, extracting the language field if present. */
5842+
function parseFrontmatter(markdown: string): { language?: string } {
5843+
const match = markdown.match(/^---\n([\s\S]*?)\n---/);
5844+
if (!match) return {};
5845+
const body = match[1] ?? '';
5846+
const langMatch = body.match(/^language:\s*(.+)$/m);
5847+
return langMatch ? { language: langMatch[1]!.trim() } : {};
5848+
}

0 commit comments

Comments
 (0)