diff --git a/packages/cli/src/docs-mcp-server/docs.ts b/packages/cli/src/docs-mcp-server/docs.ts index e7166b4..516026e 100644 --- a/packages/cli/src/docs-mcp-server/docs.ts +++ b/packages/cli/src/docs-mcp-server/docs.ts @@ -20,6 +20,20 @@ export async function fetchDocsList() { } } +export async function fetchSdkDocsList() { + try { + const response = await fetch('https://langbase.com/docs/llms-sdk.txt'); + if (!response.ok) { + throw new Error('Failed to fetch docs'); + } + + const text = await response.text(); + return text; + } catch (error) { + throw new Error('Failed to fetch docs ' + JSON.stringify(error)); + } +} + /** * Fetches and converts a blog post to markdown * @@ -45,12 +59,14 @@ export async function fetchDocsPost(url: string): Promise { // Get the main content const content = document.body.textContent?.trim() || ''; + if (!content) { throw new Error('No content found in docs'); } return content; } catch (error) { + console.error('Error fetching docs:', error); throw new Error( `Failed to fetch docs: ${error instanceof Error ? error.message : 'Something went wrong. Please try again.'}` ); diff --git a/packages/cli/src/docs-mcp-server/index.ts b/packages/cli/src/docs-mcp-server/index.ts index a7ce20d..1cf76c2 100644 --- a/packages/cli/src/docs-mcp-server/index.ts +++ b/packages/cli/src/docs-mcp-server/index.ts @@ -1,8 +1,8 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { z } from 'zod'; -import { fetchDocsList, fetchDocsPost } from './docs'; -import { getRelevanceScore } from '@/utils/get-score'; +import { fetchDocsList, fetchDocsPost, fetchSdkDocsList } from './docs'; +import { findRelevantLink } from '@/utils/get-relevent-link'; export async function docsMcpServer() { const server = new McpServer({ @@ -23,21 +23,46 @@ export async function docsMcpServer() { async ({ query }) => { const docs = await fetchDocsList(); // search through the docs and return the most relevent path based on the query - const docLines = docs.split('\n').filter(line => line.trim()); + const url = findRelevantLink(docs, query); + if (!url) { + return { + content: [ + { + type: 'text', + text: + 'No relevant documentation found for the query: ' + + query + } + ] + }; + } - // Score and sort the documentation entries - const scoredDocs = docLines - .map(line => ({ - line, - score: getRelevanceScore(line, query) - })) - .sort((a, b) => b.score - a.score) - .filter(doc => doc.score > 0) - .slice(0, 3); // Get top 3 most relevant results + return { + content: [ + { + type: 'text', + text: `This is the most relevant documentation for the query: ${url}` + } + ] + }; + } + ); - const hasRelevantDocs = scoredDocs.length === 0; + server.tool( + 'sdk-route-finder', + "Searches through all available SDK documentation routes and returns relevant paths based on the user's query. This tool helps navigate the documentation by finding the most appropriate sections that match the search criteria.", + { + query: z.string() + .describe(`A refined search term extracted from the user's question. + For example, if user asks 'How do I create a pipe?', the query would be 'SDK Pipe'. + This should be the specific concept or topic to search for in the documentation. + Treat keyword add as create if user ask for Eg. 'How do I add memory to pipe?' the query should be 'create memory'`) + }, + async ({ query }) => { + const docs = await fetchSdkDocsList(); + const url = findRelevantLink(docs, query); - if (hasRelevantDocs) { + if (!url) { return { content: [ { @@ -50,13 +75,11 @@ export async function docsMcpServer() { }; } - const results = scoredDocs.map(doc => doc.line).join('\n'); - return { content: [ { type: 'text', - text: results + text: `This is the most relevant documentation for the query: ${url}` } ] }; @@ -64,8 +87,8 @@ export async function docsMcpServer() { ); server.tool( - 'sdk-documentation-fetcher', - 'Fetches detailed SDK documentation, specializing in implementation guides for core features like pipes, memory, and tools. This is the primary source for the latest SDK documentation and should be consulted first for questions about creating or implementing SDK components. Use this tool for detailed step-by-step instructions on building pipes, configuring memory systems, and developing custom tools.', + 'sdk-docs-tool', + 'Always First Use sdk-route-finder to find the most relevant documentation and then use this tool to fetch the detailed documentation.Fetches detailed SDK documentation, specializing in implementation guides for core features like pipes, memory, and tools. This is the primary source for the latest SDK documentation and should be consulted first for questions about creating or implementing SDK components. Use this tool for detailed step-by-step instructions on building pipes, configuring memory systems, and developing custom tools.', { url: z .string() @@ -90,7 +113,7 @@ export async function docsMcpServer() { server.tool( 'examples-tool', - 'Fetches code examples and sample implementations from the documentation. Use this tool when users specifically request examples, sample code, or implementation demonstrations. This tool provides practical code snippets and complete working examples that demonstrate how to implement various features.', + 'Always first use docs-route-finder to find the most relevant documentation and then use this tool to fetch the detailed documentation. Fetches code examples and sample implementations from the documentation. Use this tool when users specifically request examples, sample code, or implementation demonstrations. This tool provides practical code snippets and complete working examples that demonstrate how to implement various features.', { url: z .string() @@ -115,7 +138,7 @@ export async function docsMcpServer() { server.tool( 'guide-tool', - 'Fetches detailed guides and tutorials from the documentation. Use this tool when users explicitly request guides, tutorials, or how-to content. This tool provides step-by-step instructions and practical examples for implementing various features.', + 'Always first use docs-route-finder to find the most relevant documentation and then use this tool to fetch the detailed documentation. Fetches detailed guides and tutorials from the documentation. Use this tool when users explicitly request guides, tutorials, or how-to content. This tool provides step-by-step instructions and practical examples for implementing various features.', { url: z .string() @@ -140,7 +163,7 @@ export async function docsMcpServer() { server.tool( 'api-reference-tool', - 'Fetches API reference documentation. Use this tool ONLY when the user explicitly asks about API endpoints, REST API calls, or programmatically creating/updating/deleting resources (like pipes, memory, etc.) through the API interface. For general SDK implementation questions, use the sdk-documentation-fetcher instead.', + 'Always first use docs-route-finder to find the most relevant documentation and then use this tool to fetch the detailed documentation. Fetches API reference documentation. Use this tool ONLY when the user explicitly asks about API endpoints, REST API calls, or programmatically creating/updating/deleting resources (like pipes, memory, etc.) through the API interface. For general SDK implementation questions, use the sdk-documentation-fetcher instead.', { url: z .string() diff --git a/packages/cli/src/utils/get-relevent-link.ts b/packages/cli/src/utils/get-relevent-link.ts new file mode 100644 index 0000000..5d05cb8 --- /dev/null +++ b/packages/cli/src/utils/get-relevent-link.ts @@ -0,0 +1,80 @@ +import { + extractDocBlocks, + extractDocMetadata, + getRelevanceScore +} from './get-score'; + +/** + * Main search function to find relevant documentation based on a query + * Returns the url of the most relevant documentation + * + * @param docs - The complete documentation text + * @param query - The search query + * @returns Array of top 5 relevant document objects + */ +export const findRelevantLink = (docs: string, query: string) => { + // Get top 5 results + const searchResults = searchDocs(docs, query, 5); + const hasNoResults = searchResults.length === 0; + if (hasNoResults) { + return 'docs/sdk'; + } + + const url = searchResults[0].url?.split('/docs/')[1]; + return url; +}; + +/** + * Searches through documentation blocks to find relevant matches for a query + * + * @param docs - The complete documentation text containing multiple doc blocks + * @param query - The search query + * @param maxResults - Maximum number of results to return + * @returns Array of relevant docs with their scores, titles and URLs + */ +export const searchDocs = (docs: string, query: string, maxResults: number) => { + // Extract all document blocks safely + const docBlocks = extractDocBlocks(docs); + // Score each document block + const scoredDocs = docBlocks.map(docBlock => { + // Extract metadata + const metadata = extractDocMetadata(docBlock); + + // Calculate scores for title and content separately + let titleScore = 0; + const isTitleNoEmpty = metadata.title != ''; + + if (isTitleNoEmpty) { + titleScore = getRelevanceScore(metadata.title, query, true); + } else { + titleScore = 0; + } + + // Extract content from the doc block, handling potential code blocks with backticks + const contentMatch = /([\s\S]*?)<\/content>/i.exec(docBlock); + let content = ''; + + if (contentMatch) { + content = contentMatch[1].trim(); + } else { + content = ''; + } + + const contentScore = getRelevanceScore(content, query, false); + // Combined score with title weighted more heavily + const totalScore = titleScore + contentScore; + + return { + score: totalScore, + title: metadata.title || 'Untitled Document', + url: metadata.url || null + }; + }); + + // Sort by score (descending) and filter out irrelevant results + const filteredDocs = scoredDocs + .filter(doc => doc.score > 0) + .sort((a, b) => b.score - a.score) + .slice(0, maxResults); + return filteredDocs; +}; diff --git a/packages/cli/src/utils/get-score.ts b/packages/cli/src/utils/get-score.ts index 62e4e51..f811a57 100644 --- a/packages/cli/src/utils/get-score.ts +++ b/packages/cli/src/utils/get-score.ts @@ -1,25 +1,143 @@ /** - * Calculates a relevance score between a line of text and a search query. + * Enhanced relevance scoring algorithm that evaluates the similarity between a text and a search query + * with special handling for document metadata extraction. * - * The scoring algorithm works as follows: - * - If the entire search query is found within the line, returns a score of 3 (highest relevance) - * - Otherwise, adds 1 point for each individual search query word found in the line + * Scoring system: + * - Exact match of full query: 10 points (highest priority) + * - Title match: 5 points per matching word in title + * - Content match: 1 point per matching word in content + * - Partial word matches (minimum 3 chars): 0.5 points + * - Word proximity bonus: additional points when query words appear close together * - * @param line - The text line to check against the search query - * @param searchQuery - The search query to check against the line - * @returns A numerical score indicating relevance: 3 for exact matches, or the count of matching words + * @param text - The text to check against the search query (could be title or content) + * @param searchQuery - The search query to check against the text + * @param isTitle - Boolean indicating if the text is a title (for boosting score) + * @returns A numerical score indicating relevance */ -export const getRelevanceScore = (line: string, searchQuery: string) => { - const lowerLine = line.toLowerCase(); - const lowerQuery = searchQuery.toLowerCase(); - // Higher score for exact matches - if (lowerLine.includes(lowerQuery)) { - return 3; +export const getRelevanceScore = ( + text: string, + searchQuery: string, + isTitle: boolean +): number => { + if (!text || !searchQuery) return 0; + + const lowerText = text.toLowerCase(); + const lowerQuery = searchQuery.toLowerCase().trim(); + + // Early return for empty queries + if (lowerQuery.length === 0) return 0; + + let score = 0; + + // Highest score for exact matches (10 points) + const isPartialMatch = lowerText.includes(lowerQuery); + if (isPartialMatch) { + score += 10; + } + + // Score based on individual word matches + const queryWords = lowerQuery.split(/\s+/).filter(word => word.length > 0); + + // Track positions of matched words for proximity calculation + const matchPositions: number[] = []; + + for (const word of queryWords) { + // Skip very short words + const isWordShort = word.length < 2; + if (isWordShort) continue; + + const isPartialWordMatch = lowerText.includes(word); + if (isPartialWordMatch) { + // Full word match + if (isTitle) { + score += 5; + } else { + score += 1; + } + + // Store position for proximity calculation + const pos = lowerText.indexOf(word); + if (pos >= 0) matchPositions.push(pos); + } else if (word.length >= 3) { + // Partial word match for longer words (at least 3 chars) + for (const textWord of lowerText.split(/\s+/)) { + const isLongWord = textWord.length >= 3; + const isWordMatch = + textWord.includes(word) || word.includes(textWord); + if (isLongWord && isWordMatch) { + if (isTitle) { + score += 2.5; + } else { + score += 0.5; + } + break; + } + } + } } - // Score based on word matches - const queryWords = lowerQuery.split(' '); - return queryWords.reduce((score, word) => { - return score + (lowerLine.includes(word) ? 1 : 0); - }, 0); + // Calculate word proximity bonus when multiple words matched + const isMultipleWordsMatched = matchPositions.length > 1; + if (isMultipleWordsMatched) { + matchPositions.sort((a, b) => a - b); + // If words appear within 50 chars of each other, give proximity bonus + const isProximityBonus = + matchPositions[matchPositions.length - 1] - matchPositions[0] < 50; + if (isProximityBonus) { + score += 2; + } + } + + return score; +}; + +/** + * Parse document metadata from structured document text + * This function handles document text that may contain code blocks with backticks + * + * @param docText - The document text containing metadata in a structured format + * @returns An object containing the title and URL if found + */ +export const extractDocMetadata = ( + docText: string +): { title: string | ''; url: string | '' } => { + const result = { title: '', url: '' }; + + // Extract metadata section + const metadataSection = docText.match(/([\s\S]*?)<\/metadata>/i); + const isMetadataSection = metadataSection && metadataSection[1]; + if (isMetadataSection) { + // Within the metadata section, extract title and url + const metadataContent = metadataSection[1]; + + // Extract title + const titleMatch = /([\s\S]*?)<\/title>/i.exec(metadataContent); + const isTitleMatch = titleMatch && titleMatch[1]; + if (isTitleMatch) { + result.title = titleMatch[1].trim(); + } + + // Extract URL + const urlMatch = /<url>([\s\S]*?)<\/url>/i.exec(metadataContent); + const isUrlMatch = urlMatch && urlMatch[1]; + if (isUrlMatch) { + result.url = urlMatch[1].trim(); + } + } + + return result; +}; + +/** + * Safely extracts document blocks from documentation text + * Handles potential code blocks with triple backticks + * + * @param docs - The complete documentation text + * @returns Array of document block objects + */ +export const extractDocBlocks = (docs: string): string[] => { + // Use regex to find all doc blocks, handling potential nested code blocks with backticks + const docPattern = /<doc>[\s\S]*?<\/doc>/g; + const matches = docs.match(docPattern) || []; + return matches; };