Document MCP Server tools

peteretelej · peteretelej · commit 32ddbafb3e50 · 2025-07-07T01:49:10.000+03:00
diff --git a/src/indexing.ts b/src/indexing.ts
@@ -186,7 +186,7 @@ export async function indexDirectories(paths: string[], config: Config): Promise
       if (config.verbose) {
         console.log(`Found ${files.length} files to process in ${path}`);
       }
-    } catch (error) {
+    } catch {
       // Continue with other directories even if one fails to scan
     }
   }
diff --git a/src/mcp.ts b/src/mcp.ts
@@ -54,17 +54,41 @@ Performance note: Initial indexing may take time for large directories, but subs
   },
   {
     name: 'search',
-    description: 'Search indexed content semantically',
+    description: `Perform semantic search across indexed files using natural language queries. This tool uses vector similarity to find the most relevant content, going beyond simple keyword matching to understand intent and context.
+
+When to use this tool:
+- Finding code examples, functions, or patterns ("error handling in Python", "JWT authentication implementation")
+- Locating documentation or explanations ("how to configure Redis", "API rate limiting guide")
+- Discovering similar functionality across files ("database connection patterns", "logging utilities")
+- Research and exploration of codebases ("machine learning models", "test utilities")
+- Finding files related to specific features or topics
+
+How semantic search works:
+- Searches by meaning and context, not just exact keywords
+- Finds conceptually related content even with different terminology
+- Returns files ranked by relevance with similarity scores
+- Groups results by file to avoid duplicates from multiple matching sections
+
+Response format:
+- Returns lightweight metadata including file paths, relevance scores, and chunk IDs
+- Use 'get_chunk' or 'get_content' tools to fetch actual content from search results
+- Chunks are sorted by relevance score within each file
+- Average similarity score calculated across all matching chunks per file
+
+Example queries:
+- "error handling patterns" (finds try/catch, error classes, logging)
+- "database migration scripts" (finds SQL, schema changes, migration files)
+- "authentication middleware" (finds auth logic, JWT handling, middleware functions)`,
     inputSchema: {
       type: 'object',
       properties: {
         query: {
           type: 'string',
-          description: 'Search query'
+          description: 'Natural language search query describing what you are looking for. Can be concepts, functionality, or specific technical terms.'
         },
         limit: {
           type: 'number',
-          description: 'Maximum number of results (default: 10)',
+          description: 'Maximum number of files to return (default: 10). Each file may contain multiple matching chunks.',
           default: 10
         }
       },
@@ -73,17 +97,40 @@ Performance note: Initial indexing may take time for large directories, but subs
   },
   {
     name: 'similar_files',
-    description: 'Find files similar to a given file',
+    description: `Find files that are semantically similar to a given reference file. This tool analyzes the content and context of a file to discover other files with related functionality, similar patterns, or comparable content.
+
+When to use this tool:
+- Discovering related implementations across a codebase ("find files similar to this authentication module")
+- Locating alternative approaches or patterns ("find other components like this React component")
+- Finding documentation or examples related to a specific file
+- Identifying code duplication or similar functionality that could be refactored
+- Exploring unfamiliar codebases by finding files similar to known examples
+- Locating test files, configuration files, or documentation related to a source file
+
+How similarity detection works:
+- Analyzes the semantic content of the reference file
+- Compares against all indexed files using vector similarity
+- Considers code patterns, function signatures, imports, and documentation
+- Returns files ranked by content similarity, not just filename or location similarity
+- Works across different file types and programming languages
+
+Use cases:
+- Code analysis: "Find files similar to this database model to understand the schema patterns"
+- Learning: "Show me other API controllers similar to this one"
+- Maintenance: "Find files with similar error handling patterns"
+- Architecture: "Locate other services that follow this microservice pattern"
+
+Note: The reference file must be indexed for this tool to work. If the file is not found in the index, an error will be returned.`,
     inputSchema: {
       type: 'object',
       properties: {
         file_path: {
           type: 'string',
-          description: 'Path to the file to find similar files for'
+          description: 'Absolute or relative path to the reference file. This file must have been previously indexed.'
         },
         limit: {
           type: 'number',
-          description: 'Maximum number of results (default: 10)',
+          description: 'Maximum number of similar files to return (default: 10). Results are sorted by similarity score.',
           default: 10
         }
       },
@@ -92,46 +139,133 @@ Performance note: Initial indexing may take time for large directories, but subs
   },
   {
     name: 'get_content',
-    description: 'Get file content',
+    description: `Retrieve the full content of a file or specific chunks within a file. This tool reads files directly from the filesystem and can optionally return only specific portions of indexed files.
+
+When to use this tool:
+- After performing a search, to retrieve the actual content of relevant files
+- Reading complete files that were identified through semantic search
+- Extracting specific sections of large files using chunk ranges
+- Accessing source code, documentation, or configuration files for analysis
+- Following up on search results with detailed content examination
+
+How chunk selection works:
+- If no chunks parameter is provided, returns the entire file content
+- Chunk ranges allow selective reading of large files (e.g., "2-5" returns chunks 2, 3, 4, and 5)
+- Single chunks can be specified (e.g., "3" returns only chunk 3)
+- Chunks are the same segments created during indexing for semantic search
+- Useful for large files where you only need specific sections identified by search
+
+File access:
+- Reads files directly from the filesystem (not from the search index)
+- Works with any readable file, whether indexed or not
+- Supports all text-based file formats
+- Preserves original formatting and content exactly as stored
+
+Workflow integration:
+1. Use 'search' to find relevant files and identify interesting chunk IDs
+2. Use 'get_content' to retrieve full file content or specific chunks
+3. Analyze the content to understand context and implementation details
+
+Performance note: For large files, using chunk ranges can be more efficient than reading entire files.`,
     inputSchema: {
       type: 'object',
       properties: {
         file_path: {
           type: 'string',
-          description: 'Path to the file to retrieve'
+          description: 'Absolute or relative path to the file to retrieve. File must be readable and text-based.'
         },
         chunks: {
           type: 'string',
-          description: 'Optional chunk range (e.g., "2-5")'
+          description: 'Optional chunk range specification. Examples: "3" (single chunk), "2-5" (chunks 2 through 5), "1-3" (first three chunks). Only works for indexed files.'
         }
       },
       required: ['file_path']
     }
   },
   {
     name: 'get_chunk',
-    description: 'Get content of a specific chunk by file path and chunk ID',
+    description: `Retrieve the content of a specific chunk from an indexed file. This tool provides precise access to individual text segments that were identified during semantic search, allowing efficient retrieval of only the most relevant content.
+
+When to use this tool:
+- After performing a 'search' operation, to fetch the actual content of specific chunks that matched your query
+- When you want to examine only the most relevant sections of a file rather than reading the entire file
+- For targeted content analysis where you need specific text segments identified by their chunk IDs
+- To build contextual responses using only the most semantically relevant portions of files
+- When working with large files and you only need particular sections
+
+How chunks work:
+- Files are divided into overlapping text segments during indexing for better search granularity
+- Each chunk represents a coherent section of text (typically 512 characters with overlap)
+- Chunk IDs are sequential strings ("0", "1", "2", etc.) within each file
+- Search results include chunk IDs for the most relevant sections
+- This tool retrieves the exact content that was semantically matched
+
+Typical workflow:
+1. Use 'search' to find files and get chunk IDs with high relevance scores
+2. Use 'get_chunk' to retrieve the specific content of the most relevant chunks
+3. Analyze or process only the most pertinent text segments
+
+Efficiency benefits:
+- Avoids transferring unnecessary content from large files
+- Provides precise access to semantically relevant text
+- Reduces token usage by fetching only needed sections
+- Enables focused analysis on the most important content
+
+Note: Both the file and the specific chunk must exist in the search index for this tool to work.`,
     inputSchema: {
       type: 'object',
       properties: {
         file_path: {
           type: 'string',
-          description: 'Path to the file'
+          description: 'Absolute or relative path to the indexed file containing the desired chunk.'
         },
         chunk_id: {
           type: 'string',
-          description: 'ID of the chunk to retrieve'
+          description: 'ID of the specific chunk to retrieve. This is typically obtained from search results and is a sequential string like "0", "1", "2", etc.'
         }
       },
       required: ['file_path', 'chunk_id']
     }
   },
   {
     name: 'server_info',
-    description: 'Get server information and status',
+    description: `Get comprehensive information about the directory indexer server status, configuration, and indexed content. This tool provides a complete overview of the current state of the semantic search system.
+
+When to use this tool:
+- To check if the indexer is properly set up and operational
+- Before starting work to understand what content is already indexed
+- To verify indexing operations completed successfully
+- When debugging search issues or unexpected results
+- To get an overview of available content for semantic search
+- To check system health and identify any configuration problems
+
+Information provided:
+- Server version and operational status
+- Total count of indexed directories, files, and searchable chunks
+- Database size and storage information
+- Most recent indexing timestamp
+- List of all indexed directories with individual statistics
+- File counts and chunk counts per directory
+- Indexing status for each directory (completed, failed, in progress)
+- Error reports and processing issues
+- System consistency checks between database components
+
+Status indicators:
+- Operational status of vector database (Qdrant) connection
+- Embedding service availability
+- Data consistency between SQLite metadata and vector storage
+- Recent errors or warnings that may affect search quality
+
+Use this tool to:
+- Verify setup before performing search operations
+- Understand the scope of available content
+- Troubleshoot search or indexing issues
+- Plan additional indexing operations
+- Monitor system health and performance`,
     inputSchema: {
       type: 'object',
-      properties: {}
+      properties: {},
+      additionalProperties: false
     }
   }
 ];
diff --git a/src/search.ts b/src/search.ts
@@ -47,7 +47,7 @@ export async function searchContent(query: string, options: SearchOptions = {}):
     const points = await qdrant.searchPoints(queryEmbedding, limit * 5);
     
     // Group points by file path
-    const fileGroups = new Map<string, Array<{ score: number; chunkId: string; content: string; parentDirectories: string[] }>>();
+    const fileGroups = new Map<string, Array<{ score: number; chunkId: string; parentDirectories: string[] }>>();
     
     for (const point of points) {
       const score = point.score ?? 0;
@@ -61,7 +61,6 @@ export async function searchContent(query: string, options: SearchOptions = {}):
       fileGroups.get(filePath)!.push({
         score,
         chunkId: point.payload.chunkId,
-        content: point.payload.content || '',
         parentDirectories: point.payload.parentDirectories
       });
     }

Original file line number	Diff line number	Diff line change
`@@ -186,7 +186,7 @@ export async function indexDirectories(paths: string[], config: Config): Promise`
`186`	`186`	`if (config.verbose) {`
`187`	`187`	console.log(`Found ${files.length} files to process in ${path}`);
`188`	`188`	`}`
`189`		`- } catch (error) {`
	`189`	`+ } catch {`
`190`	`190`	`// Continue with other directories even if one fails to scan`
`191`	`191`	`}`
`192`	`192`	`}`