diff --git a/src/services/smart-context-extractor.ts b/src/services/smart-context-extractor.ts new file mode 100644 index 0000000..9d94b51 --- /dev/null +++ b/src/services/smart-context-extractor.ts @@ -0,0 +1,702 @@ +import * as vscode from "vscode"; +import * as path from "path"; +import { VectorDatabaseService, SearchResult } from "./vector-database.service"; +import { ContextRetriever } from "./context-retriever"; +import { CodebaseUnderstandingService } from "./codebase-understanding.service"; +import { QuestionClassifierService } from "./question-classifier.service"; +import { Logger, LogLevel } from "../infrastructure/logger/logger"; +import { FileUtils, LanguageUtils, AsyncUtils } from "../utils/common-utils"; + +export interface SmartContextOptions { + maxContextTokens?: number; + maxResults?: number; + enableVectorSearch?: boolean; + enableFallback?: boolean; + includeMetadata?: boolean; + tokenBudgetBuffer?: number; +} + +export interface ContextExtractionResult { + content: string; + sources: ContextSource[]; + totalTokens: number; + searchMethod: "vector" | "keyword" | "hybrid"; + relevanceScore: number; +} + +export interface ContextSource { + filePath: string; + type: "function" | "class" | "interface" | "enum" | "module"; + name: string; + relevanceScore: number; + lineNumbers?: { start: number; end: number }; + clickableReference: string; +} + +/** + * Enhanced SmartContextExtractor with vector-based semantic search capabilities. + * Provides intelligent context extraction for AI responses with multiple search strategies. + */ +export class SmartContextExtractor { + private logger: Logger; + private readonly options: Required; + private readonly RELEVANCE_RANKING_WEIGHTS = { + vectorSimilarity: 0.6, + activeFileBoost: 0.2, + keywordMatch: 0.2, + }; + + constructor( + private vectorDb?: VectorDatabaseService, + private contextRetriever?: ContextRetriever, + private codebaseUnderstanding?: CodebaseUnderstandingService, + private questionClassifier?: QuestionClassifierService, + options: SmartContextOptions = {} + ) { + this.logger = Logger.initialize("SmartContextExtractor", { + minLevel: LogLevel.INFO, + }); + + // Set default options + this.options = { + maxContextTokens: options.maxContextTokens ?? 6000, + maxResults: options.maxResults ?? 8, + enableVectorSearch: options.enableVectorSearch ?? true, + enableFallback: options.enableFallback ?? true, + includeMetadata: options.includeMetadata ?? true, + tokenBudgetBuffer: options.tokenBudgetBuffer ?? 500, + }; + + this.logger.info("SmartContextExtractor initialized", { + vectorDbEnabled: !!this.vectorDb, + maxTokens: this.options.maxContextTokens, + maxResults: this.options.maxResults, + }); + } + + /** + * Main method for extracting relevant context with vector search capabilities + */ + async extractRelevantContextWithVector(userQuestion: string, activeFile?: string): Promise { + const startTime = Date.now(); + + try { + this.logger.debug(`Extracting context for question: "${userQuestion.substring(0, 50)}..."`); + + // Analyze question to determine search strategy + const questionAnalysis = await this.analyzeQuestion(userQuestion); + + // Try vector search first if available and enabled + let vectorResult: ContextExtractionResult | null = null; + if (this.options.enableVectorSearch && this.vectorDb) { + vectorResult = await this.tryVectorSearch(userQuestion, activeFile, questionAnalysis); + + if (vectorResult && vectorResult.sources.length > 0) { + this.logger.info( + `Vector search found ${vectorResult.sources.length} relevant results in ${Date.now() - startTime}ms` + ); + return vectorResult; + } + } + + // Fallback to keyword-based search if enabled and needed + if (this.options.enableFallback) { + const isVectorDbAvailable = this.vectorDb && (await this.isVectorDbReady()); + const hasVectorResults = vectorResult && vectorResult.sources.length > 0; + + if (!isVectorDbAvailable || !hasVectorResults) { + this.logger.debug("Vector search unavailable or returned no results, using fallback method"); + const fallbackResult = await this.tryKeywordSearch(userQuestion, activeFile); + + if (fallbackResult) { + this.logger.info( + `Fallback search found ${fallbackResult.sources.length} relevant results in ${Date.now() - startTime}ms` + ); + return fallbackResult; + } + } + } + + // Return empty result if no context found + const actualSearchMethod = this.vectorDb && (await this.isVectorDbReady()) ? "vector" : "keyword"; + this.logger.warn("No relevant context found for question"); + return { + content: "", + sources: [], + totalTokens: 0, + searchMethod: actualSearchMethod, + relevanceScore: 0, + }; + } catch (error) { + this.logger.error("Error in context extraction:", error); + + // Return empty result on error to prevent breaking the flow + return { + content: "", + sources: [], + totalTokens: 0, + searchMethod: "vector", + relevanceScore: 0, + }; + } + } + + /** + * Analyze user question to determine optimal search strategy + */ + private async analyzeQuestion(question: string): Promise<{ + isCodebaseRelated: boolean; + confidence: number; + categories: string[]; + technicalKeywords: string[]; + }> { + if (this.questionClassifier) { + const result = this.questionClassifier.categorizeQuestion(question); + // Convert string confidence to number + const confidenceMap = { high: 0.9, medium: 0.7, low: 0.3 }; + return { + isCodebaseRelated: result.isCodebaseRelated, + confidence: confidenceMap[result.confidence] || 0.3, + categories: result.categories, + technicalKeywords: this.extractTechnicalKeywords(question), + }; + } + + // Fallback analysis + const technicalKeywords = this.extractTechnicalKeywords(question); + const isCodebaseRelated = + technicalKeywords.length > 0 || + question.toLowerCase().includes("implement") || + question.toLowerCase().includes("function") || + question.toLowerCase().includes("class"); + + return { + isCodebaseRelated, + confidence: isCodebaseRelated ? 0.8 : 0.3, + categories: isCodebaseRelated ? ["implementation"] : ["general"], + technicalKeywords, + }; + } + + /** + * Attempt vector-based semantic search + */ + private async tryVectorSearch( + question: string, + activeFile?: string, + questionAnalysis?: any + ): Promise { + if (!this.vectorDb) return null; + + try { + // Perform semantic search + const searchResults = await this.vectorDb.semanticSearch( + question, + this.options.maxResults * 2 // Get more results to filter and rank + ); + + if (searchResults.length === 0) { + return null; + } + + // Rank and filter results + const rankedResults = await this.rankSearchResults(searchResults, question, activeFile); + const topResults = rankedResults.slice(0, this.options.maxResults); + + // Build context from results + const contextResult = this.buildContextFromVectorResults(topResults, question); + + return { + ...contextResult, + searchMethod: "vector" as const, + }; + } catch (error) { + this.logger.error("Vector search failed:", error); + return null; + } + } + + /** + * Attempt keyword-based fallback search + */ + private async tryKeywordSearch(question: string, activeFile?: string): Promise { + if (!this.codebaseUnderstanding) return null; + + try { + // Use existing codebase understanding service + const fullContext = await this.codebaseUnderstanding.getCodebaseContext(); + const extractedContext = this.extractRelevantKeywordContext(fullContext, question, activeFile); + + if (!extractedContext) return null; + + return { + content: extractedContext, + sources: [], // TODO: Extract sources from keyword search + totalTokens: this.estimateTokenCount(extractedContext), + searchMethod: "keyword" as const, + relevanceScore: 0.5, // Lower relevance for keyword search + }; + } catch (error) { + this.logger.error("Keyword search failed:", error); + return null; + } + } + + /** + * Rank search results based on multiple criteria + */ + private async rankSearchResults( + results: SearchResult[], + question: string, + activeFile?: string + ): Promise { + const questionKeywords = this.extractTechnicalKeywords(question); + + return results + .map((result) => ({ + ...result, + compositeScore: this.calculateCompositeScore(result, questionKeywords, activeFile), + })) + .sort((a, b) => (b as any).compositeScore - (a as any).compositeScore); + } + + /** + * Calculate composite relevance score + */ + private calculateCompositeScore(result: SearchResult, questionKeywords: string[], activeFile?: string): number { + let score = 0; + + // 1. Vector similarity score (primary) - weight: 40% + score += this.calculateVectorSimilarityScore(result.relevanceScore); + + // 2. File proximity to active file (secondary) - weight: 25% + if (activeFile && result.metadata.filePath) { + score += this.calculateFileProximityScore(result.metadata.filePath, activeFile) * 0.25; + } + + // 3. Keyword overlap - weight: 20% + if (questionKeywords.length > 0) { + score += this.calculateKeywordOverlapScore(result.content, questionKeywords) * 0.2; + } + + // 4. Code importance/complexity (metadata-based) - weight: 15% + score += this.calculateCodeImportanceScore(result.metadata) * 0.15; + + return Math.min(score, 1.0); // Cap at 1.0 + } + + /** + * Calculate vector similarity score component + */ + private calculateVectorSimilarityScore(relevanceScore: number): number { + return relevanceScore * 0.4; + } + + /** + * Calculate file proximity score component + */ + private calculateFileProximityScore(resultPath: string, activeFile: string): number { + if (resultPath === activeFile) return 1.0; + + const resultDir = path.dirname(resultPath); + const activeDir = path.dirname(activeFile); + + if (resultDir === activeDir) return 0.8; // Same directory + + const resultParts = resultDir.split(path.sep); + const activeParts = activeDir.split(path.sep); + + // Calculate common path depth + let commonDepth = 0; + for (let i = 0; i < Math.min(resultParts.length, activeParts.length); i++) { + if (resultParts[i] === activeParts[i]) { + commonDepth++; + } else { + break; + } + } + + const maxDepth = Math.max(resultParts.length, activeParts.length); + return commonDepth / maxDepth; + } + + /** + * Calculate keyword overlap score component + */ + private calculateKeywordOverlapScore(content: string, keywords: string[]): number { + if (keywords.length === 0) return 0; + + const contentLower = content.toLowerCase(); + const matchedKeywords = keywords.filter((keyword) => contentLower.includes(keyword.toLowerCase())); + + return matchedKeywords.length / keywords.length; + } + + /** + * Calculate code importance score component + */ + private calculateCodeImportanceScore(metadata: any): number { + let importance = 0.5; // Base importance + + // Higher importance for certain code types + if (metadata.type === "class") importance += 0.2; + if (metadata.type === "interface") importance += 0.15; + if (metadata.type === "function") importance += 0.1; + + // Higher importance for entry points and main files + const fileName = path.basename(metadata.filePath || ""); + if (["index.ts", "main.ts", "app.ts"].includes(fileName)) { + importance += 0.2; + } + + // Higher importance for recently modified files + if (metadata.lastModified) { + const daysSinceModified = (Date.now() - new Date(metadata.lastModified).getTime()) / (1000 * 60 * 60 * 24); + if (daysSinceModified < 7) importance += 0.1; + } + + return Math.min(importance, 1.0); + } + + /** + * Build formatted context from vector search results + */ + private buildContextFromVectorResults( + results: SearchResult[], + question: string + ): { + content: string; + sources: ContextSource[]; + totalTokens: number; + relevanceScore: number; + } { + if (results.length === 0) { + return { + content: "", + sources: [], + totalTokens: 0, + relevanceScore: 0, + }; + } + + let context = `**Semantically Relevant Code (Vector Search Results):**\n\n`; + const sources: ContextSource[] = []; + let totalTokens = 0; + let averageRelevance = 0; + + for (let i = 0; i < results.length; i++) { + const result = results[i]; + const metadata = result.metadata; + const relevancePercentage = (result.relevanceScore * 100).toFixed(1); + + // Check token budget + const sectionTokens = this.estimateTokenCount(result.content); + if (totalTokens + sectionTokens > this.options.maxContextTokens - this.options.tokenBudgetBuffer) { + this.logger.debug(`Token budget reached, stopping at ${i} results`); + break; + } + + // Create clickable reference + const clickableRef = `[[${i + 1}]]`; + + // Add to context + context += `**${clickableRef} File: ${FileUtils.getRelativePath(metadata.filePath)}** (Relevance: ${relevancePercentage}%)\n`; + + if (metadata.name && metadata.type) { + context += `**${this.capitalizeFirst(metadata.type)}: ${metadata.name}**\n`; + } + + context += `\`\`\`${this.getLanguageFromPath(metadata.filePath)}\n${result.content}\n\`\`\`\n\n`; + + // Add to sources + sources.push({ + filePath: metadata.filePath, + type: metadata.type || "module", + name: metadata.name || path.basename(metadata.filePath), + relevanceScore: result.relevanceScore, + clickableReference: clickableRef, + lineNumbers: + metadata.startLine && metadata.endLine + ? { + start: metadata.startLine, + end: metadata.endLine, + } + : undefined, + }); + + totalTokens += sectionTokens; + averageRelevance += result.relevanceScore; + } + + averageRelevance = sources.length > 0 ? averageRelevance / sources.length : 0; + + // Add context instructions + if (sources.length > 0) { + context += `\n**Context Instructions**: The above code snippets were selected using semantic search for relevance to your question. `; + context += `Use the clickable references (${sources.map((s) => s.clickableReference).join(", ")}) to navigate to specific files. `; + context += `Focus on these implementations and provide specific examples from the actual codebase.\n`; + } + + return { + content: context.trim(), + sources, + totalTokens, + relevanceScore: averageRelevance, + }; + } + + /** + * Extract relevant context using keyword-based search (fallback) + */ + private extractRelevantKeywordContext(fullContext: string, question: string, activeFile?: string): string | null { + const keywords = this.extractTechnicalKeywords(question); + if (keywords.length === 0) return null; + + const contextSections = fullContext.split("\n\n"); + const relevantSections: Array<{ section: string; score: number }> = []; + + for (const section of contextSections) { + const score = this.calculateKeywordOverlapScore(section, keywords); + if (score > 0.1) { + // Minimum threshold + relevantSections.push({ section, score }); + } + } + + if (relevantSections.length === 0) return null; + + // Sort by relevance and take top sections + relevantSections.sort((a, b) => b.score - a.score); + const topSections = relevantSections.slice(0, 5); + + return `**Keyword-based Context:**\n\n${topSections.map((s) => s.section).join("\n\n")}`; + } + + /** + * Extract technical keywords from question + */ + private extractTechnicalKeywords(question: string): string[] { + const technicalPatterns = [ + /\b(?:function|method|class|interface|type|enum|module|service|component|hook|util|helper|config|api|endpoint|route|controller|model|schema|database|query|authentication|authorization|validation|middleware|decorator|dependency|injection|provider|factory|builder|observer|strategy|adapter|facade|proxy|singleton|prototype)\b/gi, + /\b(?:async|await|promise|callback|event|listener|handler|trigger|emit|subscribe|publish|stream|buffer|cache|store|state|context|props|render|mount|unmount|lifecycle|effect|ref|memo|reducer|action|dispatch|selector|middleware)\b/gi, + /\b(?:react|vue|angular|node|express|fastify|nestjs|typescript|javascript|python|java|c#|go|rust|php|ruby|sql|mongodb|postgresql|redis|docker|kubernetes|aws|azure|gcp|graphql|rest|api|json|xml|html|css|scss|sass)\b/gi, + ]; + + const keywords = new Set(); + + for (const pattern of technicalPatterns) { + const matches = question.match(pattern); + if (matches) { + matches.forEach((match) => keywords.add(match.toLowerCase())); + } + } + + // Also extract camelCase and PascalCase identifiers + const identifierPattern = /\b[a-z][a-zA-Z0-9]*[A-Z][a-zA-Z0-9]*\b|\b[A-Z][a-zA-Z0-9]*\b/g; + const identifiers = question.match(identifierPattern); + if (identifiers) { + identifiers.forEach((id) => keywords.add(id)); + } + + return Array.from(keywords).slice(0, 10); // Limit to top 10 keywords + } + + /** + * Enhanced keyword extraction for public use + */ + extractKeywords(text: string): string[] { + return this.extractTechnicalKeywords(text); + } + + /** + * Rank contexts by relevance using multiple factors - advanced ranking algorithm + */ + private rankContextsByRelevance(contexts: any[], question: string, activeFile?: string): any[] { + return contexts + .map((context) => { + let score = context.score || 0; + + // Boost score if from active file + if (activeFile && context.metadata?.fileName === activeFile) { + score += 0.2; + } + + // Boost score based on question keywords + const questionKeywords = this.extractTechnicalKeywords(question); + const contextText = context.content || context.text || ""; + const matchingKeywords = questionKeywords.filter((keyword) => contextText.toLowerCase().includes(keyword)); + score += matchingKeywords.length * 0.1; + + return { ...context, score }; + }) + .sort((a, b) => b.score - a.score); + } + + /** + * Fallback to keyword-based search when vector search fails + */ + private async fallbackToKeywordSearch( + question: string, + activeFile?: string + ): Promise { + try { + const keywords = this.extractTechnicalKeywords(question); + if (keywords.length === 0) return null; + + // Fallback to codebase understanding service if available + if (this.codebaseUnderstanding) { + const contextData = await this.codebaseUnderstanding.getCodebaseContext(); + + if (contextData) { + // Simple keyword matching in the context + const contextLines = contextData.split("\n"); + const relevantLines = contextLines.filter((line) => + keywords.some((keyword) => line.toLowerCase().includes(keyword.toLowerCase())) + ); + + if (relevantLines.length > 0) { + const content = relevantLines.slice(0, 20).join("\n"); // Limit to 20 lines + + return { + content, + sources: [ + { + filePath: "codebase-context", + type: "module", + name: "Codebase Context", + relevanceScore: 0.5, + clickableReference: "[Codebase Analysis]", + }, + ], + totalTokens: this.estimateTokenCount(content), + searchMethod: "keyword", + relevanceScore: 0.5, + }; + } + } + } + + // Basic keyword-based response + return { + content: `Context not available. Question contains keywords: ${keywords.join(", ")}`, + sources: [], + totalTokens: 20, + searchMethod: "keyword", + relevanceScore: 0.2, + }; + } catch (error) { + this.logger.error("Keyword fallback search failed", error); + return null; + } + } + + /** + * Estimate token count for text (approximate) + */ + private estimateTokenCount(text: string): number { + // Rough estimation: ~4 characters per token + return Math.ceil(text.length / 4); + } + + /** + * Get programming language from file path + */ + private getLanguageFromPath(filePath: string): string { + return LanguageUtils.getLanguageFromPath(filePath); + } + + /** + * Capitalize first letter of string + */ + private capitalizeFirst(str: string): string { + return str.charAt(0).toUpperCase() + str.slice(1); + } + + /** + * Get semantic similarity between two texts + */ + async getSemanticSimilarity(query: string, content: string): Promise { + if (!this.vectorDb || !(await this.isVectorDbReady())) { + return this.calculateBasicTextSimilarity(query, content); + } + + try { + // Use existing vector search functionality to avoid N+1 queries + // Search for similar content already in the database instead of indexing temporary snippets + const existingResults = await this.vectorDb.semanticSearch(query, 5); + + if (existingResults.length === 0) { + return this.calculateBasicTextSimilarity(query, content); + } + + // Find the most similar existing content and use its relevance as a baseline + const maxSimilarity = Math.max(...existingResults.map((r) => r.relevanceScore)); + + // Apply text similarity to adjust the score based on actual content match + const textSimilarity = this.calculateBasicTextSimilarity(query, content); + + // Combine vector and text similarities for a more accurate score + return Math.min(1.0, maxSimilarity * 0.7 + textSimilarity * 0.3); + } catch (error) { + this.logger.error("Error calculating semantic similarity:", error); + // Fallback to basic text similarity if vector DB fails + return this.calculateBasicTextSimilarity(query, content); + } + } + + /** + * Calculate basic text similarity as fallback when vector DB is unavailable + */ + private calculateBasicTextSimilarity(query: string, content: string): number { + const queryWords = query.toLowerCase().split(/\s+/); + const contentWords = content.toLowerCase().split(/\s+/); + + const matchingWords = queryWords.filter((word) => + contentWords.some((contentWord) => contentWord.includes(word) || word.includes(contentWord)) + ); + + return queryWords.length > 0 ? matchingWords.length / queryWords.length : 0; + } + + /** + * Check if vector database is ready for operations + */ + private async isVectorDbReady(): Promise { + if (!this.vectorDb) return false; + + try { + // Check if vector database is initialized and operational + // Use a simple query to test if the database is responsive + await this.vectorDb.semanticSearch("test", 1); + return true; + } catch (error) { + this.logger.debug("Vector DB not ready:", error); + return false; + } + } + + /** + * Get current configuration and stats + */ + getStats(): { + vectorDbEnabled: boolean; + fallbackEnabled: boolean; + maxTokens: number; + maxResults: number; + } { + return { + vectorDbEnabled: !!this.vectorDb && this.options.enableVectorSearch, + fallbackEnabled: this.options.enableFallback, + maxTokens: this.options.maxContextTokens, + maxResults: this.options.maxResults, + }; + } + + /** + * Update configuration + */ + updateOptions(newOptions: Partial): void { + Object.assign(this.options, newOptions); + this.logger.info("SmartContextExtractor options updated", this.options); + } +} diff --git a/src/services/smart-embedding-orchestrator.ts b/src/services/smart-embedding-orchestrator.ts new file mode 100644 index 0000000..9662bf2 --- /dev/null +++ b/src/services/smart-embedding-orchestrator.ts @@ -0,0 +1,782 @@ +import * as vscode from "vscode"; +import * as path from "path"; +import { VectorDatabaseService } from "./vector-database.service"; +import { VectorDbWorkerManager } from "./vector-db-worker-manager"; +import { VectorDbSyncService } from "./vector-db-sync.service"; +import { EmbeddingPhaseFactory, CreatedPhases } from "./embedding-phase-factory"; +import { EmbeddingConfigurationManager } from "./embedding-configuration"; +import { Logger, LogLevel } from "../infrastructure/logger/logger"; +import { FileUtils, AsyncUtils } from "../utils/common-utils"; + +export interface OrchestrationStats { + isInitialized: boolean; + phasesActive: { + immediate: boolean; + onDemand: boolean; + background: boolean; + bulk: boolean; + }; + embeddingProgress: { + totalFiles: number; + processedFiles: number; + queuedFiles: number; + failedFiles: number; + }; + performance: { + averageEmbeddingTime: number; + searchLatency: number; + memoryUsage: number; + }; + lastActivity: string | null; +} + +export interface UserActivity { + type: "file_opened" | "file_edited" | "file_deleted" | "file_renamed" | "question_asked" | "search_performed"; + filePath?: string; + oldFilePath?: string; // For renames + content?: string; + timestamp: number; +} + +/** + * SmartEmbeddingOrchestrator coordinates all embedding phases and manages + * the overall vector database experience in CodeBuddy. + */ +export class SmartEmbeddingOrchestrator { + private logger: Logger; + private configManager: EmbeddingConfigurationManager; + private phaseFactory: EmbeddingPhaseFactory; + private phases: CreatedPhases | null = null; + private syncService: VectorDbSyncService | null = null; + private batchProcessingEnabled = true; + private readonly BATCH_SIZE = 10; + + private isInitialized = false; + private userActivityQueue: UserActivity[] = []; + private progressStatusBar: vscode.StatusBarItem; + + private stats: OrchestrationStats = { + isInitialized: false, + phasesActive: { + immediate: false, + onDemand: false, + background: false, + bulk: false, + }, + embeddingProgress: { + totalFiles: 0, + processedFiles: 0, + queuedFiles: 0, + failedFiles: 0, + }, + performance: { + averageEmbeddingTime: 0, + searchLatency: 0, + memoryUsage: 0, + }, + lastActivity: null, + }; + + constructor( + private context: vscode.ExtensionContext, + private vectorDb: VectorDatabaseService, + private workerManager: VectorDbWorkerManager + ) { + this.logger = Logger.initialize("SmartEmbeddingOrchestrator", { + minLevel: LogLevel.INFO, + }); + + this.configManager = EmbeddingConfigurationManager.getInstance(); + this.phaseFactory = EmbeddingPhaseFactory.getInstance(); + + // Create status bar item + this.progressStatusBar = vscode.window.createStatusBarItem(vscode.StatusBarAlignment.Right, 100); + this.progressStatusBar.command = "codebuddy.showEmbeddingStatus"; + + // Register commands + this.registerCommands(); + } + + /** + * Initialize the orchestrator and all embedding phases + */ + async initialize(): Promise { + if (this.isInitialized) { + this.logger.warn("Orchestrator already initialized"); + return; + } + + try { + this.logger.info("Initializing Smart Embedding Orchestrator"); + this.updateStatusBar("Initializing CodeBuddy AI...", true); + + // Step 1: Validate dependencies + await this.validateDependencies(); + + // Step 2: Create embedding phases + this.phases = await this.phaseFactory.createAllPhases({ + vectorDb: this.vectorDb, + workerManager: this.workerManager, + context: this.context, + enableBackgroundProcessing: true, + enableProgressReporting: true, + }); + + // Step 3: Create sync service (will be created when needed) + // Note: VectorDbSyncService integration will be handled separately + + // Step 4: Execute Phase 1 - Immediate Embedding + await this.executeImmediatePhase(); + + // Step 5: Setup Phase 2 - On-Demand Triggers + this.setupOnDemandPhase(); + + // Step 6: Start Phase 3 - Background Processing + this.startBackgroundPhase(); + + // Step 7: Register Phase 4 - Bulk Processing Command + this.registerBulkProcessingCommand(); + + // Step 8: Initialize sync service (when available) + if (this.syncService) { + await this.syncService.initialize(); + } + + // Step 9: Setup activity monitoring + this.setupActivityMonitoring(); + + this.isInitialized = true; + this.stats.isInitialized = true; + this.stats.lastActivity = new Date().toISOString(); + + this.updateStatusBar("CodeBuddy AI Ready", false); + this.logger.info("Smart Embedding Orchestrator initialized successfully"); + + // Show completion notification + vscode.window.showInformationMessage("🚀 CodeBuddy AI is ready with enhanced context understanding!"); + } catch (error) { + this.logger.error("Failed to initialize orchestrator:", error); + this.updateStatusBar("CodeBuddy AI Error", false); + + vscode.window.showErrorMessage( + `Failed to initialize CodeBuddy AI: ${error instanceof Error ? error.message : String(error)}` + ); + + throw error; + } + } + + /** + * Execute Phase 1: Immediate Embedding + */ + private async executeImmediatePhase(): Promise { + if (!this.phases) throw new Error("Phases not initialized"); + + try { + this.stats.phasesActive.immediate = true; + this.updateStatusBar("Indexing essential files...", true); + + await this.phases.immediate.embedEssentials(this.context, (phase, progress, details) => { + this.updateStatusBar(`${details} (${Math.round(progress)}%)`, true); + this.updateEmbeddingProgress(1, 0); + }); + + this.stats.phasesActive.immediate = false; + this.logger.info("Phase 1 (Immediate) completed successfully"); + } catch (error) { + this.stats.phasesActive.immediate = false; + this.logger.error("Phase 1 (Immediate) failed:", error); + throw error; + } + } + + /** + * Setup Phase 2: On-Demand Embedding + */ + private setupOnDemandPhase(): void { + if (!this.phases) throw new Error("Phases not initialized"); + + try { + this.phases.onDemand.setupTriggers(); + this.stats.phasesActive.onDemand = true; + this.logger.info("Phase 2 (On-Demand) setup completed"); + } catch (error) { + this.logger.error("Phase 2 (On-Demand) setup failed:", error); + // Don't throw - this is not critical + } + } + + /** + * Start Phase 3: Background Processing + */ + private startBackgroundPhase(): void { + if (!this.phases) throw new Error("Phases not initialized"); + + const config = this.configManager.getPhaseConfig("background"); + if (!config.enabled) { + this.logger.info("Background processing disabled in configuration"); + return; + } + + try { + this.phases.background.startBackgroundProcessing(); + this.stats.phasesActive.background = true; + this.logger.info("Phase 3 (Background) started successfully"); + } catch (error) { + this.logger.error("Phase 3 (Background) failed to start:", error); + // Don't throw - this is not critical + } + } + + /** + * Register Phase 4: Bulk Processing Command + */ + private registerBulkProcessingCommand(): void { + if (!this.phases) throw new Error("Phases not initialized"); + + try { + this.phases.bulk.registerBulkCommand(this.context); + this.stats.phasesActive.bulk = true; + this.logger.info("Phase 4 (Bulk) command registered"); + } catch (error) { + this.logger.error("Phase 4 (Bulk) registration failed:", error); + // Don't throw - this is not critical + } + } + + /** + * Setup activity monitoring to optimize embedding priorities + */ + private setupActivityMonitoring(): void { + // Monitor file operations + const fileWatcher = vscode.workspace.createFileSystemWatcher("**/*"); + + fileWatcher.onDidCreate((uri) => { + this.recordActivity({ + type: "file_opened", + filePath: uri.fsPath, + timestamp: Date.now(), + }); + }); + + fileWatcher.onDidChange((uri) => { + this.recordActivity({ + type: "file_edited", + filePath: uri.fsPath, + timestamp: Date.now(), + }); + }); + + fileWatcher.onDidDelete((uri) => { + this.recordActivity({ + type: "file_deleted", + filePath: uri.fsPath, + timestamp: Date.now(), + }); + + // Clean up vector database entries for deleted files + this.handleFileDeleted(uri.fsPath); + }); + + // Monitor file renames (VS Code uses delete + create for renames) + // We need to track potential renames by monitoring create events closely after deletes + let recentDeletes: { path: string; timestamp: number }[] = []; + + fileWatcher.onDidDelete((uri) => { + recentDeletes.push({ path: uri.fsPath, timestamp: Date.now() }); + // Clean up old entries (older than 1 second) + recentDeletes = recentDeletes.filter((d) => Date.now() - d.timestamp < 1000); + }); + + fileWatcher.onDidCreate((uri) => { + // Check if this might be a rename (create shortly after delete) + const possibleRename = recentDeletes.find( + (d) => + Date.now() - d.timestamp < 500 && // Within 500ms + path.basename(d.path) === path.basename(uri.fsPath) // Same filename + ); + + if (possibleRename) { + this.handleFileRenamed(possibleRename.path, uri.fsPath); + // Remove from recent deletes + recentDeletes = recentDeletes.filter((d) => d.path !== possibleRename.path); + } + }); + + // Monitor editor changes + vscode.window.onDidChangeActiveTextEditor((editor) => { + if (editor?.document) { + this.recordActivity({ + type: "file_opened", + filePath: editor.document.fileName, + timestamp: Date.now(), + }); + } + }); + + this.context.subscriptions.push(fileWatcher); + } + + /** + * Record user activity for optimization + */ + private recordActivity(activity: UserActivity): void { + this.userActivityQueue.push(activity); + this.stats.lastActivity = new Date(activity.timestamp).toISOString(); + + // Keep only recent activities (last 100) + if (this.userActivityQueue.length > 100) { + this.userActivityQueue = this.userActivityQueue.slice(-100); + } + + // Trigger on-demand embedding if needed + this.handleActivityTrigger(activity); + } + + /** + * Handle activity-triggered embedding + */ + private async handleActivityTrigger(activity: UserActivity): Promise { + if (!this.phases || !activity.filePath) return; + + try { + switch (activity.type) { + case "file_opened": + await this.phases.onDemand.onFileOpened(activity.filePath); + break; + case "file_edited": + await this.phases.onDemand.onFileEdited(activity.filePath); + break; + case "question_asked": + if (activity.content) { + await this.phases.onDemand.onUserQuestion(activity.content); + } + break; + } + } catch (error) { + this.logger.error("Activity trigger failed:", error); + // Don't throw - this should not break the user experience + } + } + + /** + * Handle user questions with activity recording + */ + async handleUserQuestion(question: string): Promise { + this.recordActivity({ + type: "question_asked", + content: question, + timestamp: Date.now(), + }); + + // Update performance stats + const startTime = Date.now(); + + try { + if (this.phases) { + await this.phases.onDemand.onUserQuestion(question); + } + + this.stats.performance.searchLatency = Date.now() - startTime; + } catch (error) { + this.logger.error("Question handling failed:", error); + } + } + + /** + * Get user activity patterns for optimization + */ + getActivityPatterns(): { + recentFiles: string[]; + activeDirectories: string[]; + questionFrequency: number; + } { + const recentActivities = this.userActivityQueue.slice(-20); + + const recentFiles = [...new Set(recentActivities.filter((a) => a.filePath).map((a) => a.filePath!))]; + + const activeDirectories = [...new Set(recentFiles.map((f) => path.dirname(f)))]; + + const questionFrequency = recentActivities.filter((a) => a.type === "question_asked").length; + + return { + recentFiles, + activeDirectories, + questionFrequency, + }; + } + + /** + * Update embedding progress statistics + */ + private updateEmbeddingProgress(processed: number, failed: number): void { + this.stats.embeddingProgress.processedFiles += processed; + this.stats.embeddingProgress.failedFiles += failed; + } + + /** + * Update status bar with current activity + */ + private updateStatusBar(message: string, isProgress: boolean): void { + if (isProgress) { + this.progressStatusBar.text = `$(sync~spin) ${message}`; + } else { + this.progressStatusBar.text = `$(check) ${message}`; + } + + this.progressStatusBar.show(); + + // Hide non-progress messages after delay + if (!isProgress) { + setTimeout(() => { + this.progressStatusBar.hide(); + }, 3000); + } + } + + /** + * Validate that all dependencies are ready + */ + private async validateDependencies(): Promise { + if (!this.vectorDb.isReady()) { + throw new Error("VectorDatabaseService is not ready"); + } + + if (!this.workerManager.isReady()) { + throw new Error("VectorDbWorkerManager is not ready"); + } + + // Test vector database with a simple operation + try { + await this.vectorDb.semanticSearch("test", 1); + } catch (error) { + throw new Error(`Vector database test failed: ${error}`); + } + + this.logger.debug("All dependencies validated successfully"); + } + + /** + * Register extension commands + */ + private registerCommands(): void { + // Show embedding status command + const statusCommand = vscode.commands.registerCommand("codebuddy.showEmbeddingStatus", () => + this.showEmbeddingStatus() + ); + + // Force reindex command + const reindexCommand = vscode.commands.registerCommand("codebuddy.forceReindex", () => this.forceReindex()); + + // Toggle background processing + const toggleBackgroundCommand = vscode.commands.registerCommand("codebuddy.toggleBackgroundProcessing", () => + this.toggleBackgroundProcessing() + ); + + this.context.subscriptions.push(statusCommand, reindexCommand, toggleBackgroundCommand, this.progressStatusBar); + } + + /** + * Show detailed embedding status + */ + private async showEmbeddingStatus(): Promise { + const stats = this.getStats(); + const activityPatterns = this.getActivityPatterns(); + + const statusMessage = ` +**CodeBuddy AI Status** + +**Initialization**: ${stats.isInitialized ? "✅ Ready" : "❌ Not Ready"} + +**Active Phases**: +- Immediate: ${stats.phasesActive.immediate ? "🔄 Active" : "✅ Complete"} +- On-Demand: ${stats.phasesActive.onDemand ? "✅ Active" : "❌ Inactive"} +- Background: ${stats.phasesActive.background ? "🔄 Active" : "❌ Inactive"} +- Bulk: ${stats.phasesActive.bulk ? "✅ Available" : "❌ Unavailable"} + +**Progress**: +- Files Processed: ${stats.embeddingProgress.processedFiles} +- Files Queued: ${stats.embeddingProgress.queuedFiles} +- Files Failed: ${stats.embeddingProgress.failedFiles} + +**Performance**: +- Search Latency: ${stats.performance.searchLatency}ms +- Memory Usage: ${Math.round(stats.performance.memoryUsage / 1024 / 1024)}MB + +**Activity**: +- Recent Files: ${activityPatterns.recentFiles.length} +- Active Directories: ${activityPatterns.activeDirectories.length} +- Recent Questions: ${activityPatterns.questionFrequency} + +**Last Activity**: ${stats.lastActivity || "None"} + `.trim(); + + vscode.window + .showInformationMessage(statusMessage, "View Logs", "Force Reindex", "Settings") + .then(async (selection) => { + switch (selection) { + case "View Logs": + vscode.commands.executeCommand("workbench.action.showOutputChannels"); + break; + case "Force Reindex": + await this.forceReindex(); + break; + case "Settings": + vscode.commands.executeCommand("workbench.action.openSettings", "codebuddy.smartEmbedding"); + break; + } + }); + } + + /** + * Force complete reindex + */ + private async forceReindex(): Promise { + if (!this.phases) { + vscode.window.showErrorMessage("Embedding phases not initialized"); + return; + } + + const confirmation = await vscode.window.showWarningMessage( + "This will reindex your entire codebase. Continue?", + { modal: true }, + "Yes, Reindex All" + ); + + if (confirmation) { + try { + await this.phases.bulk.processBulkEmbedding(); + } catch (error) { + vscode.window.showErrorMessage(`Reindex failed: ${error}`); + } + } + } + + /** + * Toggle background processing + */ + private async toggleBackgroundProcessing(): Promise { + const config = this.configManager.getPhaseConfig("background"); + const newState = !config.enabled; + + await this.configManager.updatePhaseConfig("background", { + enabled: newState, + }); + + if (newState && this.phases) { + this.startBackgroundPhase(); + vscode.window.showInformationMessage("Background processing enabled"); + } else { + this.stats.phasesActive.background = false; + vscode.window.showInformationMessage("Background processing disabled"); + } + } + + /** + * Get comprehensive orchestration statistics + */ + getStats(): OrchestrationStats { + // Update memory usage + this.stats.performance.memoryUsage = process.memoryUsage().heapUsed; + + return { ...this.stats }; + } + + /** + * Execute on-demand embedding phase + */ + private async executeOnDemandPhase(): Promise { + if (!this.phases?.onDemand) return; + + try { + this.stats.phasesActive.onDemand = true; + this.logger.info("Starting on-demand embedding phase"); + + // On-demand phase is triggered by file events, so we just ensure it's set up + this.phases.onDemand.setupTriggers(); + + this.logger.info("On-demand embedding phase setup completed"); + } catch (error) { + this.logger.error("On-demand phase execution failed:", error); + } finally { + this.stats.phasesActive.onDemand = false; + } + } + + /** + * Execute background embedding phase + */ + private async executeBackgroundPhase(): Promise { + if (!this.phases?.background) return; + + try { + this.stats.phasesActive.background = true; + this.logger.info("Starting background embedding phase"); + + // Background phase has startBackgroundProcessing method + this.phases.background.startBackgroundProcessing(); + + this.logger.info("Background embedding phase setup completed"); + } catch (error) { + this.logger.error("Background phase execution failed:", error); + } finally { + this.stats.phasesActive.background = false; + } + } + + /** + * Execute bulk embedding phase + */ + private async executeBulkPhase(): Promise { + if (!this.phases?.bulk) return; + + try { + this.stats.phasesActive.bulk = true; + this.logger.info("Starting bulk embedding phase"); + + // Bulk phase has processBulkEmbedding method + await this.phases.bulk.processBulkEmbedding(); + + this.logger.info("Bulk embedding phase completed"); + } catch (error) { + this.logger.error("Bulk phase execution failed:", error); + } finally { + this.stats.phasesActive.bulk = false; + } + } + + /** + * Handle file deletion by cleaning up vector database entries + */ + private async handleFileDeleted(filePath: string): Promise { + try { + await this.vectorDb.deleteByFile(filePath); + this.logger.info(`Cleaned up vector DB entries for deleted file: ${filePath}`); + } catch (error) { + this.logger.error(`Failed to clean up vector DB for deleted file ${filePath}:`, error); + } + } + + /** + * Handle file rename by updating vector database entries + */ + private async handleFileRenamed(oldPath: string, newPath: string): Promise { + try { + // Record the rename activity + this.recordActivity({ + type: "file_renamed", + oldFilePath: oldPath, + filePath: newPath, + timestamp: Date.now(), + }); + + // For now, delete old entries and re-index with new path + // In a more sophisticated implementation, we could update metadata in place + await this.vectorDb.deleteByFile(oldPath); + + // Re-index the file under its new path + if (await this.shouldProcessFile(newPath)) { + await this.processEmbeddingForFile(newPath); + } + + this.logger.info(`Updated vector DB for renamed file: ${oldPath} -> ${newPath}`); + } catch (error) { + this.logger.error(`Failed to handle file rename ${oldPath} -> ${newPath}:`, error); + } + } + + /** + * Check if a file should be processed for embedding + */ + private async shouldProcessFile(filePath: string): Promise { + // Check file extension and other criteria + const ext = path.extname(filePath).toLowerCase(); + const supportedExtensions = [ + ".ts", + ".js", + ".tsx", + ".jsx", + ".py", + ".java", + ".cpp", + ".c", + ".cs", + ".go", + ".rs", + ".php", + ".rb", + ]; + return supportedExtensions.includes(ext); + } + + /** + * Process files in batches for optimized performance + */ + private async processBatchEmbedding(files: string[]): Promise { + if (!this.batchProcessingEnabled) { + // Process sequentially if batch processing is disabled + for (const file of files) { + await this.processEmbeddingForFile(file); + } + return; + } + + // Process in batches + for (let i = 0; i < files.length; i += this.BATCH_SIZE) { + const batch = files.slice(i, i + this.BATCH_SIZE); + const batchPromises = batch.map((file) => this.processEmbeddingForFile(file)); + + try { + await Promise.all(batchPromises); + this.logger.info( + `Processed batch ${Math.floor(i / this.BATCH_SIZE) + 1} of ${Math.ceil(files.length / this.BATCH_SIZE)}` + ); + } catch (error) { + this.logger.error(`Batch processing failed for files ${i}-${i + batch.length}:`, error); + } + } + } + + /** + * Process embedding for a single file + */ + private async processEmbeddingForFile(filePath: string): Promise { + try { + // Use vector database service to embed the file + await this.vectorDb.initialize(); + // Processing logic would go here + this.logger.debug(`Embedded file: ${filePath}`); + } catch (error) { + this.logger.error(`Failed to embed file ${filePath}:`, error); + } + } + + /** + * Gracefully shutdown the orchestrator + */ + async dispose(): Promise { + this.logger.info("Disposing Smart Embedding Orchestrator"); + + try { + // Dispose phases + if (this.phases) { + EmbeddingPhaseFactory.disposePhases(this.phases); + } + + // Dispose sync service + if (this.syncService) { + this.syncService.dispose(); + } + + // Hide status bar + this.progressStatusBar.dispose(); + + this.isInitialized = false; + this.stats.isInitialized = false; + + this.logger.info("Smart Embedding Orchestrator disposed successfully"); + } catch (error) { + this.logger.error("Error during orchestrator disposal:", error); + } + } +} diff --git a/src/webview-providers/base.ts b/src/webview-providers/base.ts index 4d6e6d4..473515a 100644 --- a/src/webview-providers/base.ts +++ b/src/webview-providers/base.ts @@ -1,9 +1,6 @@ import * as vscode from "vscode"; import { Orchestrator } from "../agents/orchestrator"; -import { - FolderEntry, - IContextInfo, -} from "../application/interfaces/workspace.interface"; +import { FolderEntry, IContextInfo } from "../application/interfaces/workspace.interface"; import { IEventPayload } from "../emitter/interface"; import { Logger } from "../infrastructure/logger/logger"; import { AgentService } from "../services/agent-state"; @@ -15,8 +12,13 @@ import { InputValidator } from "../services/input-validator"; import { QuestionClassifierService } from "../services/question-classifier.service"; import { LogLevel } from "../services/telemetry"; import { WorkspaceService } from "../services/workspace-service"; -import { formatText } from "../utils/utils"; +import { formatText, getAPIKeyAndModel, getGenerativeAiModel } from "../utils/utils"; import { getWebviewContent } from "../webview/chat"; +import { VectorDatabaseService } from "../services/vector-database.service"; +import { VectorDbWorkerManager } from "../services/vector-db-worker-manager"; +import { SmartContextExtractor } from "../services/smart-context-extractor"; +import { SmartEmbeddingOrchestrator } from "../services/smart-embedding-orchestrator"; +import { ContextRetriever } from "../services/context-retriever"; let _view: vscode.WebviewView | undefined; export abstract class BaseWebViewProvider implements vscode.Disposable { @@ -36,11 +38,17 @@ export abstract class BaseWebViewProvider implements vscode.Disposable { private readonly codebaseUnderstanding: CodebaseUnderstandingService; private readonly inputValidator: InputValidator; + // Vector database components + protected readonly vectorDbWorkerManager: VectorDbWorkerManager; + protected readonly vectorDatabaseService: VectorDatabaseService; + protected readonly smartContextExtractor: SmartContextExtractor; + protected readonly smartEmbeddingOrchestrator: SmartEmbeddingOrchestrator; + constructor( private readonly _extensionUri: vscode.Uri, protected readonly apiKey: string, protected readonly generativeAiModel: string, - context: vscode.ExtensionContext, + context: vscode.ExtensionContext ) { this.fileManager = FileManager.initialize(context, "files"); this.fileService = FileService.getInstance(); @@ -55,9 +63,49 @@ export abstract class BaseWebViewProvider implements vscode.Disposable { this.questionClassifier = QuestionClassifierService.getInstance(); this.codebaseUnderstanding = CodebaseUnderstandingService.getInstance(); this.inputValidator = InputValidator.getInstance(); + + // Initialize vector database components + const { apiKey: geminiApiKey } = getAPIKeyAndModel("Gemini"); + this.vectorDbWorkerManager = new VectorDbWorkerManager(context); + this.vectorDatabaseService = new VectorDatabaseService(context, geminiApiKey); + this.smartContextExtractor = new SmartContextExtractor( + this.vectorDatabaseService, + undefined, // contextRetriever will be set later if needed + this.codebaseUnderstanding, + this.questionClassifier + ); + this.smartEmbeddingOrchestrator = new SmartEmbeddingOrchestrator( + context, + this.vectorDatabaseService, + this.vectorDbWorkerManager + ); + // Don't register disposables here - do it lazily when webview is resolved } + /** + * Initialize vector database components for enhanced context extraction + */ + protected async initializeVectorComponents(): Promise { + try { + this.logger.info("Initializing vector database components..."); + + // Initialize the vector database worker manager + await this.vectorDbWorkerManager.initialize(); + + // Initialize the vector database service + await this.vectorDatabaseService.initialize(); + + // Start the smart embedding orchestrator for background processing + await this.smartEmbeddingOrchestrator.initialize(); + + this.logger.info("Vector database components initialized successfully"); + } catch (error) { + this.logger.error("Failed to initialize vector database components", error); + // Don't throw - continue with fallback functionality + } + } + registerDisposables() { // Only register once per instance if (this.disposables.length > 0) { @@ -69,26 +117,14 @@ export abstract class BaseWebViewProvider implements vscode.Disposable { this.orchestrator.onThinking(this.handleModelResponseEvent.bind(this)), this.orchestrator.onUpdate(this.handleModelResponseEvent.bind(this)), this.orchestrator.onError(this.handleModelResponseEvent.bind(this)), - this.orchestrator.onSecretChange( - this.handleModelResponseEvent.bind(this), - ), - this.orchestrator.onActiveworkspaceUpdate( - this.handleGenericEvents.bind(this), - ), + this.orchestrator.onSecretChange(this.handleModelResponseEvent.bind(this)), + this.orchestrator.onActiveworkspaceUpdate(this.handleGenericEvents.bind(this)), this.orchestrator.onFileUpload(this.handleModelResponseEvent.bind(this)), - this.orchestrator.onStrategizing( - this.handleModelResponseEvent.bind(this), - ), - this.orchestrator.onConfigurationChange( - this.handleGenericEvents.bind(this), - ), + this.orchestrator.onStrategizing(this.handleModelResponseEvent.bind(this)), + this.orchestrator.onConfigurationChange(this.handleGenericEvents.bind(this)), this.orchestrator.onUserPrompt(this.handleUserPrompt.bind(this)), - this.orchestrator.onGetUserPreferences( - this.handleUserPreferences.bind(this), - ), - this.orchestrator.onUpdateThemePreferences( - this.handleThemePreferences.bind(this), - ), + this.orchestrator.onGetUserPreferences(this.handleUserPreferences.bind(this)), + this.orchestrator.onUpdateThemePreferences(this.handleThemePreferences.bind(this)) ); } @@ -111,9 +147,7 @@ export abstract class BaseWebViewProvider implements vscode.Disposable { webviewView.webview.options = webviewOptions; if (!this.apiKey) { - vscode.window.showErrorMessage( - "API key not configured. Check your settings.", - ); + vscode.window.showErrorMessage("API key not configured. Check your settings."); return; } @@ -149,17 +183,12 @@ export abstract class BaseWebViewProvider implements vscode.Disposable { // Update the provider's chatHistory array (this should be overridden in child classes) await this.updateProviderChatHistory(providerHistory); - this.logger.debug( - `Synchronized ${persistentHistory.length} chat messages from database`, - ); + this.logger.debug(`Synchronized ${persistentHistory.length} chat messages from database`); } else { this.logger.debug("No chat history found in database to synchronize"); } } catch (error) { - this.logger.warn( - "Failed to synchronize chat history from database:", - error, - ); + this.logger.warn("Failed to synchronize chat history from database:", error); // Don't throw - this is not critical for provider initialization } } @@ -171,16 +200,11 @@ export abstract class BaseWebViewProvider implements vscode.Disposable { protected async updateProviderChatHistory(history: any[]): Promise { // Base implementation - child classes should override this // to update their specific chatHistory arrays - this.logger.debug( - "Base provider - no specific chat history array to update", - ); + this.logger.debug("Base provider - no specific chat history array to update"); } private async setWebviewHtml(view: vscode.WebviewView): Promise { - view.webview.html = getWebviewContent( - this.currentWebView?.webview!, - this._extensionUri, - ); + view.webview.html = getWebviewContent(this.currentWebView?.webview!, this._extensionUri); } private async getFiles() { @@ -228,10 +252,8 @@ export abstract class BaseWebViewProvider implements vscode.Disposable { private async publishWorkSpace(): Promise { try { - const filesAndDirs: IContextInfo = - await this.workspaceService.getContextInfo(true); - const workspaceFiles: Map | undefined = - filesAndDirs.workspaceFiles; + const filesAndDirs: IContextInfo = await this.workspaceService.getContextInfo(true); + const workspaceFiles: Map | undefined = filesAndDirs.workspaceFiles; if (!workspaceFiles) { this.logger.warn("There no files within the workspace"); return; @@ -258,23 +280,17 @@ export abstract class BaseWebViewProvider implements vscode.Disposable { this.UserMessageCounter += 1; // Validate user input for security - const validation = this.inputValidator.validateInput( - message.message, - "chat", - ); + const validation = this.inputValidator.validateInput(message.message, "chat"); if (validation.blocked) { - this.logger.warn( - "User input blocked due to security concerns", - { - originalLength: message.message.length, - warnings: validation.warnings, - }, - ); + this.logger.warn("User input blocked due to security concerns", { + originalLength: message.message.length, + warnings: validation.warnings, + }); await this.sendResponse( "âš ī¸ Your message contains potentially unsafe content and has been blocked. Please rephrase your question in a more direct way.", - "bot", + "bot" ); break; } @@ -290,7 +306,7 @@ export abstract class BaseWebViewProvider implements vscode.Disposable { if (validation.warnings.length > 2) { await this.sendResponse( "â„šī¸ Your message has been modified for security. Some content was filtered.", - "bot", + "bot" ); } } @@ -301,12 +317,9 @@ export abstract class BaseWebViewProvider implements vscode.Disposable { // Check if we should prune history for performance if (this.UserMessageCounter % 10 === 0) { const stats = await this.getChatHistoryStats("agentId"); - if ( - stats.totalMessages > 100 || - stats.estimatedTokens > 16000 - ) { + if (stats.totalMessages > 100 || stats.estimatedTokens > 16000) { this.logger.info( - `High chat history usage detected: ${stats.totalMessages} messages, ${stats.estimatedTokens} tokens`, + `High chat history usage detected: ${stats.totalMessages} messages, ${stats.estimatedTokens} tokens` ); // Optionally trigger manual pruning here // await this.pruneHistoryManually("agentId", { maxMessages: 50, maxTokens: 8000 }); @@ -315,28 +328,20 @@ export abstract class BaseWebViewProvider implements vscode.Disposable { response = await this.generateResponse( await this.enhanceMessageWithCodebaseContext(sanitizedMessage), - message.metaData, + message.metaData ); if (this.UserMessageCounter === 1) { await this.publishWorkSpace(); } if (response) { - console.log( - `[DEBUG] Response from generateResponse: ${response.length} characters`, - ); + console.log(`[DEBUG] Response from generateResponse: ${response.length} characters`); const formattedResponse = formatText(response); - console.log( - `[DEBUG] Formatted response: ${formattedResponse.length} characters`, - ); - console.log( - `[DEBUG] Original response ends with: "${response.slice(-100)}"`, - ); + console.log(`[DEBUG] Formatted response: ${formattedResponse.length} characters`); + console.log(`[DEBUG] Original response ends with: "${response.slice(-100)}"`); await this.sendResponse(formattedResponse, "bot"); } else { - console.log( - `[DEBUG] No response received from generateResponse`, - ); + console.log(`[DEBUG] No response received from generateResponse`); } break; } @@ -364,18 +369,14 @@ export abstract class BaseWebViewProvider implements vscode.Disposable { case "theme-change-event": // Handle theme change and store in user preferences this.logger.info(`Theme changed to: ${message.message}`); - this.orchestrator.publish( - "onUpdateThemePreferences", - message.message, - { - theme: message.message, - }, - ); + this.orchestrator.publish("onUpdateThemePreferences", message.message, { + theme: message.message, + }); break; default: throw new Error("Unknown command"); } - }), + }) ); } catch (error) { this.logger.error("Message handler failed", error); @@ -391,60 +392,75 @@ export abstract class BaseWebViewProvider implements vscode.Disposable { } public handleModelResponseEvent(event: IEventPayload) { - this.sendResponse( - formatText(event.message), - event.message === "folders" ? "bootstrap" : "bot", - ); + this.sendResponse(formatText(event.message), event.message === "folders" ? "bootstrap" : "bot"); } - abstract generateResponse( - message?: string, - metaData?: Record, - ): Promise; + abstract generateResponse(message?: string, metaData?: Record): Promise; - abstract sendResponse( - response: string, - currentChat?: string, - ): Promise; + abstract sendResponse(response: string, currentChat?: string): Promise; /** * Enhances user messages with codebase context if the question is codebase-related */ - private async enhanceMessageWithCodebaseContext( - message: string, - ): Promise { + private async enhanceMessageWithCodebaseContext(message: string): Promise { try { - const questionAnalysis = - this.questionClassifier.categorizeQuestion(message); + const questionAnalysis = this.questionClassifier.categorizeQuestion(message); if (!questionAnalysis.isCodebaseRelated) { - this.logger.debug( - "Question not codebase-related, returning original message", - ); + this.logger.debug("Question not codebase-related, returning original message"); return message; } this.logger.info( - `Detected codebase question with confidence: ${questionAnalysis.confidence}, categories: ${questionAnalysis.categories.join(", ")}`, + `Detected codebase question with confidence: ${questionAnalysis.confidence}, categories: ${questionAnalysis.categories.join(", ")}` ); - // Get comprehensive codebase context - const codebaseContext = - await this.codebaseUnderstanding.getCodebaseContext(); + // First try vector-based semantic search for precise context + let vectorContext = ""; + let fallbackContext = ""; + + try { + const vectorResult = await this.smartContextExtractor.extractRelevantContextWithVector( + message, + vscode.window.activeTextEditor?.document.fileName + ); + + if (vectorResult.content && vectorResult.sources.length > 0) { + vectorContext = `\n**Semantic Context** (${vectorResult.searchMethod} search results):\n${vectorResult.sources + .map( + (source) => + `- **${source.filePath}** (relevance: ${source.relevanceScore.toFixed(2)}): ${source.clickableReference}` + ) + .join( + "\n" + )}\n\n**Context Content**:\n${vectorResult.content.substring(0, 2000)}${vectorResult.content.length > 2000 ? "..." : ""}`; + + this.logger.info( + `Vector search found ${vectorResult.sources.length} relevant sources with ${vectorResult.totalTokens} tokens` + ); + } + } catch (vectorError) { + this.logger.warn("Vector search failed, falling back to traditional context", vectorError); + } + + // Fallback to comprehensive codebase context if vector search didn't provide enough + if (!vectorContext) { + fallbackContext = await this.codebaseUnderstanding.getCodebaseContext(); + } - // Create enhanced prompt with codebase context + // Create enhanced prompt with both vector and fallback context const enhancedMessage = ` **User Question**: ${message} -**Codebase Context** (Automatically included because your question is related to understanding this codebase): +${vectorContext} -${codebaseContext} +${!vectorContext ? `**Codebase Context** (Automatically included because your question is related to understanding this codebase):\n\n${fallbackContext}` : ""} -**Instructions for AI**: Use the codebase context above to provide accurate, specific answers about this project. Reference actual files, patterns, and implementations found in the codebase analysis. Use the provided clickable file references (e.g., [[1]], [[2]]) so users can navigate directly to the source code. +**Instructions for AI**: Use the ${vectorContext ? "semantic context" : "codebase context"} above to provide accurate, specific answers about this project. Reference actual files, patterns, and implementations found in the analysis. Use the provided clickable file references (e.g., [[1]], [[2]]) so users can navigate directly to the source code. IMPORTANT: Please provide a complete response. Do not truncate your answer mid-sentence or mid-word. Ensure your response is fully finished before ending. `.trim(); - this.logger.debug("Enhanced message with codebase context"); + this.logger.debug("Enhanced message with vector/codebase context"); return enhancedMessage; } catch (error) { this.logger.error("Error enhancing message with codebase context", error); @@ -454,17 +470,23 @@ IMPORTANT: Please provide a complete response. Do not truncate your answer mid-s } public dispose(): void { - this.logger.debug( - `Disposing BaseWebViewProvider with ${this.disposables.length} disposables`, - ); + this.logger.debug(`Disposing BaseWebViewProvider with ${this.disposables.length} disposables`); + + // Dispose vector database components + try { + this.smartEmbeddingOrchestrator?.dispose(); + this.vectorDbWorkerManager?.dispose(); + } catch (error) { + this.logger.error("Error disposing vector database components", error); + } + this.disposables.forEach((d) => d.dispose()); this.disposables.length = 0; // Clear the array } async getContext(files: string[]) { try { - const filesContent: Map | undefined = - await this.fileService.getFilesContent(files); + const filesContent: Map | undefined = await this.fileService.getFilesContent(files); if (filesContent && filesContent.size > 0) { return Array.from(filesContent.values()).join("\n"); } @@ -484,15 +506,9 @@ IMPORTANT: Please provide a complete response. Do not truncate your answer mid-s maxTokens: number; maxAgeHours: number; preserveSystemMessages: boolean; - }>, + }> ): Promise { - return this.chatHistoryManager.formatChatHistory( - role, - message, - model, - key, - pruneConfig, - ); + return this.chatHistoryManager.formatChatHistory(role, message, model, key, pruneConfig); } // Get chat history stats for monitoring @@ -512,7 +528,7 @@ IMPORTANT: Please provide a complete response. Do not truncate your answer mid-s maxMessages?: number; maxTokens?: number; maxAgeHours?: number; - }, + } ): Promise { await this.chatHistoryManager.pruneHistoryForKey(key, config); }