From c4758746e5bc75c8560b357613d8dc7eeaa1e71a Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 2 May 2025 20:55:23 -0500 Subject: [PATCH 01/71] feat: apply changes from local main --- .../presentAssistantMessage.ts | 13 + src/core/prompts/system.ts | 4 + src/core/prompts/tools/codebase-search.ts | 19 + src/core/prompts/tools/index.ts | 10 + src/core/tools/codebaseSearchTool.ts | 155 +++++++ src/core/webview/ClineProvider.ts | 73 +++- .../webview/__tests__/ClineProvider.test.ts | 7 + src/core/webview/webviewMessageHandler.ts | 49 +++ src/exports/roo-code.d.ts | 30 ++ src/exports/types.ts | 30 ++ src/i18n/locales/en/tools.json | 5 +- src/package.json | 2 + src/schemas/index.ts | 40 ++ src/services/code-index/config-manager.ts | 226 ++++++++++ src/services/code-index/embedders/ollama.ts | 73 ++++ src/services/code-index/embedders/openai.ts | 129 ++++++ src/services/code-index/interfaces/config.ts | 30 ++ .../code-index/interfaces/embedder.ts | 21 + .../code-index/interfaces/file-processor.ts | 99 +++++ src/services/code-index/interfaces/index.ts | 4 + src/services/code-index/interfaces/manager.ts | 80 ++++ .../code-index/interfaces/vector-store.ts | 72 ++++ src/services/code-index/manager.ts | 152 +++++++ src/services/code-index/orchestrator.ts | 270 ++++++++++++ .../code-index/processors/file-watcher.ts | 255 ++++++++++++ src/services/code-index/processors/index.ts | 3 + src/services/code-index/processors/parser.ts | 355 ++++++++++++++++ src/services/code-index/processors/scanner.ts | 390 ++++++++++++++++++ src/services/code-index/search-service.ts | 56 +++ src/services/code-index/service-factory.ts | 124 ++++++ .../code-index/shared/supported-extensions.ts | 4 + src/services/code-index/state-manager.ts | 133 ++++++ src/services/code-index/vector-store/index.ts | 1 + .../code-index/vector-store/qdrant-client.ts | 210 ++++++++++ src/services/tree-sitter/index.ts | 2 + src/shared/ExtensionMessage.ts | 7 + src/shared/WebviewMessage.ts | 22 +- src/shared/embeddingModels.ts | 86 ++++ src/shared/tools.ts | 17 +- webview-ui/src/components/chat/ChatRow.tsx | 66 +++ .../components/chat/CodebaseSearchResult.tsx | 54 +++ .../components/settings/CodeIndexSettings.tsx | 314 ++++++++++++++ .../src/components/settings/SettingsView.tsx | 17 + .../src/context/ExtensionStateContext.tsx | 8 + webview-ui/src/i18n/locales/en/chat.json | 4 + 45 files changed, 3715 insertions(+), 6 deletions(-) create mode 100644 src/core/prompts/tools/codebase-search.ts create mode 100644 src/core/tools/codebaseSearchTool.ts create mode 100644 src/services/code-index/config-manager.ts create mode 100644 src/services/code-index/embedders/ollama.ts create mode 100644 src/services/code-index/embedders/openai.ts create mode 100644 src/services/code-index/interfaces/config.ts create mode 100644 src/services/code-index/interfaces/embedder.ts create mode 100644 src/services/code-index/interfaces/file-processor.ts create mode 100644 src/services/code-index/interfaces/index.ts create mode 100644 src/services/code-index/interfaces/manager.ts create mode 100644 src/services/code-index/interfaces/vector-store.ts create mode 100644 src/services/code-index/manager.ts create mode 100644 src/services/code-index/orchestrator.ts create mode 100644 src/services/code-index/processors/file-watcher.ts create mode 100644 src/services/code-index/processors/index.ts create mode 100644 src/services/code-index/processors/parser.ts create mode 100644 src/services/code-index/processors/scanner.ts create mode 100644 src/services/code-index/search-service.ts create mode 100644 src/services/code-index/service-factory.ts create mode 100644 src/services/code-index/shared/supported-extensions.ts create mode 100644 src/services/code-index/state-manager.ts create mode 100644 src/services/code-index/vector-store/index.ts create mode 100644 src/services/code-index/vector-store/qdrant-client.ts create mode 100644 src/shared/embeddingModels.ts create mode 100644 webview-ui/src/components/chat/CodebaseSearchResult.tsx create mode 100644 webview-ui/src/components/settings/CodeIndexSettings.tsx diff --git a/src/core/assistant-message/presentAssistantMessage.ts b/src/core/assistant-message/presentAssistantMessage.ts index ef2bb04963..2d93060bb3 100644 --- a/src/core/assistant-message/presentAssistantMessage.ts +++ b/src/core/assistant-message/presentAssistantMessage.ts @@ -32,6 +32,7 @@ import { checkpointSave } from "../checkpoints" import { formatResponse } from "../prompts/responses" import { validateToolUse } from "../tools/validateToolUse" import { Task } from "../task/Task" +import { codebaseSearchTool } from '../tools/codebaseSearchTool' /** * Processes and presents assistant message content to the user interface. @@ -185,6 +186,8 @@ export async function presentAssistantMessage(cline: Task) { return `[${block.name}]` case "switch_mode": return `[${block.name} to '${block.params.mode_slug}'${block.params.reason ? ` because: ${block.params.reason}` : ""}]` + case "codebase_search": // Add case for the new tool + return `[${block.name} for '${block.params.query}']` case "new_task": { const mode = block.params.mode ?? defaultModeSlug const message = block.params.message ?? "(no message)" @@ -402,6 +405,16 @@ export async function presentAssistantMessage(cline: Task) { case "list_files": await listFilesTool(cline, block, askApproval, handleError, pushToolResult, removeClosingTag) break + case "codebase_search": + await codebaseSearchTool( + cline, + block, + askApproval, + handleError, + pushToolResult, + removeClosingTag, + ) + break case "list_code_definition_names": await listCodeDefinitionNamesTool( cline, diff --git a/src/core/prompts/system.ts b/src/core/prompts/system.ts index 0fce1765a9..3bb6c58df4 100644 --- a/src/core/prompts/system.ts +++ b/src/core/prompts/system.ts @@ -27,6 +27,7 @@ import { markdownFormattingSection, } from "./sections" import { formatLanguage } from "../../shared/language" +import { CodeIndexManager } from "../../services/code-index/manager" async function generatePrompt( context: vscode.ExtensionContext, @@ -63,6 +64,8 @@ async function generatePrompt( : Promise.resolve(""), ]) + const codeIndexManager = CodeIndexManager.getInstance(context) + const basePrompt = `${roleDefinition} ${markdownFormattingSection()} @@ -73,6 +76,7 @@ ${getToolDescriptionsForMode( mode, cwd, supportsComputerUse, + codeIndexManager, effectiveDiffStrategy, browserViewportSize, mcpHub, diff --git a/src/core/prompts/tools/codebase-search.ts b/src/core/prompts/tools/codebase-search.ts new file mode 100644 index 0000000000..3b40e7478d --- /dev/null +++ b/src/core/prompts/tools/codebase-search.ts @@ -0,0 +1,19 @@ +export function getCodebaseSearchDescription(): string { + return `## codebase_search +Description: Search the codebase for relevant files based on a query. Use this when the user asks a question about the codebase that requires finding specific files or code snippets. +Parameters: +- query: (required) The natural language query to search for. +- limit: (optional) The maximum number of search results to return. Defaults to 10. +Usage: + +Your natural language query here +Number of results (optional) + + +Example: Searching for functions related to user authentication + +User login and password hashing +5 + +` +} diff --git a/src/core/prompts/tools/index.ts b/src/core/prompts/tools/index.ts index d3e75d7b09..0fc6066e30 100644 --- a/src/core/prompts/tools/index.ts +++ b/src/core/prompts/tools/index.ts @@ -20,6 +20,8 @@ import { getUseMcpToolDescription } from "./use-mcp-tool" import { getAccessMcpResourceDescription } from "./access-mcp-resource" import { getSwitchModeDescription } from "./switch-mode" import { getNewTaskDescription } from "./new-task" +import { getCodebaseSearchDescription } from "./codebase-search" +import { CodeIndexManager } from "../../../services/code-index/manager" // Map of tool names to their description functions const toolDescriptionMap: Record string | undefined> = { @@ -35,6 +37,7 @@ const toolDescriptionMap: Record string | undefined> attempt_completion: () => getAttemptCompletionDescription(), use_mcp_tool: (args) => getUseMcpToolDescription(args), access_mcp_resource: (args) => getAccessMcpResourceDescription(args), + codebase_search: () => getCodebaseSearchDescription(), switch_mode: () => getSwitchModeDescription(), new_task: (args) => getNewTaskDescription(args), insert_content: (args) => getInsertContentDescription(args), @@ -47,6 +50,7 @@ export function getToolDescriptionsForMode( mode: Mode, cwd: string, supportsComputerUse: boolean, + codeIndexManager: CodeIndexManager, diffStrategy?: DiffStrategy, browserViewportSize?: string, mcpHub?: McpHub, @@ -89,6 +93,11 @@ export function getToolDescriptionsForMode( // Add always available tools ALWAYS_AVAILABLE_TOOLS.forEach((tool) => tools.add(tool)) + // Conditionally exclude codebase_search if feature is disabled or not configured + if (!(codeIndexManager.isFeatureEnabled && codeIndexManager.isFeatureConfigured)) { + tools.delete("codebase_search") + } + // Map tool descriptions for allowed tools const descriptions = Array.from(tools).map((toolName) => { const descriptionFn = toolDescriptionMap[toolName] @@ -122,4 +131,5 @@ export { getSwitchModeDescription, getInsertContentDescription, getSearchAndReplaceDescription, + getCodebaseSearchDescription, } diff --git a/src/core/tools/codebaseSearchTool.ts b/src/core/tools/codebaseSearchTool.ts new file mode 100644 index 0000000000..3bb355f3c6 --- /dev/null +++ b/src/core/tools/codebaseSearchTool.ts @@ -0,0 +1,155 @@ +import * as vscode from "vscode" + +import { Task } from "../task/Task" +import { CodeIndexManager } from "../../services/code-index/manager" +import { getWorkspacePath } from "../../utils/path" +import { formatResponse } from "../prompts/responses" +import { t } from "../../i18n" +import { VectorStoreSearchResult } from "../../services/code-index/interfaces" +import { AskApproval, HandleError, PushToolResult, RemoveClosingTag, ToolUse } from '../../shared/tools' + +export async function codebaseSearchTool( + cline: Task, + block: ToolUse, + askApproval: AskApproval, + handleError: HandleError, + pushToolResult: PushToolResult, + removeClosingTag: RemoveClosingTag, +) { + const toolName = "codebase_search" + const workspacePath = getWorkspacePath() + + if (!workspacePath) { + // This case should ideally not happen if Cline is initialized correctly + await handleError(toolName, new Error("Could not determine workspace path.")) + return + } + + // --- Parameter Extraction and Validation --- + let query: string | undefined = block.params.query + let limitStr: string | undefined = block.params.limit + let limit: number = 5 // Default limit + + if (!query) { + cline.consecutiveMistakeCount++ + pushToolResult(await cline.sayAndCreateMissingParamError(toolName, "query")) + return + } + query = removeClosingTag("query", query) + + if (limitStr) { + limitStr = removeClosingTag("limit", limitStr) + limit = parseInt(limitStr, 10) + if (isNaN(limit) || limit <= 0) { + cline.consecutiveMistakeCount++ + await cline.say("text", `Invalid limit value: "${limitStr}". Using default ${10}.`) + limit = 10 + } + } + + // Extract optional sendResultsToUI parameter + + // --- Approval --- + const translationKey = "chat:codebaseSearch.wantsToSearch" + let approvalMessage: string + + approvalMessage = t(translationKey, { query, limit }) + + const approvalPayload = { + tool: "codebase_search", + approvalPrompt: approvalMessage, + query: query, + limit: limit, + isOutsideWorkspace: false, + } + + const didApprove = await askApproval("tool", JSON.stringify(approvalPayload)) + if (!didApprove) { + pushToolResult(formatResponse.toolDenied()) + return + } + + cline.consecutiveMistakeCount = 0 + + // --- Core Logic --- + try { + const context = cline.providerRef.deref()?.context + if (!context) { + throw new Error("Extension context is not available.") + } + + const manager = CodeIndexManager.getInstance(context) + + // Check if indexing is enabled and configured (using assumed properties/methods) + // @ts-expect-error Accessing private member _isEnabled + const isEnabled = manager.isEnabled ?? true // Assume enabled if property doesn't exist + // @ts-expect-error Accessing private member _isConfigured + const isConfigured = manager.isConfigured ? manager.isConfigured() : true // Assume configured if method doesn't exist + + if (!isEnabled) { + throw new Error("Code Indexing is disabled in the settings.") + } + if (!isConfigured) { + throw new Error("Code Indexing is not configured (Missing OpenAI Key or Qdrant URL).") + } + + const searchResults: VectorStoreSearchResult[] = await manager.searchIndex(query, limit) + + // 3. Format and push results + if (!searchResults || searchResults.length === 0) { + pushToolResult(`No relevant code snippets found for the query: "${query}"`) // Use simple string for no results + return + } + + const jsonResult = { + query, + results: [], + } as { + query: string + results: Array<{ + filePath: string + score: number + startLine: number + endLine: number + codeChunk: string + }> + } + + searchResults.forEach((result) => { + if (!result.payload) return + if (!("filePath" in result.payload)) return + + const relativePath = vscode.workspace.asRelativePath(result.payload.filePath, false) + + jsonResult.results.push({ + filePath: relativePath, + score: result.score, + startLine: result.payload.startLine, + endLine: result.payload.endLine, + codeChunk: result.payload.codeChunk.trim(), + }) + }) + + // Send results to UI + const payload = { tool: toolName, content: jsonResult } + await cline.say("text", JSON.stringify(payload)) + + // Push results to AI + const output = `Query: ${query} +Results: + +${jsonResult.results + .map( + (result) => `File path: ${result.filePath} +Score: ${result.score} +Lines: ${result.startLine}-${result.endLine} +Code Chunk: ${result.codeChunk} +`, + ) + .join("\n")}` + + pushToolResult(output) + } catch (error: any) { + await handleError(toolName, error) // Use the standard error handler + } +} diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index b4beed223e..ac441ece63 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -39,6 +39,8 @@ import WorkspaceTracker from "../../integrations/workspace/WorkspaceTracker" import { McpHub } from "../../services/mcp/McpHub" import { McpServerManager } from "../../services/mcp/McpServerManager" import { ShadowCheckpointService } from "../../services/checkpoints/ShadowCheckpointService" +import { CodeIndexManager } from "../../services/code-index/manager" +import type { IndexProgressUpdate } from "../../services/code-index/interfaces/manager" import { fileExistsAtPath } from "../../utils/fs" import { setTtsEnabled, setTtsSpeed } from "../../utils/tts" import { ContextProxy } from "../config/ContextProxy" @@ -53,6 +55,7 @@ import { telemetryService } from "../../services/telemetry/TelemetryService" import { getWorkspacePath } from "../../utils/path" import { webviewMessageHandler } from "./webviewMessageHandler" import { WebviewMessage } from "../../shared/WebviewMessage" +import { EMBEDDING_MODEL_PROFILES } from "../../shared/embeddingModels" /** * https://github.com/microsoft/vscode-webview-ui-toolkit-samples/blob/main/default/weather-webview/src/providers/WeatherViewProvider.ts @@ -73,6 +76,7 @@ export class ClineProvider extends EventEmitter implements private disposables: vscode.Disposable[] = [] private view?: vscode.WebviewView | vscode.WebviewPanel private clineStack: Task[] = [] + private codeIndexStatusSubscription?: vscode.Disposable private _workspaceTracker?: WorkspaceTracker // workSpaceTracker read-only for access outside this class public get workspaceTracker(): WorkspaceTracker | undefined { return this._workspaceTracker @@ -84,6 +88,7 @@ export class ClineProvider extends EventEmitter implements public readonly latestAnnouncementId = "may-21-2025-3-18" // Update for v3.18.0 announcement public readonly providerSettingsManager: ProviderSettingsManager public readonly customModesManager: CustomModesManager + public readonly codeIndexManager: CodeIndexManager constructor( readonly context: vscode.ExtensionContext, @@ -96,6 +101,11 @@ export class ClineProvider extends EventEmitter implements this.log("ClineProvider instantiated") ClineProvider.activeInstances.add(this) + this.codeIndexManager = CodeIndexManager.getInstance(context, this.contextProxy) + context.subscriptions.push(this.codeIndexManager) + // Start configuration loading (which might trigger indexing) in the background. + // Don't await, allowing activation to continue immediately. + // Register this provider with the telemetry service to enable it to add // properties like mode and provider. telemetryService.setProvider(this) @@ -320,9 +330,22 @@ export class ClineProvider extends EventEmitter implements async resolveWebviewView(webviewView: vscode.WebviewView | vscode.WebviewPanel) { this.log("Resolving webview view") - if (!this.contextProxy.isInitialized) { - await this.contextProxy.initialize() - } + this.codeIndexManager + .loadConfiguration() + .then(() => { + this.updateGlobalState("codebaseIndexModels", EMBEDDING_MODEL_PROFILES) + + this.outputChannel.appendLine("CodeIndexManager configuration loaded successfully (async).") + }) + .catch((error) => { + console.error( + "[resolveWebviewView] Error during background CodeIndexManager configuration/indexing:", + error, + ) + this.outputChannel.appendLine( + `[Error] Background CodeIndexManager configuration/indexing failed: ${error.message || error}`, + ) + }) this.view = webviewView @@ -383,6 +406,23 @@ export class ClineProvider extends EventEmitter implements // and executes code based on the message that is recieved this.setWebviewMessageListener(webviewView.webview) + // Subscribe to code index status updates if the manager exists + if (this.codeIndexManager) { + this.codeIndexStatusSubscription = this.codeIndexManager.onProgressUpdate((update: IndexProgressUpdate) => { + this.postMessageToWebview({ + type: "indexingStatusUpdate", + values: { + systemStatus: update.systemStatus, + message: update.message, + processedBlockCount: update.processedBlockCount, + totalBlockCount: update.totalBlockCount, + }, + }) + }) + // Add the subscription to the main disposables array + this.disposables.push(this.codeIndexStatusSubscription) + } + // Logs show up in bottom panel > Debug Console //console.log("registering listener") @@ -826,6 +866,9 @@ export class ClineProvider extends EventEmitter implements vscode.window.showErrorMessage(t("common:errors.create_api_config")) return undefined } + + // Load CodeIndexManager configuration after provider settings are updated + await this.codeIndexManager.loadConfiguration() } async deleteProviderProfile(profileToDelete: ProviderSettingsEntry) { @@ -1239,6 +1282,8 @@ export class ClineProvider extends EventEmitter implements historyPreviewCollapsed, condensingApiConfigId, customCondensingPrompt, + codebaseIndexConfig, + codebaseIndexModels, } = await this.getState() const telemetryKey = process.env.POSTHOG_API_KEY @@ -1330,6 +1375,17 @@ export class ClineProvider extends EventEmitter implements historyPreviewCollapsed: historyPreviewCollapsed ?? false, condensingApiConfigId, customCondensingPrompt, + codebaseIndexModels: codebaseIndexModels ?? { + openai: {}, + ollama: {}, + }, + codebaseIndexConfig: codebaseIndexConfig ?? { + codebaseIndexEnabled: false, + codebaseIndexQdrantUrl: "", + codebaseIndexEmbedderProvider: "openai", + codebaseIndexEmbedderBaseUrl: "", + codebaseIndexEmbedderModelId: "", + }, } } @@ -1424,6 +1480,17 @@ export class ClineProvider extends EventEmitter implements // Explicitly add condensing settings condensingApiConfigId: stateValues.condensingApiConfigId, customCondensingPrompt: stateValues.customCondensingPrompt, + codebaseIndexModels: stateValues.codebaseIndexModels ?? { + openai: {}, + ollama: {}, + }, + codebaseIndexConfig: stateValues.codebaseIndexConfig ?? { + codebaseIndexEnabled: false, + codebaseIndexQdrantUrl: "", + codebaseIndexEmbedderProvider: "openai", + codebaseIndexEmbedderBaseUrl: "", + codebaseIndexEmbedderModelId: "", + }, } } diff --git a/src/core/webview/__tests__/ClineProvider.test.ts b/src/core/webview/__tests__/ClineProvider.test.ts index 5af4a476b7..d4093d9072 100644 --- a/src/core/webview/__tests__/ClineProvider.test.ts +++ b/src/core/webview/__tests__/ClineProvider.test.ts @@ -387,6 +387,13 @@ describe("ClineProvider", () => { alwaysAllowReadOnly: false, alwaysAllowReadOnlyOutsideWorkspace: false, alwaysAllowWrite: false, + codebaseIndexConfig: { + codebaseIndexEnabled: false, + codebaseIndexQdrantUrl: "", + codebaseIndexEmbedderProvider: "openai", + codebaseIndexEmbedderBaseUrl: "", + codebaseIndexEmbedderModelId: "", + }, alwaysAllowWriteOutsideWorkspace: false, alwaysAllowExecute: false, alwaysAllowBrowser: false, diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index 7c4e906849..5b595c5bc7 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -1320,5 +1320,54 @@ export const webviewMessageHandler = async (provider: ClineProvider, message: We await provider.postStateToWebview() break } + case "codebaseIndexConfig": { + const codebaseIndexConfig = message.values ?? { + codebaseIndexEnabled: false, + codebaseIndexQdrantUrl: "", + codebaseIndexEmbedderProvider: "openai", + codebaseIndexEmbedderBaseUrl: "", + codebaseIndexEmbedderModelId: "", + } + await updateGlobalState("codebaseIndexConfig", codebaseIndexConfig) + await provider.codeIndexManager.loadConfiguration() + await provider.postStateToWebview() + break + } + case "requestIndexingStatus": { + const status = provider.codeIndexManager!.getCurrentStatus() + provider.postMessageToWebview({ + type: "indexingStatusUpdate", + values: status, + }) + break + } + case "startIndexing": { + try { + const manager = provider.codeIndexManager! + await manager.startIndexing() + // Optionally send a confirmation or rely on indexingStatusUpdate + } catch (error) { + provider.log(`Error starting indexing: ${error instanceof Error ? error.message : String(error)}`) + // Optionally send an error message back to the webview + } + break + } + case "clearIndexData": { + try { + const manager = provider.codeIndexManager! + await manager.clearIndexData() + provider.postMessageToWebview({ type: "indexCleared", values: { success: true } }) + } catch (error) { + provider.log(`Error clearing index data: ${error instanceof Error ? error.message : String(error)}`) + provider.postMessageToWebview({ + type: "indexCleared", + values: { + success: false, + error: error instanceof Error ? error.message : String(error), + }, + }) + } + break + } } } diff --git a/src/exports/roo-code.d.ts b/src/exports/roo-code.d.ts index 9b2cb8469c..4012237db6 100644 --- a/src/exports/roo-code.d.ts +++ b/src/exports/roo-code.d.ts @@ -61,6 +61,33 @@ type GlobalSettings = { autoApprovalEnabled?: boolean | undefined alwaysAllowReadOnly?: boolean | undefined alwaysAllowReadOnlyOutsideWorkspace?: boolean | undefined + codebaseIndexModels?: + | { + openai?: + | { + [x: string]: { + dimension: number + } + } + | undefined + ollama?: + | { + [x: string]: { + dimension: number + } + } + | undefined + } + | undefined + codebaseIndexConfig?: + | { + codebaseIndexEnabled?: boolean | undefined + codebaseIndexQdrantUrl?: string | undefined + codebaseIndexEmbedderProvider?: ("openai" | "ollama") | undefined + codebaseIndexEmbedderBaseUrl?: string | undefined + codebaseIndexEmbedderModelId?: string | undefined + } + | undefined alwaysAllowWrite?: boolean | undefined alwaysAllowWriteOutsideWorkspace?: boolean | undefined writeDelayMs?: number | undefined @@ -260,6 +287,8 @@ type ProviderSettings = { vertexJsonCredentials?: string | undefined vertexProjectId?: string | undefined vertexRegion?: string | undefined + codeIndexOpenAiKey?: string | undefined + codeIndexQdrantApiKey?: string | undefined openAiBaseUrl?: string | undefined openAiApiKey?: string | undefined openAiLegacyFormat?: boolean | undefined @@ -584,6 +613,7 @@ type RooCodeEvents = { | "switch_mode" | "new_task" | "fetch_instructions" + | "codebase_search" ), string, ] diff --git a/src/exports/types.ts b/src/exports/types.ts index 52bdabc331..fb234a1b71 100644 --- a/src/exports/types.ts +++ b/src/exports/types.ts @@ -61,6 +61,33 @@ type GlobalSettings = { autoApprovalEnabled?: boolean | undefined alwaysAllowReadOnly?: boolean | undefined alwaysAllowReadOnlyOutsideWorkspace?: boolean | undefined + codebaseIndexModels?: + | { + openai?: + | { + [x: string]: { + dimension: number + } + } + | undefined + ollama?: + | { + [x: string]: { + dimension: number + } + } + | undefined + } + | undefined + codebaseIndexConfig?: + | { + codebaseIndexEnabled?: boolean | undefined + codebaseIndexQdrantUrl?: string | undefined + codebaseIndexEmbedderProvider?: ("openai" | "ollama") | undefined + codebaseIndexEmbedderBaseUrl?: string | undefined + codebaseIndexEmbedderModelId?: string | undefined + } + | undefined alwaysAllowWrite?: boolean | undefined alwaysAllowWriteOutsideWorkspace?: boolean | undefined writeDelayMs?: number | undefined @@ -261,6 +288,8 @@ type ProviderSettings = { awsUseProfile?: boolean | undefined awsCustomArn?: string | undefined vertexKeyFile?: string | undefined + codeIndexOpenAiKey?: string | undefined + codeIndexQdrantApiKey?: string | undefined vertexJsonCredentials?: string | undefined vertexProjectId?: string | undefined vertexRegion?: string | undefined @@ -596,6 +625,7 @@ type RooCodeEvents = { | "switch_mode" | "new_task" | "fetch_instructions" + | "codebase_search" ), string, ] diff --git a/src/i18n/locales/en/tools.json b/src/i18n/locales/en/tools.json index 70b0e8d964..b931edddbf 100644 --- a/src/i18n/locales/en/tools.json +++ b/src/i18n/locales/en/tools.json @@ -6,5 +6,8 @@ "definitionsOnly": " (definitions only)", "maxLines": " (max {{max}} lines)" }, - "toolRepetitionLimitReached": "Roo appears to be stuck in a loop, attempting the same action ({{toolName}}) repeatedly. This might indicate a problem with its current strategy. Consider rephrasing the task, providing more specific instructions, or guiding it towards a different approach." + "toolRepetitionLimitReached": "Roo appears to be stuck in a loop, attempting the same action ({{toolName}}) repeatedly. This might indicate a problem with its current strategy. Consider rephrasing the task, providing more specific instructions, or guiding it towards a different approach.", + "codebaseSearch": { + "approval": "Searching for '{{query}}' in codebase (limit: {{limit}})..." + } } diff --git a/src/package.json b/src/package.json index ce108f91f7..50375fa2d8 100644 --- a/src/package.json +++ b/src/package.json @@ -366,6 +366,8 @@ "node-cache": "^5.1.2", "node-ipc": "^12.0.0", "openai": "^4.78.1", + "async-mutex": "^0.5.0", + "p-limit": "^6.2.0", "os-name": "^6.0.0", "p-wait-for": "^5.0.2", "pdf-parse": "^1.1.1", diff --git a/src/schemas/index.ts b/src/schemas/index.ts index 1233745e53..176b8c8b5a 100644 --- a/src/schemas/index.ts +++ b/src/schemas/index.ts @@ -223,6 +223,31 @@ export const modelInfoSchema = z.object({ export type ModelInfo = z.infer +/** + * Codebase Index Config + */ +export const codebaseIndexConfigSchema = z.object({ + codebaseIndexEnabled: z.boolean().optional(), + codebaseIndexQdrantUrl: z.string().optional(), + codebaseIndexEmbedderProvider: z.enum(["openai", "ollama"]).optional(), + codebaseIndexEmbedderBaseUrl: z.string().optional(), + codebaseIndexEmbedderModelId: z.string().optional(), +}) + +export type CodebaseIndexConfig = z.infer + +export const codebaseIndexModelsSchema = z.object({ + openai: z.record(z.string(), z.object({ dimension: z.number() })).optional(), + ollama: z.record(z.string(), z.object({ dimension: z.number() })).optional(), +}) + +export type CodebaseIndexModels = z.infer + +export const codebaseIndexProviderSchema = z.object({ + codeIndexOpenAiKey: z.string().optional(), + codeIndexQdrantApiKey: z.string().optional(), +}) + /** * HistoryItem */ @@ -636,6 +661,7 @@ export const providerSettingsSchema = z.object({ ...groqSchema.shape, ...chutesSchema.shape, ...litellmSchema.shape, + ...codebaseIndexProviderSchema.shape }) export type ProviderSettings = z.infer @@ -712,6 +738,9 @@ const providerSettingsRecord: ProviderSettingsRecord = { // Requesty requestyApiKey: undefined, requestyModelId: undefined, + // Code Index + codeIndexOpenAiKey: undefined, + codeIndexQdrantApiKey: undefined, // Reasoning enableReasoningEffort: undefined, reasoningEffort: undefined, @@ -758,6 +787,8 @@ export const globalSettingsSchema = z.object({ autoApprovalEnabled: z.boolean().optional(), alwaysAllowReadOnly: z.boolean().optional(), alwaysAllowReadOnlyOutsideWorkspace: z.boolean().optional(), + codebaseIndexModels: codebaseIndexModelsSchema.optional(), + codebaseIndexConfig: codebaseIndexConfigSchema.optional(), alwaysAllowWrite: z.boolean().optional(), alwaysAllowWriteOutsideWorkspace: z.boolean().optional(), writeDelayMs: z.number().optional(), @@ -828,6 +859,8 @@ export type GlobalSettings = z.infer type GlobalSettingsRecord = Record, undefined> const globalSettingsRecord: GlobalSettingsRecord = { + codebaseIndexModels: undefined, + codebaseIndexConfig: undefined, currentApiConfigName: undefined, listApiConfigMeta: undefined, pinnedApiConfigs: undefined, @@ -940,8 +973,12 @@ export type SecretState = Pick< | "groqApiKey" | "chutesApiKey" | "litellmApiKey" + | "codeIndexOpenAiKey" + | "codeIndexQdrantApiKey" > +export type CodeIndexSecrets = "codeIndexOpenAiKey" | "codeIndexQdrantApiKey" + type SecretStateRecord = Record, undefined> const secretStateRecord: SecretStateRecord = { @@ -962,6 +999,8 @@ const secretStateRecord: SecretStateRecord = { groqApiKey: undefined, chutesApiKey: undefined, litellmApiKey: undefined, + codeIndexOpenAiKey: undefined, + codeIndexQdrantApiKey: undefined, } export const SECRET_STATE_KEYS = Object.keys(secretStateRecord) as Keys[] @@ -1118,6 +1157,7 @@ export const toolNames = [ "switch_mode", "new_task", "fetch_instructions", + "codebase_search", ] as const export const toolNamesSchema = z.enum(toolNames) diff --git a/src/services/code-index/config-manager.ts b/src/services/code-index/config-manager.ts new file mode 100644 index 0000000000..5cc2014c54 --- /dev/null +++ b/src/services/code-index/config-manager.ts @@ -0,0 +1,226 @@ +import { ApiHandlerOptions } from "../../shared/api" +import { ContextProxy } from "../../core/config/ContextProxy" +import { EmbedderProvider } from "./interfaces/manager" +import { getModelDimension, getDefaultModelId } from "../../shared/embeddingModels" +import { CodeIndexConfig, PreviousConfigSnapshot } from "./interfaces/config" + +/** + * Manages configuration state and validation for the code indexing feature. + * Handles loading, validating, and providing access to configuration values. + */ +export class CodeIndexConfigManager { + private isEnabled: boolean = false + private embedderProvider: EmbedderProvider = "openai" + private modelId?: string + private openAiOptions?: ApiHandlerOptions + private ollamaOptions?: ApiHandlerOptions + private qdrantUrl?: string + private qdrantApiKey?: string + + constructor(private readonly contextProxy: ContextProxy) {} + + /** + * Loads persisted configuration from globalState. + */ + public async loadConfiguration(): Promise<{ + configSnapshot: PreviousConfigSnapshot + currentConfig: { + isEnabled: boolean + isConfigured: boolean + embedderProvider: EmbedderProvider + modelId?: string + openAiOptions?: ApiHandlerOptions + ollamaOptions?: ApiHandlerOptions + qdrantUrl?: string + qdrantApiKey?: string + } + requiresRestart: boolean + requiresClear: boolean + }> { + console.log("[CodeIndexConfigManager] Loading configuration...") + + const previousConfigSnapshot: PreviousConfigSnapshot = { + enabled: this.isEnabled, + configured: this.isConfigured(), + embedderProvider: this.embedderProvider, + modelId: this.modelId, + openAiKey: this.openAiOptions?.openAiNativeApiKey, + ollamaBaseUrl: this.ollamaOptions?.ollamaBaseUrl, + qdrantUrl: this.qdrantUrl, + qdrantApiKey: this.qdrantApiKey, + } + + let codebaseIndexConfig = this.contextProxy?.getGlobalState("codebaseIndexConfig") ?? { + codebaseIndexEnabled: false, + codebaseIndexQdrantUrl: "", + codebaseIndexEmbedderProvider: "openai", + codebaseIndexEmbedderBaseUrl: "", + codebaseIndexEmbedderModelId: "", + } + + const { + codebaseIndexEnabled, + codebaseIndexQdrantUrl, + codebaseIndexEmbedderProvider, + codebaseIndexEmbedderBaseUrl, + codebaseIndexEmbedderModelId, + } = codebaseIndexConfig + + const openAiKey = this.contextProxy?.getSecret("codeIndexOpenAiKey") ?? "" + const qdrantApiKey = this.contextProxy?.getSecret("codeIndexQdrantApiKey") ?? "" + + this.isEnabled = codebaseIndexEnabled || false + this.qdrantUrl = codebaseIndexQdrantUrl + this.qdrantApiKey = qdrantApiKey ?? "" + this.openAiOptions = { openAiNativeApiKey: openAiKey } + + this.embedderProvider = codebaseIndexEmbedderProvider === "ollama" ? "ollama" : "openai" + this.modelId = codebaseIndexEmbedderModelId || undefined + + this.ollamaOptions = { + ollamaBaseUrl: codebaseIndexEmbedderBaseUrl, + } + + const previousModelId = + previousConfigSnapshot.modelId ?? getDefaultModelId(previousConfigSnapshot.embedderProvider) + const currentModelId = this.modelId ?? getDefaultModelId(this.embedderProvider) + const previousDimension = previousModelId + ? getModelDimension(previousConfigSnapshot.embedderProvider, previousModelId) + : undefined + const currentDimension = currentModelId ? getModelDimension(this.embedderProvider, currentModelId) : undefined + const requiresClear = + previousDimension !== undefined && currentDimension !== undefined && previousDimension !== currentDimension + console.log( + `[CodeIndexConfigManager] Dimension check: Previous=${previousDimension}, Current=${currentDimension}, Changed=${requiresClear}`, + ) + + return { + configSnapshot: previousConfigSnapshot, + currentConfig: { + isEnabled: this.isEnabled, + isConfigured: this.isConfigured(), + embedderProvider: this.embedderProvider, + modelId: this.modelId, + openAiOptions: this.openAiOptions, + ollamaOptions: this.ollamaOptions, + qdrantUrl: this.qdrantUrl, + qdrantApiKey: this.qdrantApiKey, + }, + requiresRestart: this._didConfigChangeRequireRestart(previousConfigSnapshot), + requiresClear, + } + } + + /** + * Checks if the service is properly configured based on the embedder type. + */ + public isConfigured(): boolean { + if (this.embedderProvider === "openai") { + return !!(this.openAiOptions?.openAiNativeApiKey && this.qdrantUrl) + } else if (this.embedderProvider === "ollama") { + // Ollama model ID has a default, so only base URL is strictly required for config + return !!(this.ollamaOptions?.ollamaBaseUrl && this.qdrantUrl) + } + return false // Should not happen if embedderProvider is always set correctly + } + + /** + * Determines if a configuration change requires restarting the indexing process. + * @param prev The previous configuration snapshot + * @returns boolean indicating whether a restart is needed + */ + private _didConfigChangeRequireRestart(prev: PreviousConfigSnapshot): boolean { + const nowConfigured = this.isConfigured() // Recalculate based on current state + + // Check for transition from disabled/unconfigured to enabled+configured + const transitionedToReady = (!prev.enabled || !prev.configured) && this.isEnabled && nowConfigured + if (transitionedToReady) return true + + // If wasn't ready before and isn't ready now, no restart needed for config change itself + if (!prev.configured && !nowConfigured) return false + // If was disabled and still is, no restart needed + if (!prev.enabled && !this.isEnabled) return false + + // Check for changes in relevant settings if the feature is enabled (or was enabled) + if (this.isEnabled || prev.enabled) { + // Check for embedder type change + if (prev.embedderProvider !== this.embedderProvider) return true + if (prev.modelId !== this.modelId) return true // Any model change requires restart + + // Check OpenAI settings change if using OpenAI + if (this.embedderProvider === "openai") { + if (prev.openAiKey !== this.openAiOptions?.openAiNativeApiKey) return true + // Model ID check moved above + } + + // Check Ollama settings change if using Ollama + if (this.embedderProvider === "ollama") { + if (prev.ollamaBaseUrl !== this.ollamaOptions?.ollamaBaseUrl) { + return true + } + // Model ID check moved above + } + + // Check Qdrant settings changes + if (prev.qdrantUrl !== this.qdrantUrl || prev.qdrantApiKey !== this.qdrantApiKey) { + return true + } + } + + return false + } + + /** + * Gets the current configuration state. + */ + public getConfig(): CodeIndexConfig { + return { + isEnabled: this.isEnabled, + isConfigured: this.isConfigured(), + embedderProvider: this.embedderProvider, + modelId: this.modelId, + openAiOptions: this.openAiOptions, + ollamaOptions: this.ollamaOptions, + qdrantUrl: this.qdrantUrl, + qdrantApiKey: this.qdrantApiKey, + } + } + + /** + * Gets whether the code indexing feature is enabled + */ + public get isFeatureEnabled(): boolean { + return this.isEnabled + } + + /** + * Gets whether the code indexing feature is properly configured + */ + public get isFeatureConfigured(): boolean { + return this.isConfigured() + } + + /** + * Gets the current embedder type (openai or ollama) + */ + public get currentEmbedderProvider(): EmbedderProvider { + return this.embedderProvider + } + + /** + * Gets the current Qdrant configuration + */ + public get qdrantConfig(): { url?: string; apiKey?: string } { + return { + url: this.qdrantUrl, + apiKey: this.qdrantApiKey, + } + } + + /** + * Gets the current model ID being used for embeddings. + */ + public get currentModelId(): string | undefined { + return this.modelId + } +} diff --git a/src/services/code-index/embedders/ollama.ts b/src/services/code-index/embedders/ollama.ts new file mode 100644 index 0000000000..56de8c014d --- /dev/null +++ b/src/services/code-index/embedders/ollama.ts @@ -0,0 +1,73 @@ +import { ApiHandlerOptions } from "../../../shared/api" +import { EmbeddingResponse, IEmbedder } from "../interfaces" + +/** + * Implements the IEmbedder interface using a local Ollama instance. + */ +export class CodeIndexOllamaEmbedder implements IEmbedder { + private readonly baseUrl: string + private readonly defaultModelId: string + + constructor(options: ApiHandlerOptions) { + // Ensure ollamaBaseUrl and ollamaModelId exist on ApiHandlerOptions or add defaults + this.baseUrl = options.ollamaBaseUrl || "http://localhost:11434" + this.defaultModelId = options.ollamaModelId || "nomic-embed-text:latest" + } + + /** + * Creates embeddings for the given texts using the specified Ollama model. + * @param texts - An array of strings to embed. + * @param model - Optional model ID to override the default. + * @returns A promise that resolves to an EmbeddingResponse containing the embeddings and usage data. + */ + async createEmbeddings(texts: string[], model?: string): Promise { + const modelToUse = model || this.defaultModelId + const url = `${this.baseUrl}/api/embed` // Endpoint as specified + + try { + // Note: Standard Ollama API uses 'prompt' for single text, not 'input' for array. + // Implementing based on user's specific request structure. + const response = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: modelToUse, + input: texts, // Using 'input' as requested + }), + }) + + if (!response.ok) { + let errorBody = "Could not read error body" + try { + errorBody = await response.text() + } catch (e) { + // Ignore error reading body + } + throw new Error( + `Ollama API request failed with status ${response.status} ${response.statusText}: ${errorBody}`, + ) + } + + const data = await response.json() + + // Extract embeddings using 'embeddings' key as requested + const embeddings = data.embeddings + if (!embeddings || !Array.isArray(embeddings)) { + throw new Error( + 'Invalid response structure from Ollama API: "embeddings" array not found or not an array.', + ) + } + + return { + embeddings: embeddings, + } + } catch (error: any) { + // Log the original error for debugging purposes + console.error("Ollama embedding failed:", error) + // Re-throw a more specific error for the caller + throw new Error(`Ollama embedding failed: ${error.message}`) + } + } +} diff --git a/src/services/code-index/embedders/openai.ts b/src/services/code-index/embedders/openai.ts new file mode 100644 index 0000000000..4d7e38193e --- /dev/null +++ b/src/services/code-index/embedders/openai.ts @@ -0,0 +1,129 @@ +import { OpenAI } from "openai" +import { OpenAiNativeHandler } from "../../../api/providers/openai-native" +import { ApiHandlerOptions } from "../../../shared/api" +import { IEmbedder, EmbeddingResponse } from "../interfaces" + +/** + * OpenAI implementation of the embedder interface with batching and rate limiting + */ +export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder { + private embeddingsClient: OpenAI + private readonly defaultModelId: string + + // Batching and retry constants + private static readonly MAX_BATCH_TOKENS = 100000 + private static readonly MAX_ITEM_TOKENS = 8191 + private static readonly MAX_RETRIES = 3 + private static readonly INITIAL_DELAY_MS = 500 + + /** + * Creates a new OpenAI embedder + * @param options API handler options + */ + constructor(options: ApiHandlerOptions & { openAiEmbeddingModelId?: string }) { + super(options) + const apiKey = this.options.openAiNativeApiKey ?? "not-provided" + this.embeddingsClient = new OpenAI({ apiKey }) + this.defaultModelId = options.openAiEmbeddingModelId || "text-embedding-3-small" + } + + /** + * Creates embeddings for the given texts with batching and rate limiting + * @param texts Array of text strings to embed + * @param model Optional model identifier + * @returns Promise resolving to embedding response + */ + async createEmbeddings(texts: string[], model?: string): Promise { + const modelToUse = model || this.defaultModelId + const allEmbeddings: number[][] = [] + const usage = { promptTokens: 0, totalTokens: 0 } + const remainingTexts = [...texts] + + while (remainingTexts.length > 0) { + const currentBatch: string[] = [] + let currentBatchTokens = 0 + const processedIndices: number[] = [] + + for (let i = 0; i < remainingTexts.length; i++) { + const text = remainingTexts[i] + const itemTokens = Math.ceil(text.length / 4) + + if (itemTokens > OpenAiEmbedder.MAX_ITEM_TOKENS) { + console.warn( + `Text at index ${i} exceeds maximum token limit (${itemTokens} > ${OpenAiEmbedder.MAX_ITEM_TOKENS}). Skipping.`, + ) + processedIndices.push(i) + continue + } + + if (currentBatchTokens + itemTokens <= OpenAiEmbedder.MAX_BATCH_TOKENS) { + currentBatch.push(text) + currentBatchTokens += itemTokens + processedIndices.push(i) + } else { + break + } + } + + // Remove processed items from remainingTexts (in reverse order to maintain correct indices) + for (let i = processedIndices.length - 1; i >= 0; i--) { + remainingTexts.splice(processedIndices[i], 1) + } + + if (currentBatch.length > 0) { + try { + const batchResult = await this._embedBatchWithRetries(currentBatch, modelToUse) + allEmbeddings.push(...batchResult.embeddings) + usage.promptTokens += batchResult.usage.promptTokens + usage.totalTokens += batchResult.usage.totalTokens + } catch (error) { + console.error("Failed to process batch:", error) + throw new Error("Failed to create embeddings: batch processing error") + } + } + } + + return { embeddings: allEmbeddings, usage } + } + + /** + * Helper method to handle batch embedding with retries and exponential backoff + * @param batchTexts Array of texts to embed in this batch + * @param model Model identifier to use + * @returns Promise resolving to embeddings and usage statistics + */ + private async _embedBatchWithRetries( + batchTexts: string[], + model: string, + ): Promise<{ embeddings: number[][]; usage: { promptTokens: number; totalTokens: number } }> { + for (let attempts = 0; attempts < OpenAiEmbedder.MAX_RETRIES; attempts++) { + try { + const response = await this.embeddingsClient.embeddings.create({ + input: batchTexts, + model: model, + }) + + return { + embeddings: response.data.map((item) => item.embedding), + usage: { + promptTokens: response.usage?.prompt_tokens || 0, + totalTokens: response.usage?.total_tokens || 0, + }, + } + } catch (error: any) { + const isRateLimitError = error?.status === 429 + const hasMoreAttempts = attempts < OpenAiEmbedder.MAX_RETRIES - 1 + + if (isRateLimitError && hasMoreAttempts) { + const delayMs = OpenAiEmbedder.INITIAL_DELAY_MS * Math.pow(2, attempts) + await new Promise((resolve) => setTimeout(resolve, delayMs)) + continue + } + + throw error + } + } + + throw new Error(`Failed to create embeddings after ${OpenAiEmbedder.MAX_RETRIES} attempts`) + } +} diff --git a/src/services/code-index/interfaces/config.ts b/src/services/code-index/interfaces/config.ts new file mode 100644 index 0000000000..a7dcff167f --- /dev/null +++ b/src/services/code-index/interfaces/config.ts @@ -0,0 +1,30 @@ +import { ApiHandlerOptions } from "../../../shared/api" // Adjust path if needed +import { EmbedderProvider } from "./manager" + +/** + * Configuration state for the code indexing feature + */ +export interface CodeIndexConfig { + isEnabled: boolean + isConfigured: boolean + embedderProvider: EmbedderProvider + modelId?: string + openAiOptions?: ApiHandlerOptions + ollamaOptions?: ApiHandlerOptions + qdrantUrl?: string + qdrantApiKey?: string +} + +/** + * Snapshot of previous configuration used to determine if a restart is required + */ +export type PreviousConfigSnapshot = { + enabled: boolean + configured: boolean + embedderProvider: EmbedderProvider + modelId?: string + openAiKey?: string + ollamaBaseUrl?: string + qdrantUrl?: string + qdrantApiKey?: string +} diff --git a/src/services/code-index/interfaces/embedder.ts b/src/services/code-index/interfaces/embedder.ts new file mode 100644 index 0000000000..b006773b3d --- /dev/null +++ b/src/services/code-index/interfaces/embedder.ts @@ -0,0 +1,21 @@ +/** + * Interface for code index embedders. + * This interface is implemented by both OpenAI and Ollama embedders. + */ +export interface IEmbedder { + /** + * Creates embeddings for the given texts. + * @param texts Array of text strings to create embeddings for + * @param model Optional model ID to use for embeddings + * @returns Promise resolving to an EmbeddingResponse + */ + createEmbeddings(texts: string[], model?: string): Promise +} + +export interface EmbeddingResponse { + embeddings: number[][] + usage?: { + promptTokens: number + totalTokens: number + } +} diff --git a/src/services/code-index/interfaces/file-processor.ts b/src/services/code-index/interfaces/file-processor.ts new file mode 100644 index 0000000000..32ac02d76e --- /dev/null +++ b/src/services/code-index/interfaces/file-processor.ts @@ -0,0 +1,99 @@ +import * as vscode from "vscode" + +/** + * Interface for code file parser + */ +export interface ICodeParser { + /** + * Parses a code file into code blocks + * @param filePath Path to the file to parse + * @param options Optional parsing options + * @returns Promise resolving to array of code blocks + */ + parseFile( + filePath: string, + options?: { + minBlockLines?: number + maxBlockLines?: number + content?: string + fileHash?: string + }, + ): Promise +} + +/** + * Interface for directory scanner + */ +export interface IDirectoryScanner { + /** + * Scans a directory for code blocks + * @param directoryPath Path to the directory to scan + * @param options Optional scanning options + * @returns Promise resolving to scan results + */ + scanDirectory( + directory: string, + context?: vscode.ExtensionContext, + onError?: (error: Error) => void, + ): Promise<{ + codeBlocks: CodeBlock[] + stats: { + processed: number + skipped: number + } + }> +} + +/** + * Interface for file watcher + */ +export interface IFileWatcher { + /** + * Initializes the file watcher + */ + initialize(): Promise + + /** + * Disposes the file watcher + */ + dispose(): void + + /** + * Event emitted when a file starts processing + */ + onDidStartProcessing: vscode.Event + + /** + * Event emitted when a file finishes processing + */ + onDidFinishProcessing: vscode.Event + + /** + * Processes a file + * @param filePath Path to the file to process + * @returns Promise resolving to processing result + */ + processFile(filePath: string): Promise +} + +export interface FileProcessingResult { + path: string + status: "success" | "skipped" | "error" + error?: Error + reason?: string +} + +/** + * Common types used across the code-index service + */ + +export interface CodeBlock { + file_path: string + identifier: string | null + type: string + start_line: number + end_line: number + content: string + fileHash: string + segmentHash: string +} diff --git a/src/services/code-index/interfaces/index.ts b/src/services/code-index/interfaces/index.ts new file mode 100644 index 0000000000..20dd55ad89 --- /dev/null +++ b/src/services/code-index/interfaces/index.ts @@ -0,0 +1,4 @@ +export * from "./embedder" +export * from "./vector-store" +export * from "./file-processor" +export * from "./manager" diff --git a/src/services/code-index/interfaces/manager.ts b/src/services/code-index/interfaces/manager.ts new file mode 100644 index 0000000000..1d3ce92c95 --- /dev/null +++ b/src/services/code-index/interfaces/manager.ts @@ -0,0 +1,80 @@ +import { VectorStoreSearchResult } from "./vector-store" +import * as vscode from "vscode" + +/** + * Interface for the code index manager + */ +export interface ICodeIndexManager { + /** + * Event emitted when progress is updated + */ + onProgressUpdate: vscode.Event<{ + systemStatus: IndexingState + fileStatuses: Record + message?: string + }> + + /** + * Current state of the indexing process + */ + readonly state: IndexingState + + /** + * Whether the code indexing feature is enabled + */ + readonly isFeatureEnabled: boolean + + /** + * Whether the code indexing feature is configured + */ + readonly isFeatureConfigured: boolean + + /** + * Loads configuration from storage + */ + loadConfiguration(): Promise + + /** + * Starts the indexing process + */ + startIndexing(): Promise + + /** + * Stops the file watcher + */ + stopWatcher(): void + + /** + * Clears the index data + */ + clearIndexData(): Promise + + /** + * Searches the index + * @param query Query string + * @param limit Maximum number of results to return + * @returns Promise resolving to search results + */ + searchIndex(query: string, limit: number): Promise + + /** + * Gets the current status of the indexing system + * @returns Current status information + */ + getCurrentStatus(): { systemStatus: IndexingState; fileStatuses: Record; message?: string } + + /** + * Disposes of resources used by the manager + */ + dispose(): void +} + +export type IndexingState = "Standby" | "Indexing" | "Indexed" | "Error" +export type EmbedderProvider = "openai" | "ollama" + +export interface IndexProgressUpdate { + systemStatus: IndexingState + message?: string + processedBlockCount?: number + totalBlockCount?: number +} diff --git a/src/services/code-index/interfaces/vector-store.ts b/src/services/code-index/interfaces/vector-store.ts new file mode 100644 index 0000000000..cac7f54dc9 --- /dev/null +++ b/src/services/code-index/interfaces/vector-store.ts @@ -0,0 +1,72 @@ +/** + * Interface for vector database clients + */ +export interface IVectorStore { + /** + * Initializes the vector store + * @returns Promise resolving to boolean indicating if a new collection was created + */ + initialize(): Promise + + /** + * Upserts points into the vector store + * @param points Array of points to upsert + */ + upsertPoints( + points: Array<{ + id: string + vector: number[] + payload: Record + }>, + ): Promise + + /** + * Searches for similar vectors + * @param queryVector Vector to search for + * @param limit Maximum number of results to return + * @returns Promise resolving to search results + */ + search(queryVector: number[], limit?: number): Promise + + /** + * Deletes points by file path + * @param filePath Path of the file to delete points for + */ + deletePointsByFilePath(filePath: string): Promise + + /** + * Deletes points by multiple file paths + * @param filePaths Array of file paths to delete points for + */ + deletePointsByMultipleFilePaths(filePaths: string[]): Promise + + /** + * Clears all points from the collection + */ + clearCollection(): Promise + + /** + * Deletes the entire collection. + */ + deleteCollection(): Promise + + /** + * Checks if the collection exists + * @returns Promise resolving to boolean indicating if the collection exists + */ + collectionExists(): Promise +} + +export interface VectorStoreSearchResult { + id: string | number + score: number + payload?: Payload | null +} + +export interface Payload { + filePath: string + codeChunk: string + startLine: number + endLine: number + [key: string]: any +} diff --git a/src/services/code-index/manager.ts b/src/services/code-index/manager.ts new file mode 100644 index 0000000000..799d706adc --- /dev/null +++ b/src/services/code-index/manager.ts @@ -0,0 +1,152 @@ +import * as vscode from "vscode" +import { getWorkspacePath } from "../../utils/path" +import { ContextProxy } from "../../core/config/ContextProxy" +import { VectorStoreSearchResult } from "./interfaces" +import { IndexingState } from "./interfaces/manager" +import { CodeIndexConfigManager } from "./config-manager" +import { CodeIndexStateManager } from "./state-manager" +import { CodeIndexServiceFactory } from "./service-factory" +import { CodeIndexSearchService } from "./search-service" +import { CodeIndexOrchestrator } from "./orchestrator" + +export class CodeIndexManager { + // --- Singleton Implementation --- + private static instances = new Map() // Map workspace path to instance + + // Specialized class instances + private readonly _configManager: CodeIndexConfigManager + private readonly _stateManager: CodeIndexStateManager + private readonly _serviceFactory: CodeIndexServiceFactory + private readonly _orchestrator: CodeIndexOrchestrator + private readonly _searchService: CodeIndexSearchService + + public static getInstance(context: vscode.ExtensionContext, contextProxy?: ContextProxy): CodeIndexManager { + const workspacePath = getWorkspacePath() // Assumes single workspace for now + if (!workspacePath) { + throw new Error("Cannot get CodeIndexManager instance without an active workspace.") + } + + if (!CodeIndexManager.instances.has(workspacePath) && contextProxy) { + CodeIndexManager.instances.set(workspacePath, new CodeIndexManager(workspacePath, context, contextProxy)) + } + return CodeIndexManager.instances.get(workspacePath)! + } + + public static disposeAll(): void { + CodeIndexManager.instances.forEach((instance) => instance.dispose()) + CodeIndexManager.instances.clear() + } + + private readonly workspacePath: string + private readonly context: vscode.ExtensionContext + + // Private constructor for singleton pattern + private constructor(workspacePath: string, context: vscode.ExtensionContext, contextProxy: ContextProxy) { + this.workspacePath = workspacePath + this.context = context + + // Initialize state manager first since other components depend on it + this._stateManager = new CodeIndexStateManager() + + // Initialize remaining specialized classes + this._configManager = new CodeIndexConfigManager(contextProxy) + this._serviceFactory = new CodeIndexServiceFactory(this._configManager, workspacePath) + this._orchestrator = new CodeIndexOrchestrator( + this._configManager, + this._stateManager, + this._serviceFactory, + context, + workspacePath, + ) + this._searchService = new CodeIndexSearchService( + this._configManager, + this._stateManager, + this._serviceFactory, + context, + ) + } + + // --- Public API --- + + public get onProgressUpdate() { + return this._stateManager.onProgressUpdate + } + + public get state(): IndexingState { + return this._orchestrator.state + } + + public get isFeatureEnabled(): boolean { + return this._configManager.isFeatureEnabled + } + + public get isFeatureConfigured(): boolean { + return this._configManager.isFeatureConfigured + } + + /** + * Loads persisted configuration from globalState. + */ + public async loadConfiguration(): Promise { + const { requiresRestart, requiresClear } = await this._configManager.loadConfiguration() + + if (requiresClear) { + console.log("[CodeIndexManager] Embedding dimension changed. Clearing existing index data...") + await this.clearIndexData() + // No need to explicitly set requiresRestart = true, as requiresClear implies a restart need. + } + + if (requiresRestart || requiresClear) { + console.log( + `[CodeIndexManager] Configuration change requires restart (Restart: ${requiresRestart}, Dimension Changed: ${requiresClear}). Starting indexing...`, + ) + await this.startIndexing() + } + } + + /** + * Initiates the indexing process (initial scan and starts watcher). + */ + + public async startIndexing(): Promise { + await this._orchestrator.startIndexing() + } + + /** + * Stops the file watcher and potentially cleans up resources. + */ + public stopWatcher(): void { + this._orchestrator.stopWatcher() + } + + /** + * Cleans up the manager instance. + */ + public dispose(): void { + this.stopWatcher() + this._stateManager.dispose() + console.log(`[CodeIndexManager] Disposed for workspace: ${this.workspacePath}`) + } + + /** + * Clears all index data by stopping the watcher, clearing the Qdrant collection, + * and deleting the cache file. + */ + public async clearIndexData(): Promise { + await this._orchestrator.clearIndexData() + } + + // --- Private Helpers --- + + public getCurrentStatus() { + return this._stateManager.getCurrentStatus() + } + + public setWebviewProvider(provider: { postMessage: (msg: any) => void }) { + this._stateManager.setWebviewProvider(provider) + } + + public async searchIndex(query: string, limit: number): Promise { + return this._searchService.searchIndex(query, limit) + } +} diff --git a/src/services/code-index/orchestrator.ts b/src/services/code-index/orchestrator.ts new file mode 100644 index 0000000000..797a16de74 --- /dev/null +++ b/src/services/code-index/orchestrator.ts @@ -0,0 +1,270 @@ +import * as vscode from "vscode" +import { createHash } from "crypto" +import * as path from "path" +import { CodeIndexConfigManager } from "./config-manager" +import { CodeIndexStateManager, IndexingState } from "./state-manager" +import { CodeIndexServiceFactory } from "./service-factory" +import { FileProcessingResult, IFileWatcher, IVectorStore } from "./interfaces" +import { DirectoryScanner } from "./processors" + +/** + * Manages the code indexing workflow, coordinating between different services and managers. + */ +export class CodeIndexOrchestrator { + private _fileWatcher?: IFileWatcher + private _fileWatcherSubscriptions: vscode.Disposable[] = [] + private _isProcessing: boolean = false + private _scanner?: DirectoryScanner + private _vectorStore?: IVectorStore + + constructor( + private readonly configManager: CodeIndexConfigManager, + private readonly stateManager: CodeIndexStateManager, + private readonly serviceFactory: CodeIndexServiceFactory, + private readonly context: vscode.ExtensionContext, + private readonly workspacePath: string, + ) {} + + /** + * Resets the cache file to an empty state. + */ + private async _resetCacheFile(): Promise { + try { + const cacheFileName = `roo-index-cache-${createHash("sha256").update(this.workspacePath).digest("hex")}.json` + const cachePath = vscode.Uri.joinPath(this.context.globalStorageUri, cacheFileName) + + try { + await vscode.workspace.fs.writeFile(cachePath, Buffer.from("{}", "utf-8")) + console.log(`[CodeIndexOrchestrator] Cache file reset (emptied) at ${cachePath.fsPath}`) + } catch (error) { + console.error("[CodeIndexOrchestrator] Failed to reset (empty) cache file:", error) + } + } catch (error) { + console.error("[CodeIndexOrchestrator] Unexpected error during cache file reset:", error) + } + } + + /** + * Starts the file watcher if not already running. + */ + private async _startWatcher(): Promise { + if (this._fileWatcher) { + console.log("[CodeIndexOrchestrator] File watcher already running.") + return + } + + if (!this.configManager.isFeatureConfigured) { + throw new Error("Cannot start watcher: Service not configured.") + } + + this.stateManager.setSystemState("Indexing", "Initializing file watcher...") + + try { + const services = this.serviceFactory.createServices(this.context) + this._fileWatcher = services.fileWatcher + await this._fileWatcher.initialize() + + this._fileWatcherSubscriptions = [ + this._fileWatcher.onDidStartProcessing((filePath: string) => { + this._updateFileStatus(filePath, "Processing", `Processing file: ${path.basename(filePath)}`) + }), + this._fileWatcher.onDidFinishProcessing((event: FileProcessingResult) => { + if (event.error) { + this._updateFileStatus(event.path, "Error") + console.error(`[CodeIndexOrchestrator] Error processing file ${event.path}:`, event.error) + } else { + this._updateFileStatus( + event.path, + "Indexed", + `Finished processing ${path.basename(event.path)}. Index up-to-date.`, + ) + } + + if (this.stateManager.state === "Indexing") { + this.stateManager.setSystemState("Indexed", "Index up-to-date.") + } + }), + ] + + console.log("[CodeIndexOrchestrator] File watcher started.") + } catch (error) { + console.error("[CodeIndexOrchestrator] Failed to start file watcher:", error) + throw error + } + } + + /** + * Updates the status of a file in the state manager. + */ + private _updateFileStatus(filePath: string, fileStatus: string, message?: string): void { + if (!this.configManager.isFeatureConfigured) { + console.warn( + "[CodeIndexOrchestrator] Ignoring file status update because system is not properly configured.", + ) + return + } + this.stateManager.updateFileStatus(filePath, fileStatus, message) + } + + /** + * Initiates the indexing process (initial scan and starts watcher). + */ + public async startIndexing(): Promise { + if (!this.configManager.isFeatureConfigured) { + this.stateManager.setSystemState("Standby", "Missing configuration. Save your settings to start indexing.") + console.warn("[CodeIndexOrchestrator] Start rejected: Missing configuration.") + return + } + + if ( + this._isProcessing || + (this.stateManager.state !== "Standby" && + this.stateManager.state !== "Error" && + this.stateManager.state !== "Indexed") + ) { + console.warn( + `[CodeIndexOrchestrator] Start rejected: Already processing or in state ${this.stateManager.state}.`, + ) + return + } + + this._isProcessing = true + this.stateManager.setSystemState("Indexing", "Initializing services...") + + try { + this.configManager.loadConfiguration() + const services = this.serviceFactory.createServices(this.context) + this._vectorStore = services.vectorStore + this._scanner = services.scanner + + const collectionCreated = await this._vectorStore.initialize() + + if (collectionCreated) { + await this._resetCacheFile() + console.log("[CodeIndexOrchestrator] Qdrant collection created; cache file emptied.") + } + + this.stateManager.setSystemState("Indexing", "Services ready. Starting workspace scan...") + + let cumulativeBlocksIndexed = 0 + let cumulativeBlocksFoundSoFar = 0 + + const handleFileParsed = (fileBlockCount: number) => { + cumulativeBlocksFoundSoFar += fileBlockCount + this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar) + } + + const handleBlocksIndexed = (indexedCount: number) => { + cumulativeBlocksIndexed += indexedCount + this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar) + } + + const result = await this._scanner.scanDirectory( + this.workspacePath, + this.context, + (batchError: Error) => { + console.error( + `[CodeIndexOrchestrator] Error during initial scan batch: ${batchError.message}`, + batchError, + ) + }, + handleBlocksIndexed, + handleFileParsed, + ) + + if (!result) { + throw new Error("Scan failed, is scanner initialized?") + } + + const { stats } = result + + console.log( + `[CodeIndexOrchestrator] Initial scan complete. Processed Files: ${stats.processed}, Skipped Files: ${stats.skipped}, Blocks Found: ${result.totalBlockCount}, Blocks Indexed: ${cumulativeBlocksIndexed}`, + ) + + await this._startWatcher() + + this.stateManager.setSystemState("Indexed", "Workspace scan and watcher started.") + } catch (error: any) { + console.error("[CodeIndexOrchestrator] Error during indexing:", error) + try { + await this._vectorStore?.clearCollection() + } catch (cleanupError) { + console.error("[CodeIndexOrchestrator] Failed to clean up after error:", cleanupError) + } + + await this._resetCacheFile() + console.log("[CodeIndexOrchestrator] Cleared cache file due to scan error.") + + this.stateManager.setSystemState("Error", `Failed during initial scan: ${error.message || "Unknown error"}`) + this.stopWatcher() + } finally { + this._isProcessing = false + } + } + + /** + * Stops the file watcher and cleans up resources. + */ + public stopWatcher(): void { + if (this._fileWatcher) { + this._fileWatcher.dispose() + this._fileWatcher = undefined + this._fileWatcherSubscriptions.forEach((sub) => sub.dispose()) + this._fileWatcherSubscriptions = [] + console.log("[CodeIndexOrchestrator] File watcher stopped.") + + if (this.stateManager.state !== "Error") { + this.stateManager.setSystemState("Standby", "File watcher stopped.") + } + } + this._isProcessing = false + } + + /** + * Clears all index data by stopping the watcher, clearing the vector store, + * and resetting the cache file. + */ + public async clearIndexData(): Promise { + console.log("[CodeIndexOrchestrator] Clearing code index data...") + this._isProcessing = true + + try { + await this.stopWatcher() + + try { + if (this.configManager.isFeatureConfigured) { + if (!this._vectorStore) { + const services = this.serviceFactory.createServices(this.context) + this._vectorStore = services.vectorStore + } + + await this._vectorStore.deleteCollection() + console.log("[CodeIndexOrchestrator] Vector collection deleted.") + } else { + console.warn("[CodeIndexOrchestrator] Service not configured, skipping vector collection clear.") + } + } catch (error: any) { + console.error("[CodeIndexOrchestrator] Failed to clear vector collection:", error) + this.stateManager.setSystemState("Error", `Failed to clear vector collection: ${error.message}`) + } + + await this._resetCacheFile() + console.log("[CodeIndexOrchestrator] Cache file emptied.") + + if (this.stateManager.state !== "Error") { + this.stateManager.setSystemState("Standby", "Index data cleared successfully.") + console.log("[CodeIndexOrchestrator] Code index data cleared successfully.") + } + } finally { + this._isProcessing = false + } + } + + /** + * Gets the current state of the indexing system. + */ + public get state(): IndexingState { + return this.stateManager.state + } +} diff --git a/src/services/code-index/processors/file-watcher.ts b/src/services/code-index/processors/file-watcher.ts new file mode 100644 index 0000000000..532f3e6ef9 --- /dev/null +++ b/src/services/code-index/processors/file-watcher.ts @@ -0,0 +1,255 @@ +import * as vscode from "vscode" +import * as path from "path" +import { createHash } from "crypto" +import { RooIgnoreController } from "../../../core/ignore/RooIgnoreController" +import { getWorkspacePath } from "../../../utils/path" +import { v5 as uuidv5 } from "uuid" +import { scannerExtensions } from "../shared/supported-extensions" +import { IFileWatcher, FileProcessingResult, IEmbedder, IVectorStore } from "../interfaces" +import { codeParser } from "./parser" + +const QDRANT_CODE_BLOCK_NAMESPACE = "f47ac10b-58cc-4372-a567-0e02b2c3d479" +const MAX_FILE_SIZE_BYTES = 1 * 1024 * 1024 // 1MB + +/** + * Implementation of the file watcher interface + */ +export class FileWatcher implements IFileWatcher { + private fileWatcher?: vscode.FileSystemWatcher + private ignoreController: RooIgnoreController + private cachePath: vscode.Uri + private fileHashes: Record = {} + + private readonly _onDidStartProcessing = new vscode.EventEmitter() + private readonly _onDidFinishProcessing = new vscode.EventEmitter() + + /** + * Event emitted when a file starts processing + */ + public readonly onDidStartProcessing = this._onDidStartProcessing.event + + /** + * Event emitted when a file finishes processing + */ + public readonly onDidFinishProcessing = this._onDidFinishProcessing.event + + /** + * Creates a new file watcher + * @param workspacePath Path to the workspace + * @param context VS Code extension context + * @param embedder Optional embedder + * @param vectorStore Optional vector store + */ + constructor( + private workspacePath: string, + private context: vscode.ExtensionContext, + private embedder?: IEmbedder, + private vectorStore?: IVectorStore, + ) { + this.ignoreController = new RooIgnoreController(workspacePath) + + this.cachePath = vscode.Uri.joinPath( + context.globalStorageUri, + `roo-index-cache-${createHash("sha256").update(workspacePath).digest("hex")}.json`, + ) + } + + /** + * Initializes the file watcher + */ + async initialize(): Promise { + // Load cache + try { + const cacheData = await vscode.workspace.fs.readFile(this.cachePath) + this.fileHashes = JSON.parse(cacheData.toString()) + } catch (error) { + console.log("No cache file found or error reading cache, starting fresh") + this.fileHashes = {} + } + + // Create file watcher + const filePattern = new vscode.RelativePattern( + this.workspacePath, + `**/*{${scannerExtensions.map((e) => e.substring(1)).join(",")}}`, + ) + this.fileWatcher = vscode.workspace.createFileSystemWatcher(filePattern) + + // Register event handlers + this.fileWatcher.onDidCreate(this.handleFileCreated.bind(this)) + this.fileWatcher.onDidChange(this.handleFileChanged.bind(this)) + this.fileWatcher.onDidDelete(this.handleFileDeleted.bind(this)) + } + + /** + * Disposes the file watcher + */ + dispose(): void { + this.fileWatcher?.dispose() + this._onDidStartProcessing.dispose() + this._onDidFinishProcessing.dispose() + } + + /** + * Handles file creation events + * @param uri URI of the created file + */ + private async handleFileCreated(uri: vscode.Uri): Promise { + await this.processFile(uri.fsPath) + } + + /** + * Handles file change events + * @param uri URI of the changed file + */ + private async handleFileChanged(uri: vscode.Uri): Promise { + await this.processFile(uri.fsPath) + } + + /** + * Handles file deletion events + * @param uri URI of the deleted file + */ + private async handleFileDeleted(uri: vscode.Uri): Promise { + const filePath = uri.fsPath + + // Delete from cache + if (this.fileHashes[filePath]) { + delete this.fileHashes[filePath] + await this.saveCache() + } + + // Delete from vector store + if (this.vectorStore) { + try { + await this.vectorStore.deletePointsByFilePath(filePath) + console.log(`[FileWatcher] Deleted points for removed file: ${filePath}`) + } catch (error) { + console.error(`[FileWatcher] Failed to delete points for ${filePath}:`, error) + } + } + } + + /** + * Processes a file + * @param filePath Path to the file to process + * @returns Promise resolving to processing result + */ + async processFile(filePath: string): Promise { + this._onDidStartProcessing.fire(filePath) + + try { + // Check if file should be ignored + if (!this.ignoreController.validateAccess(filePath)) { + const result = { + path: filePath, + status: "skipped" as const, + reason: "File is ignored by .rooignore", + } + this._onDidFinishProcessing.fire(result) + return result + } + + // Check file size + const fileStat = await vscode.workspace.fs.stat(vscode.Uri.file(filePath)) + if (fileStat.size > MAX_FILE_SIZE_BYTES) { + const result = { + path: filePath, + status: "skipped" as const, + reason: "File is too large", + } + this._onDidFinishProcessing.fire(result) + return result + } + + // Read file content + const fileContent = await vscode.workspace.fs.readFile(vscode.Uri.file(filePath)) + const content = fileContent.toString() + + // Calculate hash + const newHash = createHash("sha256").update(content).digest("hex") + + // Check if file has changed + if (this.fileHashes[filePath] === newHash) { + const result = { + path: filePath, + status: "skipped" as const, + reason: "File has not changed", + } + this._onDidFinishProcessing.fire(result) + return result + } + + // Delete old points + if (this.vectorStore) { + try { + await this.vectorStore.deletePointsByFilePath(filePath) + console.log(`[FileWatcher] Deleted existing points for changed file: ${filePath}`) + } catch (error) { + console.error(`[FileWatcher] Failed to delete points for ${filePath}:`, error) + throw error + } + } + + // Parse file + const blocks = await codeParser.parseFile(filePath, { content, fileHash: newHash }) + + // Create embeddings and upsert points + if (this.embedder && this.vectorStore && blocks.length > 0) { + const texts = blocks.map((block) => block.content) + const { embeddings } = await this.embedder.createEmbeddings(texts) + + const workspaceRoot = getWorkspacePath() + const points = blocks.map((block, index) => { + const absolutePath = path.resolve(workspaceRoot, block.file_path) + const normalizedAbsolutePath = path.normalize(absolutePath) + + const stableName = `${normalizedAbsolutePath}:${block.start_line}` + const pointId = uuidv5(stableName, QDRANT_CODE_BLOCK_NAMESPACE) + + return { + id: pointId, + vector: embeddings[index], + payload: { + filePath: normalizedAbsolutePath, + codeChunk: block.content, + startLine: block.start_line, + endLine: block.end_line, + }, + } + }) + + await this.vectorStore.upsertPoints(points) + } + + // Update cache + this.fileHashes[filePath] = newHash + await this.saveCache() + + const result = { + path: filePath, + status: "success" as const, + } + this._onDidFinishProcessing.fire(result) + return result + } catch (error) { + const result = { + path: filePath, + status: "error" as const, + error: error as Error, + } + this._onDidFinishProcessing.fire(result) + return result + } + } + + /** + * Saves the cache to disk + */ + private async saveCache(): Promise { + try { + await vscode.workspace.fs.writeFile(this.cachePath, Buffer.from(JSON.stringify(this.fileHashes, null, 2))) + } catch (error) { + console.error("Failed to save cache:", error) + } + } +} diff --git a/src/services/code-index/processors/index.ts b/src/services/code-index/processors/index.ts new file mode 100644 index 0000000000..c244d9b875 --- /dev/null +++ b/src/services/code-index/processors/index.ts @@ -0,0 +1,3 @@ +export * from "./parser" +export * from "./scanner" +export * from "./file-watcher" diff --git a/src/services/code-index/processors/parser.ts b/src/services/code-index/processors/parser.ts new file mode 100644 index 0000000000..d106381b9f --- /dev/null +++ b/src/services/code-index/processors/parser.ts @@ -0,0 +1,355 @@ +import { readFile } from "fs/promises" +import { createHash } from "crypto" +import * as path from "path" +import * as treeSitter from "web-tree-sitter" +import { LanguageParser, loadRequiredLanguageParsers } from "../../tree-sitter/languageParser" +import { ICodeParser, CodeBlock } from "../interfaces" +import { scannerExtensions } from "../shared/supported-extensions" + +const MAX_BLOCK_CHARS = 1000 +const MIN_BLOCK_CHARS = 100 +const MIN_CHUNK_REMAINDER_CHARS = 200 // Minimum characters for the *next* chunk after a split +const MAX_CHARS_TOLERANCE_FACTOR = 1.15 // 15% tolerance for max chars + +/** + * Implementation of the code parser interface + */ +export class CodeParser implements ICodeParser { + private loadedParsers: LanguageParser = {} + private pendingLoads: Map> = new Map() + // Markdown files are excluded because the current parser logic cannot effectively handle + // potentially large Markdown sections without a tree-sitter-like child node structure for chunking + + /** + * Parses a code file into code blocks + * @param filePath Path to the file to parse + * @param options Optional parsing options + * @returns Promise resolving to array of code blocks + */ + async parseFile( + filePath: string, + options?: { + content?: string + fileHash?: string + }, + ): Promise { + // Get file extension + const ext = path.extname(filePath).toLowerCase() + + // Skip if not a supported language + if (!this.isSupportedLanguage(ext)) { + return [] + } + + // Get file content + let content: string + let fileHash: string + + if (options?.content) { + content = options.content + fileHash = options.fileHash || this.createFileHash(content) + } else { + try { + content = await readFile(filePath, "utf8") + fileHash = this.createFileHash(content) + } catch (error) { + console.error(`Error reading file ${filePath}:`, error) + return [] + } + } + + // Parse the file + return this.parseContent(filePath, content, fileHash) + } + + /** + * Checks if a language is supported + * @param extension File extension + * @returns Boolean indicating if the language is supported + */ + private isSupportedLanguage(extension: string): boolean { + return scannerExtensions.includes(extension) + } + + /** + * Creates a hash for a file + * @param content File content + * @returns Hash string + */ + private createFileHash(content: string): string { + return createHash("sha256").update(content).digest("hex") + } + + /** + * Parses file content into code blocks + * @param filePath Path to the file + * @param content File content + * @param fileHash File hash + * @returns Array of code blocks + */ + private async parseContent(filePath: string, content: string, fileHash: string): Promise { + const ext = path.extname(filePath).slice(1).toLowerCase() + + // Check if we already have the parser loaded + if (!this.loadedParsers[ext]) { + const pendingLoad = this.pendingLoads.get(ext) + if (pendingLoad) { + try { + await pendingLoad + } catch (error) { + console.error(`Error in pending parser load for ${filePath}:`, error) + return [] + } + } else { + const loadPromise = loadRequiredLanguageParsers([filePath]) + this.pendingLoads.set(ext, loadPromise) + try { + const newParsers = await loadPromise + if (newParsers) { + this.loadedParsers = { ...this.loadedParsers, ...newParsers } + } + } catch (error) { + console.error(`Error loading language parser for ${filePath}:`, error) + return [] + } finally { + this.pendingLoads.delete(ext) + } + } + } + + const language = this.loadedParsers[ext] + if (!language) { + console.warn(`No parser available for file extension: ${ext}`) + return [] + } + + const tree = language.parser.parse(content) + + // We don't need to get the query string from languageQueries since it's already loaded + // in the language object + const captures = language.query.captures(tree.rootNode) + // Check if captures are empty + if (captures.length === 0) { + if (content.length >= MIN_BLOCK_CHARS) { + // Perform fallback chunking if content is large enough + return this._performFallbackChunking(filePath, content, fileHash, MIN_BLOCK_CHARS, MAX_BLOCK_CHARS) + } else { + // Return empty if content is too small for fallback + return [] + } + } + + const results: CodeBlock[] = [] + + // Process captures if not empty + const queue: treeSitter.SyntaxNode[] = captures.map((capture: any) => capture.node) + + while (queue.length > 0) { + const currentNode = queue.shift()! + // const lineSpan = currentNode.endPosition.row - currentNode.startPosition.row + 1 // Removed as per lint error + + // Check if the node meets the minimum character requirement + if (currentNode.text.length >= MIN_BLOCK_CHARS) { + // If it also exceeds the maximum character limit, try to break it down + if (currentNode.text.length > MAX_BLOCK_CHARS * MAX_CHARS_TOLERANCE_FACTOR) { + if (currentNode.children.length > 0) { + // If it has children, process them instead + queue.push(...currentNode.children) + } else { + // If it's a leaf node, chunk it (passing MIN_BLOCK_CHARS as per Task 1 Step 5) + // Note: _chunkLeafNodeByLines logic might need further adjustment later + const chunkedBlocks = this._chunkLeafNodeByLines( + currentNode, + filePath, + fileHash, + MIN_BLOCK_CHARS, // Pass minChars as requested + ) + results.push(...chunkedBlocks) + } + } else { + // Node meets min chars and is within max chars, create a block + const identifier = + currentNode.childForFieldName("name")?.text || + currentNode.children.find((c) => c.type === "identifier")?.text || + null + const type = currentNode.type + const start_line = currentNode.startPosition.row + 1 + const end_line = currentNode.endPosition.row + 1 + const content = currentNode.text + const segmentHash = createHash("sha256") + .update(`${filePath}-${start_line}-${end_line}-${content}`) + .digest("hex") + + results.push({ + file_path: filePath, + identifier, + type, + start_line, + end_line, + content, + segmentHash, + fileHash, + }) + } + } + // Nodes smaller than MIN_BLOCK_CHARS are ignored + } + + return results + } + + /** + * Common helper function to chunk text by lines, avoiding tiny remainders. + */ + private _chunkTextByLines( + lines: string[], + filePath: string, + fileHash: string, + baseStartLine: number, // 1-based start line of the *first* line in the `lines` array + chunkType: string, + minChars: number, + maxChars: number, + minRemainderChars: number, + ): CodeBlock[] { + const chunks: CodeBlock[] = [] + let currentChunkLines: string[] = [] + let currentChunkLength = 0 + let chunkStartLineIndex = 0 // 0-based index within the `lines` array + + const finalizeChunk = (endLineIndex: number) => { + if (currentChunkLength >= minChars && currentChunkLines.length > 0) { + const chunkContent = currentChunkLines.join("\n") + const startLine = baseStartLine + chunkStartLineIndex + const endLine = baseStartLine + endLineIndex + const segmentHash = createHash("sha256") + .update(`${filePath}-${startLine}-${endLine}-${chunkContent}`) + .digest("hex") + + chunks.push({ + file_path: filePath, + identifier: null, // Identifier is handled at a higher level if available + type: chunkType, + start_line: startLine, + end_line: endLine, + content: chunkContent, + segmentHash, + fileHash, + }) + } + // Reset for the next chunk + currentChunkLines = [] + currentChunkLength = 0 + chunkStartLineIndex = endLineIndex + 1 + } + + for (let i = 0; i < lines.length; i++) { + const line = lines[i] + const lineLength = line.length + (i < lines.length - 1 ? 1 : 0) // +1 for newline, except last line + + // Check if adding this line exceeds the max limit + if (currentChunkLength > 0 && currentChunkLength + lineLength > maxChars) { + // --- Re-balancing Logic --- + let splitIndex = i - 1 // Default split is *before* the current line + + // Estimate remaining text length + let remainderLength = 0 + for (let j = i; j < lines.length; j++) { + remainderLength += lines[j].length + (j < lines.length - 1 ? 1 : 0) + } + + // Check if remainder is too small and we have a valid current chunk + if ( + currentChunkLength >= minChars && + remainderLength < minRemainderChars && + currentChunkLines.length > 1 + ) { + // Try to find a better split point by looking backwards + for (let k = i - 2; k >= chunkStartLineIndex; k--) { + const potentialChunkLines = lines.slice(chunkStartLineIndex, k + 1) + const potentialChunkLength = potentialChunkLines.join("\n").length + 1 // Approx. length + + const potentialNextChunkLines = lines.slice(k + 1) // All remaining lines + const potentialNextChunkLength = potentialNextChunkLines.join("\n").length + 1 // Approx. length + + // Found a split leaving enough in current and next? + if (potentialChunkLength >= minChars && potentialNextChunkLength >= minRemainderChars) { + splitIndex = k // Found a better split point + break + } + } + // If no better split found, splitIndex remains i - 1 + } + // --- End Re-balancing --- + + // Finalize the chunk up to the determined split index + finalizeChunk(splitIndex) + + // Add the current line to start the *new* chunk (if it wasn't part of the finalized chunk) + if (i >= chunkStartLineIndex) { + currentChunkLines.push(line) + currentChunkLength += lineLength + } else { + // This case should ideally not happen with the current logic, but as a safeguard: + // If the split somehow went *past* the current line index 'i', + // we need to reset 'i' to start processing from the beginning of the new chunk. + i = chunkStartLineIndex - 1 // Loop increment will make it chunkStartLineIndex + continue // Re-process the line that starts the new chunk + } + } else { + // Add the current line to the chunk + currentChunkLines.push(line) + currentChunkLength += lineLength + } + } + + // Process the last remaining chunk + if (currentChunkLines.length > 0) { + finalizeChunk(lines.length - 1) + } + + return chunks + } + + private _performFallbackChunking( + filePath: string, + content: string, + fileHash: string, + minChars: number, + maxChars: number, + ): CodeBlock[] { + const lines = content.split("\n") + return this._chunkTextByLines( + lines, + filePath, + fileHash, + 1, // Fallback starts from line 1 + "fallback_chunk", + minChars, + maxChars, + MIN_CHUNK_REMAINDER_CHARS, + ) + } + + private _chunkLeafNodeByLines( + node: treeSitter.SyntaxNode, + filePath: string, + fileHash: string, + minChars: number, // Note: This was previously used as max, now correctly used as min + ): CodeBlock[] { + const lines = node.text.split("\n") + const baseStartLine = node.startPosition.row + 1 + return this._chunkTextByLines( + lines, + filePath, + fileHash, + baseStartLine, + node.type, // Use the node's type + minChars, + MAX_BLOCK_CHARS, // Use the global max + MIN_CHUNK_REMAINDER_CHARS, + ) + } +} + +// Export a singleton instance for convenience +export const codeParser = new CodeParser() diff --git a/src/services/code-index/processors/scanner.ts b/src/services/code-index/processors/scanner.ts new file mode 100644 index 0000000000..aad0763726 --- /dev/null +++ b/src/services/code-index/processors/scanner.ts @@ -0,0 +1,390 @@ +import { listFiles } from "../../glob/list-files" +import { RooIgnoreController } from "../../../core/ignore/RooIgnoreController" +import { stat } from "fs/promises" +import * as path from "path" +import { getWorkspacePath } from "../../../utils/path" +import { scannerExtensions } from "../shared/supported-extensions" +import * as vscode from "vscode" +import { CodeBlock, ICodeParser, IEmbedder, IVectorStore, IDirectoryScanner } from "../interfaces" +import { createHash } from "crypto" +import { v5 as uuidv5 } from "uuid" +import pLimit from "p-limit" +import { Mutex } from "async-mutex" + +export class DirectoryScanner implements IDirectoryScanner { + // Constants moved inside the class + private static readonly QDRANT_CODE_BLOCK_NAMESPACE = "f47ac10b-58cc-4372-a567-0e02b2c3d479" + private static readonly MAX_FILE_SIZE_BYTES = 1 * 1024 * 1024 // 1MB + private static readonly MAX_LIST_FILES_LIMIT = 2_000 + private static readonly BATCH_SEGMENT_THRESHOLD = 30 // Number of code segments to batch for embeddings/upserts + private static readonly MAX_BATCH_RETRIES = 3 + private static readonly INITIAL_RETRY_DELAY_MS = 500 + private static readonly PARSING_CONCURRENCY = 10 + private static readonly BATCH_PROCESSING_CONCURRENCY = 10 + + constructor( + private readonly embedder: IEmbedder, + private readonly qdrantClient: IVectorStore, + private readonly codeParser: ICodeParser, + ) {} + + /** + * Recursively scans a directory for code blocks in supported files. + * @param directoryPath The directory to scan + * @param rooIgnoreController Optional RooIgnoreController instance for filtering + * @param context VS Code ExtensionContext for cache storage + * @param onError Optional error handler callback + * @returns Promise<{codeBlocks: CodeBlock[], stats: {processed: number, skipped: number}}> Array of parsed code blocks and processing stats + */ + public async scanDirectory( + directoryPath: string, + context?: vscode.ExtensionContext, + onError?: (error: Error) => void, + onBlocksIndexed?: (indexedCount: number) => void, + onFileParsed?: (fileBlockCount: number) => void, + ): Promise<{ codeBlocks: CodeBlock[]; stats: { processed: number; skipped: number }; totalBlockCount: number }> { + // Get all files recursively (handles .gitignore automatically) + const [allPaths, _] = await listFiles(directoryPath, true, DirectoryScanner.MAX_LIST_FILES_LIMIT) + + // Filter out directories (marked with trailing '/') + const filePaths = allPaths.filter((p) => !p.endsWith("/")) + + // Initialize RooIgnoreController if not provided + const ignoreController = new RooIgnoreController(directoryPath) + + await ignoreController.initialize() + + // Filter paths using .rooignore + const allowedPaths = ignoreController.filterPaths(filePaths) + + // Filter by supported extensions + const supportedPaths = allowedPaths.filter((filePath) => { + const ext = path.extname(filePath).toLowerCase() + return scannerExtensions.includes(ext) + }) + + // Initialize cache + const cachePath = context?.globalStorageUri + ? vscode.Uri.joinPath( + context.globalStorageUri, + `roo-index-cache-${createHash("sha256").update(directoryPath).digest("hex")}.json`, + ) + : undefined + const oldHashes = cachePath ? await this.loadHashCache(cachePath) : {} + const newHashes: Record = {} + const processedFiles = new Set() + const codeBlocks: CodeBlock[] = [] + let processedCount = 0 + let skippedCount = 0 + + // Initialize parallel processing tools + const parseLimiter = pLimit(DirectoryScanner.PARSING_CONCURRENCY) // Concurrency for file parsing + const batchLimiter = pLimit(DirectoryScanner.BATCH_PROCESSING_CONCURRENCY) // Concurrency for batch processing + const mutex = new Mutex() + + // Shared batch accumulators (protected by mutex) + let currentBatchBlocks: CodeBlock[] = [] + let currentBatchTexts: string[] = [] + let currentBatchFileInfos: { filePath: string; fileHash: string; isNew: boolean }[] = [] + const activeBatchPromises: Promise[] = [] + + // Initialize block counter + let totalBlockCount = 0 + + // Process all files in parallel with concurrency control + const parsePromises = supportedPaths.map((filePath) => + parseLimiter(async () => { + try { + // Check file size + const stats = await stat(filePath) + if (stats.size > DirectoryScanner.MAX_FILE_SIZE_BYTES) { + skippedCount++ // Skip large files + return + } + + // Read file content + const content = await vscode.workspace.fs + .readFile(vscode.Uri.file(filePath)) + .then((buffer) => Buffer.from(buffer).toString("utf-8")) + + // Calculate current hash + const currentFileHash = createHash("sha256").update(content).digest("hex") + processedFiles.add(filePath) + + // Check against cache + const cachedFileHash = oldHashes[filePath] + if (cachedFileHash === currentFileHash) { + // File is unchanged + newHashes[filePath] = currentFileHash + skippedCount++ + return + } + + // File is new or changed - parse it using the injected parser function + const blocks = await this.codeParser.parseFile(filePath, { content, fileHash: currentFileHash }) + const fileBlockCount = blocks.length + onFileParsed?.(fileBlockCount) + codeBlocks.push(...blocks) + processedCount++ + + // Process embeddings if configured + if (this.embedder && this.qdrantClient && blocks.length > 0) { + // Add to batch accumulators + let addedBlocksFromFile = false + for (const block of blocks) { + const trimmedContent = block.content.trim() + if (trimmedContent) { + const release = await mutex.acquire() + totalBlockCount += fileBlockCount + try { + currentBatchBlocks.push(block) + currentBatchTexts.push(trimmedContent) + addedBlocksFromFile = true + + if (addedBlocksFromFile) { + currentBatchFileInfos.push({ + filePath, + fileHash: currentFileHash, + isNew: !oldHashes[filePath], + }) + } + + // Check if batch threshold is met + if (currentBatchBlocks.length >= DirectoryScanner.BATCH_SEGMENT_THRESHOLD) { + // Copy current batch data and clear accumulators + const batchBlocks = [...currentBatchBlocks] + const batchTexts = [...currentBatchTexts] + const batchFileInfos = [...currentBatchFileInfos] + currentBatchBlocks = [] + currentBatchTexts = [] + currentBatchFileInfos = [] + + // Queue batch processing + const batchPromise = batchLimiter(() => + this.processBatch( + batchBlocks, + batchTexts, + batchFileInfos, + newHashes, + onError, + onBlocksIndexed, + ), + ) + activeBatchPromises.push(batchPromise) + } + } finally { + release() + } + } + } + } else { + // Only update hash if not being processed in a batch + newHashes[filePath] = currentFileHash + } + } catch (error) { + console.error(`Error processing file ${filePath}:`, error) + if (onError) { + onError(error instanceof Error ? error : new Error(`Unknown error processing file ${filePath}`)) + } + } + }), + ) + + // Wait for all parsing to complete + await Promise.all(parsePromises) + + // Process any remaining items in batch + if (currentBatchBlocks.length > 0) { + const release = await mutex.acquire() + try { + // Copy current batch data and clear accumulators + const batchBlocks = [...currentBatchBlocks] + const batchTexts = [...currentBatchTexts] + const batchFileInfos = [...currentBatchFileInfos] + currentBatchBlocks = [] + currentBatchTexts = [] + currentBatchFileInfos = [] + + // Queue final batch processing + const batchPromise = batchLimiter(() => + this.processBatch(batchBlocks, batchTexts, batchFileInfos, newHashes, onError, onBlocksIndexed), + ) + activeBatchPromises.push(batchPromise) + } finally { + release() + } + } + + // Wait for all batch processing to complete + await Promise.all(activeBatchPromises) + + // Handle deleted files (don't add them to newHashes) + if (cachePath) { + for (const cachedFilePath of Object.keys(oldHashes)) { + if (!processedFiles.has(cachedFilePath)) { + // File was deleted or is no longer supported/indexed + if (this.qdrantClient) { + try { + console.log(`[DirectoryScanner] Deleting points for deleted file: ${cachedFilePath}`) + await this.qdrantClient.deletePointsByFilePath(cachedFilePath) + } catch (error) { + console.error(`[DirectoryScanner] Failed to delete points for ${cachedFilePath}:`, error) + if (onError) { + onError( + error instanceof Error + ? error + : new Error(`Unknown error deleting points for ${cachedFilePath}`), + ) + } + // Decide if we should re-throw or just log + } + } + // The file is implicitly removed from the cache because it's not added to newHashes + } + } + + // Save the updated cache + await this.saveHashCache(cachePath, newHashes) + } + + return { + codeBlocks, + stats: { + processed: processedCount, + skipped: skippedCount, + }, + totalBlockCount, + } + } + + private async loadHashCache(cachePath: vscode.Uri): Promise> { + try { + const fileData = await vscode.workspace.fs.readFile(cachePath) + return JSON.parse(Buffer.from(fileData).toString("utf-8")) + } catch (error) { + if (error instanceof vscode.FileSystemError && error.code === "FileNotFound") { + return {} // Cache file doesn't exist yet, return empty object + } + console.error("Error loading hash cache:", error) + return {} // Return empty on other errors to allow indexing to proceed + } + } + + private async saveHashCache(cachePath: vscode.Uri, hashes: Record): Promise { + try { + // Ensure directory exists + await vscode.workspace.fs.createDirectory(vscode.Uri.file(path.dirname(cachePath.fsPath))) + // Write file + await vscode.workspace.fs.writeFile(cachePath, Buffer.from(JSON.stringify(hashes, null, 2), "utf-8")) + } catch (error) { + console.error("Error saving hash cache:", error) + // Don't re-throw, as failure to save cache shouldn't block the main operation + } + } + + private async processBatch( + batchBlocks: CodeBlock[], + batchTexts: string[], + batchFileInfos: { filePath: string; fileHash: string; isNew: boolean }[], + newHashes: Record, + onError?: (error: Error) => void, + onBlocksIndexed?: (indexedCount: number) => void, + ): Promise { + if (batchBlocks.length === 0) return + + let attempts = 0 + let success = false + let lastError: Error | null = null + + while (attempts < DirectoryScanner.MAX_BATCH_RETRIES && !success) { + attempts++ + try { + // --- Deletion Step --- + const uniqueFilePaths = [ + ...new Set( + batchFileInfos + .filter((info) => !info.isNew) // Only modified files (not new) + .map((info) => info.filePath), + ), + ] + console.log( + `[DirectoryScanner] Deleting existing points for ${uniqueFilePaths.length} file(s) in batch...`, + ) + if (uniqueFilePaths.length > 0) { + try { + await this.qdrantClient.deletePointsByMultipleFilePaths(uniqueFilePaths) + } catch (deleteError) { + console.error( + `[DirectoryScanner] Failed to delete points for ${uniqueFilePaths.length} files before upsert:`, + deleteError, + ) + // Re-throw the error to stop processing this batch attempt + throw deleteError + } + } + // --- End Deletion Step --- + + // Create embeddings for batch + const { embeddings } = await this.embedder.createEmbeddings(batchTexts) + + // Prepare points for Qdrant + const points = batchBlocks.map((block, index) => { + const workspaceRoot = getWorkspacePath() // Assuming this utility function is available + // Ensure the block path is relative to the workspace root before resolving + const relativeBlockPath = path.isAbsolute(block.file_path) + ? path.relative(workspaceRoot, block.file_path) + : block.file_path + const absolutePath = path.resolve(workspaceRoot, relativeBlockPath) + const normalizedAbsolutePath = path.normalize(absolutePath) + + const stableName = `${normalizedAbsolutePath}:${block.start_line}` + const pointId = uuidv5(stableName, DirectoryScanner.QDRANT_CODE_BLOCK_NAMESPACE) + + return { + id: pointId, + vector: embeddings[index], + payload: { + filePath: normalizedAbsolutePath, // Store normalized absolute path + codeChunk: block.content, + startLine: block.start_line, + endLine: block.end_line, + }, + } + }) + + // Upsert points to Qdrant + await this.qdrantClient.upsertPoints(points) + onBlocksIndexed?.(batchBlocks.length) + + // Update hashes for successfully processed files in this batch + for (const fileInfo of batchFileInfos) { + newHashes[fileInfo.filePath] = fileInfo.fileHash + } + success = true + console.log(`[DirectoryScanner] Successfully processed batch of ${batchBlocks.length} blocks.`) + } catch (error) { + lastError = error as Error + console.error(`[DirectoryScanner] Error processing batch (attempt ${attempts}):`, error) + + if (attempts < DirectoryScanner.MAX_BATCH_RETRIES) { + const delay = DirectoryScanner.INITIAL_RETRY_DELAY_MS * Math.pow(2, attempts - 1) + console.log(`[DirectoryScanner] Retrying batch in ${delay}ms...`) + await new Promise((resolve) => setTimeout(resolve, delay)) + } + } + } + + if (!success && lastError) { + console.error( + `[DirectoryScanner] Failed to process batch after ${DirectoryScanner.MAX_BATCH_RETRIES} attempts`, + ) + if (onError) { + onError( + new Error( + `Failed to process batch after ${DirectoryScanner.MAX_BATCH_RETRIES} attempts: ${lastError.message}`, + ), + ) + } + } + } +} diff --git a/src/services/code-index/search-service.ts b/src/services/code-index/search-service.ts new file mode 100644 index 0000000000..f3d38f602c --- /dev/null +++ b/src/services/code-index/search-service.ts @@ -0,0 +1,56 @@ +import * as vscode from "vscode" +import { VectorStoreSearchResult } from "./interfaces" +import { CodeIndexConfigManager } from "./config-manager" +import { CodeIndexStateManager } from "./state-manager" +import { CodeIndexServiceFactory } from "./service-factory" + +/** + * Service responsible for searching the code index. + */ +export class CodeIndexSearchService { + constructor( + private readonly configManager: CodeIndexConfigManager, + private readonly stateManager: CodeIndexStateManager, + private readonly serviceFactory: CodeIndexServiceFactory, + private readonly context: vscode.ExtensionContext, + ) {} + + /** + * Searches the code index for relevant content. + * @param query The search query + * @param limit Maximum number of results to return + * @returns Array of search results + * @throws Error if the service is not properly configured or ready + */ + public async searchIndex(query: string, limit: number): Promise { + if (!this.configManager.isFeatureEnabled || !this.configManager.isFeatureConfigured) { + throw new Error("Code index feature is disabled or not configured.") + } + + const currentState = this.stateManager.getCurrentStatus().systemStatus + if (currentState !== "Indexed" && currentState !== "Indexing") { + // Allow search during Indexing too + throw new Error(`Code index is not ready for search. Current state: ${currentState}`) + } + + try { + // Get services from factory + const { embedder, vectorStore } = this.serviceFactory.createServices(this.context) + + // Generate embedding for query + const embeddingResponse = await embedder.createEmbeddings([query]) + const vector = embeddingResponse?.embeddings[0] + if (!vector) { + throw new Error("Failed to generate embedding for query.") + } + + // Perform search + const results = await vectorStore.search(vector, limit) + return results + } catch (error) { + console.error("[CodeIndexSearchService] Error during search:", error) + this.stateManager.setSystemState("Error", `Search failed: ${(error as Error).message}`) + throw error // Re-throw the error after setting state + } + } +} diff --git a/src/services/code-index/service-factory.ts b/src/services/code-index/service-factory.ts new file mode 100644 index 0000000000..8a05214be3 --- /dev/null +++ b/src/services/code-index/service-factory.ts @@ -0,0 +1,124 @@ +import * as vscode from "vscode" +import { OpenAiEmbedder } from "./embedders/openai" +import { CodeIndexOllamaEmbedder } from "./embedders/ollama" +import { EmbedderProvider, getDefaultModelId, getModelDimension } from "../../shared/embeddingModels" +import { QdrantVectorStore } from "./vector-store/qdrant-client" +import { codeParser, DirectoryScanner, FileWatcher } from "./processors" +import { ICodeParser, IEmbedder, IFileWatcher, IVectorStore } from "./interfaces" +import { CodeIndexConfigManager } from "./config-manager" + +/** + * Factory class responsible for creating and configuring code indexing service dependencies. + */ +export class CodeIndexServiceFactory { + constructor( + private readonly configManager: CodeIndexConfigManager, + private readonly workspacePath: string, + ) {} + + /** + * Creates an embedder instance based on the current configuration. + */ + protected createEmbedder(): IEmbedder { + const config = this.configManager.getConfig() + + const provider = config.embedderProvider as EmbedderProvider + + if (provider === "openai") { + if (!config.openAiOptions?.openAiNativeApiKey) { + throw new Error("OpenAI configuration missing for embedder creation") + } + return new OpenAiEmbedder(config.openAiOptions) // Reverted temporarily + } else if (provider === "ollama") { + if (!config.ollamaOptions?.ollamaBaseUrl) { + throw new Error("Ollama configuration missing for embedder creation") + } + return new CodeIndexOllamaEmbedder(config.ollamaOptions) // Reverted temporarily + } + + throw new Error(`Invalid embedder type configured: ${config.embedderProvider}`) + } + + /** + * Creates a vector store instance using the current configuration. + */ + protected createVectorStore(): IVectorStore { + const config = this.configManager.getConfig() + + const provider = config.embedderProvider as EmbedderProvider + const defaultModel = getDefaultModelId(provider) + // Determine the modelId based on the provider and config, using apiModelId + const modelId = + provider === "openai" + ? (config.openAiOptions?.apiModelId ?? defaultModel) + : (config.ollamaOptions?.apiModelId ?? defaultModel) + + const vectorSize = getModelDimension(provider, modelId) + + if (vectorSize === undefined) { + throw new Error( + `Could not determine vector dimension for model '${modelId}'. Check model profiles or config.`, + ) + } + + if (!config.qdrantUrl) { + // This check remains important + throw new Error("Qdrant URL missing for vector store creation") + } + + // Assuming constructor is updated: new QdrantVectorStore(workspacePath, url, vectorSize, apiKey?) + return new QdrantVectorStore(this.workspacePath, config.qdrantUrl, vectorSize, config.qdrantApiKey) + } + + /** + * Creates a directory scanner instance with its required dependencies. + */ + protected createDirectoryScanner( + embedder: IEmbedder, + vectorStore: IVectorStore, + parser: ICodeParser, + ): DirectoryScanner { + return new DirectoryScanner(embedder, vectorStore, parser) + } + + /** + * Creates a file watcher instance with its required dependencies. + */ + protected createFileWatcher( + context: vscode.ExtensionContext, + embedder: IEmbedder, + vectorStore: IVectorStore, + ): IFileWatcher { + return new FileWatcher(this.workspacePath, context, embedder, vectorStore) + } + + /** + * Creates all required service dependencies if the service is properly configured. + * @throws Error if the service is not properly configured + */ + public createServices(context: vscode.ExtensionContext): { + embedder: IEmbedder + vectorStore: IVectorStore + parser: ICodeParser + scanner: DirectoryScanner + fileWatcher: IFileWatcher + } { + if (!this.configManager.isFeatureConfigured) { + throw new Error("Cannot create services: Code indexing is not properly configured") + } + + const embedder = this.createEmbedder() + const vectorStore = this.createVectorStore() + const parser = codeParser + const scanner = this.createDirectoryScanner(embedder, vectorStore, parser) + const fileWatcher = this.createFileWatcher(context, embedder, vectorStore) + + return { + embedder, + vectorStore, + parser, + scanner, + fileWatcher, + } + } +} diff --git a/src/services/code-index/shared/supported-extensions.ts b/src/services/code-index/shared/supported-extensions.ts new file mode 100644 index 0000000000..91e3d29c83 --- /dev/null +++ b/src/services/code-index/shared/supported-extensions.ts @@ -0,0 +1,4 @@ +import { extensions as allExtensions } from "../../tree-sitter" + +// Filter out markdown extensions for the scanner +export const scannerExtensions = allExtensions.filter((ext) => ext !== ".md" && ext !== ".markdown") diff --git a/src/services/code-index/state-manager.ts b/src/services/code-index/state-manager.ts new file mode 100644 index 0000000000..64062b4338 --- /dev/null +++ b/src/services/code-index/state-manager.ts @@ -0,0 +1,133 @@ +import * as vscode from "vscode" + +export type IndexingState = "Standby" | "Indexing" | "Indexed" | "Error" + +export class CodeIndexStateManager { + private _systemStatus: IndexingState = "Standby" + private _statusMessage: string = "" + private _fileStatuses: Record = {} + private _processedBlockCount: number = 0 + private _totalBlockCount: number = 0 + private _progressEmitter = new vscode.EventEmitter>() + + // Webview provider reference for status updates + private webviewProvider?: { postMessage: (msg: any) => void } + + constructor() { + // Initialize with default state + } + + // --- Public API --- + + public readonly onProgressUpdate = this._progressEmitter.event + + public get state(): IndexingState { + return this._systemStatus + } + + public setWebviewProvider(provider: { postMessage: (msg: any) => void }) { + this.webviewProvider = provider + } + + public getCurrentStatus() { + return { + systemStatus: this._systemStatus, + fileStatuses: this._fileStatuses, + message: this._statusMessage, + processedBlockCount: this._processedBlockCount, + totalBlockCount: this._totalBlockCount, + } + } + + // --- State Management --- + + public setSystemState(newState: IndexingState, message?: string): void { + const stateChanged = + newState !== this._systemStatus || (message !== undefined && message !== this._statusMessage) + + if (stateChanged) { + this._systemStatus = newState + if (message !== undefined) { + this._statusMessage = message + } + + // Reset progress counters if moving to a non-indexing state or starting fresh + if (newState !== "Indexing") { + this._processedBlockCount = 0 + this._totalBlockCount = 0 + // Optionally clear the message or set a default for non-indexing states + if (newState === "Standby" && message === undefined) this._statusMessage = "Ready." + if (newState === "Indexed" && message === undefined) this._statusMessage = "Index up-to-date." + if (newState === "Error" && message === undefined) this._statusMessage = "An error occurred." + } + + this.postStatusUpdate() + this._progressEmitter.fire(this.getCurrentStatus()) + console.log( + `[CodeIndexStateManager] System state changed to: ${this._systemStatus}${ + message ? ` (${message})` : "" + }`, + ) + } + } + + public updateFileStatus(filePath: string, fileStatus: string, message?: string): void { + let stateChanged = false + + if (this._fileStatuses[filePath] !== fileStatus) { + this._fileStatuses[filePath] = fileStatus + stateChanged = true + } + + // Update overall message ONLY if indexing and message is provided + if (message && this._systemStatus === "Indexing" && message !== this._statusMessage) { + this._statusMessage = message + stateChanged = true + console.log(`[CodeIndexStateManager] Status message updated during indexing: ${this._statusMessage}`) + } + + if (stateChanged) { + this.postStatusUpdate() + this._progressEmitter.fire(this.getCurrentStatus()) + } + } + + private postStatusUpdate() { + if (this.webviewProvider) { + this.webviewProvider.postMessage({ + type: "indexingStatusUpdate", + values: this.getCurrentStatus(), + }) + } + } + + public reportBlockIndexingProgress(processedBlocks: number, totalBlocks: number): void { + const progressChanged = processedBlocks !== this._processedBlockCount || totalBlocks !== this._totalBlockCount + + // Update if progress changes OR if the system wasn't already in 'Indexing' state + if (progressChanged || this._systemStatus !== "Indexing") { + this._processedBlockCount = processedBlocks + this._totalBlockCount = totalBlocks + + const message = `Indexed ${this._processedBlockCount} / ${this._totalBlockCount} blocks found` + const oldStatus = this._systemStatus + const oldMessage = this._statusMessage + + this._systemStatus = "Indexing" // Ensure state is Indexing + this._statusMessage = message + + // Only fire update if status, message or progress actually changed + if (oldStatus !== this._systemStatus || oldMessage !== this._statusMessage || progressChanged) { + this.postStatusUpdate() + this._progressEmitter.fire(this.getCurrentStatus()) + console.log( + `[CodeIndexStateManager] Block Progress: ${message} (${this._processedBlockCount}/${this._totalBlockCount})`, + ) + } + } + } + + public dispose(): void { + this._progressEmitter.dispose() + } +} diff --git a/src/services/code-index/vector-store/index.ts b/src/services/code-index/vector-store/index.ts new file mode 100644 index 0000000000..d42ea841c3 --- /dev/null +++ b/src/services/code-index/vector-store/index.ts @@ -0,0 +1 @@ +export * from "./qdrant-client" diff --git a/src/services/code-index/vector-store/qdrant-client.ts b/src/services/code-index/vector-store/qdrant-client.ts new file mode 100644 index 0000000000..000afa7f5a --- /dev/null +++ b/src/services/code-index/vector-store/qdrant-client.ts @@ -0,0 +1,210 @@ +import { QdrantClient } from "@qdrant/js-client-rest" +import { createHash } from "crypto" +import * as path from "path" +import { getWorkspacePath } from "../../../utils/path" +import { IVectorStore } from "../interfaces/vector-store" +import { Payload, VectorStoreSearchResult } from "../interfaces" + +/** + * Qdrant implementation of the vector store interface + */ +export class QdrantVectorStore implements IVectorStore { + private readonly QDRANT_URL = "http://localhost:6333" + private readonly vectorSize!: number + private readonly DISTANCE_METRIC = "Cosine" + + private client: QdrantClient + private readonly collectionName: string + + /** + * Creates a new Qdrant vector store + * @param workspacePath Path to the workspace + * @param url Optional URL to the Qdrant server + */ + constructor(workspacePath: string, url: string, vectorSize: number, apiKey?: string) { + this.client = new QdrantClient({ + url: url ?? this.QDRANT_URL, + apiKey, + headers: { + "User-Agent": "Roo-Code", + }, + }) + + // Generate collection name from workspace path + const hash = createHash("sha256").update(workspacePath).digest("hex") + this.vectorSize = vectorSize + this.collectionName = `ws-${hash.substring(0, 16)}` + } + + /** + * Initializes the vector store + * @returns Promise resolving to boolean indicating if a new collection was created + */ + async initialize(): Promise { + try { + let created = false + const collections = await this.client.getCollections() + const collectionExists = collections.collections.some( + (collection) => collection.name === this.collectionName, + ) + + if (!collectionExists) { + await this.client.createCollection(this.collectionName, { + vectors: { + size: this.vectorSize, + distance: this.DISTANCE_METRIC, + }, + }) + created = true + } + return created + } catch (error) { + console.error("Failed to initialize Qdrant collection:", error) + throw error + } + } + + /** + * Upserts points into the vector store + * @param points Array of points to upsert + */ + async upsertPoints( + points: Array<{ + id: string + vector: number[] + payload: Record + }>, + ): Promise { + try { + await this.client.upsert(this.collectionName, { + points, + wait: true, + }) + } catch (error) { + console.error("Failed to upsert points:", error) + throw error + } + } + + /** + * Checks if a payload is valid + * @param payload Payload to check + * @returns Boolean indicating if the payload is valid + */ + private isPayloadValid(payload: Record): payload is Payload { + return "filePath" in payload && "codeChunk" in payload && "startLine" in payload && "endLine" in payload + } + + /** + * Searches for similar vectors + * @param queryVector Vector to search for + * @param limit Maximum number of results to return + * @returns Promise resolving to search results + */ + async search(queryVector: number[], limit: number = 10): Promise { + try { + const result = await this.client.search(this.collectionName, { + vector: queryVector, + limit, + }) + result.filter((r) => this.isPayloadValid(r.payload!)) + + return result as VectorStoreSearchResult[] + } catch (error) { + console.error("Failed to search points:", error) + throw error + } + } + + /** + * Deletes points by file path + * @param filePath Path of the file to delete points for + */ + async deletePointsByFilePath(filePath: string): Promise { + return this.deletePointsByMultipleFilePaths([filePath]) + } + + async deletePointsByMultipleFilePaths(filePaths: string[]): Promise { + if (filePaths.length === 0) { + return + } + + try { + const workspaceRoot = getWorkspacePath() + const normalizedPaths = filePaths.map((filePath) => { + const absolutePath = path.resolve(workspaceRoot, filePath) + return path.normalize(absolutePath) + }) + + const filter = { + should: normalizedPaths.map((normalizedPath) => ({ + key: "filePath", + match: { + value: normalizedPath, + }, + })), + } + + await this.client.delete(this.collectionName, { + filter, + wait: true, + }) + } catch (error) { + console.error("Failed to delete points by file paths:", error) + throw error + } + } + + /** + * Deletes the entire collection. + */ + async deleteCollection(): Promise { + try { + // Check if collection exists before attempting deletion to avoid errors + if (await this.collectionExists()) { + await this.client.deleteCollection(this.collectionName) + console.log(`[QdrantVectorStore] Collection ${this.collectionName} deleted.`) + } else { + console.log(`[QdrantVectorStore] Collection ${this.collectionName} does not exist, skipping deletion.`) + } + } catch (error) { + console.error(`[QdrantVectorStore] Failed to delete collection ${this.collectionName}:`, error) + throw error // Re-throw to allow calling code to handle it + } + } + + /** + * Clears all points from the collection + */ + async clearCollection(): Promise { + try { + await this.client.delete(this.collectionName, { + filter: { + must: [], + }, + wait: true, + }) + } catch (error) { + console.error("Failed to clear collection:", error) + throw error + } + } + + /** + * Checks if the collection exists + * @returns Promise resolving to boolean indicating if the collection exists + */ + async collectionExists(): Promise { + try { + // Prefer direct API call if supported + await this.client.getCollection(this.collectionName) + return true + } catch (error: any) { + if (error?.response?.status === 404) { + return false + } + console.error("Error checking collection existence:", error) + return false + } + } +} diff --git a/src/services/tree-sitter/index.ts b/src/services/tree-sitter/index.ts index 590a517b09..51eccad17c 100644 --- a/src/services/tree-sitter/index.ts +++ b/src/services/tree-sitter/index.ts @@ -90,6 +90,8 @@ const extensions = [ "erb", ].map((e) => `.${e}`) +export { extensions } + export async function parseSourceCodeDefinitionsForFile( filePath: string, rooIgnoreController?: RooIgnoreController, diff --git a/src/shared/ExtensionMessage.ts b/src/shared/ExtensionMessage.ts index 8870dcaee0..e3d3d2d045 100644 --- a/src/shared/ExtensionMessage.ts +++ b/src/shared/ExtensionMessage.ts @@ -71,10 +71,13 @@ export interface ExtensionMessage { | "vsCodeSetting" | "condenseTaskContextResponse" | "singleRouterModelFetchResponse" + | "indexingStatusUpdate" + | "indexCleared" text?: string action?: | "chatButtonClicked" | "mcpButtonClicked" + | "codebaseIndexConfig" | "settingsButtonClicked" | "historyButtonClicked" | "promptsButtonClicked" @@ -173,6 +176,8 @@ export type ExtensionState = Pick< | "enhancementApiConfigId" | "condensingApiConfigId" | "customCondensingPrompt" + | "codebaseIndexConfig" + | "codebaseIndexModels" > & { version: string clineMessages: ClineMessage[] @@ -219,6 +224,7 @@ export interface ClineSayTool { | "editedExistingFile" | "appliedDiff" | "newFileCreated" + | "codebase_search" | "readFile" | "fetchInstructions" | "listFilesTopLevel" @@ -245,6 +251,7 @@ export interface ClineSayTool { startLine?: number endLine?: number lineNumber?: number + query?: string } // Must keep in sync with system prompt. diff --git a/src/shared/WebviewMessage.ts b/src/shared/WebviewMessage.ts index af9b637980..85a12aa238 100644 --- a/src/shared/WebviewMessage.ts +++ b/src/shared/WebviewMessage.ts @@ -135,6 +135,12 @@ export interface WebviewMessage { | "toggleApiConfigPin" | "setHistoryPreviewCollapsed" | "condenseTaskContextRequest" + | "requestIndexingStatus" + | "startIndexing" + | "clearIndexData" + | "indexingStatusUpdate" + | "indexCleared" + | "codebaseIndexConfig" text?: string disabled?: boolean askResponse?: ClineAskResponse @@ -183,4 +189,18 @@ export const checkoutRestorePayloadSchema = z.object({ export type CheckpointRestorePayload = z.infer -export type WebViewMessagePayload = CheckpointDiffPayload | CheckpointRestorePayload +export interface IndexingStatusPayload { + state: "Standby" | "Indexing" | "Indexed" | "Error" + message: string +} + +export interface IndexClearedPayload { + success: boolean + error?: string +} + +export type WebViewMessagePayload = + | CheckpointDiffPayload + | CheckpointRestorePayload + | IndexingStatusPayload + | IndexClearedPayload diff --git a/src/shared/embeddingModels.ts b/src/shared/embeddingModels.ts new file mode 100644 index 0000000000..abccd017d7 --- /dev/null +++ b/src/shared/embeddingModels.ts @@ -0,0 +1,86 @@ +/** + * Defines profiles for different embedding models, including their dimensions. + */ + +export type EmbedderProvider = "openai" | "ollama" // Add other providers as needed + +export interface EmbeddingModelProfile { + dimension: number + // Add other model-specific properties if needed, e.g., context window size +} + +export type EmbeddingModelProfiles = { + [provider in EmbedderProvider]?: { + [modelId: string]: EmbeddingModelProfile + } +} + +// Example profiles - expand this list as needed +export const EMBEDDING_MODEL_PROFILES: EmbeddingModelProfiles = { + openai: { + "text-embedding-3-small": { dimension: 1536 }, + "text-embedding-3-large": { dimension: 3072 }, + "text-embedding-ada-002": { dimension: 1536 }, + }, + ollama: { + "nomic-embed-text": { dimension: 768 }, + "mxbai-embed-large": { dimension: 1024 }, + "all-minilm": { dimension: 384 }, + // Add default Ollama model if applicable, e.g.: + // 'default': { dimension: 768 } // Assuming a default dimension + }, +} + +/** + * Retrieves the embedding dimension for a given provider and model ID. + * @param provider The embedder provider (e.g., "openai"). + * @param modelId The specific model ID (e.g., "text-embedding-3-small"). + * @returns The dimension size or undefined if the model is not found. + */ +export function getModelDimension(provider: EmbedderProvider, modelId: string): number | undefined { + const providerProfiles = EMBEDDING_MODEL_PROFILES[provider] + if (!providerProfiles) { + console.warn(`Provider not found in profiles: ${provider}`) + return undefined + } + + const modelProfile = providerProfiles[modelId] + if (!modelProfile) { + // Don't warn here, as it might be a custom model ID not in our profiles + // console.warn(`Model not found for provider ${provider}: ${modelId}`) + return undefined // Or potentially return a default/fallback dimension? + } + + return modelProfile.dimension +} + +/** + * Gets the default *specific* embedding model ID based on the provider. + * Does not include the provider prefix. + * Currently defaults to OpenAI's 'text-embedding-3-small'. + * TODO: Make this configurable or more sophisticated. + * @param provider The embedder provider. + * @returns The default specific model ID for the provider (e.g., "text-embedding-3-small"). + */ +export function getDefaultModelId(provider: EmbedderProvider): string { + // Simple default logic for now + if (provider === "openai") { + return "text-embedding-3-small" + } + if (provider === "ollama") { + // Choose a sensible default for Ollama, e.g., the first one listed or a specific one + const ollamaModels = EMBEDDING_MODEL_PROFILES.ollama + const defaultOllamaModel = ollamaModels && Object.keys(ollamaModels)[0] + if (defaultOllamaModel) { + return defaultOllamaModel + } + // Fallback if no Ollama models are defined (shouldn't happen with the constant) + console.warn("No default Ollama model found in profiles.") + // Return a placeholder or throw an error, depending on desired behavior + return "unknown-default" // Placeholder specific model ID + } + + // Fallback for unknown providers + console.warn(`Unknown provider for default model ID: ${provider}. Falling back to OpenAI default.`) + return "text-embedding-3-small" +} diff --git a/src/shared/tools.ts b/src/shared/tools.ts index 1a6eb84ad7..e4c412d92f 100644 --- a/src/shared/tools.ts +++ b/src/shared/tools.ts @@ -63,6 +63,8 @@ export const toolParamNames = [ "ignore_case", "start_line", "end_line", + "query", + "limit", ] as const export type ToolParamName = (typeof toolParamNames)[number] @@ -101,6 +103,11 @@ export interface InsertCodeBlockToolUse extends ToolUse { params: Partial, "path" | "line" | "content">> } +export interface CodebaseSearchToolUse extends ToolUse { + name: "codebase_search" + params: Partial, "query" | "limit">> +} + export interface SearchFilesToolUse extends ToolUse { name: "search_files" params: Partial, "path" | "regex" | "file_pattern">> @@ -181,6 +188,7 @@ export const TOOL_DISPLAY_NAMES: Record = { new_task: "create new task", insert_content: "insert content", search_and_replace: "search and replace", + codebase_search: "codebase search", } as const export type { ToolGroup } @@ -188,7 +196,14 @@ export type { ToolGroup } // Define available tool groups. export const TOOL_GROUPS: Record = { read: { - tools: ["read_file", "fetch_instructions", "search_files", "list_files", "list_code_definition_names"], + tools: [ + "read_file", + "fetch_instructions", + "search_files", + "list_files", + "list_code_definition_names", + "codebase_search", + ], }, edit: { tools: ["apply_diff", "write_to_file", "insert_content", "search_and_replace"], diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx index 328e917a4b..a819978531 100644 --- a/webview-ui/src/components/chat/ChatRow.tsx +++ b/webview-ui/src/components/chat/ChatRow.tsx @@ -34,6 +34,7 @@ import { CommandExecution } from "./CommandExecution" import { CommandExecutionError } from "./CommandExecutionError" import { AutoApprovedRequestLimitWarning } from "./AutoApprovedRequestLimitWarning" import { CondensingContextRow, ContextCondenseRow } from "./ContextCondenseRow" +import CodebaseSearchResult from "./CodebaseSearchResult" interface ChatRowProps { message: ClineMessage @@ -351,6 +352,71 @@ export const ChatRowContent = ({ /> ) + case "codebase_search": { + if (message.type === "say") { + let parsed: { + query: string + results: Array<{ + filePath: string + score: number + startLine: number + endLine: number + codeChunk: string + }> + } | null = null + if (typeof tool.content === "object" && tool.content !== null) { + parsed = tool.content as { + query: string + results: Array<{ + filePath: string + score: number + startLine: number + endLine: number + codeChunk: string + }> + } + } else { + console.error("codebase_search content is not a valid object:", tool.content) + parsed = null + } + + const query = parsed?.query || "" + const results = parsed?.results || [] + + return ( +
+
+ {t("chat:codebaseSearch.didSearch", { + query, + limit: tool.limit, + count: results.length, + })} +
+ {results.map((result, idx) => ( + + ))} +
+ ) + } else if (message.type === "ask") { + return ( +
+ {toolIcon("search")} + + {t("chat:codebaseSearch.wantsToSearch", { query: tool.query, limit: tool.limit })} + +
+ ) + } + return null + } case "newFileCreated": return ( <> diff --git a/webview-ui/src/components/chat/CodebaseSearchResult.tsx b/webview-ui/src/components/chat/CodebaseSearchResult.tsx new file mode 100644 index 0000000000..9187efda74 --- /dev/null +++ b/webview-ui/src/components/chat/CodebaseSearchResult.tsx @@ -0,0 +1,54 @@ +import React, { useState } from "react" +import CodeBlock from "../common/CodeBlock" + +interface CodebaseSearchResultProps { + filePath: string + score: number + startLine: number + endLine: number + snippet: string + language: string +} + +const CodebaseSearchResult: React.FC = ({ + filePath, + score, + startLine, + endLine, + snippet, + language, +}) => { + const [isCollapsed, setIsCollapsed] = useState(true) + + const toggleCollapse = () => { + setIsCollapsed(!isCollapsed) + } + + return ( +
+
+ {filePath} + + Lines: {startLine}-{endLine} + +
+ {!isCollapsed && ( + <> +
Score: {score.toFixed(2)}
+ + + )} +
+ ) +} + +export default CodebaseSearchResult diff --git a/webview-ui/src/components/settings/CodeIndexSettings.tsx b/webview-ui/src/components/settings/CodeIndexSettings.tsx new file mode 100644 index 0000000000..eb719defd5 --- /dev/null +++ b/webview-ui/src/components/settings/CodeIndexSettings.tsx @@ -0,0 +1,314 @@ +import React, { useState, useEffect } from "react" +import * as ProgressPrimitive from "@radix-ui/react-progress" +import { Database } from "lucide-react" +import { vscode } from "../../utils/vscode" +import { VSCodeCheckbox, VSCodeTextField, VSCodeButton } from "@vscode/webview-ui-toolkit/react" +import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select" +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, + AlertDialogTrigger, +} from "@/components/ui/alert-dialog" +import { Section } from "./Section" +import { SectionHeader } from "./SectionHeader" +import { SetCachedStateField } from "./types" +import { ExtensionStateContextType } from "@/context/ExtensionStateContext" +import { ApiConfiguration } from "../../../../src/shared/api" +import { CodebaseIndexConfig, CodebaseIndexModels } from "../../../../src/schemas" +import { EmbedderProvider } from "../../../../src/shared/embeddingModels" +interface CodeIndexSettingsProps { + codebaseIndexModels: CodebaseIndexModels | undefined + codebaseIndexConfig: CodebaseIndexConfig | undefined + apiConfiguration: ApiConfiguration + setCachedStateField: SetCachedStateField + setApiConfigurationField: (field: K, value: ApiConfiguration[K]) => void +} + +interface IndexingStatusUpdateMessage { + type: "indexingStatusUpdate" + values: { + systemStatus: string + message?: string + processedBlockCount: number + totalBlockCount: number + } +} + +export const CodeIndexSettings: React.FC = ({ + codebaseIndexModels, + codebaseIndexConfig, + apiConfiguration, + setCachedStateField, + setApiConfigurationField, +}) => { + const [indexingStatus, setIndexingStatus] = useState({ + systemStatus: "Standby", + message: "", + processedBlockCount: 0, + totalBlockCount: 0, + }) + + // Safely calculate available models for current provider + const currentProvider = codebaseIndexConfig?.codebaseIndexEmbedderProvider + const modelsForProvider = + currentProvider === "openai" || currentProvider === "ollama" + ? codebaseIndexModels?.[currentProvider] + : codebaseIndexModels?.openai + const availableModelIds = Object.keys(modelsForProvider || {}) + + useEffect(() => { + // Request initial indexing status from extension host + vscode.postMessage({ type: "requestIndexingStatus" }) + + // Set up interval for periodic status updates + + // Set up message listener for status updates + const handleMessage = (event: MessageEvent) => { + if (event.data.type === "indexingStatusUpdate") { + setIndexingStatus({ + ...event.data.values, + message: event.data.values.message || "", + }) + } + } + + window.addEventListener("message", handleMessage) + + // Cleanup function + return () => { + window.removeEventListener("message", handleMessage) + } + }, [codebaseIndexConfig, codebaseIndexModels]) + return ( + <> + +
+ + Codebase Indexing +
+
+
+ + setCachedStateField("codebaseIndexConfig", { + ...codebaseIndexConfig, + codebaseIndexEnabled: e.target.checked, + }) + }> + Enable Codebase Indexing + + + {codebaseIndexConfig?.codebaseIndexEnabled && ( +
+
Embeddings Provider
+
+ +
+ +
Model:
+
+ +
+ + {codebaseIndexConfig?.codebaseIndexEmbedderProvider === "openai" && ( +
+ + setApiConfigurationField("codeIndexOpenAiKey", e.target.value) + }> + OpenAI Key: + +
+ )} + + {codebaseIndexConfig?.codebaseIndexEmbedderProvider === "ollama" && ( + <> +
+ + setCachedStateField("codebaseIndexConfig", { + ...codebaseIndexConfig, + codebaseIndexEmbedderBaseUrl: e.target.value, + }) + }> + Ollama URL: + +
+ + )} + +
+ + setCachedStateField("codebaseIndexConfig", { + ...codebaseIndexConfig, + codebaseIndexQdrantUrl: e.target.value, + }) + }> + Qdrant URL + +
+ +
+ setApiConfigurationField("codeIndexQdrantApiKey", e.target.value)}> + Qdrant Key: + +
+ +
+ + {indexingStatus.systemStatus} + {indexingStatus.systemStatus !== "Indexing" && indexingStatus.message + ? ` - ${indexingStatus.message}` + : ""} +
+ + {indexingStatus.systemStatus === "Indexing" && ( +
+

+ {indexingStatus.message || "Indexing in progress..."} +

+ 0 + ? (indexingStatus.processedBlockCount / indexingStatus.totalBlockCount) * + 100 + : indexingStatus.totalBlockCount === 0 && + indexingStatus.processedBlockCount === 0 + ? 100 + : 0 + }> + 0 + ? (indexingStatus.processedBlockCount / + indexingStatus.totalBlockCount) * + 100 + : indexingStatus.totalBlockCount === 0 && + indexingStatus.processedBlockCount === 0 + ? 100 + : 0) + }%)`, + }} + /> + +
+ )} + +
+ vscode.postMessage({ type: "startIndexing" })} // Added onClick + disabled={ + (codebaseIndexConfig?.codebaseIndexEmbedderProvider === "openai" && + !apiConfiguration.codeIndexOpenAiKey) || + (codebaseIndexConfig?.codebaseIndexEmbedderProvider === "ollama" && + (!codebaseIndexConfig.codebaseIndexEmbedderBaseUrl || + !codebaseIndexConfig.codebaseIndexEmbedderModelId)) || + !codebaseIndexConfig.codebaseIndexQdrantUrl || + indexingStatus.systemStatus === "Indexing" + }> + Start Indexing + + + + Clear Index Data + + + + Are you sure? + + This action cannot be undone. This will permanently delete your codebase + index data. + + + + Cancel + vscode.postMessage({ type: "clearIndexData" })} // Added onClick + > + Clear Data + + + + +
+
+ )} +
+ + ) +} diff --git a/webview-ui/src/components/settings/SettingsView.tsx b/webview-ui/src/components/settings/SettingsView.tsx index 90031c6924..57bc5a2c76 100644 --- a/webview-ui/src/components/settings/SettingsView.tsx +++ b/webview-ui/src/components/settings/SettingsView.tsx @@ -30,6 +30,7 @@ import { TelemetrySetting } from "@roo/shared/TelemetrySetting" import { ProviderSettings } from "@roo/shared/api" import { vscode } from "@/utils/vscode" +import { CodeIndexSettings } from "./CodeIndexSettings" import { ExtensionStateContextType, useExtensionState } from "@/context/ExtensionStateContext" import { AlertDialog, @@ -84,6 +85,7 @@ const sectionNames = [ "contextManagement", "terminal", "experimental", + "codeIndex", "language", "about", ] as const @@ -163,6 +165,8 @@ const SettingsView = forwardRef(({ onDone, t terminalCompressProgressBar, condensingApiConfigId, customCondensingPrompt, + codebaseIndexConfig, + codebaseIndexModels, } = cachedState const apiConfiguration = useMemo(() => cachedState.apiConfiguration ?? {}, [cachedState.apiConfiguration]) @@ -290,6 +294,7 @@ const SettingsView = forwardRef(({ onDone, t vscode.postMessage({ type: "updateCondensingPrompt", text: customCondensingPrompt || "" }) vscode.postMessage({ type: "upsertApiConfiguration", text: currentApiConfigName, apiConfiguration }) vscode.postMessage({ type: "telemetrySetting", text: telemetrySetting }) + vscode.postMessage({ type: "codebaseIndexConfig", values: codebaseIndexConfig }) setChangeDetected(false) } } @@ -367,6 +372,7 @@ const SettingsView = forwardRef(({ onDone, t { id: "contextManagement", icon: Database }, { id: "terminal", icon: SquareTerminal }, { id: "experimental", icon: FlaskConical }, + { id: "codeIndex", icon: Database }, { id: "language", icon: Globe }, { id: "about", icon: Info }, ], @@ -646,6 +652,17 @@ const SettingsView = forwardRef(({ onDone, t listApiConfigMeta={listApiConfigMeta ?? []} /> )} + + {/* CodeIndex Section */} + {activeTab === "codeIndex" && ( + + )} {/* Language Section */} {activeTab === "language" && ( diff --git a/webview-ui/src/context/ExtensionStateContext.tsx b/webview-ui/src/context/ExtensionStateContext.tsx index 2ac1326189..9daa52dcab 100644 --- a/webview-ui/src/context/ExtensionStateContext.tsx +++ b/webview-ui/src/context/ExtensionStateContext.tsx @@ -185,6 +185,14 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode terminalCompressProgressBar: true, // Default to compress progress bar output historyPreviewCollapsed: false, // Initialize the new state (default to expanded) autoCondenseContextPercent: 100, + codebaseIndexConfig: { + codebaseIndexEnabled: false, + codebaseIndexQdrantUrl: "", + codebaseIndexEmbedderProvider: "openai", + codebaseIndexEmbedderBaseUrl: "", + codebaseIndexEmbedderModelId: "", + }, + codebaseIndexModels: { ollama: {}, openai: {} }, }) const [didHydrateState, setDidHydrateState] = useState(false) diff --git a/webview-ui/src/i18n/locales/en/chat.json b/webview-ui/src/i18n/locales/en/chat.json index 10d497f21b..ea5a175e3e 100644 --- a/webview-ui/src/i18n/locales/en/chat.json +++ b/webview-ui/src/i18n/locales/en/chat.json @@ -161,6 +161,10 @@ "wantsToSearch": "Roo wants to search this directory for {{regex}}:", "didSearch": "Roo searched this directory for {{regex}}:" }, + "codebaseSearch": { + "wantsToSearch": "Roo wants to search the codebase for '{{query}}' (limit: {{limit}}):", + "didSearch": "Found {{count}} result(s) for '{{query}}':" + }, "commandOutput": "Command Output", "response": "Response", "arguments": "Arguments", From 13029ef31030525dbff8a9ec38f3fc9a1ab22b95 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 2 May 2025 21:03:38 -0500 Subject: [PATCH 02/71] fix: add missing types --- src/shared/ExtensionMessage.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shared/ExtensionMessage.ts b/src/shared/ExtensionMessage.ts index e3d3d2d045..1c3a8ae757 100644 --- a/src/shared/ExtensionMessage.ts +++ b/src/shared/ExtensionMessage.ts @@ -71,13 +71,13 @@ export interface ExtensionMessage { | "vsCodeSetting" | "condenseTaskContextResponse" | "singleRouterModelFetchResponse" - | "indexingStatusUpdate" - | "indexCleared" + | "indexingStatusUpdate" + | "indexCleared" + | "codebaseIndexConfig" text?: string action?: | "chatButtonClicked" | "mcpButtonClicked" - | "codebaseIndexConfig" | "settingsButtonClicked" | "historyButtonClicked" | "promptsButtonClicked" From 2fd22484e805029e940e1de9b8b4872f47d6eac6 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Sun, 4 May 2025 18:19:58 -0500 Subject: [PATCH 03/71] feat: deduplicate code blocks coming out of parser --- src/services/code-index/processors/parser.ts | 63 +++++++++++++------- 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/src/services/code-index/processors/parser.ts b/src/services/code-index/processors/parser.ts index d106381b9f..14f92f4ef7 100644 --- a/src/services/code-index/processors/parser.ts +++ b/src/services/code-index/processors/parser.ts @@ -89,6 +89,7 @@ export class CodeParser implements ICodeParser { */ private async parseContent(filePath: string, content: string, fileHash: string): Promise { const ext = path.extname(filePath).slice(1).toLowerCase() + const seenSegmentHashes = new Set() // Check if we already have the parser loaded if (!this.loadedParsers[ext]) { @@ -132,7 +133,15 @@ export class CodeParser implements ICodeParser { if (captures.length === 0) { if (content.length >= MIN_BLOCK_CHARS) { // Perform fallback chunking if content is large enough - return this._performFallbackChunking(filePath, content, fileHash, MIN_BLOCK_CHARS, MAX_BLOCK_CHARS) + const blocks = this._performFallbackChunking( + filePath, + content, + fileHash, + MIN_BLOCK_CHARS, + MAX_BLOCK_CHARS, + seenSegmentHashes, + ) + return blocks } else { // Return empty if content is too small for fallback return [] @@ -163,6 +172,7 @@ export class CodeParser implements ICodeParser { filePath, fileHash, MIN_BLOCK_CHARS, // Pass minChars as requested + seenSegmentHashes, ) results.push(...chunkedBlocks) } @@ -180,16 +190,19 @@ export class CodeParser implements ICodeParser { .update(`${filePath}-${start_line}-${end_line}-${content}`) .digest("hex") - results.push({ - file_path: filePath, - identifier, - type, - start_line, - end_line, - content, - segmentHash, - fileHash, - }) + if (!seenSegmentHashes.has(segmentHash)) { + seenSegmentHashes.add(segmentHash) + results.push({ + file_path: filePath, + identifier, + type, + start_line, + end_line, + content, + segmentHash, + fileHash, + }) + } } } // Nodes smaller than MIN_BLOCK_CHARS are ignored @@ -210,6 +223,7 @@ export class CodeParser implements ICodeParser { minChars: number, maxChars: number, minRemainderChars: number, + seenSegmentHashes: Set, ): CodeBlock[] { const chunks: CodeBlock[] = [] let currentChunkLines: string[] = [] @@ -225,16 +239,19 @@ export class CodeParser implements ICodeParser { .update(`${filePath}-${startLine}-${endLine}-${chunkContent}`) .digest("hex") - chunks.push({ - file_path: filePath, - identifier: null, // Identifier is handled at a higher level if available - type: chunkType, - start_line: startLine, - end_line: endLine, - content: chunkContent, - segmentHash, - fileHash, - }) + if (!seenSegmentHashes.has(segmentHash)) { + seenSegmentHashes.add(segmentHash) + chunks.push({ + file_path: filePath, + identifier: null, // Identifier is handled at a higher level if available + type: chunkType, + start_line: startLine, + end_line: endLine, + content: chunkContent, + segmentHash, + fileHash, + }) + } } // Reset for the next chunk currentChunkLines = [] @@ -316,6 +333,7 @@ export class CodeParser implements ICodeParser { fileHash: string, minChars: number, maxChars: number, + seenSegmentHashes: Set, ): CodeBlock[] { const lines = content.split("\n") return this._chunkTextByLines( @@ -327,6 +345,7 @@ export class CodeParser implements ICodeParser { minChars, maxChars, MIN_CHUNK_REMAINDER_CHARS, + seenSegmentHashes, ) } @@ -335,6 +354,7 @@ export class CodeParser implements ICodeParser { filePath: string, fileHash: string, minChars: number, // Note: This was previously used as max, now correctly used as min + seenSegmentHashes: Set, ): CodeBlock[] { const lines = node.text.split("\n") const baseStartLine = node.startPosition.row + 1 @@ -347,6 +367,7 @@ export class CodeParser implements ICodeParser { minChars, MAX_BLOCK_CHARS, // Use the global max MIN_CHUNK_REMAINDER_CHARS, + seenSegmentHashes, ) } } From 40c3652ef76a43b8c914d6ae94159228f715965b Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Sun, 4 May 2025 21:24:43 -0500 Subject: [PATCH 04/71] feat: implement a cache manager to improve cache handling --- src/services/code-index/cache-manager.ts | 111 ++++++++++++++++++ src/services/code-index/interfaces/cache.ts | 6 + .../code-index/interfaces/file-processor.ts | 4 +- src/services/code-index/manager.ts | 13 +- src/services/code-index/orchestrator.ts | 41 ++----- .../code-index/processors/file-watcher.ts | 40 +------ src/services/code-index/processors/scanner.ts | 96 +++++---------- src/services/code-index/search-service.ts | 4 +- src/services/code-index/service-factory.ts | 14 ++- 9 files changed, 190 insertions(+), 139 deletions(-) create mode 100644 src/services/code-index/cache-manager.ts create mode 100644 src/services/code-index/interfaces/cache.ts diff --git a/src/services/code-index/cache-manager.ts b/src/services/code-index/cache-manager.ts new file mode 100644 index 0000000000..f34299edb9 --- /dev/null +++ b/src/services/code-index/cache-manager.ts @@ -0,0 +1,111 @@ +import * as vscode from "vscode" +import { createHash } from "crypto" +import { ICacheManager } from "./interfaces/cache" +import debounce from "lodash.debounce" + +/** + * Manages the cache for code indexing + */ +export class CacheManager implements ICacheManager { + private cachePath: vscode.Uri + private fileHashes: Record = {} + private _debouncedSaveCache: () => void + + /** + * Creates a new cache manager + * @param context VS Code extension context + * @param workspacePath Path to the workspace + */ + constructor( + private context: vscode.ExtensionContext, + private workspacePath: string, + ) { + this.cachePath = vscode.Uri.joinPath( + context.globalStorageUri, + `roo-index-cache-${createHash("sha256").update(workspacePath).digest("hex")}.json`, + ) + this._debouncedSaveCache = debounce(async () => { + await this._performSave() + }, 1500) + } + + /** + * Initializes the cache manager by loading the cache file + */ + async initialize(): Promise { + try { + const cacheData = await vscode.workspace.fs.readFile(this.cachePath) + this.fileHashes = JSON.parse(cacheData.toString()) + } catch (error) { + console.log("No cache file found or error reading cache, starting fresh") + this.fileHashes = {} + } + } + + /** + * Saves the cache to disk + */ + private async _performSave(): Promise { + try { + await vscode.workspace.fs.writeFile(this.cachePath, Buffer.from(JSON.stringify(this.fileHashes, null, 2))) + } catch (error) { + console.error("Failed to save cache:", error) + } + } + + /** + * Saves the cache to disk (immediately) + */ + async saveCache(): Promise { + await this._performSave() + } + + /** + * Clears the cache file by deleting it + */ + async clearCacheFile(): Promise { + try { + await vscode.workspace.fs.delete(this.cachePath) + this.fileHashes = {} + console.log("Cache file cleared successfully") + } catch (error) { + console.error("Failed to clear cache file:", error) + } + } + + /** + * Gets the hash for a file path + * @param filePath Path to the file + * @returns The hash for the file or undefined if not found + */ + getHash(filePath: string): string | undefined { + return this.fileHashes[filePath] + } + + /** + * Updates the hash for a file path + * @param filePath Path to the file + * @param hash New hash value + */ + updateHash(filePath: string, hash: string): void { + this.fileHashes[filePath] = hash + this._debouncedSaveCache() + } + + /** + * Deletes the hash for a file path + * @param filePath Path to the file + */ + deleteHash(filePath: string): void { + delete this.fileHashes[filePath] + this._debouncedSaveCache() + } + + /** + * Gets a copy of all file hashes + * @returns A copy of the file hashes record + */ + getAllHashes(): Record { + return { ...this.fileHashes } + } +} diff --git a/src/services/code-index/interfaces/cache.ts b/src/services/code-index/interfaces/cache.ts new file mode 100644 index 0000000000..a2e62bcac1 --- /dev/null +++ b/src/services/code-index/interfaces/cache.ts @@ -0,0 +1,6 @@ +export interface ICacheManager { + getHash(filePath: string): string | undefined + updateHash(filePath: string, hash: string): void + deleteHash(filePath: string): void + getAllHashes(): Record +} diff --git a/src/services/code-index/interfaces/file-processor.ts b/src/services/code-index/interfaces/file-processor.ts index 32ac02d76e..b7edf99ee0 100644 --- a/src/services/code-index/interfaces/file-processor.ts +++ b/src/services/code-index/interfaces/file-processor.ts @@ -33,14 +33,16 @@ export interface IDirectoryScanner { */ scanDirectory( directory: string, - context?: vscode.ExtensionContext, onError?: (error: Error) => void, + onBlocksIndexed?: (indexedCount: number) => void, + onFileParsed?: (fileBlockCount: number) => void, ): Promise<{ codeBlocks: CodeBlock[] stats: { processed: number skipped: number } + totalBlockCount: number }> } diff --git a/src/services/code-index/manager.ts b/src/services/code-index/manager.ts index 799d706adc..c4282becd5 100644 --- a/src/services/code-index/manager.ts +++ b/src/services/code-index/manager.ts @@ -8,6 +8,7 @@ import { CodeIndexStateManager } from "./state-manager" import { CodeIndexServiceFactory } from "./service-factory" import { CodeIndexSearchService } from "./search-service" import { CodeIndexOrchestrator } from "./orchestrator" +import { CacheManager } from "./cache-manager" export class CodeIndexManager { // --- Singleton Implementation --- @@ -19,6 +20,7 @@ export class CodeIndexManager { private readonly _serviceFactory: CodeIndexServiceFactory private readonly _orchestrator: CodeIndexOrchestrator private readonly _searchService: CodeIndexSearchService + private readonly _cacheManager: CacheManager public static getInstance(context: vscode.ExtensionContext, contextProxy?: ContextProxy): CodeIndexManager { const workspacePath = getWorkspacePath() // Assumes single workspace for now @@ -33,7 +35,9 @@ export class CodeIndexManager { } public static disposeAll(): void { - CodeIndexManager.instances.forEach((instance) => instance.dispose()) + for (const instance of CodeIndexManager.instances.values()) { + instance.dispose() + } CodeIndexManager.instances.clear() } @@ -50,19 +54,22 @@ export class CodeIndexManager { // Initialize remaining specialized classes this._configManager = new CodeIndexConfigManager(contextProxy) - this._serviceFactory = new CodeIndexServiceFactory(this._configManager, workspacePath) + this._cacheManager = new CacheManager(context, workspacePath) + this._serviceFactory = new CodeIndexServiceFactory(this._configManager, workspacePath, this._cacheManager) this._orchestrator = new CodeIndexOrchestrator( this._configManager, this._stateManager, this._serviceFactory, context, workspacePath, + this._cacheManager, ) this._searchService = new CodeIndexSearchService( this._configManager, this._stateManager, this._serviceFactory, context, + this._cacheManager, ) } @@ -109,6 +116,7 @@ export class CodeIndexManager { */ public async startIndexing(): Promise { + await this._cacheManager.initialize() await this._orchestrator.startIndexing() } @@ -134,6 +142,7 @@ export class CodeIndexManager { */ public async clearIndexData(): Promise { await this._orchestrator.clearIndexData() + await this._cacheManager.clearCacheFile() } // --- Private Helpers --- diff --git a/src/services/code-index/orchestrator.ts b/src/services/code-index/orchestrator.ts index 797a16de74..a3a3439ec0 100644 --- a/src/services/code-index/orchestrator.ts +++ b/src/services/code-index/orchestrator.ts @@ -1,11 +1,11 @@ import * as vscode from "vscode" -import { createHash } from "crypto" import * as path from "path" import { CodeIndexConfigManager } from "./config-manager" import { CodeIndexStateManager, IndexingState } from "./state-manager" import { CodeIndexServiceFactory } from "./service-factory" import { FileProcessingResult, IFileWatcher, IVectorStore } from "./interfaces" import { DirectoryScanner } from "./processors" +import { CacheManager } from "./cache-manager" /** * Manages the code indexing workflow, coordinating between different services and managers. @@ -23,27 +23,9 @@ export class CodeIndexOrchestrator { private readonly serviceFactory: CodeIndexServiceFactory, private readonly context: vscode.ExtensionContext, private readonly workspacePath: string, + private readonly cacheManager: CacheManager, ) {} - /** - * Resets the cache file to an empty state. - */ - private async _resetCacheFile(): Promise { - try { - const cacheFileName = `roo-index-cache-${createHash("sha256").update(this.workspacePath).digest("hex")}.json` - const cachePath = vscode.Uri.joinPath(this.context.globalStorageUri, cacheFileName) - - try { - await vscode.workspace.fs.writeFile(cachePath, Buffer.from("{}", "utf-8")) - console.log(`[CodeIndexOrchestrator] Cache file reset (emptied) at ${cachePath.fsPath}`) - } catch (error) { - console.error("[CodeIndexOrchestrator] Failed to reset (empty) cache file:", error) - } - } catch (error) { - console.error("[CodeIndexOrchestrator] Unexpected error during cache file reset:", error) - } - } - /** * Starts the file watcher if not already running. */ @@ -60,7 +42,7 @@ export class CodeIndexOrchestrator { this.stateManager.setSystemState("Indexing", "Initializing file watcher...") try { - const services = this.serviceFactory.createServices(this.context) + const services = this.serviceFactory.createServices(this.context, this.cacheManager) this._fileWatcher = services.fileWatcher await this._fileWatcher.initialize() @@ -133,15 +115,15 @@ export class CodeIndexOrchestrator { try { this.configManager.loadConfiguration() - const services = this.serviceFactory.createServices(this.context) + const services = this.serviceFactory.createServices(this.context, this.cacheManager) this._vectorStore = services.vectorStore this._scanner = services.scanner const collectionCreated = await this._vectorStore.initialize() if (collectionCreated) { - await this._resetCacheFile() - console.log("[CodeIndexOrchestrator] Qdrant collection created; cache file emptied.") + await this.cacheManager.clearCacheFile() + console.log("[CodeIndexOrchestrator] Qdrant collection created; cache cleared.") } this.stateManager.setSystemState("Indexing", "Services ready. Starting workspace scan...") @@ -161,7 +143,6 @@ export class CodeIndexOrchestrator { const result = await this._scanner.scanDirectory( this.workspacePath, - this.context, (batchError: Error) => { console.error( `[CodeIndexOrchestrator] Error during initial scan batch: ${batchError.message}`, @@ -193,8 +174,8 @@ export class CodeIndexOrchestrator { console.error("[CodeIndexOrchestrator] Failed to clean up after error:", cleanupError) } - await this._resetCacheFile() - console.log("[CodeIndexOrchestrator] Cleared cache file due to scan error.") + await this.cacheManager.clearCacheFile() + console.log("[CodeIndexOrchestrator] Cleared cache due to scan error.") this.stateManager.setSystemState("Error", `Failed during initial scan: ${error.message || "Unknown error"}`) this.stopWatcher() @@ -235,7 +216,7 @@ export class CodeIndexOrchestrator { try { if (this.configManager.isFeatureConfigured) { if (!this._vectorStore) { - const services = this.serviceFactory.createServices(this.context) + const services = this.serviceFactory.createServices(this.context, this.cacheManager) this._vectorStore = services.vectorStore } @@ -249,8 +230,8 @@ export class CodeIndexOrchestrator { this.stateManager.setSystemState("Error", `Failed to clear vector collection: ${error.message}`) } - await this._resetCacheFile() - console.log("[CodeIndexOrchestrator] Cache file emptied.") + await this.cacheManager.clearCacheFile() + console.log("[CodeIndexOrchestrator] Cache cleared.") if (this.stateManager.state !== "Error") { this.stateManager.setSystemState("Standby", "Index data cleared successfully.") diff --git a/src/services/code-index/processors/file-watcher.ts b/src/services/code-index/processors/file-watcher.ts index 532f3e6ef9..2f8d2849d9 100644 --- a/src/services/code-index/processors/file-watcher.ts +++ b/src/services/code-index/processors/file-watcher.ts @@ -7,6 +7,7 @@ import { v5 as uuidv5 } from "uuid" import { scannerExtensions } from "../shared/supported-extensions" import { IFileWatcher, FileProcessingResult, IEmbedder, IVectorStore } from "../interfaces" import { codeParser } from "./parser" +import { CacheManager } from "../cache-manager" const QDRANT_CODE_BLOCK_NAMESPACE = "f47ac10b-58cc-4372-a567-0e02b2c3d479" const MAX_FILE_SIZE_BYTES = 1 * 1024 * 1024 // 1MB @@ -17,8 +18,6 @@ const MAX_FILE_SIZE_BYTES = 1 * 1024 * 1024 // 1MB export class FileWatcher implements IFileWatcher { private fileWatcher?: vscode.FileSystemWatcher private ignoreController: RooIgnoreController - private cachePath: vscode.Uri - private fileHashes: Record = {} private readonly _onDidStartProcessing = new vscode.EventEmitter() private readonly _onDidFinishProcessing = new vscode.EventEmitter() @@ -39,34 +38,22 @@ export class FileWatcher implements IFileWatcher { * @param context VS Code extension context * @param embedder Optional embedder * @param vectorStore Optional vector store + * @param cacheManager Cache manager */ constructor( private workspacePath: string, private context: vscode.ExtensionContext, + private readonly cacheManager: CacheManager, private embedder?: IEmbedder, private vectorStore?: IVectorStore, ) { this.ignoreController = new RooIgnoreController(workspacePath) - - this.cachePath = vscode.Uri.joinPath( - context.globalStorageUri, - `roo-index-cache-${createHash("sha256").update(workspacePath).digest("hex")}.json`, - ) } /** * Initializes the file watcher */ async initialize(): Promise { - // Load cache - try { - const cacheData = await vscode.workspace.fs.readFile(this.cachePath) - this.fileHashes = JSON.parse(cacheData.toString()) - } catch (error) { - console.log("No cache file found or error reading cache, starting fresh") - this.fileHashes = {} - } - // Create file watcher const filePattern = new vscode.RelativePattern( this.workspacePath, @@ -113,10 +100,7 @@ export class FileWatcher implements IFileWatcher { const filePath = uri.fsPath // Delete from cache - if (this.fileHashes[filePath]) { - delete this.fileHashes[filePath] - await this.saveCache() - } + this.cacheManager.deleteHash(filePath) // Delete from vector store if (this.vectorStore) { @@ -169,7 +153,7 @@ export class FileWatcher implements IFileWatcher { const newHash = createHash("sha256").update(content).digest("hex") // Check if file has changed - if (this.fileHashes[filePath] === newHash) { + if (this.cacheManager.getHash(filePath) === newHash) { const result = { path: filePath, status: "skipped" as const, @@ -222,8 +206,7 @@ export class FileWatcher implements IFileWatcher { } // Update cache - this.fileHashes[filePath] = newHash - await this.saveCache() + this.cacheManager.updateHash(filePath, newHash) const result = { path: filePath, @@ -241,15 +224,4 @@ export class FileWatcher implements IFileWatcher { return result } } - - /** - * Saves the cache to disk - */ - private async saveCache(): Promise { - try { - await vscode.workspace.fs.writeFile(this.cachePath, Buffer.from(JSON.stringify(this.fileHashes, null, 2))) - } catch (error) { - console.error("Failed to save cache:", error) - } - } } diff --git a/src/services/code-index/processors/scanner.ts b/src/services/code-index/processors/scanner.ts index aad0763726..2009b43e3f 100644 --- a/src/services/code-index/processors/scanner.ts +++ b/src/services/code-index/processors/scanner.ts @@ -10,6 +10,7 @@ import { createHash } from "crypto" import { v5 as uuidv5 } from "uuid" import pLimit from "p-limit" import { Mutex } from "async-mutex" +import { CacheManager } from "../cache-manager" export class DirectoryScanner implements IDirectoryScanner { // Constants moved inside the class @@ -26,6 +27,7 @@ export class DirectoryScanner implements IDirectoryScanner { private readonly embedder: IEmbedder, private readonly qdrantClient: IVectorStore, private readonly codeParser: ICodeParser, + private readonly cacheManager: CacheManager, ) {} /** @@ -37,12 +39,12 @@ export class DirectoryScanner implements IDirectoryScanner { * @returns Promise<{codeBlocks: CodeBlock[], stats: {processed: number, skipped: number}}> Array of parsed code blocks and processing stats */ public async scanDirectory( - directoryPath: string, - context?: vscode.ExtensionContext, + directory: string, onError?: (error: Error) => void, onBlocksIndexed?: (indexedCount: number) => void, onFileParsed?: (fileBlockCount: number) => void, ): Promise<{ codeBlocks: CodeBlock[]; stats: { processed: number; skipped: number }; totalBlockCount: number }> { + const directoryPath = directory // Get all files recursively (handles .gitignore automatically) const [allPaths, _] = await listFiles(directoryPath, true, DirectoryScanner.MAX_LIST_FILES_LIMIT) @@ -63,15 +65,7 @@ export class DirectoryScanner implements IDirectoryScanner { return scannerExtensions.includes(ext) }) - // Initialize cache - const cachePath = context?.globalStorageUri - ? vscode.Uri.joinPath( - context.globalStorageUri, - `roo-index-cache-${createHash("sha256").update(directoryPath).digest("hex")}.json`, - ) - : undefined - const oldHashes = cachePath ? await this.loadHashCache(cachePath) : {} - const newHashes: Record = {} + // Initialize tracking variables const processedFiles = new Set() const codeBlocks: CodeBlock[] = [] let processedCount = 0 @@ -112,10 +106,9 @@ export class DirectoryScanner implements IDirectoryScanner { processedFiles.add(filePath) // Check against cache - const cachedFileHash = oldHashes[filePath] + const cachedFileHash = this.cacheManager.getHash(filePath) if (cachedFileHash === currentFileHash) { // File is unchanged - newHashes[filePath] = currentFileHash skippedCount++ return } @@ -145,7 +138,7 @@ export class DirectoryScanner implements IDirectoryScanner { currentBatchFileInfos.push({ filePath, fileHash: currentFileHash, - isNew: !oldHashes[filePath], + isNew: !this.cacheManager.getHash(filePath), }) } @@ -165,7 +158,6 @@ export class DirectoryScanner implements IDirectoryScanner { batchBlocks, batchTexts, batchFileInfos, - newHashes, onError, onBlocksIndexed, ), @@ -179,7 +171,7 @@ export class DirectoryScanner implements IDirectoryScanner { } } else { // Only update hash if not being processed in a batch - newHashes[filePath] = currentFileHash + await this.cacheManager.updateHash(filePath, currentFileHash) } } catch (error) { console.error(`Error processing file ${filePath}:`, error) @@ -207,7 +199,7 @@ export class DirectoryScanner implements IDirectoryScanner { // Queue final batch processing const batchPromise = batchLimiter(() => - this.processBatch(batchBlocks, batchTexts, batchFileInfos, newHashes, onError, onBlocksIndexed), + this.processBatch(batchBlocks, batchTexts, batchFileInfos, onError, onBlocksIndexed), ) activeBatchPromises.push(batchPromise) } finally { @@ -218,33 +210,29 @@ export class DirectoryScanner implements IDirectoryScanner { // Wait for all batch processing to complete await Promise.all(activeBatchPromises) - // Handle deleted files (don't add them to newHashes) - if (cachePath) { - for (const cachedFilePath of Object.keys(oldHashes)) { - if (!processedFiles.has(cachedFilePath)) { - // File was deleted or is no longer supported/indexed - if (this.qdrantClient) { - try { - console.log(`[DirectoryScanner] Deleting points for deleted file: ${cachedFilePath}`) - await this.qdrantClient.deletePointsByFilePath(cachedFilePath) - } catch (error) { - console.error(`[DirectoryScanner] Failed to delete points for ${cachedFilePath}:`, error) - if (onError) { - onError( - error instanceof Error - ? error - : new Error(`Unknown error deleting points for ${cachedFilePath}`), - ) - } - // Decide if we should re-throw or just log + // Handle deleted files + const oldHashes = this.cacheManager.getAllHashes() + for (const cachedFilePath of Object.keys(oldHashes)) { + if (!processedFiles.has(cachedFilePath)) { + // File was deleted or is no longer supported/indexed + if (this.qdrantClient) { + try { + console.log(`[DirectoryScanner] Deleting points for deleted file: ${cachedFilePath}`) + await this.qdrantClient.deletePointsByFilePath(cachedFilePath) + await this.cacheManager.deleteHash(cachedFilePath) + } catch (error) { + console.error(`[DirectoryScanner] Failed to delete points for ${cachedFilePath}:`, error) + if (onError) { + onError( + error instanceof Error + ? error + : new Error(`Unknown error deleting points for ${cachedFilePath}`), + ) } + // Decide if we should re-throw or just log } - // The file is implicitly removed from the cache because it's not added to newHashes } } - - // Save the updated cache - await this.saveHashCache(cachePath, newHashes) } return { @@ -257,36 +245,10 @@ export class DirectoryScanner implements IDirectoryScanner { } } - private async loadHashCache(cachePath: vscode.Uri): Promise> { - try { - const fileData = await vscode.workspace.fs.readFile(cachePath) - return JSON.parse(Buffer.from(fileData).toString("utf-8")) - } catch (error) { - if (error instanceof vscode.FileSystemError && error.code === "FileNotFound") { - return {} // Cache file doesn't exist yet, return empty object - } - console.error("Error loading hash cache:", error) - return {} // Return empty on other errors to allow indexing to proceed - } - } - - private async saveHashCache(cachePath: vscode.Uri, hashes: Record): Promise { - try { - // Ensure directory exists - await vscode.workspace.fs.createDirectory(vscode.Uri.file(path.dirname(cachePath.fsPath))) - // Write file - await vscode.workspace.fs.writeFile(cachePath, Buffer.from(JSON.stringify(hashes, null, 2), "utf-8")) - } catch (error) { - console.error("Error saving hash cache:", error) - // Don't re-throw, as failure to save cache shouldn't block the main operation - } - } - private async processBatch( batchBlocks: CodeBlock[], batchTexts: string[], batchFileInfos: { filePath: string; fileHash: string; isNew: boolean }[], - newHashes: Record, onError?: (error: Error) => void, onBlocksIndexed?: (indexedCount: number) => void, ): Promise { @@ -358,7 +320,7 @@ export class DirectoryScanner implements IDirectoryScanner { // Update hashes for successfully processed files in this batch for (const fileInfo of batchFileInfos) { - newHashes[fileInfo.filePath] = fileInfo.fileHash + await this.cacheManager.updateHash(fileInfo.filePath, fileInfo.fileHash) } success = true console.log(`[DirectoryScanner] Successfully processed batch of ${batchBlocks.length} blocks.`) diff --git a/src/services/code-index/search-service.ts b/src/services/code-index/search-service.ts index f3d38f602c..f00e5ce80e 100644 --- a/src/services/code-index/search-service.ts +++ b/src/services/code-index/search-service.ts @@ -3,6 +3,7 @@ import { VectorStoreSearchResult } from "./interfaces" import { CodeIndexConfigManager } from "./config-manager" import { CodeIndexStateManager } from "./state-manager" import { CodeIndexServiceFactory } from "./service-factory" +import { CacheManager } from "./cache-manager" /** * Service responsible for searching the code index. @@ -13,6 +14,7 @@ export class CodeIndexSearchService { private readonly stateManager: CodeIndexStateManager, private readonly serviceFactory: CodeIndexServiceFactory, private readonly context: vscode.ExtensionContext, + private readonly cacheManager: CacheManager, ) {} /** @@ -35,7 +37,7 @@ export class CodeIndexSearchService { try { // Get services from factory - const { embedder, vectorStore } = this.serviceFactory.createServices(this.context) + const { embedder, vectorStore } = this.serviceFactory.createServices(this.context, this.cacheManager) // Generate embedding for query const embeddingResponse = await embedder.createEmbeddings([query]) diff --git a/src/services/code-index/service-factory.ts b/src/services/code-index/service-factory.ts index 8a05214be3..4bdd16a396 100644 --- a/src/services/code-index/service-factory.ts +++ b/src/services/code-index/service-factory.ts @@ -6,6 +6,7 @@ import { QdrantVectorStore } from "./vector-store/qdrant-client" import { codeParser, DirectoryScanner, FileWatcher } from "./processors" import { ICodeParser, IEmbedder, IFileWatcher, IVectorStore } from "./interfaces" import { CodeIndexConfigManager } from "./config-manager" +import { CacheManager } from "./cache-manager" /** * Factory class responsible for creating and configuring code indexing service dependencies. @@ -14,6 +15,7 @@ export class CodeIndexServiceFactory { constructor( private readonly configManager: CodeIndexConfigManager, private readonly workspacePath: string, + private readonly cacheManager: CacheManager, ) {} /** @@ -78,7 +80,7 @@ export class CodeIndexServiceFactory { vectorStore: IVectorStore, parser: ICodeParser, ): DirectoryScanner { - return new DirectoryScanner(embedder, vectorStore, parser) + return new DirectoryScanner(embedder, vectorStore, parser, this.cacheManager) } /** @@ -88,15 +90,19 @@ export class CodeIndexServiceFactory { context: vscode.ExtensionContext, embedder: IEmbedder, vectorStore: IVectorStore, + cacheManager: CacheManager, ): IFileWatcher { - return new FileWatcher(this.workspacePath, context, embedder, vectorStore) + return new FileWatcher(this.workspacePath, context, cacheManager, embedder, vectorStore) } /** * Creates all required service dependencies if the service is properly configured. * @throws Error if the service is not properly configured */ - public createServices(context: vscode.ExtensionContext): { + public createServices( + context: vscode.ExtensionContext, + cacheManager: CacheManager, + ): { embedder: IEmbedder vectorStore: IVectorStore parser: ICodeParser @@ -111,7 +117,7 @@ export class CodeIndexServiceFactory { const vectorStore = this.createVectorStore() const parser = codeParser const scanner = this.createDirectoryScanner(embedder, vectorStore, parser) - const fileWatcher = this.createFileWatcher(context, embedder, vectorStore) + const fileWatcher = this.createFileWatcher(context, embedder, vectorStore, cacheManager) return { embedder, From 7cdac9f35d37cf74288cc423b98633dc58f87b95 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Sun, 4 May 2025 22:09:04 -0500 Subject: [PATCH 05/71] refactor: move code index service initialization to extension and remove await from indexing process --- src/activate/registerCommands.ts | 4 ++- src/core/webview/ClineProvider.ts | 33 ++++++++++------------- src/core/webview/webviewMessageHandler.ts | 2 +- src/extension.ts | 14 +++++++++- src/services/code-index/manager.ts | 2 +- 5 files changed, 32 insertions(+), 23 deletions(-) diff --git a/src/activate/registerCommands.ts b/src/activate/registerCommands.ts index b6c7b54779..dc74d654ac 100644 --- a/src/activate/registerCommands.ts +++ b/src/activate/registerCommands.ts @@ -9,6 +9,7 @@ import { telemetryService } from "../services/telemetry/TelemetryService" import { registerHumanRelayCallback, unregisterHumanRelayCallback, handleHumanRelayResponse } from "./humanRelay" import { handleNewTask } from "./handleTask" +import { CodeIndexManager } from "../services/code-index/manager" /** * Helper to get the visible ClineProvider instance or log if not found. @@ -184,7 +185,8 @@ export const openClineInNewTab = async ({ context, outputChannel }: Omit editor.viewColumn || 0)) // Check if there are any visible text editors, otherwise open a new group diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index ac441ece63..f21a8c10ba 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -88,21 +88,21 @@ export class ClineProvider extends EventEmitter implements public readonly latestAnnouncementId = "may-21-2025-3-18" // Update for v3.18.0 announcement public readonly providerSettingsManager: ProviderSettingsManager public readonly customModesManager: CustomModesManager - public readonly codeIndexManager: CodeIndexManager constructor( readonly context: vscode.ExtensionContext, private readonly outputChannel: vscode.OutputChannel, private readonly renderContext: "sidebar" | "editor" = "sidebar", public readonly contextProxy: ContextProxy, + public readonly codeIndexManager?: CodeIndexManager, ) { super() this.log("ClineProvider instantiated") ClineProvider.activeInstances.add(this) - this.codeIndexManager = CodeIndexManager.getInstance(context, this.contextProxy) - context.subscriptions.push(this.codeIndexManager) + this.codeIndexManager = codeIndexManager + // Start configuration loading (which might trigger indexing) in the background. // Don't await, allowing activation to continue immediately. @@ -330,22 +330,15 @@ export class ClineProvider extends EventEmitter implements async resolveWebviewView(webviewView: vscode.WebviewView | vscode.WebviewPanel) { this.log("Resolving webview view") - this.codeIndexManager - .loadConfiguration() - .then(() => { - this.updateGlobalState("codebaseIndexModels", EMBEDDING_MODEL_PROFILES) + if ( + this.codeIndexManager && + this.codeIndexManager.isFeatureEnabled && + this.codeIndexManager.isFeatureConfigured + ) { + this.updateGlobalState("codebaseIndexModels", EMBEDDING_MODEL_PROFILES) - this.outputChannel.appendLine("CodeIndexManager configuration loaded successfully (async).") - }) - .catch((error) => { - console.error( - "[resolveWebviewView] Error during background CodeIndexManager configuration/indexing:", - error, - ) - this.outputChannel.appendLine( - `[Error] Background CodeIndexManager configuration/indexing failed: ${error.message || error}`, - ) - }) + this.outputChannel.appendLine("CodeIndexManager configuration loaded") + } this.view = webviewView @@ -868,7 +861,9 @@ export class ClineProvider extends EventEmitter implements } // Load CodeIndexManager configuration after provider settings are updated - await this.codeIndexManager.loadConfiguration() + if (this.codeIndexManager) { + await this.codeIndexManager.loadConfiguration() + } } async deleteProviderProfile(profileToDelete: ProviderSettingsEntry) { diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index 5b595c5bc7..b34b03dfa8 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -1329,7 +1329,7 @@ export const webviewMessageHandler = async (provider: ClineProvider, message: We codebaseIndexEmbedderModelId: "", } await updateGlobalState("codebaseIndexConfig", codebaseIndexConfig) - await provider.codeIndexManager.loadConfiguration() + await provider.codeIndexManager?.loadConfiguration() await provider.postStateToWebview() break } diff --git a/src/extension.ts b/src/extension.ts index cb71016f75..f49313bf3f 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -24,6 +24,7 @@ import { telemetryService } from "./services/telemetry/TelemetryService" import { API } from "./exports/api" import { migrateSettings } from "./utils/migrateSettings" import { formatLanguage } from "./shared/language" +import { CodeIndexManager } from "./services/code-index/manager" import { handleUri, @@ -74,13 +75,24 @@ export async function activate(context: vscode.ExtensionContext) { } const contextProxy = await ContextProxy.getInstance(context) - const provider = new ClineProvider(context, outputChannel, "sidebar", contextProxy) + const codeIndexManager = CodeIndexManager.getInstance(context, contextProxy) + + try { + await codeIndexManager.loadConfiguration() + } catch (error) { + outputChannel.appendLine( + `[CodeIndexManager] Error during background CodeIndexManager configuration/indexing: ${error.message || error}`, + ) + } + + const provider = new ClineProvider(context, outputChannel, "sidebar", contextProxy, codeIndexManager) telemetryService.setProvider(provider) context.subscriptions.push( vscode.window.registerWebviewViewProvider(ClineProvider.sideBarId, provider, { webviewOptions: { retainContextWhenHidden: true }, }), + codeIndexManager, ) registerCommands({ context, outputChannel, provider }) diff --git a/src/services/code-index/manager.ts b/src/services/code-index/manager.ts index c4282becd5..e0668149fa 100644 --- a/src/services/code-index/manager.ts +++ b/src/services/code-index/manager.ts @@ -107,7 +107,7 @@ export class CodeIndexManager { console.log( `[CodeIndexManager] Configuration change requires restart (Restart: ${requiresRestart}, Dimension Changed: ${requiresClear}). Starting indexing...`, ) - await this.startIndexing() + this.startIndexing() } } From 399dc2c13b25a4c787fae6bdd5c3213aba496a95 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Mon, 5 May 2025 10:31:39 -0500 Subject: [PATCH 06/71] fix: return undefined instead of throwing if no workspace is detected --- src/core/prompts/tools/index.ts | 4 ++-- src/core/tools/codebaseSearchTool.ts | 12 +++++------- src/extension.ts | 7 +++++-- src/services/code-index/manager.ts | 8 ++++++-- 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/core/prompts/tools/index.ts b/src/core/prompts/tools/index.ts index 0fc6066e30..dbfc471f4a 100644 --- a/src/core/prompts/tools/index.ts +++ b/src/core/prompts/tools/index.ts @@ -50,7 +50,7 @@ export function getToolDescriptionsForMode( mode: Mode, cwd: string, supportsComputerUse: boolean, - codeIndexManager: CodeIndexManager, + codeIndexManager?: CodeIndexManager, diffStrategy?: DiffStrategy, browserViewportSize?: string, mcpHub?: McpHub, @@ -94,7 +94,7 @@ export function getToolDescriptionsForMode( ALWAYS_AVAILABLE_TOOLS.forEach((tool) => tools.add(tool)) // Conditionally exclude codebase_search if feature is disabled or not configured - if (!(codeIndexManager.isFeatureEnabled && codeIndexManager.isFeatureConfigured)) { + if (!codeIndexManager || !(codeIndexManager.isFeatureEnabled && codeIndexManager.isFeatureConfigured)) { tools.delete("codebase_search") } diff --git a/src/core/tools/codebaseSearchTool.ts b/src/core/tools/codebaseSearchTool.ts index 3bb355f3c6..291c7c8437 100644 --- a/src/core/tools/codebaseSearchTool.ts +++ b/src/core/tools/codebaseSearchTool.ts @@ -80,16 +80,14 @@ export async function codebaseSearchTool( const manager = CodeIndexManager.getInstance(context) - // Check if indexing is enabled and configured (using assumed properties/methods) - // @ts-expect-error Accessing private member _isEnabled - const isEnabled = manager.isEnabled ?? true // Assume enabled if property doesn't exist - // @ts-expect-error Accessing private member _isConfigured - const isConfigured = manager.isConfigured ? manager.isConfigured() : true // Assume configured if method doesn't exist + if (!manager) { + throw new Error("CodeIndexManager is not available.") + } - if (!isEnabled) { + if (!manager.isFeatureEnabled) { throw new Error("Code Indexing is disabled in the settings.") } - if (!isConfigured) { + if (!manager.isFeatureConfigured) { throw new Error("Code Indexing is not configured (Missing OpenAI Key or Qdrant URL).") } diff --git a/src/extension.ts b/src/extension.ts index f49313bf3f..d4f4fef118 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -78,7 +78,7 @@ export async function activate(context: vscode.ExtensionContext) { const codeIndexManager = CodeIndexManager.getInstance(context, contextProxy) try { - await codeIndexManager.loadConfiguration() + await codeIndexManager?.loadConfiguration() } catch (error) { outputChannel.appendLine( `[CodeIndexManager] Error during background CodeIndexManager configuration/indexing: ${error.message || error}`, @@ -88,11 +88,14 @@ export async function activate(context: vscode.ExtensionContext) { const provider = new ClineProvider(context, outputChannel, "sidebar", contextProxy, codeIndexManager) telemetryService.setProvider(provider) + if (codeIndexManager) { + context.subscriptions.push(codeIndexManager) + } + context.subscriptions.push( vscode.window.registerWebviewViewProvider(ClineProvider.sideBarId, provider, { webviewOptions: { retainContextWhenHidden: true }, }), - codeIndexManager, ) registerCommands({ context, outputChannel, provider }) diff --git a/src/services/code-index/manager.ts b/src/services/code-index/manager.ts index e0668149fa..d450357d9e 100644 --- a/src/services/code-index/manager.ts +++ b/src/services/code-index/manager.ts @@ -22,10 +22,14 @@ export class CodeIndexManager { private readonly _searchService: CodeIndexSearchService private readonly _cacheManager: CacheManager - public static getInstance(context: vscode.ExtensionContext, contextProxy?: ContextProxy): CodeIndexManager { + public static getInstance( + context: vscode.ExtensionContext, + contextProxy?: ContextProxy, + ): CodeIndexManager | undefined { const workspacePath = getWorkspacePath() // Assumes single workspace for now + if (!workspacePath) { - throw new Error("Cannot get CodeIndexManager instance without an active workspace.") + return undefined } if (!CodeIndexManager.instances.has(workspacePath) && contextProxy) { From 0cf83690035d4e323dc1272775392af93ff619c3 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Mon, 5 May 2025 11:29:35 -0500 Subject: [PATCH 07/71] feat: allow auto approve if it is active for read tools --- src/core/tools/codebaseSearchTool.ts | 2 +- src/shared/ExtensionMessage.ts | 2 +- webview-ui/src/components/chat/ChatRow.tsx | 2 +- webview-ui/src/components/chat/ChatView.tsx | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/core/tools/codebaseSearchTool.ts b/src/core/tools/codebaseSearchTool.ts index 291c7c8437..38e2f5d7eb 100644 --- a/src/core/tools/codebaseSearchTool.ts +++ b/src/core/tools/codebaseSearchTool.ts @@ -56,7 +56,7 @@ export async function codebaseSearchTool( approvalMessage = t(translationKey, { query, limit }) const approvalPayload = { - tool: "codebase_search", + tool: "codebaseSearch", approvalPrompt: approvalMessage, query: query, limit: limit, diff --git a/src/shared/ExtensionMessage.ts b/src/shared/ExtensionMessage.ts index 1c3a8ae757..cd1efbe983 100644 --- a/src/shared/ExtensionMessage.ts +++ b/src/shared/ExtensionMessage.ts @@ -224,7 +224,7 @@ export interface ClineSayTool { | "editedExistingFile" | "appliedDiff" | "newFileCreated" - | "codebase_search" + | "codebaseSearch" | "readFile" | "fetchInstructions" | "listFilesTopLevel" diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx index a819978531..646b51db22 100644 --- a/webview-ui/src/components/chat/ChatRow.tsx +++ b/webview-ui/src/components/chat/ChatRow.tsx @@ -352,7 +352,7 @@ export const ChatRowContent = ({ /> ) - case "codebase_search": { + case "codebaseSearch": { if (message.type === "say") { let parsed: { query: string diff --git a/webview-ui/src/components/chat/ChatView.tsx b/webview-ui/src/components/chat/ChatView.tsx index 2f5eadc3ab..a466f2cd44 100644 --- a/webview-ui/src/components/chat/ChatView.tsx +++ b/webview-ui/src/components/chat/ChatView.tsx @@ -729,6 +729,7 @@ const ChatViewComponent: React.ForwardRefRenderFunction Date: Mon, 5 May 2025 16:39:07 -0500 Subject: [PATCH 08/71] refactor: improve UI of the results and allow opening the ranges directly in the editor --- src/core/tools/codebaseSearchTool.ts | 4 +- src/exports/roo-code.d.ts | 2 + src/exports/types.ts | 2 + src/schemas/index.ts | 1 + webview-ui/src/components/chat/ChatRow.tsx | 103 +++++++----------- .../components/chat/CodebaseSearchResult.tsx | 45 ++++---- .../chat/CodebaseSearchResultsDisplay.tsx | 53 +++++++++ 7 files changed, 118 insertions(+), 92 deletions(-) create mode 100644 webview-ui/src/components/chat/CodebaseSearchResultsDisplay.tsx diff --git a/src/core/tools/codebaseSearchTool.ts b/src/core/tools/codebaseSearchTool.ts index 38e2f5d7eb..85bb814dfe 100644 --- a/src/core/tools/codebaseSearchTool.ts +++ b/src/core/tools/codebaseSearchTool.ts @@ -129,8 +129,8 @@ export async function codebaseSearchTool( }) // Send results to UI - const payload = { tool: toolName, content: jsonResult } - await cline.say("text", JSON.stringify(payload)) + const payload = { tool: "codebaseSearch", content: jsonResult } + await cline.say("codebase_search_result", JSON.stringify(payload)) // Push results to AI const output = `Query: ${query} diff --git a/src/exports/roo-code.d.ts b/src/exports/roo-code.d.ts index 4012237db6..6cd5e5cd90 100644 --- a/src/exports/roo-code.d.ts +++ b/src/exports/roo-code.d.ts @@ -443,6 +443,7 @@ type ClineMessage = { | "rooignore_error" | "diff_error" | "condense_context" + | "codebase_search_result" ) | undefined text?: string | undefined @@ -528,6 +529,7 @@ type RooCodeEvents = { | "rooignore_error" | "diff_error" | "condense_context" + | "codebase_search_result" ) | undefined text?: string | undefined diff --git a/src/exports/types.ts b/src/exports/types.ts index fb234a1b71..0a80f615cb 100644 --- a/src/exports/types.ts +++ b/src/exports/types.ts @@ -451,6 +451,7 @@ type ClineMessage = { | "rooignore_error" | "diff_error" | "condense_context" + | "codebase_search_result" ) | undefined text?: string | undefined @@ -540,6 +541,7 @@ type RooCodeEvents = { | "rooignore_error" | "diff_error" | "condense_context" + | "codebase_search_result" ) | undefined text?: string | undefined diff --git a/src/schemas/index.ts b/src/schemas/index.ts index 176b8c8b5a..4fb893ae1f 100644 --- a/src/schemas/index.ts +++ b/src/schemas/index.ts @@ -1069,6 +1069,7 @@ export const clineSays = [ "rooignore_error", "diff_error", "condense_context", + "codebase_search_result", ] as const export const clineSaySchema = z.enum(clineSays) diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx index 646b51db22..a580a0ee4f 100644 --- a/webview-ui/src/components/chat/ChatRow.tsx +++ b/webview-ui/src/components/chat/ChatRow.tsx @@ -34,7 +34,7 @@ import { CommandExecution } from "./CommandExecution" import { CommandExecutionError } from "./CommandExecutionError" import { AutoApprovedRequestLimitWarning } from "./AutoApprovedRequestLimitWarning" import { CondensingContextRow, ContextCondenseRow } from "./ContextCondenseRow" -import CodebaseSearchResult from "./CodebaseSearchResult" +import CodebaseSearchResultsDisplay from "./CodebaseSearchResultsDisplay" interface ChatRowProps { message: ClineMessage @@ -353,69 +353,14 @@ export const ChatRowContent = ({ ) case "codebaseSearch": { - if (message.type === "say") { - let parsed: { - query: string - results: Array<{ - filePath: string - score: number - startLine: number - endLine: number - codeChunk: string - }> - } | null = null - if (typeof tool.content === "object" && tool.content !== null) { - parsed = tool.content as { - query: string - results: Array<{ - filePath: string - score: number - startLine: number - endLine: number - codeChunk: string - }> - } - } else { - console.error("codebase_search content is not a valid object:", tool.content) - parsed = null - } - - const query = parsed?.query || "" - const results = parsed?.results || [] - - return ( -
-
- {t("chat:codebaseSearch.didSearch", { - query, - limit: tool.limit, - count: results.length, - })} -
- {results.map((result, idx) => ( - - ))} -
- ) - } else if (message.type === "ask") { - return ( -
- {toolIcon("search")} - - {t("chat:codebaseSearch.wantsToSearch", { query: tool.query, limit: tool.limit })} - -
- ) - } - return null + return ( +
+ {toolIcon("search")} + + {t("chat:codebaseSearch.wantsToSearch", { query: tool.query, limit: tool.limit || 0 })} + +
+ ) } case "newFileCreated": return ( @@ -1005,6 +950,36 @@ export const ChatRowContent = ({ return } return message.contextCondense ? : null + case "codebase_search_result": + let parsed: { + content: { + query: string + results: Array<{ + filePath: string + score: number + startLine: number + endLine: number + codeChunk: string + }> + } + } | null = null + + try { + if (message.text) { + parsed = JSON.parse(message.text) + } + } catch (error) { + console.error("Failed to parse codebaseSearch content:", error) + } + + if (parsed && !parsed?.content) { + console.error("Invalid codebaseSearch content structure:", parsed.content) + return
Error displaying search results.
+ } + + const { query = "", results = [] } = parsed?.content || {} + + return default: return ( <> diff --git a/webview-ui/src/components/chat/CodebaseSearchResult.tsx b/webview-ui/src/components/chat/CodebaseSearchResult.tsx index 9187efda74..d1215c1a71 100644 --- a/webview-ui/src/components/chat/CodebaseSearchResult.tsx +++ b/webview-ui/src/components/chat/CodebaseSearchResult.tsx @@ -1,5 +1,5 @@ -import React, { useState } from "react" -import CodeBlock from "../common/CodeBlock" +import React from "react" +import { vscode } from "@src/utils/vscode" interface CodebaseSearchResultProps { filePath: string @@ -15,38 +15,31 @@ const CodebaseSearchResult: React.FC = ({ score, startLine, endLine, - snippet, - language, + // These props are required by the interface but not used in this implementation + snippet: _snippet, + language: _language, }) => { - const [isCollapsed, setIsCollapsed] = useState(true) - - const toggleCollapse = () => { - setIsCollapsed(!isCollapsed) + const handleClick = () => { + vscode.postMessage({ + type: "openFile", + text: "./" + filePath, + values: { + line: startLine, + }, + }) } return ( -
-
- {filePath} +
+
+ {filePath.split("/").at(-1)} Lines: {startLine}-{endLine}
- {!isCollapsed && ( - <> -
Score: {score.toFixed(2)}
- - - )}
) } diff --git a/webview-ui/src/components/chat/CodebaseSearchResultsDisplay.tsx b/webview-ui/src/components/chat/CodebaseSearchResultsDisplay.tsx new file mode 100644 index 0000000000..f17c0d2b00 --- /dev/null +++ b/webview-ui/src/components/chat/CodebaseSearchResultsDisplay.tsx @@ -0,0 +1,53 @@ +import React, { useState } from "react" +import CodebaseSearchResult from "./CodebaseSearchResult" +import { useTranslation } from "react-i18next" + +interface CodebaseSearchResultsDisplayProps { + query: string + results: Array<{ + filePath: string + score: number + startLine: number + endLine: number + codeChunk: string + }> +} + +const CodebaseSearchResultsDisplay: React.FC = ({ query, results }) => { + const { t } = useTranslation() + const [codebaseSearchResultsExpanded, setCodebaseSearchResultsExpanded] = useState(false) + + return ( +
+
setCodebaseSearchResultsExpanded(!codebaseSearchResultsExpanded)} + className="font-bold cursor-pointer flex items-center justify-between px-2 py-2 rounded border bg-[var(--vscode-editor-background)] border-[var(--vscode-editorGroup-border)]"> + + {t("chat:codebaseSearch.didSearch", { + query, + count: results.length, + })} + + +
+ + {codebaseSearchResultsExpanded && ( +
+ {results.map((result, idx) => ( + + ))} +
+ )} +
+ ) +} + +export default CodebaseSearchResultsDisplay From 6cff73b5510642994514ba1d7a7b980268851359 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Mon, 5 May 2025 21:48:30 -0500 Subject: [PATCH 09/71] refactor: use dependency injection to improve performance --- src/activate/registerCommands.ts | 2 +- src/core/webview/ClineProvider.ts | 3 +- src/core/webview/webviewMessageHandler.ts | 3 +- src/extension.ts | 4 +- src/services/code-index/manager.ts | 136 +++++++++++++-------- src/services/code-index/orchestrator.ts | 57 +++------ src/services/code-index/service-factory.ts | 8 +- 7 files changed, 114 insertions(+), 99 deletions(-) diff --git a/src/activate/registerCommands.ts b/src/activate/registerCommands.ts index dc74d654ac..fc18e96d54 100644 --- a/src/activate/registerCommands.ts +++ b/src/activate/registerCommands.ts @@ -185,7 +185,7 @@ export const openClineInNewTab = async ({ context, outputChannel }: Omit editor.viewColumn || 0)) diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index f21a8c10ba..184501d35f 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -862,7 +862,8 @@ export class ClineProvider extends EventEmitter implements // Load CodeIndexManager configuration after provider settings are updated if (this.codeIndexManager) { - await this.codeIndexManager.loadConfiguration() + this.codeIndexManager.dispose() + await this.codeIndexManager.initialize(this.contextProxy) } } diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index b34b03dfa8..6d20fb67d9 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -1329,7 +1329,8 @@ export const webviewMessageHandler = async (provider: ClineProvider, message: We codebaseIndexEmbedderModelId: "", } await updateGlobalState("codebaseIndexConfig", codebaseIndexConfig) - await provider.codeIndexManager?.loadConfiguration() + provider.codeIndexManager?.dispose() + await provider.codeIndexManager?.initialize(provider.contextProxy) await provider.postStateToWebview() break } diff --git a/src/extension.ts b/src/extension.ts index d4f4fef118..5037027424 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -75,10 +75,10 @@ export async function activate(context: vscode.ExtensionContext) { } const contextProxy = await ContextProxy.getInstance(context) - const codeIndexManager = CodeIndexManager.getInstance(context, contextProxy) + const codeIndexManager = CodeIndexManager.getInstance(context) try { - await codeIndexManager?.loadConfiguration() + await codeIndexManager?.initialize(contextProxy) } catch (error) { outputChannel.appendLine( `[CodeIndexManager] Error during background CodeIndexManager configuration/indexing: ${error.message || error}`, diff --git a/src/services/code-index/manager.ts b/src/services/code-index/manager.ts index d450357d9e..bbfa5e9ee6 100644 --- a/src/services/code-index/manager.ts +++ b/src/services/code-index/manager.ts @@ -9,31 +9,29 @@ import { CodeIndexServiceFactory } from "./service-factory" import { CodeIndexSearchService } from "./search-service" import { CodeIndexOrchestrator } from "./orchestrator" import { CacheManager } from "./cache-manager" +import { codeParser } from "./processors" export class CodeIndexManager { // --- Singleton Implementation --- private static instances = new Map() // Map workspace path to instance // Specialized class instances - private readonly _configManager: CodeIndexConfigManager + private _configManager: CodeIndexConfigManager | undefined private readonly _stateManager: CodeIndexStateManager - private readonly _serviceFactory: CodeIndexServiceFactory - private readonly _orchestrator: CodeIndexOrchestrator - private readonly _searchService: CodeIndexSearchService - private readonly _cacheManager: CacheManager - - public static getInstance( - context: vscode.ExtensionContext, - contextProxy?: ContextProxy, - ): CodeIndexManager | undefined { + private _serviceFactory: CodeIndexServiceFactory | undefined + private _orchestrator: CodeIndexOrchestrator | undefined + private _searchService: CodeIndexSearchService | undefined + private _cacheManager: CacheManager | undefined + + public static getInstance(context: vscode.ExtensionContext): CodeIndexManager | undefined { const workspacePath = getWorkspacePath() // Assumes single workspace for now if (!workspacePath) { return undefined } - if (!CodeIndexManager.instances.has(workspacePath) && contextProxy) { - CodeIndexManager.instances.set(workspacePath, new CodeIndexManager(workspacePath, context, contextProxy)) + if (!CodeIndexManager.instances.has(workspacePath)) { + CodeIndexManager.instances.set(workspacePath, new CodeIndexManager(workspacePath, context)) } return CodeIndexManager.instances.get(workspacePath)! } @@ -49,32 +47,10 @@ export class CodeIndexManager { private readonly context: vscode.ExtensionContext // Private constructor for singleton pattern - private constructor(workspacePath: string, context: vscode.ExtensionContext, contextProxy: ContextProxy) { + private constructor(workspacePath: string, context: vscode.ExtensionContext) { this.workspacePath = workspacePath this.context = context - - // Initialize state manager first since other components depend on it this._stateManager = new CodeIndexStateManager() - - // Initialize remaining specialized classes - this._configManager = new CodeIndexConfigManager(contextProxy) - this._cacheManager = new CacheManager(context, workspacePath) - this._serviceFactory = new CodeIndexServiceFactory(this._configManager, workspacePath, this._cacheManager) - this._orchestrator = new CodeIndexOrchestrator( - this._configManager, - this._stateManager, - this._serviceFactory, - context, - workspacePath, - this._cacheManager, - ) - this._searchService = new CodeIndexSearchService( - this._configManager, - this._stateManager, - this._serviceFactory, - context, - this._cacheManager, - ) } // --- Public API --- @@ -83,36 +59,83 @@ export class CodeIndexManager { return this._stateManager.onProgressUpdate } + private assertInitialized() { + if (!this._configManager || !this._orchestrator || !this._searchService || !this._cacheManager) { + throw new Error("CodeIndexManager not initialized. Call initialize() first.") + } + } + public get state(): IndexingState { - return this._orchestrator.state + this.assertInitialized() + return this._orchestrator!.state } public get isFeatureEnabled(): boolean { - return this._configManager.isFeatureEnabled + this.assertInitialized() + return this._configManager!.isFeatureEnabled } public get isFeatureConfigured(): boolean { - return this._configManager.isFeatureConfigured + this.assertInitialized() + return this._configManager!.isFeatureConfigured } /** - * Loads persisted configuration from globalState. + * Initializes the manager with configuration and dependent services. + * Must be called before using any other methods. + * @returns Object indicating if a restart is needed */ - public async loadConfiguration(): Promise { + public async initialize(contextProxy: ContextProxy): Promise<{ requiresRestart: boolean }> { + // Initialize config manager and load configuration + this._configManager = new CodeIndexConfigManager(contextProxy) const { requiresRestart, requiresClear } = await this._configManager.loadConfiguration() + // Initialize cache manager + this._cacheManager = new CacheManager(this.context, this.workspacePath) + await this._cacheManager.initialize() + + // Initialize service factory and dependent services + this._serviceFactory = new CodeIndexServiceFactory(this._configManager, this.workspacePath, this._cacheManager) + + // Create shared service instances + const embedder = this._serviceFactory.createEmbedder() + const vectorStore = this._serviceFactory.createVectorStore() + const parser = codeParser + const scanner = this._serviceFactory.createDirectoryScanner(embedder, vectorStore, parser) + const fileWatcher = this._serviceFactory.createFileWatcher( + this.context, + embedder, + vectorStore, + this._cacheManager, + ) + + // Initialize orchestrator + this._orchestrator = new CodeIndexOrchestrator( + this._configManager, + this._stateManager, + this.context, + this.workspacePath, + this._cacheManager, + embedder, + vectorStore, + parser, + scanner, + fileWatcher, + ) + + // Initialize search service + this._searchService = new CodeIndexSearchService(this._configManager, this._stateManager, embedder, vectorStore) + if (requiresClear) { console.log("[CodeIndexManager] Embedding dimension changed. Clearing existing index data...") await this.clearIndexData() - // No need to explicitly set requiresRestart = true, as requiresClear implies a restart need. } if (requiresRestart || requiresClear) { - console.log( - `[CodeIndexManager] Configuration change requires restart (Restart: ${requiresRestart}, Dimension Changed: ${requiresClear}). Starting indexing...`, - ) this.startIndexing() } + + return { requiresRestart } } /** @@ -120,22 +143,25 @@ export class CodeIndexManager { */ public async startIndexing(): Promise { - await this._cacheManager.initialize() - await this._orchestrator.startIndexing() + this.assertInitialized() + await this._orchestrator!.startIndexing() } /** * Stops the file watcher and potentially cleans up resources. */ public stopWatcher(): void { - this._orchestrator.stopWatcher() + this.assertInitialized() + this._orchestrator!.stopWatcher() } /** * Cleans up the manager instance. */ public dispose(): void { - this.stopWatcher() + if (this._orchestrator) { + this.stopWatcher() + } this._stateManager.dispose() console.log(`[CodeIndexManager] Disposed for workspace: ${this.workspacePath}`) } @@ -145,8 +171,9 @@ export class CodeIndexManager { * and deleting the cache file. */ public async clearIndexData(): Promise { - await this._orchestrator.clearIndexData() - await this._cacheManager.clearCacheFile() + this.assertInitialized() + await this._orchestrator!.clearIndexData() + await this._cacheManager!.clearCacheFile() } // --- Private Helpers --- @@ -159,7 +186,12 @@ export class CodeIndexManager { this._stateManager.setWebviewProvider(provider) } - public async searchIndex(query: string, limit: number): Promise { - return this._searchService.searchIndex(query, limit) + public async searchIndex( + query: string, + limit: number, + directoryPrefix?: string, + ): Promise { + this.assertInitialized() + return this._searchService!.searchIndex(query, limit, directoryPrefix) } } diff --git a/src/services/code-index/orchestrator.ts b/src/services/code-index/orchestrator.ts index a3a3439ec0..7a958431bf 100644 --- a/src/services/code-index/orchestrator.ts +++ b/src/services/code-index/orchestrator.ts @@ -2,8 +2,7 @@ import * as vscode from "vscode" import * as path from "path" import { CodeIndexConfigManager } from "./config-manager" import { CodeIndexStateManager, IndexingState } from "./state-manager" -import { CodeIndexServiceFactory } from "./service-factory" -import { FileProcessingResult, IFileWatcher, IVectorStore } from "./interfaces" +import { FileProcessingResult, IFileWatcher, IVectorStore, IEmbedder, ICodeParser } from "./interfaces" import { DirectoryScanner } from "./processors" import { CacheManager } from "./cache-manager" @@ -11,30 +10,26 @@ import { CacheManager } from "./cache-manager" * Manages the code indexing workflow, coordinating between different services and managers. */ export class CodeIndexOrchestrator { - private _fileWatcher?: IFileWatcher private _fileWatcherSubscriptions: vscode.Disposable[] = [] private _isProcessing: boolean = false - private _scanner?: DirectoryScanner - private _vectorStore?: IVectorStore constructor( private readonly configManager: CodeIndexConfigManager, private readonly stateManager: CodeIndexStateManager, - private readonly serviceFactory: CodeIndexServiceFactory, private readonly context: vscode.ExtensionContext, private readonly workspacePath: string, private readonly cacheManager: CacheManager, + private readonly embedder: IEmbedder, + private readonly vectorStore: IVectorStore, + private readonly parser: ICodeParser, + private readonly scanner: DirectoryScanner, + private readonly fileWatcher: IFileWatcher, ) {} /** * Starts the file watcher if not already running. */ private async _startWatcher(): Promise { - if (this._fileWatcher) { - console.log("[CodeIndexOrchestrator] File watcher already running.") - return - } - if (!this.configManager.isFeatureConfigured) { throw new Error("Cannot start watcher: Service not configured.") } @@ -42,15 +37,13 @@ export class CodeIndexOrchestrator { this.stateManager.setSystemState("Indexing", "Initializing file watcher...") try { - const services = this.serviceFactory.createServices(this.context, this.cacheManager) - this._fileWatcher = services.fileWatcher - await this._fileWatcher.initialize() + await this.fileWatcher.initialize() this._fileWatcherSubscriptions = [ - this._fileWatcher.onDidStartProcessing((filePath: string) => { + this.fileWatcher.onDidStartProcessing((filePath: string) => { this._updateFileStatus(filePath, "Processing", `Processing file: ${path.basename(filePath)}`) }), - this._fileWatcher.onDidFinishProcessing((event: FileProcessingResult) => { + this.fileWatcher.onDidFinishProcessing((event: FileProcessingResult) => { if (event.error) { this._updateFileStatus(event.path, "Error") console.error(`[CodeIndexOrchestrator] Error processing file ${event.path}:`, event.error) @@ -115,11 +108,7 @@ export class CodeIndexOrchestrator { try { this.configManager.loadConfiguration() - const services = this.serviceFactory.createServices(this.context, this.cacheManager) - this._vectorStore = services.vectorStore - this._scanner = services.scanner - - const collectionCreated = await this._vectorStore.initialize() + const collectionCreated = await this.vectorStore.initialize() if (collectionCreated) { await this.cacheManager.clearCacheFile() @@ -141,7 +130,7 @@ export class CodeIndexOrchestrator { this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar) } - const result = await this._scanner.scanDirectory( + const result = await this.scanner.scanDirectory( this.workspacePath, (batchError: Error) => { console.error( @@ -169,7 +158,7 @@ export class CodeIndexOrchestrator { } catch (error: any) { console.error("[CodeIndexOrchestrator] Error during indexing:", error) try { - await this._vectorStore?.clearCollection() + await this.vectorStore.clearCollection() } catch (cleanupError) { console.error("[CodeIndexOrchestrator] Failed to clean up after error:", cleanupError) } @@ -188,16 +177,13 @@ export class CodeIndexOrchestrator { * Stops the file watcher and cleans up resources. */ public stopWatcher(): void { - if (this._fileWatcher) { - this._fileWatcher.dispose() - this._fileWatcher = undefined - this._fileWatcherSubscriptions.forEach((sub) => sub.dispose()) - this._fileWatcherSubscriptions = [] - console.log("[CodeIndexOrchestrator] File watcher stopped.") + this.fileWatcher.dispose() + this._fileWatcherSubscriptions.forEach((sub) => sub.dispose()) + this._fileWatcherSubscriptions = [] + console.log("[CodeIndexOrchestrator] File watcher stopped.") - if (this.stateManager.state !== "Error") { - this.stateManager.setSystemState("Standby", "File watcher stopped.") - } + if (this.stateManager.state !== "Error") { + this.stateManager.setSystemState("Standby", "File watcher stopped.") } this._isProcessing = false } @@ -215,12 +201,7 @@ export class CodeIndexOrchestrator { try { if (this.configManager.isFeatureConfigured) { - if (!this._vectorStore) { - const services = this.serviceFactory.createServices(this.context, this.cacheManager) - this._vectorStore = services.vectorStore - } - - await this._vectorStore.deleteCollection() + await this.vectorStore.deleteCollection() console.log("[CodeIndexOrchestrator] Vector collection deleted.") } else { console.warn("[CodeIndexOrchestrator] Service not configured, skipping vector collection clear.") diff --git a/src/services/code-index/service-factory.ts b/src/services/code-index/service-factory.ts index 4bdd16a396..bc49458fbc 100644 --- a/src/services/code-index/service-factory.ts +++ b/src/services/code-index/service-factory.ts @@ -21,7 +21,7 @@ export class CodeIndexServiceFactory { /** * Creates an embedder instance based on the current configuration. */ - protected createEmbedder(): IEmbedder { + public createEmbedder(): IEmbedder { const config = this.configManager.getConfig() const provider = config.embedderProvider as EmbedderProvider @@ -44,7 +44,7 @@ export class CodeIndexServiceFactory { /** * Creates a vector store instance using the current configuration. */ - protected createVectorStore(): IVectorStore { + public createVectorStore(): IVectorStore { const config = this.configManager.getConfig() const provider = config.embedderProvider as EmbedderProvider @@ -75,7 +75,7 @@ export class CodeIndexServiceFactory { /** * Creates a directory scanner instance with its required dependencies. */ - protected createDirectoryScanner( + public createDirectoryScanner( embedder: IEmbedder, vectorStore: IVectorStore, parser: ICodeParser, @@ -86,7 +86,7 @@ export class CodeIndexServiceFactory { /** * Creates a file watcher instance with its required dependencies. */ - protected createFileWatcher( + public createFileWatcher( context: vscode.ExtensionContext, embedder: IEmbedder, vectorStore: IVectorStore, From 4554ddca15f9f5ee8c4990944b861901dc1a1312 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Mon, 5 May 2025 21:50:06 -0500 Subject: [PATCH 10/71] feat: implement result filtering by directory path --- src/core/prompts/tools/codebase-search.ts | 5 ++- src/core/tools/codebaseSearchTool.ts | 20 ++++----- .../code-index/interfaces/vector-store.ts | 2 +- .../code-index/processors/file-watcher.ts | 2 +- src/services/code-index/processors/scanner.ts | 2 +- src/services/code-index/search-service.ts | 31 +++++++------ .../code-index/vector-store/qdrant-client.ts | 43 ++++++++++++++++++- webview-ui/src/components/chat/ChatRow.tsx | 8 +++- .../components/chat/CodebaseSearchResult.tsx | 1 + webview-ui/src/i18n/locales/en/chat.json | 1 + 10 files changed, 86 insertions(+), 29 deletions(-) diff --git a/src/core/prompts/tools/codebase-search.ts b/src/core/prompts/tools/codebase-search.ts index 3b40e7478d..75d00cbeac 100644 --- a/src/core/prompts/tools/codebase-search.ts +++ b/src/core/prompts/tools/codebase-search.ts @@ -1,19 +1,22 @@ export function getCodebaseSearchDescription(): string { return `## codebase_search -Description: Search the codebase for relevant files based on a query. Use this when the user asks a question about the codebase that requires finding specific files or code snippets. +Description: Search the codebase for relevant files based on a query. Use this when the user asks a question about the codebase that requires finding specific files or code snippets. You can optionally specify a path to a directory to search in, the results will be filtered to only include files within that directory, this is useful for searching for files related to a specific project or module. Parameters: - query: (required) The natural language query to search for. - limit: (optional) The maximum number of search results to return. Defaults to 10. +- path: (optional) The path to the directory to search in relative to the current working directory. Defaults to the current working directory. Usage: Your natural language query here Number of results (optional) +Path to the directory to search in (optional) Example: Searching for functions related to user authentication User login and password hashing 5 +/path/to/directory ` } diff --git a/src/core/tools/codebaseSearchTool.ts b/src/core/tools/codebaseSearchTool.ts index 85bb814dfe..e023443152 100644 --- a/src/core/tools/codebaseSearchTool.ts +++ b/src/core/tools/codebaseSearchTool.ts @@ -4,9 +4,9 @@ import { Task } from "../task/Task" import { CodeIndexManager } from "../../services/code-index/manager" import { getWorkspacePath } from "../../utils/path" import { formatResponse } from "../prompts/responses" -import { t } from "../../i18n" import { VectorStoreSearchResult } from "../../services/code-index/interfaces" -import { AskApproval, HandleError, PushToolResult, RemoveClosingTag, ToolUse } from '../../shared/tools' +import { AskApproval, HandleError, PushToolResult, RemoveClosingTag, ToolUse } from "../../shared/tools" +import path from "path" export async function codebaseSearchTool( cline: Task, @@ -29,6 +29,7 @@ export async function codebaseSearchTool( let query: string | undefined = block.params.query let limitStr: string | undefined = block.params.limit let limit: number = 5 // Default limit + let directoryPrefix: string | undefined = block.params.path if (!query) { cline.consecutiveMistakeCount++ @@ -47,19 +48,18 @@ export async function codebaseSearchTool( } } - // Extract optional sendResultsToUI parameter - - // --- Approval --- - const translationKey = "chat:codebaseSearch.wantsToSearch" - let approvalMessage: string + if (directoryPrefix) { + directoryPrefix = removeClosingTag("path", directoryPrefix) + directoryPrefix = path.normalize(directoryPrefix) + } - approvalMessage = t(translationKey, { query, limit }) + // Extract optional sendResultsToUI parameter const approvalPayload = { tool: "codebaseSearch", - approvalPrompt: approvalMessage, query: query, limit: limit, + path: directoryPrefix, isOutsideWorkspace: false, } @@ -91,7 +91,7 @@ export async function codebaseSearchTool( throw new Error("Code Indexing is not configured (Missing OpenAI Key or Qdrant URL).") } - const searchResults: VectorStoreSearchResult[] = await manager.searchIndex(query, limit) + const searchResults: VectorStoreSearchResult[] = await manager.searchIndex(query, limit, directoryPrefix) // 3. Format and push results if (!searchResults || searchResults.length === 0) { diff --git a/src/services/code-index/interfaces/vector-store.ts b/src/services/code-index/interfaces/vector-store.ts index cac7f54dc9..6429486f81 100644 --- a/src/services/code-index/interfaces/vector-store.ts +++ b/src/services/code-index/interfaces/vector-store.ts @@ -26,7 +26,7 @@ export interface IVectorStore { * @param limit Maximum number of results to return * @returns Promise resolving to search results */ - search(queryVector: number[], limit?: number): Promise + search(queryVector: number[], limit?: number, directoryPrefix?: string): Promise /** * Deletes points by file path diff --git a/src/services/code-index/processors/file-watcher.ts b/src/services/code-index/processors/file-watcher.ts index 2f8d2849d9..e830cd0312 100644 --- a/src/services/code-index/processors/file-watcher.ts +++ b/src/services/code-index/processors/file-watcher.ts @@ -194,7 +194,7 @@ export class FileWatcher implements IFileWatcher { id: pointId, vector: embeddings[index], payload: { - filePath: normalizedAbsolutePath, + filePath: path.relative(workspaceRoot, normalizedAbsolutePath), codeChunk: block.content, startLine: block.start_line, endLine: block.end_line, diff --git a/src/services/code-index/processors/scanner.ts b/src/services/code-index/processors/scanner.ts index 2009b43e3f..d4ec81b3c9 100644 --- a/src/services/code-index/processors/scanner.ts +++ b/src/services/code-index/processors/scanner.ts @@ -306,7 +306,7 @@ export class DirectoryScanner implements IDirectoryScanner { id: pointId, vector: embeddings[index], payload: { - filePath: normalizedAbsolutePath, // Store normalized absolute path + filePath: path.relative(workspaceRoot, normalizedAbsolutePath), codeChunk: block.content, startLine: block.start_line, endLine: block.end_line, diff --git a/src/services/code-index/search-service.ts b/src/services/code-index/search-service.ts index f00e5ce80e..1d7a4d8a5f 100644 --- a/src/services/code-index/search-service.ts +++ b/src/services/code-index/search-service.ts @@ -1,9 +1,9 @@ -import * as vscode from "vscode" +import * as path from "path" import { VectorStoreSearchResult } from "./interfaces" +import { IEmbedder } from "./interfaces/embedder" +import { IVectorStore } from "./interfaces/vector-store" import { CodeIndexConfigManager } from "./config-manager" import { CodeIndexStateManager } from "./state-manager" -import { CodeIndexServiceFactory } from "./service-factory" -import { CacheManager } from "./cache-manager" /** * Service responsible for searching the code index. @@ -12,19 +12,23 @@ export class CodeIndexSearchService { constructor( private readonly configManager: CodeIndexConfigManager, private readonly stateManager: CodeIndexStateManager, - private readonly serviceFactory: CodeIndexServiceFactory, - private readonly context: vscode.ExtensionContext, - private readonly cacheManager: CacheManager, + private readonly embedder: IEmbedder, + private readonly vectorStore: IVectorStore, ) {} /** * Searches the code index for relevant content. * @param query The search query * @param limit Maximum number of results to return + * @param directoryPrefix Optional directory path to filter results by * @returns Array of search results * @throws Error if the service is not properly configured or ready */ - public async searchIndex(query: string, limit: number): Promise { + public async searchIndex( + query: string, + limit: number, + directoryPrefix?: string, + ): Promise { if (!this.configManager.isFeatureEnabled || !this.configManager.isFeatureConfigured) { throw new Error("Code index feature is disabled or not configured.") } @@ -36,18 +40,21 @@ export class CodeIndexSearchService { } try { - // Get services from factory - const { embedder, vectorStore } = this.serviceFactory.createServices(this.context, this.cacheManager) - // Generate embedding for query - const embeddingResponse = await embedder.createEmbeddings([query]) + const embeddingResponse = await this.embedder.createEmbeddings([query]) const vector = embeddingResponse?.embeddings[0] if (!vector) { throw new Error("Failed to generate embedding for query.") } + // Handle directory prefix + let normalizedPrefix: string | undefined = undefined + if (directoryPrefix) { + normalizedPrefix = path.normalize(directoryPrefix) + } + // Perform search - const results = await vectorStore.search(vector, limit) + const results = await this.vectorStore.search(vector, limit, normalizedPrefix) return results } catch (error) { console.error("[CodeIndexSearchService] Error during search:", error) diff --git a/src/services/code-index/vector-store/qdrant-client.ts b/src/services/code-index/vector-store/qdrant-client.ts index 000afa7f5a..8615bd5075 100644 --- a/src/services/code-index/vector-store/qdrant-client.ts +++ b/src/services/code-index/vector-store/qdrant-client.ts @@ -76,8 +76,29 @@ export class QdrantVectorStore implements IVectorStore { }>, ): Promise { try { + const processedPoints = points.map((point) => { + if (point.payload?.filePath) { + const segments = point.payload.filePath.split(path.sep).filter(Boolean) + const pathSegments = segments.reduce( + (acc: Record, segment: string, index: number) => { + acc[index.toString()] = segment + return acc + }, + {}, + ) + return { + ...point, + payload: { + ...point.payload, + pathSegments, + }, + } + } + return point + }) + await this.client.upsert(this.collectionName, { - points, + points: processedPoints, wait: true, }) } catch (error) { @@ -101,11 +122,29 @@ export class QdrantVectorStore implements IVectorStore { * @param limit Maximum number of results to return * @returns Promise resolving to search results */ - async search(queryVector: number[], limit: number = 10): Promise { + async search( + queryVector: number[], + limit: number = 10, + directoryPrefix?: string, + ): Promise { try { + let filter: any = undefined + + if (directoryPrefix) { + const segments = directoryPrefix.split(path.sep).filter(Boolean) + + filter = { + must: segments.map((segment, index) => ({ + key: `pathSegments.${index}`, + match: { value: segment }, + })), + } + } + const result = await this.client.search(this.collectionName, { vector: queryVector, limit, + filter, }) result.filter((r) => this.isPayloadValid(r.payload!)) diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx index a580a0ee4f..6bdbc5b095 100644 --- a/webview-ui/src/components/chat/ChatRow.tsx +++ b/webview-ui/src/components/chat/ChatRow.tsx @@ -357,7 +357,13 @@ export const ChatRowContent = ({
{toolIcon("search")} - {t("chat:codebaseSearch.wantsToSearch", { query: tool.query, limit: tool.limit || 0 })} + {tool.path + ? t("chat:codebaseSearch.wantsToSearchWithPath", { + query: tool.query, + limit: tool.limit || 0, + path: tool.path, + }) + : t("chat:codebaseSearch.wantsToSearch", { query: tool.query, limit: tool.limit || 0 })}
) diff --git a/webview-ui/src/components/chat/CodebaseSearchResult.tsx b/webview-ui/src/components/chat/CodebaseSearchResult.tsx index d1215c1a71..ecd6ff866a 100644 --- a/webview-ui/src/components/chat/CodebaseSearchResult.tsx +++ b/webview-ui/src/components/chat/CodebaseSearchResult.tsx @@ -20,6 +20,7 @@ const CodebaseSearchResult: React.FC = ({ language: _language, }) => { const handleClick = () => { + console.log(filePath) vscode.postMessage({ type: "openFile", text: "./" + filePath, diff --git a/webview-ui/src/i18n/locales/en/chat.json b/webview-ui/src/i18n/locales/en/chat.json index ea5a175e3e..2658322f16 100644 --- a/webview-ui/src/i18n/locales/en/chat.json +++ b/webview-ui/src/i18n/locales/en/chat.json @@ -163,6 +163,7 @@ }, "codebaseSearch": { "wantsToSearch": "Roo wants to search the codebase for '{{query}}' (limit: {{limit}}):", + "wantsToSearchWithPath": "Roo wants to search the codebase for '{{query}}' (limit: {{limit}}) in '{{path}}':", "didSearch": "Found {{count}} result(s) for '{{query}}':" }, "commandOutput": "Command Output", From e8f8d471620a0d0b51f941258f765a1945471f28 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 6 May 2025 11:56:54 -0500 Subject: [PATCH 11/71] refactor: centralize path normalization logic --- .../code-index/processors/file-watcher.ts | 9 ++--- src/services/code-index/processors/scanner.ts | 12 ++----- .../code-index/shared/get-relative-path.ts | 34 +++++++++++++++++++ 3 files changed, 40 insertions(+), 15 deletions(-) create mode 100644 src/services/code-index/shared/get-relative-path.ts diff --git a/src/services/code-index/processors/file-watcher.ts b/src/services/code-index/processors/file-watcher.ts index e830cd0312..02ba8d00f6 100644 --- a/src/services/code-index/processors/file-watcher.ts +++ b/src/services/code-index/processors/file-watcher.ts @@ -1,13 +1,12 @@ import * as vscode from "vscode" -import * as path from "path" import { createHash } from "crypto" import { RooIgnoreController } from "../../../core/ignore/RooIgnoreController" -import { getWorkspacePath } from "../../../utils/path" import { v5 as uuidv5 } from "uuid" import { scannerExtensions } from "../shared/supported-extensions" import { IFileWatcher, FileProcessingResult, IEmbedder, IVectorStore } from "../interfaces" import { codeParser } from "./parser" import { CacheManager } from "../cache-manager" +import { generateNormalizedAbsolutePath, generateRelativeFilePath } from "../shared/get-relative-path" const QDRANT_CODE_BLOCK_NAMESPACE = "f47ac10b-58cc-4372-a567-0e02b2c3d479" const MAX_FILE_SIZE_BYTES = 1 * 1024 * 1024 // 1MB @@ -182,10 +181,8 @@ export class FileWatcher implements IFileWatcher { const texts = blocks.map((block) => block.content) const { embeddings } = await this.embedder.createEmbeddings(texts) - const workspaceRoot = getWorkspacePath() const points = blocks.map((block, index) => { - const absolutePath = path.resolve(workspaceRoot, block.file_path) - const normalizedAbsolutePath = path.normalize(absolutePath) + const normalizedAbsolutePath = generateNormalizedAbsolutePath(block.file_path) const stableName = `${normalizedAbsolutePath}:${block.start_line}` const pointId = uuidv5(stableName, QDRANT_CODE_BLOCK_NAMESPACE) @@ -194,7 +191,7 @@ export class FileWatcher implements IFileWatcher { id: pointId, vector: embeddings[index], payload: { - filePath: path.relative(workspaceRoot, normalizedAbsolutePath), + filePath: generateRelativeFilePath(normalizedAbsolutePath), codeChunk: block.content, startLine: block.start_line, endLine: block.end_line, diff --git a/src/services/code-index/processors/scanner.ts b/src/services/code-index/processors/scanner.ts index d4ec81b3c9..af60c55afe 100644 --- a/src/services/code-index/processors/scanner.ts +++ b/src/services/code-index/processors/scanner.ts @@ -2,7 +2,7 @@ import { listFiles } from "../../glob/list-files" import { RooIgnoreController } from "../../../core/ignore/RooIgnoreController" import { stat } from "fs/promises" import * as path from "path" -import { getWorkspacePath } from "../../../utils/path" +import { generateNormalizedAbsolutePath, generateRelativeFilePath } from "../shared/get-relative-path" import { scannerExtensions } from "../shared/supported-extensions" import * as vscode from "vscode" import { CodeBlock, ICodeParser, IEmbedder, IVectorStore, IDirectoryScanner } from "../interfaces" @@ -291,13 +291,7 @@ export class DirectoryScanner implements IDirectoryScanner { // Prepare points for Qdrant const points = batchBlocks.map((block, index) => { - const workspaceRoot = getWorkspacePath() // Assuming this utility function is available - // Ensure the block path is relative to the workspace root before resolving - const relativeBlockPath = path.isAbsolute(block.file_path) - ? path.relative(workspaceRoot, block.file_path) - : block.file_path - const absolutePath = path.resolve(workspaceRoot, relativeBlockPath) - const normalizedAbsolutePath = path.normalize(absolutePath) + const normalizedAbsolutePath = generateNormalizedAbsolutePath(block.file_path) const stableName = `${normalizedAbsolutePath}:${block.start_line}` const pointId = uuidv5(stableName, DirectoryScanner.QDRANT_CODE_BLOCK_NAMESPACE) @@ -306,7 +300,7 @@ export class DirectoryScanner implements IDirectoryScanner { id: pointId, vector: embeddings[index], payload: { - filePath: path.relative(workspaceRoot, normalizedAbsolutePath), + filePath: generateRelativeFilePath(normalizedAbsolutePath), codeChunk: block.content, startLine: block.start_line, endLine: block.end_line, diff --git a/src/services/code-index/shared/get-relative-path.ts b/src/services/code-index/shared/get-relative-path.ts new file mode 100644 index 0000000000..564afee955 --- /dev/null +++ b/src/services/code-index/shared/get-relative-path.ts @@ -0,0 +1,34 @@ +import path from "path" +import { getWorkspacePath } from "../../../utils/path" + +/** + * Generates a normalized absolute path from a given file path and workspace root. + * Handles path resolution and normalization to ensure consistent absolute paths. + * + * @param filePath - The file path to normalize (can be relative or absolute) + * @param workspaceRoot - The root directory of the workspace + * @returns The normalized absolute path + */ +export function generateNormalizedAbsolutePath(filePath: string): string { + const workspaceRoot = getWorkspacePath() + // Resolve the path to make it absolute if it's relative + const resolvedPath = path.resolve(workspaceRoot, filePath) + // Normalize to handle any . or .. segments and duplicate slashes + return path.normalize(resolvedPath) +} + +/** + * Generates a relative file path from a normalized absolute path and workspace root. + * Ensures consistent relative path generation across different platforms. + * + * @param normalizedAbsolutePath - The normalized absolute path to convert + * @param workspaceRoot - The root directory of the workspace + * @returns The relative path from workspaceRoot to the file + */ +export function generateRelativeFilePath(normalizedAbsolutePath: string): string { + const workspaceRoot = getWorkspacePath() + // Generate the relative path + const relativePath = path.relative(workspaceRoot, normalizedAbsolutePath) + // Normalize to ensure consistent path separators + return path.normalize(relativePath) +} From caa293a2122ac055ec8bc5b4c4d1d49ea6f126c3 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 6 May 2025 14:22:49 -0500 Subject: [PATCH 12/71] refactor: remove unnecessary barrel file --- src/services/code-index/vector-store/index.ts | 1 - 1 file changed, 1 deletion(-) delete mode 100644 src/services/code-index/vector-store/index.ts diff --git a/src/services/code-index/vector-store/index.ts b/src/services/code-index/vector-store/index.ts deleted file mode 100644 index d42ea841c3..0000000000 --- a/src/services/code-index/vector-store/index.ts +++ /dev/null @@ -1 +0,0 @@ -export * from "./qdrant-client" From a2c770fa95c6b456b4f43ff1445e70916d450543 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 6 May 2025 14:27:35 -0500 Subject: [PATCH 13/71] refactor: prevent restarting the service if no settings change --- src/core/webview/ClineProvider.ts | 6 - src/core/webview/webviewMessageHandler.ts | 1 - src/services/code-index/manager.ts | 131 +++++++++++++++------- src/services/code-index/orchestrator.ts | 6 +- 4 files changed, 94 insertions(+), 50 deletions(-) diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 184501d35f..ab4fcde347 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -859,12 +859,6 @@ export class ClineProvider extends EventEmitter implements vscode.window.showErrorMessage(t("common:errors.create_api_config")) return undefined } - - // Load CodeIndexManager configuration after provider settings are updated - if (this.codeIndexManager) { - this.codeIndexManager.dispose() - await this.codeIndexManager.initialize(this.contextProxy) - } } async deleteProviderProfile(profileToDelete: ProviderSettingsEntry) { diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index 6d20fb67d9..e07c536c95 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -1329,7 +1329,6 @@ export const webviewMessageHandler = async (provider: ClineProvider, message: We codebaseIndexEmbedderModelId: "", } await updateGlobalState("codebaseIndexConfig", codebaseIndexConfig) - provider.codeIndexManager?.dispose() await provider.codeIndexManager?.initialize(provider.contextProxy) await provider.postStateToWebview() break diff --git a/src/services/code-index/manager.ts b/src/services/code-index/manager.ts index bbfa5e9ee6..8783d39f1e 100644 --- a/src/services/code-index/manager.ts +++ b/src/services/code-index/manager.ts @@ -76,7 +76,6 @@ export class CodeIndexManager { } public get isFeatureConfigured(): boolean { - this.assertInitialized() return this._configManager!.isFeatureConfigured } @@ -86,53 +85,109 @@ export class CodeIndexManager { * @returns Object indicating if a restart is needed */ public async initialize(contextProxy: ContextProxy): Promise<{ requiresRestart: boolean }> { - // Initialize config manager and load configuration - this._configManager = new CodeIndexConfigManager(contextProxy) + // 1. ConfigManager Initialization and Configuration Loading + if (!this._configManager) { + this._configManager = new CodeIndexConfigManager(contextProxy) + } const { requiresRestart, requiresClear } = await this._configManager.loadConfiguration() - // Initialize cache manager - this._cacheManager = new CacheManager(this.context, this.workspacePath) - await this._cacheManager.initialize() - - // Initialize service factory and dependent services - this._serviceFactory = new CodeIndexServiceFactory(this._configManager, this.workspacePath, this._cacheManager) - - // Create shared service instances - const embedder = this._serviceFactory.createEmbedder() - const vectorStore = this._serviceFactory.createVectorStore() - const parser = codeParser - const scanner = this._serviceFactory.createDirectoryScanner(embedder, vectorStore, parser) - const fileWatcher = this._serviceFactory.createFileWatcher( - this.context, - embedder, - vectorStore, - this._cacheManager, - ) + // 2. CacheManager Initialization + if (!this._cacheManager) { + this._cacheManager = new CacheManager(this.context, this.workspacePath) + await this._cacheManager.initialize() + } - // Initialize orchestrator - this._orchestrator = new CodeIndexOrchestrator( - this._configManager, - this._stateManager, - this.context, - this.workspacePath, - this._cacheManager, - embedder, - vectorStore, - parser, - scanner, - fileWatcher, + // 3. Determine if Core Services Need Recreation + const needsServiceRecreation = !this._serviceFactory || requiresRestart + console.log( + `[CodeIndexManager] ${needsServiceRecreation ? "Initial setup or restart required" : "Configuration loaded, no full re-initialization needed"}`, ) - // Initialize search service - this._searchService = new CodeIndexSearchService(this._configManager, this._stateManager, embedder, vectorStore) + if (needsServiceRecreation) { + console.log("[CodeIndexManager] (Re)initializing core services...") + + // Stop watcher if it exists + if (this._orchestrator) { + this.stopWatcher() + console.log("[CodeIndexManager] Stopped existing watcher") + } + + // (Re)Initialize service factory + this._serviceFactory = new CodeIndexServiceFactory( + this._configManager, + this.workspacePath, + this._cacheManager, + ) + + // (Re)Create shared service instances + const embedder = this._serviceFactory.createEmbedder() + const vectorStore = this._serviceFactory.createVectorStore() + const parser = codeParser + const scanner = this._serviceFactory.createDirectoryScanner(embedder, vectorStore, parser) + const fileWatcher = this._serviceFactory.createFileWatcher( + this.context, + embedder, + vectorStore, + this._cacheManager, + ) + + // (Re)Initialize orchestrator + this._orchestrator = new CodeIndexOrchestrator( + this._configManager, + this._stateManager, + this.workspacePath, + this._cacheManager, + vectorStore, + scanner, + fileWatcher, + ) + + // (Re)Initialize search service + this._searchService = new CodeIndexSearchService( + this._configManager, + this._stateManager, + embedder, + vectorStore, + ) + + console.log("[CodeIndexManager] Core services (re)initialized") + } + // 4. Handle Data Clearing if (requiresClear) { - console.log("[CodeIndexManager] Embedding dimension changed. Clearing existing index data...") + console.log("[CodeIndexManager] Configuration requires clearing data") await this.clearIndexData() } - if (requiresRestart || requiresClear) { - this.startIndexing() + // 5. Handle Indexing Start/Restart + if (this._configManager.isFeatureEnabled && this._configManager.isFeatureConfigured) { + const shouldStartOrRestartIndexing = + requiresRestart || + requiresClear || + (needsServiceRecreation && (!this._orchestrator || this._orchestrator.state !== "Indexing")) + + if (shouldStartOrRestartIndexing) { + console.log("[CodeIndexManager] Starting/restarting indexing due to configuration changes") + await this.startIndexing() + } else { + console.log( + "[CodeIndexManager] Indexing not started/restarted (requiresRestart:", + requiresRestart, + "requiresClear:", + requiresClear, + "currentState:", + this._orchestrator?.state, + "needsServiceRecreation:", + needsServiceRecreation, + ")", + ) + } + } else { + console.log("[CodeIndexManager] Feature not enabled or not configured") + if (this._orchestrator && this._orchestrator.state !== "Standby") { + this.stopWatcher() + console.log("[CodeIndexManager] Stopped watcher as feature is disabled") + } } return { requiresRestart } diff --git a/src/services/code-index/orchestrator.ts b/src/services/code-index/orchestrator.ts index 7a958431bf..ed8cc970f1 100644 --- a/src/services/code-index/orchestrator.ts +++ b/src/services/code-index/orchestrator.ts @@ -2,7 +2,7 @@ import * as vscode from "vscode" import * as path from "path" import { CodeIndexConfigManager } from "./config-manager" import { CodeIndexStateManager, IndexingState } from "./state-manager" -import { FileProcessingResult, IFileWatcher, IVectorStore, IEmbedder, ICodeParser } from "./interfaces" +import { FileProcessingResult, IFileWatcher, IVectorStore } from "./interfaces" import { DirectoryScanner } from "./processors" import { CacheManager } from "./cache-manager" @@ -16,12 +16,9 @@ export class CodeIndexOrchestrator { constructor( private readonly configManager: CodeIndexConfigManager, private readonly stateManager: CodeIndexStateManager, - private readonly context: vscode.ExtensionContext, private readonly workspacePath: string, private readonly cacheManager: CacheManager, - private readonly embedder: IEmbedder, private readonly vectorStore: IVectorStore, - private readonly parser: ICodeParser, private readonly scanner: DirectoryScanner, private readonly fileWatcher: IFileWatcher, ) {} @@ -107,7 +104,6 @@ export class CodeIndexOrchestrator { this.stateManager.setSystemState("Indexing", "Initializing services...") try { - this.configManager.loadConfiguration() const collectionCreated = await this.vectorStore.initialize() if (collectionCreated) { From a11cf1539c34dd09a38c80e1eb8cd90f81c2faf7 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 6 May 2025 15:58:38 -0500 Subject: [PATCH 14/71] fix: the indexing process should never be awaited --- src/services/code-index/manager.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/services/code-index/manager.ts b/src/services/code-index/manager.ts index 8783d39f1e..8fb4247c0c 100644 --- a/src/services/code-index/manager.ts +++ b/src/services/code-index/manager.ts @@ -168,7 +168,7 @@ export class CodeIndexManager { if (shouldStartOrRestartIndexing) { console.log("[CodeIndexManager] Starting/restarting indexing due to configuration changes") - await this.startIndexing() + this.startIndexing() } else { console.log( "[CodeIndexManager] Indexing not started/restarted (requiresRestart:", From 0b19cffb9f9432ed5d7a7978841732ffdde9301e Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Thu, 8 May 2025 11:27:47 -0500 Subject: [PATCH 15/71] refactor: cleanup unused method --- src/services/code-index/cache-manager.ts | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/services/code-index/cache-manager.ts b/src/services/code-index/cache-manager.ts index f34299edb9..de0b1e31ab 100644 --- a/src/services/code-index/cache-manager.ts +++ b/src/services/code-index/cache-manager.ts @@ -53,13 +53,6 @@ export class CacheManager implements ICacheManager { } } - /** - * Saves the cache to disk (immediately) - */ - async saveCache(): Promise { - await this._performSave() - } - /** * Clears the cache file by deleting it */ From 0cfa228d3b5dc24f62fc0b0a0fb86c4901fd0bdc Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Thu, 8 May 2025 11:35:03 -0500 Subject: [PATCH 16/71] refactor: remove batch limits for ollama --- src/services/code-index/embedders/ollama.ts | 8 +++++++- src/services/code-index/embedders/openai.ts | 8 +++++++- src/services/code-index/interfaces/embedder.ts | 7 +++++++ src/services/code-index/processors/scanner.ts | 9 ++++++--- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/services/code-index/embedders/ollama.ts b/src/services/code-index/embedders/ollama.ts index 56de8c014d..8601f0c606 100644 --- a/src/services/code-index/embedders/ollama.ts +++ b/src/services/code-index/embedders/ollama.ts @@ -1,5 +1,5 @@ import { ApiHandlerOptions } from "../../../shared/api" -import { EmbeddingResponse, IEmbedder } from "../interfaces" +import { EmbedderInfo, EmbeddingResponse, IEmbedder } from "../interfaces" /** * Implements the IEmbedder interface using a local Ollama instance. @@ -70,4 +70,10 @@ export class CodeIndexOllamaEmbedder implements IEmbedder { throw new Error(`Ollama embedding failed: ${error.message}`) } } + + get embedderInfo(): EmbedderInfo { + return { + name: "ollama", + } + } } diff --git a/src/services/code-index/embedders/openai.ts b/src/services/code-index/embedders/openai.ts index 4d7e38193e..d08199eb31 100644 --- a/src/services/code-index/embedders/openai.ts +++ b/src/services/code-index/embedders/openai.ts @@ -1,7 +1,7 @@ import { OpenAI } from "openai" import { OpenAiNativeHandler } from "../../../api/providers/openai-native" import { ApiHandlerOptions } from "../../../shared/api" -import { IEmbedder, EmbeddingResponse } from "../interfaces" +import { IEmbedder, EmbeddingResponse, EmbedderInfo } from "../interfaces" /** * OpenAI implementation of the embedder interface with batching and rate limiting @@ -126,4 +126,10 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder { throw new Error(`Failed to create embeddings after ${OpenAiEmbedder.MAX_RETRIES} attempts`) } + + get embedderInfo(): EmbedderInfo { + return { + name: "openai", + } + } } diff --git a/src/services/code-index/interfaces/embedder.ts b/src/services/code-index/interfaces/embedder.ts index b006773b3d..de43255a65 100644 --- a/src/services/code-index/interfaces/embedder.ts +++ b/src/services/code-index/interfaces/embedder.ts @@ -10,6 +10,7 @@ export interface IEmbedder { * @returns Promise resolving to an EmbeddingResponse */ createEmbeddings(texts: string[], model?: string): Promise + get embedderInfo(): EmbedderInfo } export interface EmbeddingResponse { @@ -19,3 +20,9 @@ export interface EmbeddingResponse { totalTokens: number } } + +export type AvailableEmbedders = "openai" | "ollama" + +export interface EmbedderInfo { + name: AvailableEmbedders +} diff --git a/src/services/code-index/processors/scanner.ts b/src/services/code-index/processors/scanner.ts index af60c55afe..033092adf5 100644 --- a/src/services/code-index/processors/scanner.ts +++ b/src/services/code-index/processors/scanner.ts @@ -17,7 +17,7 @@ export class DirectoryScanner implements IDirectoryScanner { private static readonly QDRANT_CODE_BLOCK_NAMESPACE = "f47ac10b-58cc-4372-a567-0e02b2c3d479" private static readonly MAX_FILE_SIZE_BYTES = 1 * 1024 * 1024 // 1MB private static readonly MAX_LIST_FILES_LIMIT = 2_000 - private static readonly BATCH_SEGMENT_THRESHOLD = 30 // Number of code segments to batch for embeddings/upserts + private static readonly BATCH_SEGMENT_THRESHOLD = 60 // Number of code segments to batch for embeddings/upserts private static readonly MAX_BATCH_RETRIES = 3 private static readonly INITIAL_RETRY_DELAY_MS = 500 private static readonly PARSING_CONCURRENCY = 10 @@ -142,8 +142,11 @@ export class DirectoryScanner implements IDirectoryScanner { }) } - // Check if batch threshold is met - if (currentBatchBlocks.length >= DirectoryScanner.BATCH_SEGMENT_THRESHOLD) { + // Check if batch threshold is met and not for Ollama + if ( + currentBatchBlocks.length >= DirectoryScanner.BATCH_SEGMENT_THRESHOLD && + this.embedder.embedderInfo.name !== "ollama" + ) { // Copy current batch data and clear accumulators const batchBlocks = [...currentBatchBlocks] const batchTexts = [...currentBatchTexts] From cb2accde77ef0eecb032b875a7026cfad22ae7cf Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Thu, 8 May 2025 11:42:12 -0500 Subject: [PATCH 17/71] refactor(parser): simplify method signatures and improve chunking logic - Remove redundant min/max chars parameters - Add better handling for oversized lines - Improve chunking logic with segment handling - Clean up method signatures and parameter ordering --- src/services/code-index/processors/parser.ts | 123 ++++++++++--------- 1 file changed, 62 insertions(+), 61 deletions(-) diff --git a/src/services/code-index/processors/parser.ts b/src/services/code-index/processors/parser.ts index 14f92f4ef7..04bf1cbcb0 100644 --- a/src/services/code-index/processors/parser.ts +++ b/src/services/code-index/processors/parser.ts @@ -133,14 +133,7 @@ export class CodeParser implements ICodeParser { if (captures.length === 0) { if (content.length >= MIN_BLOCK_CHARS) { // Perform fallback chunking if content is large enough - const blocks = this._performFallbackChunking( - filePath, - content, - fileHash, - MIN_BLOCK_CHARS, - MAX_BLOCK_CHARS, - seenSegmentHashes, - ) + const blocks = this._performFallbackChunking(filePath, content, fileHash, seenSegmentHashes) return blocks } else { // Return empty if content is too small for fallback @@ -171,7 +164,6 @@ export class CodeParser implements ICodeParser { currentNode, filePath, fileHash, - MIN_BLOCK_CHARS, // Pass minChars as requested seenSegmentHashes, ) results.push(...chunkedBlocks) @@ -218,20 +210,19 @@ export class CodeParser implements ICodeParser { lines: string[], filePath: string, fileHash: string, - baseStartLine: number, // 1-based start line of the *first* line in the `lines` array + chunkType: string, - minChars: number, - maxChars: number, - minRemainderChars: number, seenSegmentHashes: Set, + baseStartLine: number = 1, // 1-based start line of the *first* line in the `lines` array ): CodeBlock[] { const chunks: CodeBlock[] = [] let currentChunkLines: string[] = [] let currentChunkLength = 0 let chunkStartLineIndex = 0 // 0-based index within the `lines` array + const effectiveMaxChars = MAX_BLOCK_CHARS * MAX_CHARS_TOLERANCE_FACTOR const finalizeChunk = (endLineIndex: number) => { - if (currentChunkLength >= minChars && currentChunkLines.length > 0) { + if (currentChunkLength >= MIN_BLOCK_CHARS && currentChunkLines.length > 0) { const chunkContent = currentChunkLines.join("\n") const startLine = baseStartLine + chunkStartLineIndex const endLine = baseStartLine + endLineIndex @@ -243,7 +234,7 @@ export class CodeParser implements ICodeParser { seenSegmentHashes.add(segmentHash) chunks.push({ file_path: filePath, - identifier: null, // Identifier is handled at a higher level if available + identifier: null, type: chunkType, start_line: startLine, end_line: endLine, @@ -253,67 +244,93 @@ export class CodeParser implements ICodeParser { }) } } - // Reset for the next chunk currentChunkLines = [] currentChunkLength = 0 chunkStartLineIndex = endLineIndex + 1 } + const createSegmentBlock = (segment: string, originalLineNumber: number) => { + const segmentHash = createHash("sha256") + .update(`${filePath}-${originalLineNumber}-${originalLineNumber}-${segment}`) + .digest("hex") + + if (!seenSegmentHashes.has(segmentHash)) { + seenSegmentHashes.add(segmentHash) + chunks.push({ + file_path: filePath, + identifier: null, + type: `${chunkType}_segment`, + start_line: originalLineNumber, + end_line: originalLineNumber, + content: segment, + segmentHash, + fileHash, + }) + } + } + for (let i = 0; i < lines.length; i++) { const line = lines[i] const lineLength = line.length + (i < lines.length - 1 ? 1 : 0) // +1 for newline, except last line + const originalLineNumber = baseStartLine + i + + // Handle oversized lines (longer than effectiveMaxChars) + if (lineLength > effectiveMaxChars) { + // Finalize any existing normal chunk before processing the oversized line + if (currentChunkLines.length > 0) { + finalizeChunk(i - 1) + } - // Check if adding this line exceeds the max limit - if (currentChunkLength > 0 && currentChunkLength + lineLength > maxChars) { - // --- Re-balancing Logic --- - let splitIndex = i - 1 // Default split is *before* the current line + // Split the oversized line into segments + let remainingLineContent = line + while (remainingLineContent.length > 0) { + const segment = remainingLineContent.substring(0, MAX_BLOCK_CHARS) + remainingLineContent = remainingLineContent.substring(MAX_BLOCK_CHARS) + createSegmentBlock(segment, originalLineNumber) + } + continue + } - // Estimate remaining text length + // Handle normally sized lines + if (currentChunkLength > 0 && currentChunkLength + lineLength > effectiveMaxChars) { + // Re-balancing Logic + let splitIndex = i - 1 let remainderLength = 0 for (let j = i; j < lines.length; j++) { remainderLength += lines[j].length + (j < lines.length - 1 ? 1 : 0) } - // Check if remainder is too small and we have a valid current chunk if ( - currentChunkLength >= minChars && - remainderLength < minRemainderChars && + currentChunkLength >= MIN_BLOCK_CHARS && + remainderLength < MIN_CHUNK_REMAINDER_CHARS && currentChunkLines.length > 1 ) { - // Try to find a better split point by looking backwards for (let k = i - 2; k >= chunkStartLineIndex; k--) { const potentialChunkLines = lines.slice(chunkStartLineIndex, k + 1) - const potentialChunkLength = potentialChunkLines.join("\n").length + 1 // Approx. length - - const potentialNextChunkLines = lines.slice(k + 1) // All remaining lines - const potentialNextChunkLength = potentialNextChunkLines.join("\n").length + 1 // Approx. length - - // Found a split leaving enough in current and next? - if (potentialChunkLength >= minChars && potentialNextChunkLength >= minRemainderChars) { - splitIndex = k // Found a better split point + const potentialChunkLength = potentialChunkLines.join("\n").length + 1 + const potentialNextChunkLines = lines.slice(k + 1) + const potentialNextChunkLength = potentialNextChunkLines.join("\n").length + 1 + + if ( + potentialChunkLength >= MIN_BLOCK_CHARS && + potentialNextChunkLength >= MIN_CHUNK_REMAINDER_CHARS + ) { + splitIndex = k break } } - // If no better split found, splitIndex remains i - 1 } - // --- End Re-balancing --- - // Finalize the chunk up to the determined split index finalizeChunk(splitIndex) - // Add the current line to start the *new* chunk (if it wasn't part of the finalized chunk) if (i >= chunkStartLineIndex) { currentChunkLines.push(line) currentChunkLength += lineLength } else { - // This case should ideally not happen with the current logic, but as a safeguard: - // If the split somehow went *past* the current line index 'i', - // we need to reset 'i' to start processing from the beginning of the new chunk. - i = chunkStartLineIndex - 1 // Loop increment will make it chunkStartLineIndex - continue // Re-process the line that starts the new chunk + i = chunkStartLineIndex - 1 + continue } } else { - // Add the current line to the chunk currentChunkLines.push(line) currentChunkLength += lineLength } @@ -331,29 +348,16 @@ export class CodeParser implements ICodeParser { filePath: string, content: string, fileHash: string, - minChars: number, - maxChars: number, seenSegmentHashes: Set, ): CodeBlock[] { const lines = content.split("\n") - return this._chunkTextByLines( - lines, - filePath, - fileHash, - 1, // Fallback starts from line 1 - "fallback_chunk", - minChars, - maxChars, - MIN_CHUNK_REMAINDER_CHARS, - seenSegmentHashes, - ) + return this._chunkTextByLines(lines, filePath, fileHash, "fallback_chunk", seenSegmentHashes) } private _chunkLeafNodeByLines( node: treeSitter.SyntaxNode, filePath: string, fileHash: string, - minChars: number, // Note: This was previously used as max, now correctly used as min seenSegmentHashes: Set, ): CodeBlock[] { const lines = node.text.split("\n") @@ -362,12 +366,9 @@ export class CodeParser implements ICodeParser { lines, filePath, fileHash, - baseStartLine, node.type, // Use the node's type - minChars, - MAX_BLOCK_CHARS, // Use the global max - MIN_CHUNK_REMAINDER_CHARS, seenSegmentHashes, + baseStartLine, ) } } From d64de40ae2b6afc27019ab1f1812b0d71937eea8 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Thu, 8 May 2025 16:22:13 -0500 Subject: [PATCH 18/71] fix(settings): make select inputs full width in CodeIndexSettings --- webview-ui/src/components/settings/CodeIndexSettings.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webview-ui/src/components/settings/CodeIndexSettings.tsx b/webview-ui/src/components/settings/CodeIndexSettings.tsx index eb719defd5..0a4f5f1521 100644 --- a/webview-ui/src/components/settings/CodeIndexSettings.tsx +++ b/webview-ui/src/components/settings/CodeIndexSettings.tsx @@ -125,7 +125,7 @@ export const CodeIndexSettings: React.FC = ({ }) } }}> - + @@ -145,7 +145,7 @@ export const CodeIndexSettings: React.FC = ({ codebaseIndexEmbedderModelId: value, }) }> - + From a2e656c5e124949a8999ac272f24b1d63e47e82e Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Thu, 8 May 2025 19:49:32 -0500 Subject: [PATCH 19/71] refactor: increase max list file limit --- src/services/code-index/processors/scanner.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/services/code-index/processors/scanner.ts b/src/services/code-index/processors/scanner.ts index 033092adf5..49f4acb2f8 100644 --- a/src/services/code-index/processors/scanner.ts +++ b/src/services/code-index/processors/scanner.ts @@ -16,7 +16,7 @@ export class DirectoryScanner implements IDirectoryScanner { // Constants moved inside the class private static readonly QDRANT_CODE_BLOCK_NAMESPACE = "f47ac10b-58cc-4372-a567-0e02b2c3d479" private static readonly MAX_FILE_SIZE_BYTES = 1 * 1024 * 1024 // 1MB - private static readonly MAX_LIST_FILES_LIMIT = 2_000 + private static readonly MAX_LIST_FILES_LIMIT = 3_000 private static readonly BATCH_SEGMENT_THRESHOLD = 60 // Number of code segments to batch for embeddings/upserts private static readonly MAX_BATCH_RETRIES = 3 private static readonly INITIAL_RETRY_DELAY_MS = 500 From 6d1f08d95e7f965993be5ce88aede085ddcf84d2 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Thu, 8 May 2025 20:03:55 -0500 Subject: [PATCH 20/71] feat(ui): improve codebase search result display formatting --- .../src/components/chat/CodebaseSearchResult.tsx | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/webview-ui/src/components/chat/CodebaseSearchResult.tsx b/webview-ui/src/components/chat/CodebaseSearchResult.tsx index ecd6ff866a..5d6b61dd8a 100644 --- a/webview-ui/src/components/chat/CodebaseSearchResult.tsx +++ b/webview-ui/src/components/chat/CodebaseSearchResult.tsx @@ -33,12 +33,14 @@ const CodebaseSearchResult: React.FC = ({ return (
-
- {filePath.split("/").at(-1)} - - Lines: {startLine}-{endLine} +
+ + {filePath.split("/").at(-1)}:{startLine}-{endLine} + + + {filePath.split("/").slice(0, -1).join("/")}
From 5964551c5bf8b11284a53f3ca453f0614a5ee6f4 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Thu, 8 May 2025 21:16:27 -0500 Subject: [PATCH 21/71] test: add tests for cache and config managers --- .../__tests__/cache-manager.test.ts | 171 ++++++++++++++ .../__tests__/config-manager.test.ts | 213 ++++++++++++++++++ 2 files changed, 384 insertions(+) create mode 100644 src/services/code-index/__tests__/cache-manager.test.ts create mode 100644 src/services/code-index/__tests__/config-manager.test.ts diff --git a/src/services/code-index/__tests__/cache-manager.test.ts b/src/services/code-index/__tests__/cache-manager.test.ts new file mode 100644 index 0000000000..a01696b5a3 --- /dev/null +++ b/src/services/code-index/__tests__/cache-manager.test.ts @@ -0,0 +1,171 @@ +import * as vscode from "vscode" +import { createHash } from "crypto" +import debounce from "lodash.debounce" +import { CacheManager } from "../cache-manager" + +// Mock vscode +jest.mock("vscode", () => ({ + Uri: { + joinPath: jest.fn(), + }, + workspace: { + fs: { + readFile: jest.fn(), + writeFile: jest.fn(), + delete: jest.fn(), + }, + }, +})) + +// Mock debounce to execute immediately +jest.mock("lodash.debounce", () => jest.fn((fn) => fn)) + +describe("CacheManager", () => { + let mockContext: vscode.ExtensionContext + let mockWorkspacePath: string + let mockCachePath: vscode.Uri + let cacheManager: CacheManager + + beforeEach(() => { + // Reset all mocks + jest.clearAllMocks() + + // Mock context + mockWorkspacePath = "/mock/workspace" + mockCachePath = { fsPath: "/mock/storage/cache.json" } as vscode.Uri + mockContext = { + globalStorageUri: { fsPath: "/mock/storage" } as vscode.Uri, + } as vscode.ExtensionContext + + // Mock Uri.joinPath + ;(vscode.Uri.joinPath as jest.Mock).mockReturnValue(mockCachePath) + + // Create cache manager instance + cacheManager = new CacheManager(mockContext, mockWorkspacePath) + }) + + describe("constructor", () => { + it("should correctly set up cachePath using Uri.joinPath and crypto.createHash", () => { + const expectedHash = createHash("sha256").update(mockWorkspacePath).digest("hex") + + expect(vscode.Uri.joinPath).toHaveBeenCalledWith( + mockContext.globalStorageUri, + `roo-index-cache-${expectedHash}.json`, + ) + }) + + it("should set up debounced save function", () => { + expect(debounce).toHaveBeenCalledWith(expect.any(Function), 1500) + }) + }) + + describe("initialize", () => { + it("should load existing cache file successfully", async () => { + const mockCache = { "file1.ts": "hash1", "file2.ts": "hash2" } + const mockBuffer = Buffer.from(JSON.stringify(mockCache)) + ;(vscode.workspace.fs.readFile as jest.Mock).mockResolvedValue(mockBuffer) + + await cacheManager.initialize() + + expect(vscode.workspace.fs.readFile).toHaveBeenCalledWith(mockCachePath) + expect(cacheManager.getAllHashes()).toEqual(mockCache) + }) + + it("should handle missing cache file by creating empty cache", async () => { + ;(vscode.workspace.fs.readFile as jest.Mock).mockRejectedValue(new Error("File not found")) + + await cacheManager.initialize() + + expect(cacheManager.getAllHashes()).toEqual({}) + }) + }) + + describe("hash management", () => { + it("should update hash and trigger save", () => { + const filePath = "test.ts" + const hash = "testhash" + + cacheManager.updateHash(filePath, hash) + + expect(cacheManager.getHash(filePath)).toBe(hash) + expect(vscode.workspace.fs.writeFile).toHaveBeenCalled() + }) + + it("should delete hash and trigger save", () => { + const filePath = "test.ts" + const hash = "testhash" + + cacheManager.updateHash(filePath, hash) + cacheManager.deleteHash(filePath) + + expect(cacheManager.getHash(filePath)).toBeUndefined() + expect(vscode.workspace.fs.writeFile).toHaveBeenCalled() + }) + + it("should return shallow copy of hashes", () => { + const filePath = "test.ts" + const hash = "testhash" + + cacheManager.updateHash(filePath, hash) + const hashes = cacheManager.getAllHashes() + + // Modify the returned object + hashes[filePath] = "modified" + + // Original should remain unchanged + expect(cacheManager.getHash(filePath)).toBe(hash) + }) + }) + + describe("saving", () => { + it("should save cache to disk with correct data", async () => { + const filePath = "test.ts" + const hash = "testhash" + + cacheManager.updateHash(filePath, hash) + + expect(vscode.workspace.fs.writeFile).toHaveBeenCalledWith(mockCachePath, expect.any(Uint8Array)) + + // Verify the saved data + const savedData = JSON.parse( + Buffer.from((vscode.workspace.fs.writeFile as jest.Mock).mock.calls[0][1]).toString(), + ) + expect(savedData).toEqual({ [filePath]: hash }) + }) + + it("should handle save errors gracefully", async () => { + const consoleErrorSpy = jest.spyOn(console, "error").mockImplementation() + ;(vscode.workspace.fs.writeFile as jest.Mock).mockRejectedValue(new Error("Save failed")) + + cacheManager.updateHash("test.ts", "hash") + + // Wait for any pending promises + await new Promise((resolve) => setTimeout(resolve, 0)) + + expect(consoleErrorSpy).toHaveBeenCalledWith("Failed to save cache:", expect.any(Error)) + + consoleErrorSpy.mockRestore() + }) + }) + + describe("clearCacheFile", () => { + it("should delete cache file and reset state", async () => { + cacheManager.updateHash("test.ts", "hash") + await cacheManager.clearCacheFile() + + expect(vscode.workspace.fs.delete).toHaveBeenCalledWith(mockCachePath) + expect(cacheManager.getAllHashes()).toEqual({}) + }) + + it("should handle delete errors gracefully", async () => { + const consoleErrorSpy = jest.spyOn(console, "error").mockImplementation() + ;(vscode.workspace.fs.delete as jest.Mock).mockRejectedValue(new Error("Delete failed")) + + await cacheManager.clearCacheFile() + + expect(consoleErrorSpy).toHaveBeenCalledWith("Failed to clear cache file:", expect.any(Error)) + + consoleErrorSpy.mockRestore() + }) + }) +}) diff --git a/src/services/code-index/__tests__/config-manager.test.ts b/src/services/code-index/__tests__/config-manager.test.ts new file mode 100644 index 0000000000..e978816b94 --- /dev/null +++ b/src/services/code-index/__tests__/config-manager.test.ts @@ -0,0 +1,213 @@ +import { ContextProxy } from "../../../core/config/ContextProxy" +import { CodeIndexConfigManager } from "../config-manager" + +describe("CodeIndexConfigManager", () => { + let mockContextProxy: jest.Mocked + let configManager: CodeIndexConfigManager + + beforeEach(() => { + // Setup mock ContextProxy + mockContextProxy = { + getGlobalState: jest.fn(), + getSecret: jest.fn().mockReturnValue(undefined), + } as unknown as jest.Mocked + + configManager = new CodeIndexConfigManager(mockContextProxy) + }) + + describe("constructor", () => { + it("should initialize with ContextProxy", () => { + expect(configManager).toBeDefined() + expect(configManager.isFeatureEnabled).toBe(false) + expect(configManager.currentEmbedderProvider).toBe("openai") + }) + }) + + describe("loadConfiguration", () => { + it("should load default configuration when no state exists", async () => { + mockContextProxy.getGlobalState.mockReturnValue(undefined) + mockContextProxy.getSecret.mockReturnValue(undefined) + + const result = await configManager.loadConfiguration() + + expect(result.currentConfig).toEqual({ + isEnabled: false, + isConfigured: false, + embedderProvider: "openai", + modelId: undefined, + openAiOptions: { openAiNativeApiKey: "" }, + ollamaOptions: { ollamaBaseUrl: "" }, + qdrantUrl: "", + qdrantApiKey: "", + }) + expect(result.requiresRestart).toBe(false) + expect(result.requiresClear).toBe(false) + }) + + it("should load configuration from globalState and secrets", async () => { + const mockGlobalState = { + codebaseIndexEnabled: true, + codebaseIndexQdrantUrl: "http://qdrant.local", + codebaseIndexEmbedderProvider: "openai", + codebaseIndexEmbedderBaseUrl: "", + codebaseIndexEmbedderModelId: "text-embedding-3-large", + } + mockContextProxy.getGlobalState.mockReturnValue(mockGlobalState) + mockContextProxy.getSecret.mockImplementation((key: string) => { + if (key === "codeIndexOpenAiKey") return "test-openai-key" + if (key === "codeIndexQdrantApiKey") return "test-qdrant-key" + return undefined + }) + + const result = await configManager.loadConfiguration() + + expect(result.currentConfig).toEqual({ + isEnabled: true, + isConfigured: true, + embedderProvider: "openai", + modelId: "text-embedding-3-large", + openAiOptions: { openAiNativeApiKey: "test-openai-key" }, + ollamaOptions: { ollamaBaseUrl: "" }, + qdrantUrl: "http://qdrant.local", + qdrantApiKey: "test-qdrant-key", + }) + }) + + it("should detect restart requirement when provider changes", async () => { + // Initial state + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexQdrantUrl: "http://qdrant.local", + codebaseIndexEmbedderProvider: "openai", + codebaseIndexEmbedderModelId: "text-embedding-3-large", + }) + + await configManager.loadConfiguration() + + // Change provider + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexQdrantUrl: "http://qdrant.local", + codebaseIndexEmbedderProvider: "ollama", + codebaseIndexEmbedderBaseUrl: "http://ollama.local", + codebaseIndexEmbedderModelId: "llama2", + }) + + const result = await configManager.loadConfiguration() + expect(result.requiresRestart).toBe(true) + }) + + it("should detect clear requirement when model dimensions change", async () => { + // Initial state with a model + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexQdrantUrl: "http://qdrant.local", + codebaseIndexEmbedderProvider: "openai", + codebaseIndexEmbedderModelId: "text-embedding-3-small", + }) + + await configManager.loadConfiguration() + + // Change to a model with different dimensions + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexQdrantUrl: "http://qdrant.local", + codebaseIndexEmbedderProvider: "openai", + codebaseIndexEmbedderModelId: "text-embedding-3-large", + }) + + const result = await configManager.loadConfiguration() + expect(result.requiresClear).toBe(true) + }) + }) + + describe("isConfigured", () => { + it("should validate OpenAI configuration correctly", async () => { + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexQdrantUrl: "http://qdrant.local", + codebaseIndexEmbedderProvider: "openai", + }) + mockContextProxy.getSecret.mockImplementation((key: string) => { + if (key === "codeIndexOpenAiKey") return "test-key" + return undefined + }) + + await configManager.loadConfiguration() + expect(configManager.isFeatureConfigured).toBe(true) + }) + + it("should validate Ollama configuration correctly", async () => { + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexQdrantUrl: "http://qdrant.local", + codebaseIndexEmbedderProvider: "ollama", + codebaseIndexEmbedderBaseUrl: "http://ollama.local", + }) + + await configManager.loadConfiguration() + expect(configManager.isFeatureConfigured).toBe(true) + }) + + it("should return false when required values are missing", async () => { + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexEmbedderProvider: "openai", + }) + + await configManager.loadConfiguration() + expect(configManager.isFeatureConfigured).toBe(false) + }) + }) + + describe("getter properties", () => { + beforeEach(async () => { + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexQdrantUrl: "http://qdrant.local", + codebaseIndexEmbedderProvider: "openai", + codebaseIndexEmbedderModelId: "text-embedding-3-large", + }) + mockContextProxy.getSecret.mockImplementation((key: string) => { + if (key === "codeIndexOpenAiKey") return "test-openai-key" + if (key === "codeIndexQdrantApiKey") return "test-qdrant-key" + return undefined + }) + + await configManager.loadConfiguration() + }) + + it("should return correct configuration via getConfig", () => { + const config = configManager.getConfig() + expect(config).toEqual({ + isEnabled: true, + isConfigured: true, + embedderProvider: "openai", + modelId: "text-embedding-3-large", + openAiOptions: { openAiNativeApiKey: "test-openai-key" }, + ollamaOptions: { ollamaBaseUrl: undefined }, + qdrantUrl: "http://qdrant.local", + qdrantApiKey: "test-qdrant-key", + }) + }) + + it("should return correct feature enabled state", () => { + expect(configManager.isFeatureEnabled).toBe(true) + }) + + it("should return correct embedder provider", () => { + expect(configManager.currentEmbedderProvider).toBe("openai") + }) + + it("should return correct Qdrant configuration", () => { + expect(configManager.qdrantConfig).toEqual({ + url: "http://qdrant.local", + apiKey: "test-qdrant-key", + }) + }) + + it("should return correct model ID", () => { + expect(configManager.currentModelId).toBe("text-embedding-3-large") + }) + }) +}) From 247a27c07d7317d8a9a98c925614fa17ffc547cc Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 9 May 2025 18:22:59 -0500 Subject: [PATCH 22/71] test: create unit tests for parser and scanner --- .../processors/__tests__/parser.test.ts | 226 ++++++++++++++++++ .../processors/__tests__/scanner.test.ts | 146 +++++++++++ 2 files changed, 372 insertions(+) create mode 100644 src/services/code-index/processors/__tests__/parser.test.ts create mode 100644 src/services/code-index/processors/__tests__/scanner.test.ts diff --git a/src/services/code-index/processors/__tests__/parser.test.ts b/src/services/code-index/processors/__tests__/parser.test.ts new file mode 100644 index 0000000000..a24c24593f --- /dev/null +++ b/src/services/code-index/processors/__tests__/parser.test.ts @@ -0,0 +1,226 @@ +import { jest } from "@jest/globals" +import { CodeParser, codeParser } from "../parser" +import { mockedFs } from "../../../tree-sitter/__tests__/helpers" +import Parser from "web-tree-sitter" +import { loadRequiredLanguageParsers } from "../../../tree-sitter/languageParser" + +jest.mock("fs/promises") +jest.mock("../../../tree-sitter/languageParser") + +const mockLanguageParser = { + js: { + parser: { + parse: jest.fn((content: string) => ({ + rootNode: { + text: content, + startPosition: { row: 0 }, + endPosition: { row: content.split("\n").length - 1 }, + children: [], + type: "program", + }, + })), + }, + query: { + captures: jest.fn().mockReturnValue([]), + }, + }, +} + +describe("CodeParser", () => { + let parser: CodeParser + + beforeEach(() => { + jest.clearAllMocks() + parser = new CodeParser() + ;(loadRequiredLanguageParsers as jest.MockedFunction).mockResolvedValue( + mockLanguageParser as any, + ) + }) + + describe("parseFile", () => { + it("should return empty array for unsupported extensions", async () => { + const result = await parser.parseFile("test.unsupported") + expect(result).toEqual([]) + }) + + it("should use provided content instead of reading file when options.content is provided", async () => { + const content = `/* This is a long test content string that exceeds 100 characters to properly test the parser's behavior with large inputs. + It includes multiple lines and various JavaScript constructs to simulate real-world code. + const a = 1; + const b = 2; + function test() { return a + b; } + class Example { constructor() { this.value = 42; } } + // More comments to pad the length to ensure we hit the minimum character requirement */` + const result = await parser.parseFile("test.js", { content }) + expect(mockedFs.readFile).not.toHaveBeenCalled() + expect(result.length).toBeGreaterThan(0) + }) + + it("should read file when no content is provided", async () => { + mockedFs.readFile + .mockResolvedValue(`/* This is a long test content string that exceeds 100 characters to properly test file reading behavior. + It includes multiple lines and various JavaScript constructs to simulate real-world code. + const x = 10; + const y = 20; + function calculate() { return x * y; } + class Calculator { + constructor() { this.history = []; } + add(a, b) { return a + b; } + } + // More comments to pad the length to ensure we hit the minimum character requirement */`) + const result = await parser.parseFile("test.js") + expect(mockedFs.readFile).toHaveBeenCalledWith("test.js", "utf8") + expect(result.length).toBeGreaterThan(0) + }) + + it("should handle file read errors gracefully", async () => { + mockedFs.readFile.mockRejectedValue(new Error("File not found")) + const result = await parser.parseFile("test.js") + expect(result).toEqual([]) + }) + + it("should use provided fileHash when available", async () => { + const content = `/* This is a long test content string that exceeds 100 characters to test fileHash behavior. + It includes multiple lines and various JavaScript constructs to simulate real-world code. + const items = [1, 2, 3]; + const sum = items.reduce((a, b) => a + b, 0); + function processItems(items) { + return items.map(item => item * 2); + } + // More comments to pad the length to ensure we hit the minimum character requirement */` + const fileHash = "test-hash" + const result = await parser.parseFile("test.js", { content, fileHash }) + expect(result[0].fileHash).toBe(fileHash) + }) + }) + + describe("isSupportedLanguage", () => { + it("should return true for supported extensions", () => { + expect(parser["isSupportedLanguage"](".js")).toBe(true) + }) + + it("should return false for unsupported extensions", () => { + expect(parser["isSupportedLanguage"](".unsupported")).toBe(false) + }) + }) + + describe("createFileHash", () => { + it("should generate consistent hashes for same content", () => { + const content = "test content" + const hash1 = parser["createFileHash"](content) + const hash2 = parser["createFileHash"](content) + expect(hash1).toBe(hash2) + expect(hash1).toMatch(/^[a-f0-9]{64}$/) // SHA-256 hex format + }) + + it("should generate different hashes for different content", () => { + const hash1 = parser["createFileHash"]("content1") + const hash2 = parser["createFileHash"]("content2") + expect(hash1).not.toBe(hash2) + }) + }) + + describe("parseContent", () => { + it("should wait for pending parser loads", async () => { + const pendingLoad = new Promise((resolve) => setTimeout(() => resolve(mockLanguageParser), 100)) + parser["pendingLoads"].set(".js", pendingLoad as Promise) + + const result = await parser["parseContent"]("test.js", "const test = 123", "hash") + expect(result).toBeDefined() + }) + + it("should handle parser load errors", async () => { + ;(loadRequiredLanguageParsers as jest.MockedFunction).mockRejectedValue( + new Error("Load failed"), + ) + const result = await parser["parseContent"]("test.js", "const test = 123", "hash") + expect(result).toEqual([]) + }) + + it("should return empty array when no parser is available", async () => { + ;(loadRequiredLanguageParsers as jest.MockedFunction).mockResolvedValue( + {} as any, + ) + const result = await parser["parseContent"]("test.js", "const test = 123", "hash") + expect(result).toEqual([]) + }) + }) + + describe("_performFallbackChunking", () => { + it("should chunk content when no captures are found", async () => { + const content = `/* This is a long test content string that exceeds 100 characters to test fallback chunking behavior. + It includes multiple lines and various JavaScript constructs to simulate real-world code. + line1: const a = 1; + line2: const b = 2; + line3: function sum() { return a + b; } + line4: class Adder { constructor(x, y) { this.x = x; this.y = y; } } + line5: const instance = new Adder(1, 2); + line6: console.log(instance.x + instance.y); + line7: // More comments to pad the length to ensure we hit the minimum character requirement */` + const result = await parser["_performFallbackChunking"]("test.js", content, "hash", new Set()) + expect(result.length).toBeGreaterThan(0) + expect(result[0].type).toBe("fallback_chunk") + }) + + it("should respect MIN_BLOCK_CHARS for fallback chunks", async () => { + const shortContent = "short" + const result = await parser["_performFallbackChunking"]("test.js", shortContent, "hash", new Set()) + expect(result).toEqual([]) + }) + }) + + describe("_chunkLeafNodeByLines", () => { + it("should chunk leaf nodes by lines", async () => { + const mockNode = { + text: `/* This is a long test content string that exceeds 100 characters to test line chunking behavior. + line1: const a = 1; + line2: const b = 2; + line3: function sum() { return a + b; } + line4: class Multiplier { constructor(x, y) { this.x = x; this.y = y; } } + line5: const instance = new Multiplier(3, 4); + line6: console.log(instance.x * instance.y); + line7: // More comments to pad the length to ensure we hit the minimum character requirement */`, + startPosition: { row: 10 }, + endPosition: { row: 12 }, + type: "function", + } as unknown as Parser.SyntaxNode + + const result = await parser["_chunkLeafNodeByLines"](mockNode, "test.js", "hash", new Set()) + expect(result.length).toBeGreaterThan(0) + expect(result[0].type).toBe("function") + expect(result[0].start_line).toBe(11) // 1-based + }) + }) + + describe("_chunkTextByLines", () => { + it("should handle oversized lines by splitting them", async () => { + const longLine = "a".repeat(2000) + const lines = ["normal", longLine, "normal"] + const result = await parser["_chunkTextByLines"](lines, "test.js", "hash", "test_type", new Set()) + + const segments = result.filter((r) => r.type === "test_type_segment") + expect(segments.length).toBeGreaterThan(1) + }) + + it("should re-balance chunks when remainder is too small", async () => { + const lines = Array(100) + .fill("line with 10 chars") + .map((_, i) => `${i}: line`) + const result = await parser["_chunkTextByLines"](lines, "test.js", "hash", "test_type", new Set()) + + result.forEach((chunk) => { + expect(chunk.content.length).toBeGreaterThanOrEqual(100) + expect(chunk.content.length).toBeLessThanOrEqual(1150) + }) + }) + }) + + describe("singleton instance", () => { + it("should maintain parser state across calls", async () => { + const result1 = await codeParser.parseFile("test.js", { content: "const a = 1" }) + const result2 = await codeParser.parseFile("test.js", { content: "const b = 2" }) + expect(result1).toBeDefined() + expect(result2).toBeDefined() + }) + }) +}) diff --git a/src/services/code-index/processors/__tests__/scanner.test.ts b/src/services/code-index/processors/__tests__/scanner.test.ts new file mode 100644 index 0000000000..c093cfca6a --- /dev/null +++ b/src/services/code-index/processors/__tests__/scanner.test.ts @@ -0,0 +1,146 @@ +// @ts-nocheck +import { DirectoryScanner } from "../scanner" +import { stat } from "fs/promises" +import { IEmbedder, IVectorStore, CodeBlock } from "../../../../core/interfaces" +jest.mock("fs/promises", () => ({ + stat: jest.fn(), +})) + +// Create a simple mock for vscode since we can't access the real one +jest.mock("vscode", () => ({ + workspace: { + workspaceFolders: [ + { + uri: { + fsPath: "/mock/workspace", + }, + }, + ], + getWorkspaceFolder: jest.fn().mockReturnValue({ + uri: { + fsPath: "/mock/workspace", + }, + }), + fs: { + readFile: jest.fn().mockResolvedValue(Buffer.from("test content")), + }, + }, + Uri: { + file: jest.fn().mockImplementation((path) => path), + }, + window: { + activeTextEditor: { + document: { + uri: { + fsPath: "/mock/workspace", + }, + }, + }, + }, +})) + +jest.mock("fs/promises") +jest.mock("../../../glob/list-files") +jest.mock("../../../../core/ignore/RooIgnoreController") + +describe("DirectoryScanner", () => { + let scanner: DirectoryScanner + let mockEmbedder: IEmbedder + let mockVectorStore: IVectorStore + let mockCodeParser: ICodeParser + let mockCacheManager: CacheManager + + beforeEach(() => { + mockEmbedder = { + createEmbeddings: jest.fn().mockResolvedValue({ embeddings: [[0.1, 0.2, 0.3]] }), + embedderInfo: { name: "mock-embedder", dimensions: 384 }, + } + mockVectorStore = { + upsertPoints: jest.fn().mockResolvedValue(undefined), + deletePointsByFilePath: jest.fn().mockResolvedValue(undefined), + deletePointsByMultipleFilePaths: jest.fn().mockResolvedValue(undefined), + initialize: jest.fn().mockResolvedValue(true), + search: jest.fn().mockResolvedValue([]), + clearCollection: jest.fn().mockResolvedValue(undefined), + deleteCollection: jest.fn().mockResolvedValue(undefined), + collectionExists: jest.fn().mockResolvedValue(true), + } + mockCodeParser = { + parseFile: jest.fn().mockResolvedValue([]), + } + mockCacheManager = { + getHash: jest.fn().mockReturnValue(undefined), + getAllHashes: jest.fn().mockReturnValue({}), + updateHash: jest.fn().mockResolvedValue(undefined), + deleteHash: jest.fn().mockResolvedValue(undefined), + initialize: jest.fn().mockResolvedValue(undefined), + clearCacheFile: jest.fn().mockResolvedValue(undefined), + } + + scanner = new DirectoryScanner(mockEmbedder, mockVectorStore, mockCodeParser, mockCacheManager) + + // Mock default implementations + ;(stat as unknown as jest.Mock).mockResolvedValue({ size: 1024 }) + require("../../../glob/list-files").listFiles.mockResolvedValue([["test/file1.js", "test/file2.js"], []]) + }) + + describe("scanDirectory", () => { + it("should skip files larger than MAX_FILE_SIZE_BYTES", async () => { + require("../../../glob/list-files").listFiles.mockResolvedValue([["test/file1.js"], []]) + ;(stat as jest.Mock).mockResolvedValueOnce({ size: 2 * 1024 * 1024 }) // 2MB > 1MB limit + + const result = await scanner.scanDirectory("/test") + expect(result.stats.skipped).toBe(1) + expect(mockCodeParser.parseFile).not.toHaveBeenCalled() + }) + + it("should parse changed files and return code blocks", async () => { + require("../../../glob/list-files").listFiles.mockResolvedValue([["test/file1.js"], []]) + const mockBlocks: CodeBlock[] = [ + { + file_path: "test/file1.js", + content: "test content", + start_line: 1, + end_line: 5, + identifier: "test", + type: "function", + fileHash: "hash", + segmentHash: "segment-hash", + }, + ] + ;(mockCodeParser.parseFile as jest.Mock).mockResolvedValue(mockBlocks) + + const result = await scanner.scanDirectory("/test") + expect(result.codeBlocks).toEqual(mockBlocks) + expect(result.stats.processed).toBe(1) + }) + + it("should process embeddings for new/changed files", async () => { + const mockBlocks: CodeBlock[] = [ + { + file_path: "test/file1.js", + content: "test content", + start_line: 1, + end_line: 5, + identifier: "test", + type: "function", + fileHash: "hash", + segmentHash: "segment-hash", + }, + ] + ;(mockCodeParser.parseFile as jest.Mock).mockResolvedValue(mockBlocks) + + await scanner.scanDirectory("/test") + expect(mockEmbedder.createEmbeddings).toHaveBeenCalled() + expect(mockVectorStore.upsertPoints).toHaveBeenCalled() + }) + + it("should delete points for removed files", async () => { + ;(mockCacheManager.getAllHashes as jest.Mock).mockReturnValue({ "old/file.js": "old-hash" }) + + await scanner.scanDirectory("/test") + expect(mockVectorStore.deletePointsByFilePath).toHaveBeenCalledWith("old/file.js") + expect(mockCacheManager.deleteHash).toHaveBeenCalledWith("old/file.js") + }) + }) +}) From f2757fe18bb1b6f8592b0f8c71e636a67da490fb Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 9 May 2025 18:25:59 -0500 Subject: [PATCH 23/71] feat(parser): improve segment hash uniqueness - Added startCharIndex to segment hash calculation in _chunkTextByLines - Track character position when splitting oversized lines - Ensures unique identification of segments from same line --- src/services/code-index/processors/parser.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/services/code-index/processors/parser.ts b/src/services/code-index/processors/parser.ts index 04bf1cbcb0..5d8f5b5488 100644 --- a/src/services/code-index/processors/parser.ts +++ b/src/services/code-index/processors/parser.ts @@ -249,9 +249,9 @@ export class CodeParser implements ICodeParser { chunkStartLineIndex = endLineIndex + 1 } - const createSegmentBlock = (segment: string, originalLineNumber: number) => { + const createSegmentBlock = (segment: string, originalLineNumber: number, startCharIndex: number) => { const segmentHash = createHash("sha256") - .update(`${filePath}-${originalLineNumber}-${originalLineNumber}-${segment}`) + .update(`${filePath}-${originalLineNumber}-${originalLineNumber}-${startCharIndex}-${segment}`) .digest("hex") if (!seenSegmentHashes.has(segmentHash)) { @@ -283,10 +283,12 @@ export class CodeParser implements ICodeParser { // Split the oversized line into segments let remainingLineContent = line + let currentSegmentStartChar = 0 while (remainingLineContent.length > 0) { const segment = remainingLineContent.substring(0, MAX_BLOCK_CHARS) remainingLineContent = remainingLineContent.substring(MAX_BLOCK_CHARS) - createSegmentBlock(segment, originalLineNumber) + createSegmentBlock(segment, originalLineNumber, currentSegmentStartChar) + currentSegmentStartChar += MAX_BLOCK_CHARS } continue } From 28adab558040a145c804ebe58f6a96aa152b2308 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 9 May 2025 23:58:29 -0500 Subject: [PATCH 24/71] feat(file-watcher): add error logging and optional ignoreController injection --- .../processors/__tests__/file-watcher.test.ts | 262 ++++++++++++++++++ .../code-index/processors/file-watcher.ts | 4 +- 2 files changed, 265 insertions(+), 1 deletion(-) create mode 100644 src/services/code-index/processors/__tests__/file-watcher.test.ts diff --git a/src/services/code-index/processors/__tests__/file-watcher.test.ts b/src/services/code-index/processors/__tests__/file-watcher.test.ts new file mode 100644 index 0000000000..14f7461b63 --- /dev/null +++ b/src/services/code-index/processors/__tests__/file-watcher.test.ts @@ -0,0 +1,262 @@ +// @ts-nocheck +import { FileWatcher } from "../file-watcher" +import { IEmbedder, IVectorStore } from "../../../../core/interfaces" +import { createHash } from "crypto" + +jest.mock("vscode", () => ({ + EventEmitter: jest.fn().mockImplementation(() => ({ + event: jest.fn(), + fire: jest.fn(), + dispose: jest.fn(), + })), + RelativePattern: jest.fn().mockImplementation((base, pattern) => ({ + base, + pattern, + })), + Uri: { + file: jest.fn().mockImplementation((path) => ({ fsPath: path })), + }, + window: { + activeTextEditor: undefined, + }, + workspace: { + createFileSystemWatcher: jest.fn().mockReturnValue({ + onDidCreate: jest.fn(), + onDidChange: jest.fn(), + onDidDelete: jest.fn(), + dispose: jest.fn(), + }), + fs: { + stat: jest.fn(), + readFile: jest.fn(), + }, + workspaceFolders: [{ uri: { fsPath: "/mock/workspace" } }], + getWorkspaceFolder: jest.fn((uri) => { + if (uri && uri.fsPath && uri.fsPath.startsWith("/mock/workspace")) { + return { uri: { fsPath: "/mock/workspace" } } + } + return undefined + }), + }, +})) + +const vscode = require("vscode") +jest.mock("crypto") +jest.mock("uuid", () => ({ + ...jest.requireActual("uuid"), + v5: jest.fn().mockReturnValue("mocked-uuid-v5-for-testing"), +})) +jest.mock("../../../../core/ignore/RooIgnoreController", () => ({ + RooIgnoreController: jest.fn().mockImplementation(() => ({ + validateAccess: jest.fn(), + })), + mockValidateAccess: jest.fn(), +})) +jest.mock("../../cache-manager") +jest.mock("../parser", () => ({ codeParser: { parseFile: jest.fn() } })) + +describe("FileWatcher", () => { + let fileWatcher: FileWatcher + let mockEmbedder: IEmbedder + let mockVectorStore: IVectorStore + let mockCacheManager: any + let mockContext: any + let mockRooIgnoreController: any + + beforeEach(() => { + mockEmbedder = { + createEmbeddings: jest.fn().mockResolvedValue({ embeddings: [[0.1, 0.2, 0.3]] }), + embedderInfo: { name: "mock-embedder", dimensions: 384 }, + } + mockVectorStore = { + upsertPoints: jest.fn().mockResolvedValue(undefined), + deletePointsByFilePath: jest.fn().mockResolvedValue(undefined), + } + mockCacheManager = { + getHash: jest.fn(), + updateHash: jest.fn(), + deleteHash: jest.fn(), + } + mockContext = { + subscriptions: [], + } + + const { RooIgnoreController, mockValidateAccess } = require("../../../../core/ignore/RooIgnoreController") + mockRooIgnoreController = new RooIgnoreController() + mockRooIgnoreController.validateAccess = mockValidateAccess.mockReturnValue(true) + + fileWatcher = new FileWatcher( + "/mock/workspace", + mockContext, + mockCacheManager, + mockEmbedder, + mockVectorStore, + mockRooIgnoreController, + ) + }) + + describe("constructor", () => { + it("should initialize with correct properties", () => { + expect(fileWatcher).toBeDefined() + // Push mock event emitters to subscriptions array + mockContext.subscriptions.push({ dispose: jest.fn() }, { dispose: jest.fn() }) + expect(mockContext.subscriptions).toHaveLength(2) // onDidStartProcessing and onDidFinishProcessing + }) + }) + + describe("initialize", () => { + it("should create file watcher with correct pattern", async () => { + await fileWatcher.initialize() + expect(vscode.workspace.createFileSystemWatcher).toHaveBeenCalled() + expect(vscode.workspace.createFileSystemWatcher.mock.calls[0][0].pattern).toMatch( + /\{tla,js,jsx,ts,vue,tsx,py,rs,go,c,h,cpp,hpp,cs,rb,java,php,swift,sol,kt,kts,ex,exs,el,html,htm,json,css,rdl,ml,mli,lua,scala,toml,zig,elm,ejs,erb\}/, + ) + }) + + it("should register event handlers", async () => { + await fileWatcher.initialize() + const watcher = vscode.workspace.createFileSystemWatcher.mock.results[0].value + expect(watcher.onDidCreate).toHaveBeenCalled() + expect(watcher.onDidChange).toHaveBeenCalled() + expect(watcher.onDidDelete).toHaveBeenCalled() + }) + }) + + describe("dispose", () => { + it("should dispose all resources", async () => { + await fileWatcher.initialize() // Initialize first to create watcher + fileWatcher.dispose() + const watcher = vscode.workspace.createFileSystemWatcher.mock.results[0].value + expect(watcher.dispose).toHaveBeenCalled() + }) + }) + + describe("handleFileCreated", () => { + it("should call processFile with correct path", async () => { + const mockUri = { fsPath: "/mock/workspace/test.js" } + const processFileSpy = jest.spyOn(fileWatcher, "processFile").mockResolvedValue({ status: "success" }) + + await fileWatcher.handleFileCreated(mockUri) + expect(processFileSpy).toHaveBeenCalledWith(mockUri.fsPath) + }) + }) + + describe("handleFileChanged", () => { + it("should call processFile with correct path", async () => { + const mockUri = { fsPath: "/mock/workspace/test.js" } + const processFileSpy = jest.spyOn(fileWatcher, "processFile").mockResolvedValue({ status: "success" }) + + await fileWatcher.handleFileChanged(mockUri) + expect(processFileSpy).toHaveBeenCalledWith(mockUri.fsPath) + }) + }) + + describe("handleFileDeleted", () => { + it("should delete from cache and vector store", async () => { + const mockUri = { fsPath: "/mock/workspace/test.js" } + + await fileWatcher.handleFileDeleted(mockUri) + expect(mockCacheManager.deleteHash).toHaveBeenCalledWith(mockUri.fsPath) + expect(mockVectorStore.deletePointsByFilePath).toHaveBeenCalledWith(mockUri.fsPath) + }) + }) + + describe("processFile", () => { + it("should skip ignored files", async () => { + mockRooIgnoreController.validateAccess.mockImplementation((path) => { + if (path === "/mock/workspace/ignored.js") return false + return true + }) + const filePath = "/mock/workspace/ignored.js" + vscode.Uri.file.mockImplementation((path) => ({ fsPath: path })) + const result = await fileWatcher.processFile(filePath) + + expect(result.status).toBe("skipped") + expect(result.reason).toBe("File is ignored by .rooignore") + expect(mockCacheManager.updateHash).not.toHaveBeenCalled() + expect(vscode.workspace.fs.stat).not.toHaveBeenCalled() + expect(vscode.workspace.fs.readFile).not.toHaveBeenCalled() + expect(mockCacheManager.updateHash).not.toHaveBeenCalled() + expect(vscode.workspace.fs.stat).not.toHaveBeenCalled() + expect(vscode.workspace.fs.readFile).not.toHaveBeenCalled() + }) + + it("should skip files larger than MAX_FILE_SIZE_BYTES", async () => { + vscode.workspace.fs.stat.mockResolvedValue({ size: 2 * 1024 * 1024 }) // 2MB > 1MB limit + vscode.workspace.fs.readFile.mockResolvedValue(Buffer.from("large file content")) + mockRooIgnoreController.validateAccess.mockReturnValue(true) // Ensure file isn't ignored + const result = await fileWatcher.processFile("/mock/workspace/large.js") + expect(vscode.Uri.file).toHaveBeenCalledWith("/mock/workspace/large.js") + + expect(result.status).toBe("skipped") + expect(result.reason).toBe("File is too large") + expect(mockCacheManager.updateHash).not.toHaveBeenCalled() + }) + + it("should skip unchanged files", async () => { + vscode.workspace.fs.stat.mockResolvedValue({ size: 1024, mtime: Date.now() }) + vscode.workspace.fs.readFile.mockResolvedValue(Buffer.from("test content")) + mockCacheManager.getHash.mockReturnValue("hash") + mockRooIgnoreController.validateAccess.mockReturnValue(true) // Ensure file isn't ignored + ;(createHash as jest.Mock).mockReturnValue({ + update: jest.fn().mockReturnThis(), + digest: jest.fn().mockReturnValue("hash"), + }) + + const result = await fileWatcher.processFile("/mock/workspace/unchanged.js") + + expect(result.status).toBe("skipped") + expect(result.reason).toBe("File has not changed") + expect(mockCacheManager.updateHash).not.toHaveBeenCalled() + }) + + it("should process changed files", async () => { + vscode.Uri.file.mockImplementation((path) => ({ fsPath: path })) + vscode.workspace.fs.stat.mockResolvedValue({ size: 1024, mtime: Date.now() }) + vscode.workspace.fs.readFile.mockResolvedValue(Buffer.from("test content")) + mockCacheManager.getHash.mockReturnValue("old-hash") + mockRooIgnoreController.validateAccess.mockReturnValue(true) + ;(createHash as jest.Mock).mockReturnValue({ + update: jest.fn().mockReturnThis(), + digest: jest.fn().mockReturnValue("new-hash"), + }) + + const { codeParser: mockCodeParser } = require("../parser") + mockCodeParser.parseFile.mockResolvedValue([ + { + file_path: "/mock/workspace/test.js", + content: "test content", + start_line: 1, + end_line: 5, + identifier: "test", + type: "function", + fileHash: "new-hash", + segmentHash: "segment-hash", + }, + ]) + + mockEmbedder.createEmbeddings.mockResolvedValue({ + embeddings: [[0.1, 0.2, 0.3]], + }) + + const result = await fileWatcher.processFile("/mock/workspace/test.js") + + expect(result.status).toBe("success") + expect(mockVectorStore.deletePointsByFilePath).toHaveBeenCalled() + expect(mockCodeParser.parseFile).toHaveBeenCalled() + expect(mockEmbedder.createEmbeddings).toHaveBeenCalled() + expect(mockVectorStore.upsertPoints).toHaveBeenCalled() + expect(mockCacheManager.updateHash).toHaveBeenCalledWith("/mock/workspace/test.js", "new-hash") + }) + + it("should handle processing errors", async () => { + vscode.workspace.fs.stat.mockResolvedValue({ size: 1024 }) + vscode.workspace.fs.readFile.mockRejectedValue(new Error("Read error")) + + const result = await fileWatcher.processFile("/mock/workspace/error.js") + + expect(result.status).toBe("error") + expect(result.error).toBeDefined() + }) + }) +}) diff --git a/src/services/code-index/processors/file-watcher.ts b/src/services/code-index/processors/file-watcher.ts index 02ba8d00f6..7141f166f9 100644 --- a/src/services/code-index/processors/file-watcher.ts +++ b/src/services/code-index/processors/file-watcher.ts @@ -45,8 +45,9 @@ export class FileWatcher implements IFileWatcher { private readonly cacheManager: CacheManager, private embedder?: IEmbedder, private vectorStore?: IVectorStore, + ignoreController?: RooIgnoreController, ) { - this.ignoreController = new RooIgnoreController(workspacePath) + this.ignoreController = ignoreController || new RooIgnoreController(workspacePath) } /** @@ -212,6 +213,7 @@ export class FileWatcher implements IFileWatcher { this._onDidFinishProcessing.fire(result) return result } catch (error) { + console.error("[FileWatcher] processFile error in test:", error) const result = { path: filePath, status: "error" as const, From bdf7309bf7e1b6a2a3914c2551e463ae770722af Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Sat, 10 May 2025 18:45:12 -0500 Subject: [PATCH 25/71] fix: allow getting the state if the service is disabled --- src/services/code-index/manager.ts | 93 +++++++++++++++++------------- 1 file changed, 52 insertions(+), 41 deletions(-) diff --git a/src/services/code-index/manager.ts b/src/services/code-index/manager.ts index 8fb4247c0c..47b1c3a718 100644 --- a/src/services/code-index/manager.ts +++ b/src/services/code-index/manager.ts @@ -66,17 +66,19 @@ export class CodeIndexManager { } public get state(): IndexingState { + if (!this.isFeatureEnabled) { + return "Standby" + } this.assertInitialized() return this._orchestrator!.state } public get isFeatureEnabled(): boolean { - this.assertInitialized() - return this._configManager!.isFeatureEnabled + return this._configManager?.isFeatureEnabled ?? false } public get isFeatureConfigured(): boolean { - return this._configManager!.isFeatureConfigured + return this._configManager?.isFeatureConfigured ?? false } /** @@ -86,18 +88,25 @@ export class CodeIndexManager { */ public async initialize(contextProxy: ContextProxy): Promise<{ requiresRestart: boolean }> { // 1. ConfigManager Initialization and Configuration Loading - if (!this._configManager) { - this._configManager = new CodeIndexConfigManager(contextProxy) - } + this._configManager = new CodeIndexConfigManager(contextProxy) const { requiresRestart, requiresClear } = await this._configManager.loadConfiguration() - // 2. CacheManager Initialization + // 2. Check if feature is enabled + if (!this.isFeatureEnabled) { + console.log("[CodeIndexManager] Feature disabled - skipping service initialization") + if (this._orchestrator) { + this._orchestrator.stopWatcher() + } + return { requiresRestart } + } + + // 3. CacheManager Initialization if (!this._cacheManager) { this._cacheManager = new CacheManager(this.context, this.workspacePath) await this._cacheManager.initialize() } - // 3. Determine if Core Services Need Recreation + // 4. Determine if Core Services Need Recreation const needsServiceRecreation = !this._serviceFactory || requiresRestart console.log( `[CodeIndexManager] ${needsServiceRecreation ? "Initial setup or restart required" : "Configuration loaded, no full re-initialization needed"}`, @@ -153,43 +162,28 @@ export class CodeIndexManager { console.log("[CodeIndexManager] Core services (re)initialized") } - // 4. Handle Data Clearing + // 5. Handle Data Clearing if (requiresClear) { console.log("[CodeIndexManager] Configuration requires clearing data") - await this.clearIndexData() - } - - // 5. Handle Indexing Start/Restart - if (this._configManager.isFeatureEnabled && this._configManager.isFeatureConfigured) { - const shouldStartOrRestartIndexing = - requiresRestart || - requiresClear || - (needsServiceRecreation && (!this._orchestrator || this._orchestrator.state !== "Indexing")) - - if (shouldStartOrRestartIndexing) { - console.log("[CodeIndexManager] Starting/restarting indexing due to configuration changes") - this.startIndexing() - } else { - console.log( - "[CodeIndexManager] Indexing not started/restarted (requiresRestart:", - requiresRestart, - "requiresClear:", - requiresClear, - "currentState:", - this._orchestrator?.state, - "needsServiceRecreation:", - needsServiceRecreation, - ")", - ) + if (this._orchestrator) { + await this._orchestrator.clearIndexData() } - } else { - console.log("[CodeIndexManager] Feature not enabled or not configured") - if (this._orchestrator && this._orchestrator.state !== "Standby") { - this.stopWatcher() - console.log("[CodeIndexManager] Stopped watcher as feature is disabled") + if (this._cacheManager) { + await this._cacheManager.clearCacheFile() } } + // Handle Indexing Start/Restart + const shouldStartOrRestartIndexing = + requiresRestart || + requiresClear || + (needsServiceRecreation && (!this._orchestrator || this._orchestrator.state !== "Indexing")) + + if (shouldStartOrRestartIndexing) { + console.log("[CodeIndexManager] Starting/restarting indexing due to configuration changes") + await this._orchestrator?.startIndexing() + } + return { requiresRestart } } @@ -198,6 +192,10 @@ export class CodeIndexManager { */ public async startIndexing(): Promise { + if (!this.isFeatureEnabled) { + console.log("[CodeIndexManager] Feature disabled - skipping startIndexing") + return + } this.assertInitialized() await this._orchestrator!.startIndexing() } @@ -206,8 +204,13 @@ export class CodeIndexManager { * Stops the file watcher and potentially cleans up resources. */ public stopWatcher(): void { - this.assertInitialized() - this._orchestrator!.stopWatcher() + if (!this.isFeatureEnabled) { + console.log("[CodeIndexManager] Feature disabled - skipping stopWatcher") + return + } + if (this._orchestrator) { + this._orchestrator.stopWatcher() + } } /** @@ -226,6 +229,10 @@ export class CodeIndexManager { * and deleting the cache file. */ public async clearIndexData(): Promise { + if (!this.isFeatureEnabled) { + console.log("[CodeIndexManager] Feature disabled - skipping clearIndexData") + return + } this.assertInitialized() await this._orchestrator!.clearIndexData() await this._cacheManager!.clearCacheFile() @@ -246,6 +253,10 @@ export class CodeIndexManager { limit: number, directoryPrefix?: string, ): Promise { + if (!this.isFeatureEnabled) { + console.log("[CodeIndexManager] Feature disabled - returning empty search results") + return [] + } this.assertInitialized() return this._searchService!.searchIndex(query, limit, directoryPrefix) } From c49bd954783950f06a06be6fd78942cb174b0955 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 13 May 2025 19:09:02 -0500 Subject: [PATCH 26/71] fix: set the embedding models when cline provider is initialized --- src/core/webview/ClineProvider.ts | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index ab4fcde347..8d230e7e5b 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -76,7 +76,7 @@ export class ClineProvider extends EventEmitter implements private disposables: vscode.Disposable[] = [] private view?: vscode.WebviewView | vscode.WebviewPanel private clineStack: Task[] = [] - private codeIndexStatusSubscription?: vscode.Disposable + private codeIndexStatusSubscription?: vscode.Disposable private _workspaceTracker?: WorkspaceTracker // workSpaceTracker read-only for access outside this class public get workspaceTracker(): WorkspaceTracker | undefined { return this._workspaceTracker @@ -102,6 +102,7 @@ export class ClineProvider extends EventEmitter implements ClineProvider.activeInstances.add(this) this.codeIndexManager = codeIndexManager + this.updateGlobalState("codebaseIndexModels", EMBEDDING_MODEL_PROFILES) // Start configuration loading (which might trigger indexing) in the background. // Don't await, allowing activation to continue immediately. @@ -330,16 +331,6 @@ export class ClineProvider extends EventEmitter implements async resolveWebviewView(webviewView: vscode.WebviewView | vscode.WebviewPanel) { this.log("Resolving webview view") - if ( - this.codeIndexManager && - this.codeIndexManager.isFeatureEnabled && - this.codeIndexManager.isFeatureConfigured - ) { - this.updateGlobalState("codebaseIndexModels", EMBEDDING_MODEL_PROFILES) - - this.outputChannel.appendLine("CodeIndexManager configuration loaded") - } - this.view = webviewView // Set panel reference according to webview type From d9188176f95276488b3167f47a7b87f717b07f7e Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 13 May 2025 19:33:14 -0500 Subject: [PATCH 27/71] feat: use zod to validate form --- .../components/settings/CodeIndexSettings.tsx | 65 ++++++++++++++----- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/webview-ui/src/components/settings/CodeIndexSettings.tsx b/webview-ui/src/components/settings/CodeIndexSettings.tsx index 0a4f5f1521..01f6df204a 100644 --- a/webview-ui/src/components/settings/CodeIndexSettings.tsx +++ b/webview-ui/src/components/settings/CodeIndexSettings.tsx @@ -22,6 +22,8 @@ import { ExtensionStateContextType } from "@/context/ExtensionStateContext" import { ApiConfiguration } from "../../../../src/shared/api" import { CodebaseIndexConfig, CodebaseIndexModels } from "../../../../src/schemas" import { EmbedderProvider } from "../../../../src/shared/embeddingModels" +import { z } from "zod" + interface CodeIndexSettingsProps { codebaseIndexModels: CodebaseIndexModels | undefined codebaseIndexConfig: CodebaseIndexConfig | undefined @@ -85,6 +87,40 @@ export const CodeIndexSettings: React.FC = ({ window.removeEventListener("message", handleMessage) } }, [codebaseIndexConfig, codebaseIndexModels]) + + function validateIndexingConfig(config: CodebaseIndexConfig | undefined, apiConfig: ApiConfiguration): boolean { + if (!config) return false + + const baseSchema = z.object({ + codebaseIndexQdrantUrl: z.string().min(1, "Qdrant URL is required"), + codebaseIndexEmbedderModelId: z.string().min(1, "Model ID is required"), + }) + + const providerSchemas = { + openai: baseSchema.extend({ + codebaseIndexEmbedderProvider: z.literal("openai"), + codeIndexOpenAiKey: z.string().min(1, "OpenAI key is required"), + }), + ollama: baseSchema.extend({ + codebaseIndexEmbedderProvider: z.literal("ollama"), + codebaseIndexEmbedderBaseUrl: z.string().min(1, "Ollama URL is required"), + }), + } + + try { + const schema = + config.codebaseIndexEmbedderProvider === "openai" ? providerSchemas.openai : providerSchemas.ollama + + schema.parse({ + ...config, + codeIndexOpenAiKey: apiConfig.codeIndexOpenAiKey, + }) + return true + } catch { + return false + } + } + return ( <> @@ -163,9 +199,8 @@ export const CodeIndexSettings: React.FC = ({ - setApiConfigurationField("codeIndexOpenAiKey", e.target.value) - }> + onInput={(e: any) => setApiConfigurationField("codeIndexOpenAiKey", e.target.value)} + style={{ width: "100%" }}> OpenAI Key:
@@ -181,7 +216,8 @@ export const CodeIndexSettings: React.FC = ({ ...codebaseIndexConfig, codebaseIndexEmbedderBaseUrl: e.target.value, }) - }> + } + style={{ width: "100%" }}> Ollama URL:
@@ -196,7 +232,8 @@ export const CodeIndexSettings: React.FC = ({ ...codebaseIndexConfig, codebaseIndexQdrantUrl: e.target.value, }) - }> + } + style={{ width: "100%" }}> Qdrant URL
@@ -205,7 +242,8 @@ export const CodeIndexSettings: React.FC = ({ setApiConfigurationField("codeIndexQdrantApiKey", e.target.value)}> + onInput={(e: any) => setApiConfigurationField("codeIndexQdrantApiKey", e.target.value)} + style={{ width: "100%" }}> Qdrant Key: @@ -270,15 +308,10 @@ export const CodeIndexSettings: React.FC = ({
vscode.postMessage({ type: "startIndexing" })} // Added onClick + onClick={() => vscode.postMessage({ type: "startIndexing" })} disabled={ - (codebaseIndexConfig?.codebaseIndexEmbedderProvider === "openai" && - !apiConfiguration.codeIndexOpenAiKey) || - (codebaseIndexConfig?.codebaseIndexEmbedderProvider === "ollama" && - (!codebaseIndexConfig.codebaseIndexEmbedderBaseUrl || - !codebaseIndexConfig.codebaseIndexEmbedderModelId)) || - !codebaseIndexConfig.codebaseIndexQdrantUrl || - indexingStatus.systemStatus === "Indexing" + indexingStatus.systemStatus === "Indexing" || + !validateIndexingConfig(codebaseIndexConfig, apiConfiguration) }> Start Indexing @@ -297,9 +330,7 @@ export const CodeIndexSettings: React.FC = ({ Cancel vscode.postMessage({ type: "clearIndexData" })} // Added onClick - > + onClick={() => vscode.postMessage({ type: "clearIndexData" })}> Clear Data From 8fb9766882eb7f562b1c607108a82fa7b67ef5c3 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Wed, 14 May 2025 01:18:03 -0500 Subject: [PATCH 28/71] feat(file-watcher): enhance file watcher for batched deletions and improved vector store interactions Improve file watcher to handle file deletions in batches and optimize vector store operations. --- .../processors/__tests__/file-watcher.test.ts | 16 ++- .../code-index/processors/file-watcher.ts | 123 ++++++++++++++++-- 2 files changed, 129 insertions(+), 10 deletions(-) diff --git a/src/services/code-index/processors/__tests__/file-watcher.test.ts b/src/services/code-index/processors/__tests__/file-watcher.test.ts index 14f7461b63..0ee4d2b5a3 100644 --- a/src/services/code-index/processors/__tests__/file-watcher.test.ts +++ b/src/services/code-index/processors/__tests__/file-watcher.test.ts @@ -71,6 +71,7 @@ describe("FileWatcher", () => { mockVectorStore = { upsertPoints: jest.fn().mockResolvedValue(undefined), deletePointsByFilePath: jest.fn().mockResolvedValue(undefined), + deletePointsByMultipleFilePaths: jest.fn().mockResolvedValue(undefined), } mockCacheManager = { getHash: jest.fn(), @@ -152,12 +153,25 @@ describe("FileWatcher", () => { }) describe("handleFileDeleted", () => { + beforeEach(() => { + jest.useFakeTimers() + }) + + afterEach(() => { + jest.useRealTimers() + }) + it("should delete from cache and vector store", async () => { const mockUri = { fsPath: "/mock/workspace/test.js" } await fileWatcher.handleFileDeleted(mockUri) expect(mockCacheManager.deleteHash).toHaveBeenCalledWith(mockUri.fsPath) - expect(mockVectorStore.deletePointsByFilePath).toHaveBeenCalledWith(mockUri.fsPath) + + // Advance timers to trigger the batched deletion + await jest.advanceTimersByTime(500) + + // Verify the batched deletion call + expect(mockVectorStore.deletePointsByMultipleFilePaths).toHaveBeenCalledWith([mockUri.fsPath]) }) }) diff --git a/src/services/code-index/processors/file-watcher.ts b/src/services/code-index/processors/file-watcher.ts index 7141f166f9..89a9ed5553 100644 --- a/src/services/code-index/processors/file-watcher.ts +++ b/src/services/code-index/processors/file-watcher.ts @@ -17,6 +17,11 @@ const MAX_FILE_SIZE_BYTES = 1 * 1024 * 1024 // 1MB export class FileWatcher implements IFileWatcher { private fileWatcher?: vscode.FileSystemWatcher private ignoreController: RooIgnoreController + private eventQueue: { uri: vscode.Uri; type: "create" | "change" | "delete" }[] = [] + private processingMap: Map> = new Map() + private isProcessing = false + private deletedFilesBuffer: string[] = [] + private deleteTimer: NodeJS.Timeout | undefined private readonly _onDidStartProcessing = new vscode.EventEmitter() private readonly _onDidFinishProcessing = new vscode.EventEmitter() @@ -74,6 +79,9 @@ export class FileWatcher implements IFileWatcher { this.fileWatcher?.dispose() this._onDidStartProcessing.dispose() this._onDidFinishProcessing.dispose() + this.processingMap.clear() + this.eventQueue = [] + clearTimeout(this.deleteTimer) } /** @@ -81,7 +89,8 @@ export class FileWatcher implements IFileWatcher { * @param uri URI of the created file */ private async handleFileCreated(uri: vscode.Uri): Promise { - await this.processFile(uri.fsPath) + this.eventQueue.push({ uri, type: "create" }) + this.startProcessing() } /** @@ -89,7 +98,8 @@ export class FileWatcher implements IFileWatcher { * @param uri URI of the changed file */ private async handleFileChanged(uri: vscode.Uri): Promise { - await this.processFile(uri.fsPath) + this.eventQueue.push({ uri, type: "change" }) + this.startProcessing() } /** @@ -97,18 +107,114 @@ export class FileWatcher implements IFileWatcher { * @param uri URI of the deleted file */ private async handleFileDeleted(uri: vscode.Uri): Promise { - const filePath = uri.fsPath + this.eventQueue.push({ uri, type: "delete" }) + this.startProcessing() + } + + /** + * Starts the processing loop if not already running + */ + private startProcessing(): void { + if (!this.isProcessing) { + this.isProcessing = true + this.processQueue() + } + } + + /** + * Processes events from the queue + */ + private async processQueue(): Promise { + try { + while (this.eventQueue.length > 0) { + const event = this.eventQueue.shift()! + const filePath = event.uri.fsPath + + // Ensure sequential processing for the same file path + const existingPromise = this.processingMap.get(filePath) + const newPromise = (existingPromise || Promise.resolve()) + .then(() => this.processEvent(event)) + .finally(() => this.processingMap.delete(filePath)) + this.processingMap.set(filePath, newPromise) + await newPromise + } + } finally { + this.isProcessing = false + } + } + + /** + * Processes a single file system event + * @param event The file system event to process + */ + private async processEvent(event: { uri: vscode.Uri; type: "create" | "change" | "delete" }): Promise { + const filePath = event.uri.fsPath + + // For delete operations, process immediately + if (event.type === "delete") { + await this.processFileDeletion(filePath) + return + } + + // For create/change operations, check if the file is in the deletion buffer + const bufferIndex = this.deletedFilesBuffer.indexOf(filePath) + if (bufferIndex !== -1) { + // Remove from buffer and delete immediately before processing the new version + this.deletedFilesBuffer.splice(bufferIndex, 1) + if (this.vectorStore) { + await this.vectorStore.deletePointsByFilePath(filePath) + } + } + + // Also check if there's a pending delete in the queue + const hasPendingDelete = this.eventQueue.some((e) => e.type === "delete" && e.uri.fsPath === filePath) + + if (hasPendingDelete) { + // Wait for delete to be processed first + return + } + + await this.processFile(filePath) + } + + /** + * Processes a file deletion + * @param filePath Path of the file to delete + */ + private async processFileDeletion(filePath: string): Promise { // Delete from cache this.cacheManager.deleteHash(filePath) - // Delete from vector store - if (this.vectorStore) { + // Add to deletion buffer instead of deleting immediately + this.deletedFilesBuffer.push(filePath) + + // Clear any existing timer + if (this.deleteTimer) { + clearTimeout(this.deleteTimer) + } + + // Set a new timer to flush the buffer after a delay + this.deleteTimer = setTimeout(() => { + this.flushDeletedFiles() + }, 500) + } + + /** + * Processes the batch deletion of files from the buffer + */ + private async flushDeletedFiles(): Promise { + if (this.deletedFilesBuffer.length > 0 && this.vectorStore) { + const filesToDelete = [...this.deletedFilesBuffer] + try { - await this.vectorStore.deletePointsByFilePath(filePath) - console.log(`[FileWatcher] Deleted points for removed file: ${filePath}`) + await this.vectorStore.deletePointsByMultipleFilePaths(filesToDelete) + console.log(`[FileWatcher] Batch deleted points for ${filesToDelete.length} files`) } catch (error) { - console.error(`[FileWatcher] Failed to delete points for ${filePath}:`, error) + console.error(`[FileWatcher] Failed to batch delete points:`, error) + } finally { + // Clear the buffer + this.deletedFilesBuffer = [] } } } @@ -213,7 +319,6 @@ export class FileWatcher implements IFileWatcher { this._onDidFinishProcessing.fire(result) return result } catch (error) { - console.error("[FileWatcher] processFile error in test:", error) const result = { path: filePath, status: "error" as const, From 57413380277a533d718a6f865997e0e2912dbdad Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Wed, 14 May 2025 15:25:15 -0500 Subject: [PATCH 29/71] feat(CodeIndexSettings): move OpenAI key input to a conditional rendering block --- .../components/settings/CodeIndexSettings.tsx | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/webview-ui/src/components/settings/CodeIndexSettings.tsx b/webview-ui/src/components/settings/CodeIndexSettings.tsx index 01f6df204a..3ea6c82c17 100644 --- a/webview-ui/src/components/settings/CodeIndexSettings.tsx +++ b/webview-ui/src/components/settings/CodeIndexSettings.tsx @@ -172,6 +172,18 @@ export const CodeIndexSettings: React.FC = ({
Model:
+ {codebaseIndexConfig?.codebaseIndexEmbedderProvider === "openai" && ( +
+ setApiConfigurationField("codeIndexOpenAiKey", e.target.value)} + style={{ width: "100%" }}> + OpenAI Key: + +
+ )} +
- {codebaseIndexConfig?.codebaseIndexEmbedderProvider === "openai" && ( -
- setApiConfigurationField("codeIndexOpenAiKey", e.target.value)} - style={{ width: "100%" }}> - OpenAI Key: - -
- )} - {codebaseIndexConfig?.codebaseIndexEmbedderProvider === "ollama" && ( <>
From b180ae81d3b750062eb0b2c0b422ab9968ee4442 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Wed, 14 May 2025 15:31:00 -0500 Subject: [PATCH 30/71] feat(CodeIndexSettings): update button visibility based on indexing status --- .../components/settings/CodeIndexSettings.tsx | 61 ++++++++++--------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/webview-ui/src/components/settings/CodeIndexSettings.tsx b/webview-ui/src/components/settings/CodeIndexSettings.tsx index 3ea6c82c17..11ae1ac32f 100644 --- a/webview-ui/src/components/settings/CodeIndexSettings.tsx +++ b/webview-ui/src/components/settings/CodeIndexSettings.tsx @@ -226,7 +226,7 @@ export const CodeIndexSettings: React.FC = ({
setCachedStateField("codebaseIndexConfig", { ...codebaseIndexConfig, @@ -307,35 +307,36 @@ export const CodeIndexSettings: React.FC = ({ )}
- vscode.postMessage({ type: "startIndexing" })} - disabled={ - indexingStatus.systemStatus === "Indexing" || - !validateIndexingConfig(codebaseIndexConfig, apiConfiguration) - }> - Start Indexing - - - - Clear Index Data - - - - Are you sure? - - This action cannot be undone. This will permanently delete your codebase - index data. - - - - Cancel - vscode.postMessage({ type: "clearIndexData" })}> - Clear Data - - - - + {(indexingStatus.systemStatus === "Error" || indexingStatus.systemStatus === "Standby") && ( + vscode.postMessage({ type: "startIndexing" })} + disabled={!validateIndexingConfig(codebaseIndexConfig, apiConfiguration)}> + Start Indexing + + )} + {(indexingStatus.systemStatus === "Indexed" || indexingStatus.systemStatus === "Error") && ( + + + Clear Index Data + + + + Are you sure? + + This action cannot be undone. This will permanently delete your codebase + index data. + + + + Cancel + vscode.postMessage({ type: "clearIndexData" })}> + Clear Data + + + + + )}
)} From 7cbde5f3d8c13419f70f6bf292c6bfaf2b91ef60 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Wed, 14 May 2025 19:53:15 -0500 Subject: [PATCH 31/71] feat(file-watcher): refactor vscode mock and enhance file watcher tests --- .../processors/__tests__/file-watcher.test.ts | 256 ++++++++++++++---- 1 file changed, 200 insertions(+), 56 deletions(-) diff --git a/src/services/code-index/processors/__tests__/file-watcher.test.ts b/src/services/code-index/processors/__tests__/file-watcher.test.ts index 0ee4d2b5a3..19c82c79f5 100644 --- a/src/services/code-index/processors/__tests__/file-watcher.test.ts +++ b/src/services/code-index/processors/__tests__/file-watcher.test.ts @@ -1,44 +1,75 @@ -// @ts-nocheck +import { IEmbedder } from "../../interfaces/embedder" +import { IVectorStore } from "../../interfaces/vector-store" +import { FileProcessingResult } from "../../interfaces/file-processor" import { FileWatcher } from "../file-watcher" -import { IEmbedder, IVectorStore } from "../../../../core/interfaces" + import { createHash } from "crypto" -jest.mock("vscode", () => ({ - EventEmitter: jest.fn().mockImplementation(() => ({ - event: jest.fn(), - fire: jest.fn(), - dispose: jest.fn(), - })), - RelativePattern: jest.fn().mockImplementation((base, pattern) => ({ - base, - pattern, - })), - Uri: { - file: jest.fn().mockImplementation((path) => ({ fsPath: path })), - }, - window: { - activeTextEditor: undefined, - }, - workspace: { - createFileSystemWatcher: jest.fn().mockReturnValue({ - onDidCreate: jest.fn(), - onDidChange: jest.fn(), - onDidDelete: jest.fn(), - dispose: jest.fn(), - }), - fs: { - stat: jest.fn(), - readFile: jest.fn(), - }, - workspaceFolders: [{ uri: { fsPath: "/mock/workspace" } }], - getWorkspaceFolder: jest.fn((uri) => { - if (uri && uri.fsPath && uri.fsPath.startsWith("/mock/workspace")) { - return { uri: { fsPath: "/mock/workspace" } } +jest.mock("vscode", () => { + type Disposable = { dispose: () => void } + + type _Event = (listener: (e: T) => any, thisArgs?: any, disposables?: Disposable[]) => Disposable + + const MOCK_EMITTER_REGISTRY = new Map any>>() + + return { + EventEmitter: jest.fn().mockImplementation(() => { + const emitterInstanceKey = {} + MOCK_EMITTER_REGISTRY.set(emitterInstanceKey, new Set()) + + return { + event: function (listener: (e: T) => any): Disposable { + const listeners = MOCK_EMITTER_REGISTRY.get(emitterInstanceKey) + listeners!.add(listener as any) + return { + dispose: () => { + listeners!.delete(listener as any) + }, + } + }, + + fire: function (data: T): void { + const listeners = MOCK_EMITTER_REGISTRY.get(emitterInstanceKey) + listeners!.forEach((fn) => fn(data)) + }, + + dispose: () => { + MOCK_EMITTER_REGISTRY.get(emitterInstanceKey)!.clear() + MOCK_EMITTER_REGISTRY.delete(emitterInstanceKey) + }, } - return undefined }), - }, -})) + RelativePattern: jest.fn().mockImplementation((base, pattern) => ({ + base, + pattern, + })), + Uri: { + file: jest.fn().mockImplementation((path) => ({ fsPath: path })), + }, + window: { + activeTextEditor: undefined, + }, + workspace: { + createFileSystemWatcher: jest.fn().mockReturnValue({ + onDidCreate: jest.fn(), + onDidChange: jest.fn(), + onDidDelete: jest.fn(), + dispose: jest.fn(), + }), + fs: { + stat: jest.fn(), + readFile: jest.fn(), + }, + workspaceFolders: [{ uri: { fsPath: "/mock/workspace" } }], + getWorkspaceFolder: jest.fn((uri) => { + if (uri && uri.fsPath && uri.fsPath.startsWith("/mock/workspace")) { + return { uri: { fsPath: "/mock/workspace" } } + } + return undefined + }), + }, + } +}) const vscode = require("vscode") jest.mock("crypto") @@ -66,12 +97,17 @@ describe("FileWatcher", () => { beforeEach(() => { mockEmbedder = { createEmbeddings: jest.fn().mockResolvedValue({ embeddings: [[0.1, 0.2, 0.3]] }), - embedderInfo: { name: "mock-embedder", dimensions: 384 }, + embedderInfo: { name: "openai" }, } mockVectorStore = { upsertPoints: jest.fn().mockResolvedValue(undefined), deletePointsByFilePath: jest.fn().mockResolvedValue(undefined), deletePointsByMultipleFilePaths: jest.fn().mockResolvedValue(undefined), + initialize: jest.fn().mockResolvedValue(true), + search: jest.fn().mockResolvedValue([]), + clearCollection: jest.fn().mockResolvedValue(undefined), + deleteCollection: jest.fn().mockResolvedValue(undefined), + collectionExists: jest.fn().mockResolvedValue(true), } mockCacheManager = { getHash: jest.fn(), @@ -99,9 +135,9 @@ describe("FileWatcher", () => { describe("constructor", () => { it("should initialize with correct properties", () => { expect(fileWatcher).toBeDefined() - // Push mock event emitters to subscriptions array + mockContext.subscriptions.push({ dispose: jest.fn() }, { dispose: jest.fn() }) - expect(mockContext.subscriptions).toHaveLength(2) // onDidStartProcessing and onDidFinishProcessing + expect(mockContext.subscriptions).toHaveLength(2) }) }) @@ -125,7 +161,7 @@ describe("FileWatcher", () => { describe("dispose", () => { it("should dispose all resources", async () => { - await fileWatcher.initialize() // Initialize first to create watcher + await fileWatcher.initialize() fileWatcher.dispose() const watcher = vscode.workspace.createFileSystemWatcher.mock.results[0].value expect(watcher.dispose).toHaveBeenCalled() @@ -135,9 +171,13 @@ describe("FileWatcher", () => { describe("handleFileCreated", () => { it("should call processFile with correct path", async () => { const mockUri = { fsPath: "/mock/workspace/test.js" } - const processFileSpy = jest.spyOn(fileWatcher, "processFile").mockResolvedValue({ status: "success" }) + const processFileSpy = jest.spyOn(fileWatcher, "processFile").mockResolvedValue({ + path: mockUri.fsPath, + status: "success", + } as FileProcessingResult) - await fileWatcher.handleFileCreated(mockUri) + // Access private method using type assertion + await (fileWatcher as any).handleFileCreated(mockUri) expect(processFileSpy).toHaveBeenCalledWith(mockUri.fsPath) }) }) @@ -145,9 +185,13 @@ describe("FileWatcher", () => { describe("handleFileChanged", () => { it("should call processFile with correct path", async () => { const mockUri = { fsPath: "/mock/workspace/test.js" } - const processFileSpy = jest.spyOn(fileWatcher, "processFile").mockResolvedValue({ status: "success" }) + const processFileSpy = jest.spyOn(fileWatcher, "processFile").mockResolvedValue({ + path: mockUri.fsPath, + status: "success", + } as FileProcessingResult) - await fileWatcher.handleFileChanged(mockUri) + // Access private method using type assertion + await (fileWatcher as any).handleFileChanged(mockUri) expect(processFileSpy).toHaveBeenCalledWith(mockUri.fsPath) }) }) @@ -164,25 +208,24 @@ describe("FileWatcher", () => { it("should delete from cache and vector store", async () => { const mockUri = { fsPath: "/mock/workspace/test.js" } - await fileWatcher.handleFileDeleted(mockUri) + // Access private method using type assertion + await (fileWatcher as any).handleFileDeleted(mockUri) expect(mockCacheManager.deleteHash).toHaveBeenCalledWith(mockUri.fsPath) - // Advance timers to trigger the batched deletion await jest.advanceTimersByTime(500) - // Verify the batched deletion call expect(mockVectorStore.deletePointsByMultipleFilePaths).toHaveBeenCalledWith([mockUri.fsPath]) }) }) describe("processFile", () => { it("should skip ignored files", async () => { - mockRooIgnoreController.validateAccess.mockImplementation((path) => { + mockRooIgnoreController.validateAccess.mockImplementation((path: string) => { if (path === "/mock/workspace/ignored.js") return false return true }) const filePath = "/mock/workspace/ignored.js" - vscode.Uri.file.mockImplementation((path) => ({ fsPath: path })) + vscode.Uri.file.mockImplementation((path: string) => ({ fsPath: path })) const result = await fileWatcher.processFile(filePath) expect(result.status).toBe("skipped") @@ -196,9 +239,9 @@ describe("FileWatcher", () => { }) it("should skip files larger than MAX_FILE_SIZE_BYTES", async () => { - vscode.workspace.fs.stat.mockResolvedValue({ size: 2 * 1024 * 1024 }) // 2MB > 1MB limit + vscode.workspace.fs.stat.mockResolvedValue({ size: 2 * 1024 * 1024 }) vscode.workspace.fs.readFile.mockResolvedValue(Buffer.from("large file content")) - mockRooIgnoreController.validateAccess.mockReturnValue(true) // Ensure file isn't ignored + mockRooIgnoreController.validateAccess.mockReturnValue(true) const result = await fileWatcher.processFile("/mock/workspace/large.js") expect(vscode.Uri.file).toHaveBeenCalledWith("/mock/workspace/large.js") @@ -211,7 +254,7 @@ describe("FileWatcher", () => { vscode.workspace.fs.stat.mockResolvedValue({ size: 1024, mtime: Date.now() }) vscode.workspace.fs.readFile.mockResolvedValue(Buffer.from("test content")) mockCacheManager.getHash.mockReturnValue("hash") - mockRooIgnoreController.validateAccess.mockReturnValue(true) // Ensure file isn't ignored + mockRooIgnoreController.validateAccess.mockReturnValue(true) ;(createHash as jest.Mock).mockReturnValue({ update: jest.fn().mockReturnThis(), digest: jest.fn().mockReturnValue("hash"), @@ -225,7 +268,7 @@ describe("FileWatcher", () => { }) it("should process changed files", async () => { - vscode.Uri.file.mockImplementation((path) => ({ fsPath: path })) + vscode.Uri.file.mockImplementation((path: string) => ({ fsPath: path })) vscode.workspace.fs.stat.mockResolvedValue({ size: 1024, mtime: Date.now() }) vscode.workspace.fs.readFile.mockResolvedValue(Buffer.from("test content")) mockCacheManager.getHash.mockReturnValue("old-hash") @@ -249,9 +292,7 @@ describe("FileWatcher", () => { }, ]) - mockEmbedder.createEmbeddings.mockResolvedValue({ - embeddings: [[0.1, 0.2, 0.3]], - }) + // No need to mock again, it's already mocked in the setup const result = await fileWatcher.processFile("/mock/workspace/test.js") @@ -273,4 +314,107 @@ describe("FileWatcher", () => { expect(result.error).toBeDefined() }) }) + + describe("delete then create race condition", () => { + let onDidDeleteCallback: (uri: any) => void + let onDidCreateCallback: (uri: any) => void + let mockUri: { fsPath: string } + + beforeEach(() => { + jest.useFakeTimers() + + mockCacheManager.deleteHash.mockClear() + ;(mockVectorStore.deletePointsByFilePath as jest.Mock).mockClear() + ;(mockVectorStore.upsertPoints as jest.Mock).mockClear() + ;(mockVectorStore.deletePointsByMultipleFilePaths as jest.Mock).mockClear() + + vscode.workspace.createFileSystemWatcher.mockReturnValue({ + onDidCreate: jest.fn((callback) => { + onDidCreateCallback = callback + return { dispose: jest.fn() } + }), + onDidChange: jest.fn().mockReturnValue({ dispose: jest.fn() }), + onDidDelete: jest.fn((callback) => { + onDidDeleteCallback = callback + return { dispose: jest.fn() } + }), + dispose: jest.fn(), + }) + + fileWatcher.initialize() + + mockUri = { fsPath: "/mock/workspace/test-race.js" } + }) + + afterEach(() => { + jest.useRealTimers() + }) + + const waitForFileProcessingToFinish = (fw: FileWatcher, filePath: string) => { + return new Promise((resolve) => { + const listener = fw.onDidFinishProcessing((result) => { + if (result.path === filePath) { + listener.dispose() + resolve() + } + }) + }) + } + + it("should handle rapid delete-then-create sequence correctly", async () => { + vscode.workspace.fs.stat.mockResolvedValue({ size: 100 }) + vscode.workspace.fs.readFile.mockResolvedValue(Buffer.from("new content")) + mockCacheManager.getHash.mockReturnValue("old-hash") + ;(createHash as jest.Mock).mockReturnValue({ + update: jest.fn().mockReturnThis(), + digest: jest.fn().mockReturnValue("new-hash"), + }) + + const { codeParser: mockCodeParser } = require("../parser") + mockCodeParser.parseFile.mockResolvedValue([ + { + file_path: mockUri.fsPath, + content: "new content", + start_line: 1, + end_line: 5, + fileHash: "new-hash", + }, + ]) + + onDidDeleteCallback(mockUri) + + await jest.runAllTicks() + + expect(mockCacheManager.deleteHash).toHaveBeenCalledWith(mockUri.fsPath) + expect((fileWatcher as any).deletedFilesBuffer).toContain(mockUri.fsPath) + + const processingPromise = waitForFileProcessingToFinish(fileWatcher, mockUri.fsPath) + + onDidCreateCallback(mockUri) + + await jest.runAllTicks() + + await processingPromise + + expect(mockVectorStore.deletePointsByFilePath).toHaveBeenCalledWith(mockUri.fsPath) + expect(mockVectorStore.deletePointsByFilePath).toHaveBeenCalledTimes(2) + + expect(mockVectorStore.upsertPoints).toHaveBeenCalled() + + expect((fileWatcher as any).deletedFilesBuffer).not.toContain(mockUri.fsPath) + + const otherFilePath = "/mock/workspace/other-file.js" + ;(fileWatcher as any).deletedFilesBuffer.push(otherFilePath) + + await jest.advanceTimersByTimeAsync(500) + await jest.runAllTicks() + + expect(mockVectorStore.deletePointsByMultipleFilePaths).toHaveBeenCalled() + const deletedPaths = (mockVectorStore.deletePointsByMultipleFilePaths as jest.Mock).mock.calls[0][0] + expect(deletedPaths).toContain(otherFilePath) + expect(deletedPaths).not.toContain(mockUri.fsPath) + + expect(mockCacheManager.updateHash).toHaveBeenCalledWith(mockUri.fsPath, "new-hash") + }) + }) }) From 453af1c21612f4353b3592b743eefbe1c11cdcdf Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Thu, 15 May 2025 00:05:51 -0500 Subject: [PATCH 32/71] fix(CodeIndexManager): do not await startIndexing on configuration changes --- src/services/code-index/manager.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/services/code-index/manager.ts b/src/services/code-index/manager.ts index 47b1c3a718..a7701c66a6 100644 --- a/src/services/code-index/manager.ts +++ b/src/services/code-index/manager.ts @@ -181,7 +181,7 @@ export class CodeIndexManager { if (shouldStartOrRestartIndexing) { console.log("[CodeIndexManager] Starting/restarting indexing due to configuration changes") - await this._orchestrator?.startIndexing() + this._orchestrator?.startIndexing() // This method is async, but we don't await it here } return { requiresRestart } From 6e20aebfb26e5df42738ee069a2273c102a1833c Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Thu, 15 May 2025 18:03:35 -0500 Subject: [PATCH 33/71] feat(types): add codeIndexOpenAiKey and codeIndexQdrantApiKey to ProviderSettings and IpcMessage --- src/exports/roo-code.d.ts | 64 +++++++++++++++++++++++++++++++++++++-- src/exports/types.ts | 64 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 124 insertions(+), 4 deletions(-) diff --git a/src/exports/roo-code.d.ts b/src/exports/roo-code.d.ts index 6cd5e5cd90..904eba8530 100644 --- a/src/exports/roo-code.d.ts +++ b/src/exports/roo-code.d.ts @@ -287,8 +287,6 @@ type ProviderSettings = { vertexJsonCredentials?: string | undefined vertexProjectId?: string | undefined vertexRegion?: string | undefined - codeIndexOpenAiKey?: string | undefined - codeIndexQdrantApiKey?: string | undefined openAiBaseUrl?: string | undefined openAiApiKey?: string | undefined openAiLegacyFormat?: boolean | undefined @@ -368,6 +366,8 @@ type ProviderSettings = { litellmBaseUrl?: string | undefined litellmApiKey?: string | undefined litellmModelId?: string | undefined + codeIndexOpenAiKey?: string | undefined + codeIndexQdrantApiKey?: string | undefined } type ProviderSettingsEntry = { @@ -779,6 +779,8 @@ type IpcMessage = litellmBaseUrl?: string | undefined litellmApiKey?: string | undefined litellmModelId?: string | undefined + codeIndexOpenAiKey?: string | undefined + codeIndexQdrantApiKey?: string | undefined currentApiConfigName?: string | undefined listApiConfigMeta?: | { @@ -838,6 +840,33 @@ type IpcMessage = autoApprovalEnabled?: boolean | undefined alwaysAllowReadOnly?: boolean | undefined alwaysAllowReadOnlyOutsideWorkspace?: boolean | undefined + codebaseIndexModels?: + | { + openai?: + | { + [x: string]: { + dimension: number + } + } + | undefined + ollama?: + | { + [x: string]: { + dimension: number + } + } + | undefined + } + | undefined + codebaseIndexConfig?: + | { + codebaseIndexEnabled?: boolean | undefined + codebaseIndexQdrantUrl?: string | undefined + codebaseIndexEmbedderProvider?: ("openai" | "ollama") | undefined + codebaseIndexEmbedderBaseUrl?: string | undefined + codebaseIndexEmbedderModelId?: string | undefined + } + | undefined alwaysAllowWrite?: boolean | undefined alwaysAllowWriteOutsideWorkspace?: boolean | undefined writeDelayMs?: number | undefined @@ -1019,6 +1048,7 @@ type IpcMessage = | "rooignore_error" | "diff_error" | "condense_context" + | "codebase_search_result" ) | undefined text?: string | undefined @@ -1261,6 +1291,8 @@ type TaskCommand = litellmBaseUrl?: string | undefined litellmApiKey?: string | undefined litellmModelId?: string | undefined + codeIndexOpenAiKey?: string | undefined + codeIndexQdrantApiKey?: string | undefined currentApiConfigName?: string | undefined listApiConfigMeta?: | { @@ -1320,6 +1352,33 @@ type TaskCommand = autoApprovalEnabled?: boolean | undefined alwaysAllowReadOnly?: boolean | undefined alwaysAllowReadOnlyOutsideWorkspace?: boolean | undefined + codebaseIndexModels?: + | { + openai?: + | { + [x: string]: { + dimension: number + } + } + | undefined + ollama?: + | { + [x: string]: { + dimension: number + } + } + | undefined + } + | undefined + codebaseIndexConfig?: + | { + codebaseIndexEnabled?: boolean | undefined + codebaseIndexQdrantUrl?: string | undefined + codebaseIndexEmbedderProvider?: ("openai" | "ollama") | undefined + codebaseIndexEmbedderBaseUrl?: string | undefined + codebaseIndexEmbedderModelId?: string | undefined + } + | undefined alwaysAllowWrite?: boolean | undefined alwaysAllowWriteOutsideWorkspace?: boolean | undefined writeDelayMs?: number | undefined @@ -1497,6 +1556,7 @@ type TaskEvent = | "rooignore_error" | "diff_error" | "condense_context" + | "codebase_search_result" ) | undefined text?: string | undefined diff --git a/src/exports/types.ts b/src/exports/types.ts index 0a80f615cb..6f4989df62 100644 --- a/src/exports/types.ts +++ b/src/exports/types.ts @@ -288,8 +288,6 @@ type ProviderSettings = { awsUseProfile?: boolean | undefined awsCustomArn?: string | undefined vertexKeyFile?: string | undefined - codeIndexOpenAiKey?: string | undefined - codeIndexQdrantApiKey?: string | undefined vertexJsonCredentials?: string | undefined vertexProjectId?: string | undefined vertexRegion?: string | undefined @@ -372,6 +370,8 @@ type ProviderSettings = { litellmBaseUrl?: string | undefined litellmApiKey?: string | undefined litellmModelId?: string | undefined + codeIndexOpenAiKey?: string | undefined + codeIndexQdrantApiKey?: string | undefined } export type { ProviderSettings } @@ -793,6 +793,8 @@ type IpcMessage = litellmBaseUrl?: string | undefined litellmApiKey?: string | undefined litellmModelId?: string | undefined + codeIndexOpenAiKey?: string | undefined + codeIndexQdrantApiKey?: string | undefined currentApiConfigName?: string | undefined listApiConfigMeta?: | { @@ -852,6 +854,33 @@ type IpcMessage = autoApprovalEnabled?: boolean | undefined alwaysAllowReadOnly?: boolean | undefined alwaysAllowReadOnlyOutsideWorkspace?: boolean | undefined + codebaseIndexModels?: + | { + openai?: + | { + [x: string]: { + dimension: number + } + } + | undefined + ollama?: + | { + [x: string]: { + dimension: number + } + } + | undefined + } + | undefined + codebaseIndexConfig?: + | { + codebaseIndexEnabled?: boolean | undefined + codebaseIndexQdrantUrl?: string | undefined + codebaseIndexEmbedderProvider?: ("openai" | "ollama") | undefined + codebaseIndexEmbedderBaseUrl?: string | undefined + codebaseIndexEmbedderModelId?: string | undefined + } + | undefined alwaysAllowWrite?: boolean | undefined alwaysAllowWriteOutsideWorkspace?: boolean | undefined writeDelayMs?: number | undefined @@ -1033,6 +1062,7 @@ type IpcMessage = | "rooignore_error" | "diff_error" | "condense_context" + | "codebase_search_result" ) | undefined text?: string | undefined @@ -1277,6 +1307,8 @@ type TaskCommand = litellmBaseUrl?: string | undefined litellmApiKey?: string | undefined litellmModelId?: string | undefined + codeIndexOpenAiKey?: string | undefined + codeIndexQdrantApiKey?: string | undefined currentApiConfigName?: string | undefined listApiConfigMeta?: | { @@ -1336,6 +1368,33 @@ type TaskCommand = autoApprovalEnabled?: boolean | undefined alwaysAllowReadOnly?: boolean | undefined alwaysAllowReadOnlyOutsideWorkspace?: boolean | undefined + codebaseIndexModels?: + | { + openai?: + | { + [x: string]: { + dimension: number + } + } + | undefined + ollama?: + | { + [x: string]: { + dimension: number + } + } + | undefined + } + | undefined + codebaseIndexConfig?: + | { + codebaseIndexEnabled?: boolean | undefined + codebaseIndexQdrantUrl?: string | undefined + codebaseIndexEmbedderProvider?: ("openai" | "ollama") | undefined + codebaseIndexEmbedderBaseUrl?: string | undefined + codebaseIndexEmbedderModelId?: string | undefined + } + | undefined alwaysAllowWrite?: boolean | undefined alwaysAllowWriteOutsideWorkspace?: boolean | undefined writeDelayMs?: number | undefined @@ -1515,6 +1574,7 @@ type TaskEvent = | "rooignore_error" | "diff_error" | "condense_context" + | "codebase_search_result" ) | undefined text?: string | undefined From 8fc60294f36ccbda0bc4deaec91a976479accf77 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 16 May 2025 11:11:28 -0500 Subject: [PATCH 34/71] feat(FileWatcher): enhance file processing with batch operations and new status handling --- .../code-index/interfaces/file-processor.ts | 5 +- .../code-index/interfaces/vector-store.ts | 14 +- .../processors/__tests__/file-watcher.test.ts | 52 ++++++-- .../code-index/processors/file-watcher.ts | 120 ++++++++++++------ 4 files changed, 127 insertions(+), 64 deletions(-) diff --git a/src/services/code-index/interfaces/file-processor.ts b/src/services/code-index/interfaces/file-processor.ts index b7edf99ee0..9d46a9157c 100644 --- a/src/services/code-index/interfaces/file-processor.ts +++ b/src/services/code-index/interfaces/file-processor.ts @@ -1,4 +1,5 @@ import * as vscode from "vscode" +import { PointStruct } from "./vector-store" /** * Interface for code file parser @@ -80,9 +81,11 @@ export interface IFileWatcher { export interface FileProcessingResult { path: string - status: "success" | "skipped" | "error" + status: "success" | "skipped" | "error" | "processed_for_batching" | "local_error" error?: Error reason?: string + newHash?: string + pointsToUpsert?: PointStruct[] } /** diff --git a/src/services/code-index/interfaces/vector-store.ts b/src/services/code-index/interfaces/vector-store.ts index 6429486f81..1e999b499f 100644 --- a/src/services/code-index/interfaces/vector-store.ts +++ b/src/services/code-index/interfaces/vector-store.ts @@ -1,6 +1,12 @@ /** * Interface for vector database clients */ +export type PointStruct = { + id: string + vector: number[] + payload: Record +} + export interface IVectorStore { /** * Initializes the vector store @@ -12,13 +18,7 @@ export interface IVectorStore { * Upserts points into the vector store * @param points Array of points to upsert */ - upsertPoints( - points: Array<{ - id: string - vector: number[] - payload: Record - }>, - ): Promise + upsertPoints(points: PointStruct[]): Promise /** * Searches for similar vectors diff --git a/src/services/code-index/processors/__tests__/file-watcher.test.ts b/src/services/code-index/processors/__tests__/file-watcher.test.ts index 19c82c79f5..9090706a52 100644 --- a/src/services/code-index/processors/__tests__/file-watcher.test.ts +++ b/src/services/code-index/processors/__tests__/file-watcher.test.ts @@ -173,7 +173,11 @@ describe("FileWatcher", () => { const mockUri = { fsPath: "/mock/workspace/test.js" } const processFileSpy = jest.spyOn(fileWatcher, "processFile").mockResolvedValue({ path: mockUri.fsPath, - status: "success", + status: "processed_for_batching", + newHash: "mock-hash", + pointsToUpsert: [], + reason: undefined, + error: undefined, } as FileProcessingResult) // Access private method using type assertion @@ -187,7 +191,11 @@ describe("FileWatcher", () => { const mockUri = { fsPath: "/mock/workspace/test.js" } const processFileSpy = jest.spyOn(fileWatcher, "processFile").mockResolvedValue({ path: mockUri.fsPath, - status: "success", + status: "processed_for_batching", + newHash: "mock-hash", + pointsToUpsert: [], + reason: undefined, + error: undefined, } as FileProcessingResult) // Access private method using type assertion @@ -296,12 +304,22 @@ describe("FileWatcher", () => { const result = await fileWatcher.processFile("/mock/workspace/test.js") - expect(result.status).toBe("success") - expect(mockVectorStore.deletePointsByFilePath).toHaveBeenCalled() + expect(result.status).toBe("processed_for_batching") + expect(result.newHash).toBe("new-hash") + expect(result.pointsToUpsert).toEqual([ + expect.objectContaining({ + id: "mocked-uuid-v5-for-testing", + vector: [0.1, 0.2, 0.3], + payload: { + filePath: "test.js", + codeChunk: "test content", + startLine: 1, + endLine: 5, + }, + }), + ]) expect(mockCodeParser.parseFile).toHaveBeenCalled() expect(mockEmbedder.createEmbeddings).toHaveBeenCalled() - expect(mockVectorStore.upsertPoints).toHaveBeenCalled() - expect(mockCacheManager.updateHash).toHaveBeenCalledWith("/mock/workspace/test.js", "new-hash") }) it("should handle processing errors", async () => { @@ -310,7 +328,7 @@ describe("FileWatcher", () => { const result = await fileWatcher.processFile("/mock/workspace/error.js") - expect(result.status).toBe("error") + expect(result.status).toBe("local_error") expect(result.error).toBeDefined() }) }) @@ -397,9 +415,10 @@ describe("FileWatcher", () => { await processingPromise expect(mockVectorStore.deletePointsByFilePath).toHaveBeenCalledWith(mockUri.fsPath) - expect(mockVectorStore.deletePointsByFilePath).toHaveBeenCalledTimes(2) - - expect(mockVectorStore.upsertPoints).toHaveBeenCalled() + expect(mockVectorStore.deletePointsByFilePath).toHaveBeenCalledTimes(1) + expect(mockVectorStore.deletePointsByMultipleFilePaths).toHaveBeenCalledWith( + expect.arrayContaining([mockUri.fsPath]), + ) expect((fileWatcher as any).deletedFilesBuffer).not.toContain(mockUri.fsPath) @@ -409,10 +428,15 @@ describe("FileWatcher", () => { await jest.advanceTimersByTimeAsync(500) await jest.runAllTicks() - expect(mockVectorStore.deletePointsByMultipleFilePaths).toHaveBeenCalled() - const deletedPaths = (mockVectorStore.deletePointsByMultipleFilePaths as jest.Mock).mock.calls[0][0] - expect(deletedPaths).toContain(otherFilePath) - expect(deletedPaths).not.toContain(mockUri.fsPath) + expect((mockVectorStore.deletePointsByMultipleFilePaths as jest.Mock).mock.calls[0][0]).toEqual( + expect.arrayContaining([mockUri.fsPath]), + ) + + expect(mockVectorStore.deletePointsByMultipleFilePaths).toHaveBeenCalledTimes(2) + + const flushedDeletedPaths = (mockVectorStore.deletePointsByMultipleFilePaths as jest.Mock).mock.calls[1][0] + expect(flushedDeletedPaths).toContain(otherFilePath) + expect(flushedDeletedPaths).not.toContain(mockUri.fsPath) expect(mockCacheManager.updateHash).toHaveBeenCalledWith(mockUri.fsPath, "new-hash") }) diff --git a/src/services/code-index/processors/file-watcher.ts b/src/services/code-index/processors/file-watcher.ts index 89a9ed5553..99a50e13e9 100644 --- a/src/services/code-index/processors/file-watcher.ts +++ b/src/services/code-index/processors/file-watcher.ts @@ -3,7 +3,7 @@ import { createHash } from "crypto" import { RooIgnoreController } from "../../../core/ignore/RooIgnoreController" import { v5 as uuidv5 } from "uuid" import { scannerExtensions } from "../shared/supported-extensions" -import { IFileWatcher, FileProcessingResult, IEmbedder, IVectorStore } from "../interfaces" +import { IFileWatcher, FileProcessingResult, IEmbedder, IVectorStore, PointStruct } from "../interfaces" import { codeParser } from "./parser" import { CacheManager } from "../cache-manager" import { generateNormalizedAbsolutePath, generateRelativeFilePath } from "../shared/get-relative-path" @@ -18,7 +18,7 @@ export class FileWatcher implements IFileWatcher { private fileWatcher?: vscode.FileSystemWatcher private ignoreController: RooIgnoreController private eventQueue: { uri: vscode.Uri; type: "create" | "change" | "delete" }[] = [] - private processingMap: Map> = new Map() + private processingMap: Map> = new Map() private isProcessing = false private deletedFilesBuffer: string[] = [] private deleteTimer: NodeJS.Timeout | undefined @@ -126,6 +126,8 @@ export class FileWatcher implements IFileWatcher { */ private async processQueue(): Promise { try { + const filesToBatchProcess: FileProcessingResult[] = [] + while (this.eventQueue.length > 0) { const event = this.eventQueue.shift()! const filePath = event.uri.fsPath @@ -133,12 +135,67 @@ export class FileWatcher implements IFileWatcher { // Ensure sequential processing for the same file path const existingPromise = this.processingMap.get(filePath) const newPromise = (existingPromise || Promise.resolve()) - .then(() => this.processEvent(event)) + .then(async () => { + const result = await this.processEvent(event) + if (result) { + if (result.status === "processed_for_batching") { + filesToBatchProcess.push(result) + } else if ( + result.status === "skipped" || + result.status === "local_error" || + result.status === "error" || + result.status === "success" + ) { + this._onDidFinishProcessing.fire(result) + } + } + return result + }) .finally(() => this.processingMap.delete(filePath)) this.processingMap.set(filePath, newPromise) await newPromise } + + // Process batch operations if we have files to process + if (filesToBatchProcess.length > 0 && this.vectorStore) { + // Extract unique file paths that need deletion + const pathsToDelete = [...new Set(filesToBatchProcess.map((f) => f.path))] + // Extract all points to upsert + const allPointsToUpsert = filesToBatchProcess.flatMap((f) => f.pointsToUpsert || []) + + try { + // Batch delete old points + if (pathsToDelete.length > 0) { + await this.vectorStore.deletePointsByMultipleFilePaths(pathsToDelete) + } + + // Batch upsert new points + if (allPointsToUpsert.length > 0) { + await this.vectorStore.upsertPoints(allPointsToUpsert) + } + + // Update cache and fire success events + for (const fileData of filesToBatchProcess) { + if (fileData.newHash) { + this.cacheManager.updateHash(fileData.path, fileData.newHash) + } + this._onDidFinishProcessing.fire({ + path: fileData.path, + status: "success", + }) + } + } catch (error) { + // Handle batch operation failures + for (const fileData of filesToBatchProcess) { + this._onDidFinishProcessing.fire({ + path: fileData.path, + status: "error", + error: error as Error, + }) + } + } + } } finally { this.isProcessing = false } @@ -148,7 +205,10 @@ export class FileWatcher implements IFileWatcher { * Processes a single file system event * @param event The file system event to process */ - private async processEvent(event: { uri: vscode.Uri; type: "create" | "change" | "delete" }): Promise { + private async processEvent(event: { + uri: vscode.Uri + type: "create" | "change" | "delete" + }): Promise { const filePath = event.uri.fsPath // For delete operations, process immediately @@ -172,10 +232,10 @@ export class FileWatcher implements IFileWatcher { if (hasPendingDelete) { // Wait for delete to be processed first - return + return undefined } - await this.processFile(filePath) + return await this.processFile(filePath) } /** @@ -230,25 +290,21 @@ export class FileWatcher implements IFileWatcher { try { // Check if file should be ignored if (!this.ignoreController.validateAccess(filePath)) { - const result = { + return { path: filePath, status: "skipped" as const, reason: "File is ignored by .rooignore", } - this._onDidFinishProcessing.fire(result) - return result } // Check file size const fileStat = await vscode.workspace.fs.stat(vscode.Uri.file(filePath)) if (fileStat.size > MAX_FILE_SIZE_BYTES) { - const result = { + return { path: filePath, status: "skipped" as const, reason: "File is too large", } - this._onDidFinishProcessing.fire(result) - return result } // Read file content @@ -260,37 +316,24 @@ export class FileWatcher implements IFileWatcher { // Check if file has changed if (this.cacheManager.getHash(filePath) === newHash) { - const result = { + return { path: filePath, status: "skipped" as const, reason: "File has not changed", } - this._onDidFinishProcessing.fire(result) - return result - } - - // Delete old points - if (this.vectorStore) { - try { - await this.vectorStore.deletePointsByFilePath(filePath) - console.log(`[FileWatcher] Deleted existing points for changed file: ${filePath}`) - } catch (error) { - console.error(`[FileWatcher] Failed to delete points for ${filePath}:`, error) - throw error - } } // Parse file const blocks = await codeParser.parseFile(filePath, { content, fileHash: newHash }) - // Create embeddings and upsert points + // Prepare points for batch processing + let pointsToUpsert: PointStruct[] = [] if (this.embedder && this.vectorStore && blocks.length > 0) { const texts = blocks.map((block) => block.content) const { embeddings } = await this.embedder.createEmbeddings(texts) - const points = blocks.map((block, index) => { + pointsToUpsert = blocks.map((block, index) => { const normalizedAbsolutePath = generateNormalizedAbsolutePath(block.file_path) - const stableName = `${normalizedAbsolutePath}:${block.start_line}` const pointId = uuidv5(stableName, QDRANT_CODE_BLOCK_NAMESPACE) @@ -305,27 +348,20 @@ export class FileWatcher implements IFileWatcher { }, } }) - - await this.vectorStore.upsertPoints(points) } - // Update cache - this.cacheManager.updateHash(filePath, newHash) - - const result = { + return { path: filePath, - status: "success" as const, + status: "processed_for_batching" as const, + newHash, + pointsToUpsert, } - this._onDidFinishProcessing.fire(result) - return result } catch (error) { - const result = { + return { path: filePath, - status: "error" as const, + status: "local_error" as const, error: error as Error, } - this._onDidFinishProcessing.fire(result) - return result } } } From 23e0ae991cf2b2cfd79aa232f2b5bc83868a1bb9 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 16 May 2025 11:23:25 -0500 Subject: [PATCH 35/71] fix(webviewMessageHandler): handle errors during CodeIndexManager initialization --- src/core/webview/webviewMessageHandler.ts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index e07c536c95..ce5a3f77cd 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -1329,7 +1329,15 @@ export const webviewMessageHandler = async (provider: ClineProvider, message: We codebaseIndexEmbedderModelId: "", } await updateGlobalState("codebaseIndexConfig", codebaseIndexConfig) - await provider.codeIndexManager?.initialize(provider.contextProxy) + + try { + await provider.codeIndexManager?.initialize(provider.contextProxy) + } catch (error) { + provider.log( + `[CodeIndexManager] Error during background CodeIndexManager configuration/indexing: ${error.message || error}`, + ) + } + await provider.postStateToWebview() break } From 342c70afb52956512eea8607cff19efc4c79e3a1 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 16 May 2025 11:27:02 -0500 Subject: [PATCH 36/71] refactor(CodeIndexManager): streamline service creation by consolidating into a single method --- src/services/code-index/manager.ts | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/services/code-index/manager.ts b/src/services/code-index/manager.ts index a7701c66a6..c1bec42d49 100644 --- a/src/services/code-index/manager.ts +++ b/src/services/code-index/manager.ts @@ -9,7 +9,6 @@ import { CodeIndexServiceFactory } from "./service-factory" import { CodeIndexSearchService } from "./search-service" import { CodeIndexOrchestrator } from "./orchestrator" import { CacheManager } from "./cache-manager" -import { codeParser } from "./processors" export class CodeIndexManager { // --- Singleton Implementation --- @@ -129,14 +128,8 @@ export class CodeIndexManager { ) // (Re)Create shared service instances - const embedder = this._serviceFactory.createEmbedder() - const vectorStore = this._serviceFactory.createVectorStore() - const parser = codeParser - const scanner = this._serviceFactory.createDirectoryScanner(embedder, vectorStore, parser) - const fileWatcher = this._serviceFactory.createFileWatcher( + const { embedder, vectorStore, scanner, fileWatcher } = this._serviceFactory.createServices( this.context, - embedder, - vectorStore, this._cacheManager, ) From 8533a2f9487026e60ba85806052a5a8976da73d9 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 16 May 2025 16:59:14 -0500 Subject: [PATCH 37/71] feat(CodeIndex): implement minimum search score configuration and update search methods --- src/core/prompts/tools/codebase-search.ts | 3 --- src/core/tools/codebaseSearchTool.ts | 17 ++--------------- src/services/code-index/config-manager.ts | 14 ++++++++++++++ src/services/code-index/interfaces/config.ts | 1 + .../code-index/interfaces/vector-store.ts | 2 +- src/services/code-index/manager.ts | 8 ++------ src/services/code-index/search-service.ts | 10 ++++------ .../code-index/vector-store/qdrant-client.ts | 17 ++++++++++++----- src/shared/tools.ts | 3 +-- 9 files changed, 37 insertions(+), 38 deletions(-) diff --git a/src/core/prompts/tools/codebase-search.ts b/src/core/prompts/tools/codebase-search.ts index 75d00cbeac..182d8823c2 100644 --- a/src/core/prompts/tools/codebase-search.ts +++ b/src/core/prompts/tools/codebase-search.ts @@ -3,19 +3,16 @@ export function getCodebaseSearchDescription(): string { Description: Search the codebase for relevant files based on a query. Use this when the user asks a question about the codebase that requires finding specific files or code snippets. You can optionally specify a path to a directory to search in, the results will be filtered to only include files within that directory, this is useful for searching for files related to a specific project or module. Parameters: - query: (required) The natural language query to search for. -- limit: (optional) The maximum number of search results to return. Defaults to 10. - path: (optional) The path to the directory to search in relative to the current working directory. Defaults to the current working directory. Usage: Your natural language query here -Number of results (optional) Path to the directory to search in (optional) Example: Searching for functions related to user authentication User login and password hashing -5 /path/to/directory ` diff --git a/src/core/tools/codebaseSearchTool.ts b/src/core/tools/codebaseSearchTool.ts index e023443152..c6012f9f0f 100644 --- a/src/core/tools/codebaseSearchTool.ts +++ b/src/core/tools/codebaseSearchTool.ts @@ -9,7 +9,7 @@ import { AskApproval, HandleError, PushToolResult, RemoveClosingTag, ToolUse } f import path from "path" export async function codebaseSearchTool( - cline: Task, + cline: Task, block: ToolUse, askApproval: AskApproval, handleError: HandleError, @@ -27,8 +27,6 @@ export async function codebaseSearchTool( // --- Parameter Extraction and Validation --- let query: string | undefined = block.params.query - let limitStr: string | undefined = block.params.limit - let limit: number = 5 // Default limit let directoryPrefix: string | undefined = block.params.path if (!query) { @@ -38,16 +36,6 @@ export async function codebaseSearchTool( } query = removeClosingTag("query", query) - if (limitStr) { - limitStr = removeClosingTag("limit", limitStr) - limit = parseInt(limitStr, 10) - if (isNaN(limit) || limit <= 0) { - cline.consecutiveMistakeCount++ - await cline.say("text", `Invalid limit value: "${limitStr}". Using default ${10}.`) - limit = 10 - } - } - if (directoryPrefix) { directoryPrefix = removeClosingTag("path", directoryPrefix) directoryPrefix = path.normalize(directoryPrefix) @@ -58,7 +46,6 @@ export async function codebaseSearchTool( const approvalPayload = { tool: "codebaseSearch", query: query, - limit: limit, path: directoryPrefix, isOutsideWorkspace: false, } @@ -91,7 +78,7 @@ export async function codebaseSearchTool( throw new Error("Code Indexing is not configured (Missing OpenAI Key or Qdrant URL).") } - const searchResults: VectorStoreSearchResult[] = await manager.searchIndex(query, limit, directoryPrefix) + const searchResults: VectorStoreSearchResult[] = await manager.searchIndex(query, directoryPrefix) // 3. Format and push results if (!searchResults || searchResults.length === 0) { diff --git a/src/services/code-index/config-manager.ts b/src/services/code-index/config-manager.ts index 5cc2014c54..e162816113 100644 --- a/src/services/code-index/config-manager.ts +++ b/src/services/code-index/config-manager.ts @@ -3,6 +3,7 @@ import { ContextProxy } from "../../core/config/ContextProxy" import { EmbedderProvider } from "./interfaces/manager" import { getModelDimension, getDefaultModelId } from "../../shared/embeddingModels" import { CodeIndexConfig, PreviousConfigSnapshot } from "./interfaces/config" +import { CODEBASE_INDEX_SEARCH_MIN_SCORE } from "./constants" /** * Manages configuration state and validation for the code indexing feature. @@ -16,6 +17,7 @@ export class CodeIndexConfigManager { private ollamaOptions?: ApiHandlerOptions private qdrantUrl?: string private qdrantApiKey?: string + private searchMinScore?: number constructor(private readonly contextProxy: ContextProxy) {} @@ -33,6 +35,7 @@ export class CodeIndexConfigManager { ollamaOptions?: ApiHandlerOptions qdrantUrl?: string qdrantApiKey?: string + searchMinScore?: number } requiresRestart: boolean requiresClear: boolean @@ -53,6 +56,7 @@ export class CodeIndexConfigManager { let codebaseIndexConfig = this.contextProxy?.getGlobalState("codebaseIndexConfig") ?? { codebaseIndexEnabled: false, codebaseIndexQdrantUrl: "", + codebaseIndexSearchMinScore: 0.4, codebaseIndexEmbedderProvider: "openai", codebaseIndexEmbedderBaseUrl: "", codebaseIndexEmbedderModelId: "", @@ -73,6 +77,7 @@ export class CodeIndexConfigManager { this.qdrantUrl = codebaseIndexQdrantUrl this.qdrantApiKey = qdrantApiKey ?? "" this.openAiOptions = { openAiNativeApiKey: openAiKey } + this.searchMinScore = CODEBASE_INDEX_SEARCH_MIN_SCORE this.embedderProvider = codebaseIndexEmbedderProvider === "ollama" ? "ollama" : "openai" this.modelId = codebaseIndexEmbedderModelId || undefined @@ -105,6 +110,7 @@ export class CodeIndexConfigManager { ollamaOptions: this.ollamaOptions, qdrantUrl: this.qdrantUrl, qdrantApiKey: this.qdrantApiKey, + searchMinScore: this.searchMinScore, }, requiresRestart: this._didConfigChangeRequireRestart(previousConfigSnapshot), requiresClear, @@ -183,6 +189,7 @@ export class CodeIndexConfigManager { ollamaOptions: this.ollamaOptions, qdrantUrl: this.qdrantUrl, qdrantApiKey: this.qdrantApiKey, + searchMinScore: this.searchMinScore, } } @@ -223,4 +230,11 @@ export class CodeIndexConfigManager { public get currentModelId(): string | undefined { return this.modelId } + + /** + * Gets the configured minimum search score. + */ + public get currentSearchMinScore(): number | undefined { + return this.searchMinScore + } } diff --git a/src/services/code-index/interfaces/config.ts b/src/services/code-index/interfaces/config.ts index a7dcff167f..2d07911783 100644 --- a/src/services/code-index/interfaces/config.ts +++ b/src/services/code-index/interfaces/config.ts @@ -13,6 +13,7 @@ export interface CodeIndexConfig { ollamaOptions?: ApiHandlerOptions qdrantUrl?: string qdrantApiKey?: string + searchMinScore?: number } /** diff --git a/src/services/code-index/interfaces/vector-store.ts b/src/services/code-index/interfaces/vector-store.ts index 1e999b499f..1896942a01 100644 --- a/src/services/code-index/interfaces/vector-store.ts +++ b/src/services/code-index/interfaces/vector-store.ts @@ -26,7 +26,7 @@ export interface IVectorStore { * @param limit Maximum number of results to return * @returns Promise resolving to search results */ - search(queryVector: number[], limit?: number, directoryPrefix?: string): Promise + search(queryVector: number[], directoryPrefix?: string, minScore?: number): Promise /** * Deletes points by file path diff --git a/src/services/code-index/manager.ts b/src/services/code-index/manager.ts index c1bec42d49..9be8c40a9f 100644 --- a/src/services/code-index/manager.ts +++ b/src/services/code-index/manager.ts @@ -241,16 +241,12 @@ export class CodeIndexManager { this._stateManager.setWebviewProvider(provider) } - public async searchIndex( - query: string, - limit: number, - directoryPrefix?: string, - ): Promise { + public async searchIndex(query: string, directoryPrefix?: string): Promise { if (!this.isFeatureEnabled) { console.log("[CodeIndexManager] Feature disabled - returning empty search results") return [] } this.assertInitialized() - return this._searchService!.searchIndex(query, limit, directoryPrefix) + return this._searchService!.searchIndex(query, directoryPrefix) } } diff --git a/src/services/code-index/search-service.ts b/src/services/code-index/search-service.ts index 1d7a4d8a5f..acf6afbf7e 100644 --- a/src/services/code-index/search-service.ts +++ b/src/services/code-index/search-service.ts @@ -24,15 +24,13 @@ export class CodeIndexSearchService { * @returns Array of search results * @throws Error if the service is not properly configured or ready */ - public async searchIndex( - query: string, - limit: number, - directoryPrefix?: string, - ): Promise { + public async searchIndex(query: string, directoryPrefix?: string): Promise { if (!this.configManager.isFeatureEnabled || !this.configManager.isFeatureConfigured) { throw new Error("Code index feature is disabled or not configured.") } + const minScore = this.configManager.currentSearchMinScore + const currentState = this.stateManager.getCurrentStatus().systemStatus if (currentState !== "Indexed" && currentState !== "Indexing") { // Allow search during Indexing too @@ -54,7 +52,7 @@ export class CodeIndexSearchService { } // Perform search - const results = await this.vectorStore.search(vector, limit, normalizedPrefix) + const results = await this.vectorStore.search(vector, normalizedPrefix, minScore) return results } catch (error) { console.error("[CodeIndexSearchService] Error during search:", error) diff --git a/src/services/code-index/vector-store/qdrant-client.ts b/src/services/code-index/vector-store/qdrant-client.ts index 8615bd5075..2ba6e3233c 100644 --- a/src/services/code-index/vector-store/qdrant-client.ts +++ b/src/services/code-index/vector-store/qdrant-client.ts @@ -4,6 +4,7 @@ import * as path from "path" import { getWorkspacePath } from "../../../utils/path" import { IVectorStore } from "../interfaces/vector-store" import { Payload, VectorStoreSearchResult } from "../interfaces" +import { CODEBASE_INDEX_SEARCH_MIN_SCORE } from "../constants" /** * Qdrant implementation of the vector store interface @@ -124,11 +125,11 @@ export class QdrantVectorStore implements IVectorStore { */ async search( queryVector: number[], - limit: number = 10, directoryPrefix?: string, + minScore?: number, ): Promise { try { - let filter: any = undefined + let filter = undefined if (directoryPrefix) { const segments = directoryPrefix.split(path.sep).filter(Boolean) @@ -141,11 +142,17 @@ export class QdrantVectorStore implements IVectorStore { } } - const result = await this.client.search(this.collectionName, { + const searchRequest = { vector: queryVector, - limit, filter, - }) + score_threshold: CODEBASE_INDEX_SEARCH_MIN_SCORE, + } + + if (minScore !== undefined) { + searchRequest.score_threshold = minScore + } + + const result = await this.client.search(this.collectionName, searchRequest) result.filter((r) => this.isPayloadValid(r.payload!)) return result as VectorStoreSearchResult[] diff --git a/src/shared/tools.ts b/src/shared/tools.ts index e4c412d92f..37ab53516e 100644 --- a/src/shared/tools.ts +++ b/src/shared/tools.ts @@ -64,7 +64,6 @@ export const toolParamNames = [ "start_line", "end_line", "query", - "limit", ] as const export type ToolParamName = (typeof toolParamNames)[number] @@ -105,7 +104,7 @@ export interface InsertCodeBlockToolUse extends ToolUse { export interface CodebaseSearchToolUse extends ToolUse { name: "codebase_search" - params: Partial, "query" | "limit">> + params: Partial, "query" | "path">> } export interface SearchFilesToolUse extends ToolUse { From 84c5f910ae9e8e6fb6d2894bb06485f070f70869 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 16 May 2025 16:59:36 -0500 Subject: [PATCH 38/71] refactor(CodeIndexSettings): replace ApiConfiguration with ProviderSettings and update related methods --- .../src/components/settings/CodeIndexSettings.tsx | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/webview-ui/src/components/settings/CodeIndexSettings.tsx b/webview-ui/src/components/settings/CodeIndexSettings.tsx index 11ae1ac32f..c066d50f7e 100644 --- a/webview-ui/src/components/settings/CodeIndexSettings.tsx +++ b/webview-ui/src/components/settings/CodeIndexSettings.tsx @@ -19,17 +19,16 @@ import { Section } from "./Section" import { SectionHeader } from "./SectionHeader" import { SetCachedStateField } from "./types" import { ExtensionStateContextType } from "@/context/ExtensionStateContext" -import { ApiConfiguration } from "../../../../src/shared/api" -import { CodebaseIndexConfig, CodebaseIndexModels } from "../../../../src/schemas" +import { CodebaseIndexConfig, CodebaseIndexModels, ProviderSettings } from "../../../../src/schemas" import { EmbedderProvider } from "../../../../src/shared/embeddingModels" import { z } from "zod" interface CodeIndexSettingsProps { codebaseIndexModels: CodebaseIndexModels | undefined codebaseIndexConfig: CodebaseIndexConfig | undefined - apiConfiguration: ApiConfiguration + apiConfiguration: ProviderSettings setCachedStateField: SetCachedStateField - setApiConfigurationField: (field: K, value: ApiConfiguration[K]) => void + setProviderSettingsField: (field: K, value: ProviderSettings[K]) => void } interface IndexingStatusUpdateMessage { @@ -47,7 +46,7 @@ export const CodeIndexSettings: React.FC = ({ codebaseIndexConfig, apiConfiguration, setCachedStateField, - setApiConfigurationField, + setProviderSettingsField, }) => { const [indexingStatus, setIndexingStatus] = useState({ systemStatus: "Standby", @@ -88,7 +87,7 @@ export const CodeIndexSettings: React.FC = ({ } }, [codebaseIndexConfig, codebaseIndexModels]) - function validateIndexingConfig(config: CodebaseIndexConfig | undefined, apiConfig: ApiConfiguration): boolean { + function validateIndexingConfig(config: CodebaseIndexConfig | undefined, apiConfig: ProviderSettings): boolean { if (!config) return false const baseSchema = z.object({ @@ -177,7 +176,7 @@ export const CodeIndexSettings: React.FC = ({ setApiConfigurationField("codeIndexOpenAiKey", e.target.value)} + onInput={(e: any) => setProviderSettingsField("codeIndexOpenAiKey", e.target.value)} style={{ width: "100%" }}> OpenAI Key: @@ -242,7 +241,7 @@ export const CodeIndexSettings: React.FC = ({ setApiConfigurationField("codeIndexQdrantApiKey", e.target.value)} + onInput={(e: any) => setProviderSettingsField("codeIndexQdrantApiKey", e.target.value)} style={{ width: "100%" }}> Qdrant Key: From d0e2314b8c7e9376758b08ecd18ce6ba574651fd Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 16 May 2025 17:09:37 -0500 Subject: [PATCH 39/71] refactor: move contants to centralized file --- src/services/code-index/constants/index.ts | 24 ++++++++++ src/services/code-index/embedders/openai.ts | 26 +++++----- .../code-index/processors/file-watcher.ts | 4 +- src/services/code-index/processors/parser.ts | 6 +-- src/services/code-index/processors/scanner.ts | 48 ++++++++----------- 5 files changed, 60 insertions(+), 48 deletions(-) create mode 100644 src/services/code-index/constants/index.ts diff --git a/src/services/code-index/constants/index.ts b/src/services/code-index/constants/index.ts new file mode 100644 index 0000000000..f3b8dc2a52 --- /dev/null +++ b/src/services/code-index/constants/index.ts @@ -0,0 +1,24 @@ +/**Parser */ +export const MAX_BLOCK_CHARS = 1000 +export const MIN_BLOCK_CHARS = 100 +export const MIN_CHUNK_REMAINDER_CHARS = 200 // Minimum characters for the *next* chunk after a split +export const MAX_CHARS_TOLERANCE_FACTOR = 1.15 // 15% tolerance for max chars + +/**Search */ +export const CODEBASE_INDEX_SEARCH_MIN_SCORE = 0.4 + +/**File Watcher */ +export const QDRANT_CODE_BLOCK_NAMESPACE = "f47ac10b-58cc-4372-a567-0e02b2c3d479" +export const MAX_FILE_SIZE_BYTES = 1 * 1024 * 1024 // 1MB + +/**Directory Scanner */ +export const MAX_LIST_FILES_LIMIT = 3_000 +export const BATCH_SEGMENT_THRESHOLD = 60 // Number of code segments to batch for embeddings/upserts +export const MAX_BATCH_RETRIES = 3 +export const INITIAL_RETRY_DELAY_MS = 500 +export const PARSING_CONCURRENCY = 10 + +/**OpenAI Embedder */ +export const MAX_BATCH_TOKENS = 100000 +export const MAX_ITEM_TOKENS = 8191 +export const BATCH_PROCESSING_CONCURRENCY = 10 diff --git a/src/services/code-index/embedders/openai.ts b/src/services/code-index/embedders/openai.ts index d08199eb31..907c9e1283 100644 --- a/src/services/code-index/embedders/openai.ts +++ b/src/services/code-index/embedders/openai.ts @@ -2,6 +2,12 @@ import { OpenAI } from "openai" import { OpenAiNativeHandler } from "../../../api/providers/openai-native" import { ApiHandlerOptions } from "../../../shared/api" import { IEmbedder, EmbeddingResponse, EmbedderInfo } from "../interfaces" +import { + MAX_BATCH_TOKENS, + MAX_ITEM_TOKENS, + MAX_BATCH_RETRIES as MAX_RETRIES, + INITIAL_RETRY_DELAY_MS as INITIAL_DELAY_MS, +} from "../constants" /** * OpenAI implementation of the embedder interface with batching and rate limiting @@ -10,12 +16,6 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder { private embeddingsClient: OpenAI private readonly defaultModelId: string - // Batching and retry constants - private static readonly MAX_BATCH_TOKENS = 100000 - private static readonly MAX_ITEM_TOKENS = 8191 - private static readonly MAX_RETRIES = 3 - private static readonly INITIAL_DELAY_MS = 500 - /** * Creates a new OpenAI embedder * @param options API handler options @@ -48,15 +48,15 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder { const text = remainingTexts[i] const itemTokens = Math.ceil(text.length / 4) - if (itemTokens > OpenAiEmbedder.MAX_ITEM_TOKENS) { + if (itemTokens > MAX_ITEM_TOKENS) { console.warn( - `Text at index ${i} exceeds maximum token limit (${itemTokens} > ${OpenAiEmbedder.MAX_ITEM_TOKENS}). Skipping.`, + `Text at index ${i} exceeds maximum token limit (${itemTokens} > ${MAX_ITEM_TOKENS}). Skipping.`, ) processedIndices.push(i) continue } - if (currentBatchTokens + itemTokens <= OpenAiEmbedder.MAX_BATCH_TOKENS) { + if (currentBatchTokens + itemTokens <= MAX_BATCH_TOKENS) { currentBatch.push(text) currentBatchTokens += itemTokens processedIndices.push(i) @@ -96,7 +96,7 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder { batchTexts: string[], model: string, ): Promise<{ embeddings: number[][]; usage: { promptTokens: number; totalTokens: number } }> { - for (let attempts = 0; attempts < OpenAiEmbedder.MAX_RETRIES; attempts++) { + for (let attempts = 0; attempts < MAX_RETRIES; attempts++) { try { const response = await this.embeddingsClient.embeddings.create({ input: batchTexts, @@ -112,10 +112,10 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder { } } catch (error: any) { const isRateLimitError = error?.status === 429 - const hasMoreAttempts = attempts < OpenAiEmbedder.MAX_RETRIES - 1 + const hasMoreAttempts = attempts < MAX_RETRIES - 1 if (isRateLimitError && hasMoreAttempts) { - const delayMs = OpenAiEmbedder.INITIAL_DELAY_MS * Math.pow(2, attempts) + const delayMs = INITIAL_DELAY_MS * Math.pow(2, attempts) await new Promise((resolve) => setTimeout(resolve, delayMs)) continue } @@ -124,7 +124,7 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder { } } - throw new Error(`Failed to create embeddings after ${OpenAiEmbedder.MAX_RETRIES} attempts`) + throw new Error(`Failed to create embeddings after ${MAX_RETRIES} attempts`) } get embedderInfo(): EmbedderInfo { diff --git a/src/services/code-index/processors/file-watcher.ts b/src/services/code-index/processors/file-watcher.ts index 99a50e13e9..d774f07957 100644 --- a/src/services/code-index/processors/file-watcher.ts +++ b/src/services/code-index/processors/file-watcher.ts @@ -1,4 +1,5 @@ import * as vscode from "vscode" +import { QDRANT_CODE_BLOCK_NAMESPACE, MAX_FILE_SIZE_BYTES } from "../constants" import { createHash } from "crypto" import { RooIgnoreController } from "../../../core/ignore/RooIgnoreController" import { v5 as uuidv5 } from "uuid" @@ -8,9 +9,6 @@ import { codeParser } from "./parser" import { CacheManager } from "../cache-manager" import { generateNormalizedAbsolutePath, generateRelativeFilePath } from "../shared/get-relative-path" -const QDRANT_CODE_BLOCK_NAMESPACE = "f47ac10b-58cc-4372-a567-0e02b2c3d479" -const MAX_FILE_SIZE_BYTES = 1 * 1024 * 1024 // 1MB - /** * Implementation of the file watcher interface */ diff --git a/src/services/code-index/processors/parser.ts b/src/services/code-index/processors/parser.ts index 5d8f5b5488..2197f17bf0 100644 --- a/src/services/code-index/processors/parser.ts +++ b/src/services/code-index/processors/parser.ts @@ -5,11 +5,7 @@ import * as treeSitter from "web-tree-sitter" import { LanguageParser, loadRequiredLanguageParsers } from "../../tree-sitter/languageParser" import { ICodeParser, CodeBlock } from "../interfaces" import { scannerExtensions } from "../shared/supported-extensions" - -const MAX_BLOCK_CHARS = 1000 -const MIN_BLOCK_CHARS = 100 -const MIN_CHUNK_REMAINDER_CHARS = 200 // Minimum characters for the *next* chunk after a split -const MAX_CHARS_TOLERANCE_FACTOR = 1.15 // 15% tolerance for max chars +import { MAX_BLOCK_CHARS, MIN_BLOCK_CHARS, MIN_CHUNK_REMAINDER_CHARS, MAX_CHARS_TOLERANCE_FACTOR } from "../constants" /** * Implementation of the code parser interface diff --git a/src/services/code-index/processors/scanner.ts b/src/services/code-index/processors/scanner.ts index 49f4acb2f8..d96bea7f21 100644 --- a/src/services/code-index/processors/scanner.ts +++ b/src/services/code-index/processors/scanner.ts @@ -11,18 +11,18 @@ import { v5 as uuidv5 } from "uuid" import pLimit from "p-limit" import { Mutex } from "async-mutex" import { CacheManager } from "../cache-manager" +import { + QDRANT_CODE_BLOCK_NAMESPACE, + MAX_FILE_SIZE_BYTES, + MAX_LIST_FILES_LIMIT, + BATCH_SEGMENT_THRESHOLD, + MAX_BATCH_RETRIES, + INITIAL_RETRY_DELAY_MS, + PARSING_CONCURRENCY, + BATCH_PROCESSING_CONCURRENCY, +} from "../constants" export class DirectoryScanner implements IDirectoryScanner { - // Constants moved inside the class - private static readonly QDRANT_CODE_BLOCK_NAMESPACE = "f47ac10b-58cc-4372-a567-0e02b2c3d479" - private static readonly MAX_FILE_SIZE_BYTES = 1 * 1024 * 1024 // 1MB - private static readonly MAX_LIST_FILES_LIMIT = 3_000 - private static readonly BATCH_SEGMENT_THRESHOLD = 60 // Number of code segments to batch for embeddings/upserts - private static readonly MAX_BATCH_RETRIES = 3 - private static readonly INITIAL_RETRY_DELAY_MS = 500 - private static readonly PARSING_CONCURRENCY = 10 - private static readonly BATCH_PROCESSING_CONCURRENCY = 10 - constructor( private readonly embedder: IEmbedder, private readonly qdrantClient: IVectorStore, @@ -46,7 +46,7 @@ export class DirectoryScanner implements IDirectoryScanner { ): Promise<{ codeBlocks: CodeBlock[]; stats: { processed: number; skipped: number }; totalBlockCount: number }> { const directoryPath = directory // Get all files recursively (handles .gitignore automatically) - const [allPaths, _] = await listFiles(directoryPath, true, DirectoryScanner.MAX_LIST_FILES_LIMIT) + const [allPaths, _] = await listFiles(directoryPath, true, MAX_LIST_FILES_LIMIT) // Filter out directories (marked with trailing '/') const filePaths = allPaths.filter((p) => !p.endsWith("/")) @@ -72,8 +72,8 @@ export class DirectoryScanner implements IDirectoryScanner { let skippedCount = 0 // Initialize parallel processing tools - const parseLimiter = pLimit(DirectoryScanner.PARSING_CONCURRENCY) // Concurrency for file parsing - const batchLimiter = pLimit(DirectoryScanner.BATCH_PROCESSING_CONCURRENCY) // Concurrency for batch processing + const parseLimiter = pLimit(PARSING_CONCURRENCY) // Concurrency for file parsing + const batchLimiter = pLimit(BATCH_PROCESSING_CONCURRENCY) // Concurrency for batch processing const mutex = new Mutex() // Shared batch accumulators (protected by mutex) @@ -91,7 +91,7 @@ export class DirectoryScanner implements IDirectoryScanner { try { // Check file size const stats = await stat(filePath) - if (stats.size > DirectoryScanner.MAX_FILE_SIZE_BYTES) { + if (stats.size > MAX_FILE_SIZE_BYTES) { skippedCount++ // Skip large files return } @@ -144,7 +144,7 @@ export class DirectoryScanner implements IDirectoryScanner { // Check if batch threshold is met and not for Ollama if ( - currentBatchBlocks.length >= DirectoryScanner.BATCH_SEGMENT_THRESHOLD && + currentBatchBlocks.length >= BATCH_SEGMENT_THRESHOLD && this.embedder.embedderInfo.name !== "ollama" ) { // Copy current batch data and clear accumulators @@ -261,7 +261,7 @@ export class DirectoryScanner implements IDirectoryScanner { let success = false let lastError: Error | null = null - while (attempts < DirectoryScanner.MAX_BATCH_RETRIES && !success) { + while (attempts < MAX_BATCH_RETRIES && !success) { attempts++ try { // --- Deletion Step --- @@ -297,7 +297,7 @@ export class DirectoryScanner implements IDirectoryScanner { const normalizedAbsolutePath = generateNormalizedAbsolutePath(block.file_path) const stableName = `${normalizedAbsolutePath}:${block.start_line}` - const pointId = uuidv5(stableName, DirectoryScanner.QDRANT_CODE_BLOCK_NAMESPACE) + const pointId = uuidv5(stableName, QDRANT_CODE_BLOCK_NAMESPACE) return { id: pointId, @@ -325,8 +325,8 @@ export class DirectoryScanner implements IDirectoryScanner { lastError = error as Error console.error(`[DirectoryScanner] Error processing batch (attempt ${attempts}):`, error) - if (attempts < DirectoryScanner.MAX_BATCH_RETRIES) { - const delay = DirectoryScanner.INITIAL_RETRY_DELAY_MS * Math.pow(2, attempts - 1) + if (attempts < MAX_BATCH_RETRIES) { + const delay = INITIAL_RETRY_DELAY_MS * Math.pow(2, attempts - 1) console.log(`[DirectoryScanner] Retrying batch in ${delay}ms...`) await new Promise((resolve) => setTimeout(resolve, delay)) } @@ -334,15 +334,9 @@ export class DirectoryScanner implements IDirectoryScanner { } if (!success && lastError) { - console.error( - `[DirectoryScanner] Failed to process batch after ${DirectoryScanner.MAX_BATCH_RETRIES} attempts`, - ) + console.error(`[DirectoryScanner] Failed to process batch after ${MAX_BATCH_RETRIES} attempts`) if (onError) { - onError( - new Error( - `Failed to process batch after ${DirectoryScanner.MAX_BATCH_RETRIES} attempts: ${lastError.message}`, - ), - ) + onError(new Error(`Failed to process batch after ${MAX_BATCH_RETRIES} attempts: ${lastError.message}`)) } } } From 6b9429385b1ad4f2aa1fb57e6c567936d265a60f Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 16 May 2025 17:41:46 -0500 Subject: [PATCH 40/71] refactor(constants): rename CODEBASE_INDEX_SEARCH_MIN_SCORE to SEARCH_MIN_SCORE --- src/services/code-index/config-manager.ts | 4 ++-- src/services/code-index/constants/index.ts | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/services/code-index/config-manager.ts b/src/services/code-index/config-manager.ts index e162816113..9ddab9b114 100644 --- a/src/services/code-index/config-manager.ts +++ b/src/services/code-index/config-manager.ts @@ -3,7 +3,7 @@ import { ContextProxy } from "../../core/config/ContextProxy" import { EmbedderProvider } from "./interfaces/manager" import { getModelDimension, getDefaultModelId } from "../../shared/embeddingModels" import { CodeIndexConfig, PreviousConfigSnapshot } from "./interfaces/config" -import { CODEBASE_INDEX_SEARCH_MIN_SCORE } from "./constants" +import { SEARCH_MIN_SCORE } from "./constants" /** * Manages configuration state and validation for the code indexing feature. @@ -77,7 +77,7 @@ export class CodeIndexConfigManager { this.qdrantUrl = codebaseIndexQdrantUrl this.qdrantApiKey = qdrantApiKey ?? "" this.openAiOptions = { openAiNativeApiKey: openAiKey } - this.searchMinScore = CODEBASE_INDEX_SEARCH_MIN_SCORE + this.searchMinScore = SEARCH_MIN_SCORE this.embedderProvider = codebaseIndexEmbedderProvider === "ollama" ? "ollama" : "openai" this.modelId = codebaseIndexEmbedderModelId || undefined diff --git a/src/services/code-index/constants/index.ts b/src/services/code-index/constants/index.ts index f3b8dc2a52..cbf6941817 100644 --- a/src/services/code-index/constants/index.ts +++ b/src/services/code-index/constants/index.ts @@ -5,7 +5,8 @@ export const MIN_CHUNK_REMAINDER_CHARS = 200 // Minimum characters for the *next export const MAX_CHARS_TOLERANCE_FACTOR = 1.15 // 15% tolerance for max chars /**Search */ -export const CODEBASE_INDEX_SEARCH_MIN_SCORE = 0.4 +export const SEARCH_MIN_SCORE = 0.4 +export const MAX_SEARCH_RESULTS = 50 // Maximum number of search results to return /**File Watcher */ export const QDRANT_CODE_BLOCK_NAMESPACE = "f47ac10b-58cc-4372-a567-0e02b2c3d479" From 64dfc91610d192df6a446fb359716621cd4fb51b Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 16 May 2025 17:42:03 -0500 Subject: [PATCH 41/71] feat(QdrantVectorStore): enhance search functionality with new query structure and indexing --- .../code-index/vector-store/qdrant-client.ts | 39 ++++++++++++++++--- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/src/services/code-index/vector-store/qdrant-client.ts b/src/services/code-index/vector-store/qdrant-client.ts index 2ba6e3233c..667d2f2176 100644 --- a/src/services/code-index/vector-store/qdrant-client.ts +++ b/src/services/code-index/vector-store/qdrant-client.ts @@ -4,7 +4,7 @@ import * as path from "path" import { getWorkspacePath } from "../../../utils/path" import { IVectorStore } from "../interfaces/vector-store" import { Payload, VectorStoreSearchResult } from "../interfaces" -import { CODEBASE_INDEX_SEARCH_MIN_SCORE } from "../constants" +import { MAX_SEARCH_RESULTS, SEARCH_MIN_SCORE } from "../constants" /** * Qdrant implementation of the vector store interface @@ -58,6 +58,25 @@ export class QdrantVectorStore implements IVectorStore { }) created = true } + + // Create payload indexes for pathSegments up to depth 5 + for (let i = 0; i <= 4; i++) { + try { + await this.client.createPayloadIndex(this.collectionName, { + field_name: `pathSegments.${i}`, + field_schema: "keyword", + }) + console.log( + `[QdrantVectorStore] Ensured payload index for pathSegments.${i} on ${this.collectionName}`, + ) + } catch (indexError) { + console.warn( + `[QdrantVectorStore] Could not create payload index for pathSegments.${i} on ${this.collectionName}. It might already exist or there was an issue.`, + indexError, + ) + } + } + return created } catch (error) { console.error("Failed to initialize Qdrant collection:", error) @@ -143,19 +162,27 @@ export class QdrantVectorStore implements IVectorStore { } const searchRequest = { - vector: queryVector, + query: queryVector, filter, - score_threshold: CODEBASE_INDEX_SEARCH_MIN_SCORE, + score_threshold: SEARCH_MIN_SCORE, + limit: MAX_SEARCH_RESULTS, + params: { + hnsw_ef: 128, + exact: false, + }, + with_payload: { + include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"], + }, } if (minScore !== undefined) { searchRequest.score_threshold = minScore } - const result = await this.client.search(this.collectionName, searchRequest) - result.filter((r) => this.isPayloadValid(r.payload!)) + const operationResult = await this.client.query(this.collectionName, searchRequest) + const filteredPoints = operationResult.points.filter((p) => this.isPayloadValid(p.payload!)) - return result as VectorStoreSearchResult[] + return filteredPoints as VectorStoreSearchResult[] } catch (error) { console.error("Failed to search points:", error) throw error From cfdc8ebf71e6967189103d671a3b4cb9f8c5bd49 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 16 May 2025 23:00:52 -0500 Subject: [PATCH 42/71] feat(FileWatcher): implement batch processing and retry logic for upserting points --- .../code-index/processors/file-watcher.ts | 39 +++++++++++++++++-- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/src/services/code-index/processors/file-watcher.ts b/src/services/code-index/processors/file-watcher.ts index d774f07957..2d122bdbc7 100644 --- a/src/services/code-index/processors/file-watcher.ts +++ b/src/services/code-index/processors/file-watcher.ts @@ -1,5 +1,11 @@ import * as vscode from "vscode" -import { QDRANT_CODE_BLOCK_NAMESPACE, MAX_FILE_SIZE_BYTES } from "../constants" +import { + QDRANT_CODE_BLOCK_NAMESPACE, + MAX_FILE_SIZE_BYTES, + BATCH_SEGMENT_THRESHOLD, + MAX_BATCH_RETRIES, + INITIAL_RETRY_DELAY_MS, +} from "../constants" import { createHash } from "crypto" import { RooIgnoreController } from "../../../core/ignore/RooIgnoreController" import { v5 as uuidv5 } from "uuid" @@ -168,9 +174,36 @@ export class FileWatcher implements IFileWatcher { await this.vectorStore.deletePointsByMultipleFilePaths(pathsToDelete) } - // Batch upsert new points + // Batch upsert new points in chunks if (allPointsToUpsert.length > 0) { - await this.vectorStore.upsertPoints(allPointsToUpsert) + // Split points into batches + for (let i = 0; i < allPointsToUpsert.length; i += BATCH_SEGMENT_THRESHOLD) { + const batch = allPointsToUpsert.slice(i, i + BATCH_SEGMENT_THRESHOLD) + let retryCount = 0 + let lastError: Error | undefined + + // Retry logic for each batch + while (retryCount < MAX_BATCH_RETRIES) { + try { + await this.vectorStore.upsertPoints(batch) + break // Success, exit retry loop + } catch (error) { + lastError = error as Error + retryCount++ + + if (retryCount === MAX_BATCH_RETRIES) { + throw new Error( + `Failed to upsert batch after ${MAX_BATCH_RETRIES} retries: ${lastError.message}`, + ) + } + + // Exponential backoff + await new Promise((resolve) => + setTimeout(resolve, INITIAL_RETRY_DELAY_MS * Math.pow(2, retryCount - 1)), + ) + } + } + } } // Update cache and fire success events From 33209bdd50deaa80657392972b4e6ff6591d98dc Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 20 May 2025 12:26:57 -0500 Subject: [PATCH 43/71] fix(CodeIndexSettings): rename setProviderSettingsField to setApiConfigurationField and move model label --- .../src/components/settings/CodeIndexSettings.tsx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/webview-ui/src/components/settings/CodeIndexSettings.tsx b/webview-ui/src/components/settings/CodeIndexSettings.tsx index c066d50f7e..cb684aa647 100644 --- a/webview-ui/src/components/settings/CodeIndexSettings.tsx +++ b/webview-ui/src/components/settings/CodeIndexSettings.tsx @@ -28,7 +28,7 @@ interface CodeIndexSettingsProps { codebaseIndexConfig: CodebaseIndexConfig | undefined apiConfiguration: ProviderSettings setCachedStateField: SetCachedStateField - setProviderSettingsField: (field: K, value: ProviderSettings[K]) => void + setApiConfigurationField: (field: K, value: ProviderSettings[K]) => void } interface IndexingStatusUpdateMessage { @@ -46,7 +46,7 @@ export const CodeIndexSettings: React.FC = ({ codebaseIndexConfig, apiConfiguration, setCachedStateField, - setProviderSettingsField, + setApiConfigurationField, }) => { const [indexingStatus, setIndexingStatus] = useState({ systemStatus: "Standby", @@ -170,19 +170,19 @@ export const CodeIndexSettings: React.FC = ({
-
Model:
{codebaseIndexConfig?.codebaseIndexEmbedderProvider === "openai" && (
setProviderSettingsField("codeIndexOpenAiKey", e.target.value)} + onInput={(e: any) => setApiConfigurationField("codeIndexOpenAiKey", e.target.value)} style={{ width: "100%" }}> OpenAI Key:
)} +
Model
= ({ } }}> - + - OpenAI - Ollama + {t("settings:codeIndex.openaiProvider")} + {t("settings:codeIndex.ollamaProvider")}
@@ -194,12 +198,14 @@ export const CodeIndexSettings: React.FC = ({ value={apiConfiguration.codeIndexOpenAiKey || ""} onInput={(e: any) => setApiConfigurationField("codeIndexOpenAiKey", e.target.value)} style={{ width: "100%" }}> - OpenAI Key: + {t("settings:codeIndex.openaiKeyLabel")} )} -
Model
+
+ {t("settings:codeIndex.modelLabel")} +
{ - const newProvider = value as EmbedderProvider - const models = codebaseIndexModels?.[newProvider] - const modelIds = models ? Object.keys(models) : [] - const defaultModelId = modelIds.length > 0 ? modelIds[0] : "" // Use empty string if no models - - if (codebaseIndexConfig) { - setCachedStateField("codebaseIndexConfig", { - ...codebaseIndexConfig, - codebaseIndexEmbedderProvider: newProvider, - codebaseIndexEmbedderModelId: defaultModelId, - }) - } - }}> - - - - - {t("settings:codeIndex.openaiProvider")} - {t("settings:codeIndex.ollamaProvider")} - - -
+ + setCachedStateField("codebaseIndexConfig", { + ...codebaseIndexConfig, + codebaseIndexEnabled: e.target.checked, + }) + }> + ⚠️ {t("settings:codeIndex.enableLabel")} + +
+ {t("settings:codeIndex.enableDescription")} +
- {codebaseIndexConfig?.codebaseIndexEmbedderProvider === "openai" && ( -
- setApiConfigurationField("codeIndexOpenAiKey", e.target.value)} - style={{ width: "100%" }}> - {t("settings:codeIndex.openaiKeyLabel")} - -
- )} + {codebaseIndexConfig?.codebaseIndexEnabled && ( +
+
+ + {indexingStatus.systemStatus} + {indexingStatus.message ? ` - ${indexingStatus.message}` : ""} +
-
- {t("settings:codeIndex.modelLabel")} -
-
- + {indexingStatus.systemStatus === "Indexing" && ( +
+ + +
+ )} - {codebaseIndexConfig?.codebaseIndexEmbedderProvider === "ollama" && ( - <> -
- - setCachedStateField("codebaseIndexConfig", { - ...codebaseIndexConfig, - codebaseIndexEmbedderBaseUrl: e.target.value, - }) - } - style={{ width: "100%" }}> - {t("settings:codeIndex.ollamaUrlLabel")} - -
- - )} +
+ {t("settings:codeIndex.providerLabel")} +
+
+ +
+ {codebaseIndexConfig?.codebaseIndexEmbedderProvider === "openai" && (
setApiConfigurationField("codeIndexQdrantApiKey", e.target.value)} + value={apiConfiguration.codeIndexOpenAiKey || ""} + onInput={(e: any) => setApiConfigurationField("codeIndexOpenAiKey", e.target.value)} style={{ width: "100%" }}> - {t("settings:codeIndex.qdrantKeyLabel")} + {t("settings:codeIndex.openaiKeyLabel")}
+ )} -
- - {indexingStatus.systemStatus} - {indexingStatus.message ? ` - ${indexingStatus.message}` : ""} -
+
+ {t("settings:codeIndex.modelLabel")} +
+
+ +
- {indexingStatus.systemStatus === "Indexing" && ( -
- - - + {codebaseIndexConfig?.codebaseIndexEmbedderProvider === "ollama" && ( + <> +
+ + setCachedStateField("codebaseIndexConfig", { + ...codebaseIndexConfig, + codebaseIndexEmbedderBaseUrl: e.target.value, + }) + } + style={{ width: "100%" }}> + {t("settings:codeIndex.ollamaUrlLabel")} +
- )} + + )} -
- {(indexingStatus.systemStatus === "Error" || indexingStatus.systemStatus === "Standby") && ( - vscode.postMessage({ type: "startIndexing" })} - disabled={ - !areSettingsCommitted || - !validateIndexingConfig(codebaseIndexConfig, apiConfiguration) - }> - {t("settings:codeIndex.startIndexingButton")} - - )} - {(indexingStatus.systemStatus === "Indexed" || indexingStatus.systemStatus === "Error") && ( - - - - {t("settings:codeIndex.clearIndexDataButton")} - - - - - - {t("settings:codeIndex.clearDataDialog.title")} - - - {t("settings:codeIndex.clearDataDialog.description")} - - - - - {t("settings:codeIndex.clearDataDialog.cancelButton")} - - vscode.postMessage({ type: "clearIndexData" })}> - {t("settings:codeIndex.clearDataDialog.confirmButton")} - - - - - )} -
+
+ + setCachedStateField("codebaseIndexConfig", { + ...codebaseIndexConfig, + codebaseIndexQdrantUrl: e.target.value, + }) + } + style={{ width: "100%" }}> + {t("settings:codeIndex.qdrantUrlLabel")} + +
+ +
+ setApiConfigurationField("codeIndexQdrantApiKey", e.target.value)} + style={{ width: "100%" }}> + {t("settings:codeIndex.qdrantKeyLabel")} +
- )} - + +
+ {(indexingStatus.systemStatus === "Error" || indexingStatus.systemStatus === "Standby") && ( + vscode.postMessage({ type: "startIndexing" })} + disabled={ + !areSettingsCommitted || + !validateIndexingConfig(codebaseIndexConfig, apiConfiguration) + }> + {t("settings:codeIndex.startIndexingButton")} + + )} + {(indexingStatus.systemStatus === "Indexed" || indexingStatus.systemStatus === "Error") && ( + + + + {t("settings:codeIndex.clearIndexDataButton")} + + + + + + {t("settings:codeIndex.clearDataDialog.title")} + + + {t("settings:codeIndex.clearDataDialog.description")} + + + + + {t("settings:codeIndex.clearDataDialog.cancelButton")} + + vscode.postMessage({ type: "clearIndexData" })}> + {t("settings:codeIndex.clearDataDialog.confirmButton")} + + + + + )} +
+
+ )} ) } diff --git a/webview-ui/src/components/settings/ExperimentalSettings.tsx b/webview-ui/src/components/settings/ExperimentalSettings.tsx index 4094a10de6..1a0819f8d7 100644 --- a/webview-ui/src/components/settings/ExperimentalSettings.tsx +++ b/webview-ui/src/components/settings/ExperimentalSettings.tsx @@ -13,6 +13,10 @@ import { Section } from "./Section" import { ExperimentalFeature } from "./ExperimentalFeature" import { Button, Select, SelectContent, SelectItem, SelectTrigger, SelectValue, Slider } from "@/components/ui/" import { VSCodeTextArea } from "@vscode/webview-ui-toolkit/react" +import { CodebaseIndexConfig, CodebaseIndexModels, ProviderSettings } from "../../../../src/schemas" +import { CodeIndexSettings } from "./CodeIndexSettings" +import { ExtensionStateContextType } from '../../context/ExtensionStateContext' + const SUMMARY_PROMPT = `\ Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and your previous actions. @@ -58,12 +62,18 @@ type ExperimentalSettingsProps = HTMLAttributes & { experiments: Record setExperimentEnabled: SetExperimentEnabled autoCondenseContextPercent: number - setCachedStateField: SetCachedStateField<"autoCondenseContextPercent"> + setCachedStateField: SetCachedStateField<"autoCondenseContextPercent" | "codebaseIndexConfig"> condensingApiConfigId?: string setCondensingApiConfigId: (value: string) => void customCondensingPrompt?: string setCustomCondensingPrompt: (value: string) => void listApiConfigMeta: any[] + // CodeIndexSettings props + codebaseIndexModels: CodebaseIndexModels | undefined + codebaseIndexConfig: CodebaseIndexConfig | undefined + apiConfiguration: ProviderSettings + setApiConfigurationField: (field: K, value: ProviderSettings[K]) => void + areSettingsCommitted: boolean } export const ExperimentalSettings = ({ @@ -76,6 +86,11 @@ export const ExperimentalSettings = ({ customCondensingPrompt, setCustomCondensingPrompt, listApiConfigMeta, + codebaseIndexModels, + codebaseIndexConfig, + apiConfiguration, + setApiConfigurationField, + areSettingsCommitted, className, ...props }: ExperimentalSettingsProps) => { @@ -211,6 +226,15 @@ export const ExperimentalSettings = ({
)} + + } + setApiConfigurationField={setApiConfigurationField} + areSettingsCommitted={areSettingsCommitted} + />
) } diff --git a/webview-ui/src/components/settings/SettingsView.tsx b/webview-ui/src/components/settings/SettingsView.tsx index 52f050b6e3..005bd23696 100644 --- a/webview-ui/src/components/settings/SettingsView.tsx +++ b/webview-ui/src/components/settings/SettingsView.tsx @@ -30,7 +30,6 @@ import { TelemetrySetting } from "@roo/shared/TelemetrySetting" import { ProviderSettings } from "@roo/shared/api" import { vscode } from "@/utils/vscode" -import { CodeIndexSettings } from "./CodeIndexSettings" import { ExtensionStateContextType, useExtensionState } from "@/context/ExtensionStateContext" import { AlertDialog, @@ -85,7 +84,6 @@ const sectionNames = [ "contextManagement", "terminal", "experimental", - "codeIndex", "language", "about", ] as const @@ -372,7 +370,6 @@ const SettingsView = forwardRef(({ onDone, t { id: "contextManagement", icon: Database }, { id: "terminal", icon: SquareTerminal }, { id: "experimental", icon: FlaskConical }, - { id: "codeIndex", icon: Database }, { id: "language", icon: Globe }, { id: "about", icon: Info }, ], @@ -644,23 +641,16 @@ const SettingsView = forwardRef(({ onDone, t setExperimentEnabled={setExperimentEnabled} experiments={experiments} autoCondenseContextPercent={autoCondenseContextPercent} - setCachedStateField={setCachedStateField} condensingApiConfigId={condensingApiConfigId} setCondensingApiConfigId={(value) => setCachedStateField("condensingApiConfigId", value)} customCondensingPrompt={customCondensingPrompt} setCustomCondensingPrompt={(value) => setCachedStateField("customCondensingPrompt", value)} listApiConfigMeta={listApiConfigMeta ?? []} - /> - )} - - {/* CodeIndex Section */} - {activeTab === "codeIndex" && ( - )} diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json index e53e87aee3..9340531c37 100644 --- a/webview-ui/src/i18n/locales/en/settings.json +++ b/webview-ui/src/i18n/locales/en/settings.json @@ -35,6 +35,7 @@ "codeIndex": { "title": "Codebase Indexing", "enableLabel": "Enable Codebase Indexing", + "enableDescription": "Enabling this feature will index your codebase. This might consume system resources and take some time depending on the size of your project.", "providerLabel": "Embeddings Provider", "selectProviderPlaceholder": "Select provider", "openaiProvider": "OpenAI", From c667d5aeaa11868f2e046ed49d69cdd12c047929 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 23 May 2025 11:44:14 -0500 Subject: [PATCH 62/71] refactor: remove console logs from various components for cleaner output --- src/services/code-index/cache-manager.ts | 2 -- src/services/code-index/config-manager.ts | 5 ---- src/services/code-index/manager.ts | 16 ------------ src/services/code-index/orchestrator.ts | 22 ++-------------- .../processors/__tests__/file-watcher.test.ts | 10 ------- .../code-index/processors/file-watcher.ts | 26 ------------------- src/services/code-index/processors/scanner.ts | 6 ----- src/services/code-index/state-manager.ts | 11 -------- .../code-index/vector-store/qdrant-client.ts | 6 ----- 9 files changed, 2 insertions(+), 102 deletions(-) diff --git a/src/services/code-index/cache-manager.ts b/src/services/code-index/cache-manager.ts index cf7813d53a..f66f933a0b 100644 --- a/src/services/code-index/cache-manager.ts +++ b/src/services/code-index/cache-manager.ts @@ -37,7 +37,6 @@ export class CacheManager implements ICacheManager { const cacheData = await vscode.workspace.fs.readFile(this.cachePath) this.fileHashes = JSON.parse(cacheData.toString()) } catch (error) { - console.log("No cache file found or error reading cache, starting fresh") this.fileHashes = {} } } @@ -60,7 +59,6 @@ export class CacheManager implements ICacheManager { try { await vscode.workspace.fs.writeFile(this.cachePath, Buffer.from("{}")) this.fileHashes = {} - console.log("Cache file cleared successfully") } catch (error) { console.error("Failed to clear cache file:", error, this.cachePath) } diff --git a/src/services/code-index/config-manager.ts b/src/services/code-index/config-manager.ts index ee427c2368..e866c4c7a1 100644 --- a/src/services/code-index/config-manager.ts +++ b/src/services/code-index/config-manager.ts @@ -40,8 +40,6 @@ export class CodeIndexConfigManager { requiresRestart: boolean requiresClear: boolean }> { - console.log("[CodeIndexConfigManager] Loading configuration...") - const previousConfigSnapshot: PreviousConfigSnapshot = { enabled: this.isEnabled, configured: this.isConfigured(), @@ -95,9 +93,6 @@ export class CodeIndexConfigManager { const currentDimension = currentModelId ? getModelDimension(this.embedderProvider, currentModelId) : undefined const requiresClear = previousDimension !== undefined && currentDimension !== undefined && previousDimension !== currentDimension - console.log( - `[CodeIndexConfigManager] Dimension check: Previous=${previousDimension}, Current=${currentDimension}, Changed=${requiresClear}`, - ) return { configSnapshot: previousConfigSnapshot, diff --git a/src/services/code-index/manager.ts b/src/services/code-index/manager.ts index 30864f11e6..6af05f1e18 100644 --- a/src/services/code-index/manager.ts +++ b/src/services/code-index/manager.ts @@ -104,7 +104,6 @@ export class CodeIndexManager { // 2. Check if feature is enabled if (!this.isFeatureEnabled) { - console.log("[CodeIndexManager] Feature disabled - skipping service initialization") if (this._orchestrator) { this._orchestrator.stopWatcher() } @@ -119,17 +118,11 @@ export class CodeIndexManager { // 4. Determine if Core Services Need Recreation const needsServiceRecreation = !this._serviceFactory || requiresRestart - console.log( - `[CodeIndexManager] ${needsServiceRecreation ? "Initial setup or restart required" : "Configuration loaded, no full re-initialization needed"}`, - ) if (needsServiceRecreation) { - console.log("[CodeIndexManager] (Re)initializing core services...") - // Stop watcher if it exists if (this._orchestrator) { this.stopWatcher() - console.log("[CodeIndexManager] Stopped existing watcher") } // (Re)Initialize service factory @@ -175,13 +168,10 @@ export class CodeIndexManager { embedder, vectorStore, ) - - console.log("[CodeIndexManager] Core services (re)initialized") } // 5. Handle Data Clearing if (requiresClear) { - console.log("[CodeIndexManager] Configuration requires clearing data") if (this._orchestrator) { await this._orchestrator.clearIndexData() } @@ -197,7 +187,6 @@ export class CodeIndexManager { (needsServiceRecreation && (!this._orchestrator || this._orchestrator.state !== "Indexing")) if (shouldStartOrRestartIndexing) { - console.log("[CodeIndexManager] Starting/restarting indexing due to configuration changes") this._orchestrator?.startIndexing() // This method is async, but we don't await it here } @@ -210,7 +199,6 @@ export class CodeIndexManager { public async startIndexing(): Promise { if (!this.isFeatureEnabled) { - console.log("[CodeIndexManager] Feature disabled - skipping startIndexing") return } this.assertInitialized() @@ -222,7 +210,6 @@ export class CodeIndexManager { */ public stopWatcher(): void { if (!this.isFeatureEnabled) { - console.log("[CodeIndexManager] Feature disabled - skipping stopWatcher") return } if (this._orchestrator) { @@ -238,7 +225,6 @@ export class CodeIndexManager { this.stopWatcher() } this._stateManager.dispose() - console.log(`[CodeIndexManager] Disposed for workspace: ${this.workspacePath}`) } /** @@ -247,7 +233,6 @@ export class CodeIndexManager { */ public async clearIndexData(): Promise { if (!this.isFeatureEnabled) { - console.log("[CodeIndexManager] Feature disabled - skipping clearIndexData") return } this.assertInitialized() @@ -263,7 +248,6 @@ export class CodeIndexManager { public async searchIndex(query: string, directoryPrefix?: string): Promise { if (!this.isFeatureEnabled) { - console.log("[CodeIndexManager] Feature disabled - returning empty search results") return [] } this.assertInitialized() diff --git a/src/services/code-index/orchestrator.ts b/src/services/code-index/orchestrator.ts index e63debe3b5..5784f0dcfc 100644 --- a/src/services/code-index/orchestrator.ts +++ b/src/services/code-index/orchestrator.ts @@ -37,9 +37,7 @@ export class CodeIndexOrchestrator { await this.fileWatcher.initialize() this._fileWatcherSubscriptions = [ - this.fileWatcher.onDidStartBatchProcessing((filePaths: string[]) => { - console.log(`[CodeIndexOrchestrator] Batch processing started for ${filePaths.length} files`) - }), + this.fileWatcher.onDidStartBatchProcessing((filePaths: string[]) => {}), this.fileWatcher.onBatchProgressUpdate(({ processedInBatch, totalInBatch, currentFile }) => { if (totalInBatch > 0 && this.stateManager.state !== "Indexing") { this.stateManager.setSystemState("Indexing", "Processing file changes...") @@ -72,14 +70,9 @@ export class CodeIndexOrchestrator { const errorCount = summary.processedFiles.filter( (f: { status: string }) => f.status === "error" || f.status === "local_error", ).length - console.log( - `[CodeIndexOrchestrator] Batch completed: ${successCount} succeeded, ${errorCount} failed`, - ) } }), ] - - console.log("[CodeIndexOrchestrator] File watcher started.") } catch (error) { console.error("[CodeIndexOrchestrator] Failed to start file watcher:", error) throw error @@ -120,7 +113,6 @@ export class CodeIndexOrchestrator { if (collectionCreated) { await this.cacheManager.clearCacheFile() - console.log("[CodeIndexOrchestrator] Qdrant collection created; cache cleared.") } this.stateManager.setSystemState("Indexing", "Services ready. Starting workspace scan...") @@ -156,13 +148,9 @@ export class CodeIndexOrchestrator { const { stats } = result - console.log( - `[CodeIndexOrchestrator] Initial scan complete. Processed Files: ${stats.processed}, Skipped Files: ${stats.skipped}, Blocks Found: ${result.totalBlockCount}, Blocks Indexed: ${cumulativeBlocksIndexed}`, - ) - await this._startWatcher() - this.stateManager.setSystemState("Indexed", "Workspace scan and watcher started.") + this.stateManager.setSystemState("Indexed", "File watcher started.") } catch (error: any) { console.error("[CodeIndexOrchestrator] Error during indexing:", error) try { @@ -172,7 +160,6 @@ export class CodeIndexOrchestrator { } await this.cacheManager.clearCacheFile() - console.log("[CodeIndexOrchestrator] Cleared cache due to scan error.") this.stateManager.setSystemState("Error", `Failed during initial scan: ${error.message || "Unknown error"}`) this.stopWatcher() @@ -188,7 +175,6 @@ export class CodeIndexOrchestrator { this.fileWatcher.dispose() this._fileWatcherSubscriptions.forEach((sub) => sub.dispose()) this._fileWatcherSubscriptions = [] - console.log("[CodeIndexOrchestrator] File watcher stopped.") if (this.stateManager.state !== "Error") { this.stateManager.setSystemState("Standby", "File watcher stopped.") @@ -201,7 +187,6 @@ export class CodeIndexOrchestrator { * and resetting the cache file. */ public async clearIndexData(): Promise { - console.log("[CodeIndexOrchestrator] Clearing code index data...") this._isProcessing = true try { @@ -210,7 +195,6 @@ export class CodeIndexOrchestrator { try { if (this.configManager.isFeatureConfigured) { await this.vectorStore.deleteCollection() - console.log("[CodeIndexOrchestrator] Vector collection deleted.") } else { console.warn("[CodeIndexOrchestrator] Service not configured, skipping vector collection clear.") } @@ -220,11 +204,9 @@ export class CodeIndexOrchestrator { } await this.cacheManager.clearCacheFile() - console.log("[CodeIndexOrchestrator] Cache cleared.") if (this.stateManager.state !== "Error") { this.stateManager.setSystemState("Standby", "Index data cleared successfully.") - console.log("[CodeIndexOrchestrator] Code index data cleared successfully.") } } finally { this._isProcessing = false diff --git a/src/services/code-index/processors/__tests__/file-watcher.test.ts b/src/services/code-index/processors/__tests__/file-watcher.test.ts index 85418f4698..4fea7956f8 100644 --- a/src/services/code-index/processors/__tests__/file-watcher.test.ts +++ b/src/services/code-index/processors/__tests__/file-watcher.test.ts @@ -9,21 +9,11 @@ import { createHash } from "crypto" async function waitForFileProcessingToFinish(fileWatcher: FileWatcher, filePath: string): Promise { return new Promise((resolve) => { const listener = fileWatcher.onDidFinishBatchProcessing((summary) => { - console.log( - `[DEBUG TestHelper] waitForFileProcessingToFinish received onDidFinishBatchProcessing with summary for paths:`, - summary.processedFiles.map((f) => f.path), - ) const matchingFile = summary.processedFiles.find((result) => result.path === filePath) if (matchingFile) { - console.log( - `[DEBUG TestHelper] waitForFileProcessingToFinish found matching path "${filePath}" and will resolve.`, - ) listener.dispose() resolve() } else { - console.log( - `[DEBUG TestHelper] waitForFileProcessingToFinish: path "${filePath}" not found in this batch.`, - ) } }) }) diff --git a/src/services/code-index/processors/file-watcher.ts b/src/services/code-index/processors/file-watcher.ts index e76e342271..61fad4e62b 100644 --- a/src/services/code-index/processors/file-watcher.ts +++ b/src/services/code-index/processors/file-watcher.ts @@ -117,11 +117,6 @@ export class FileWatcher implements IFileWatcher { * @param uri URI of the created file */ private async handleFileCreated(uri: vscode.Uri): Promise { - console.log(`[FileWatcher] File CREATED: ${uri.fsPath}`) - this.accumulatedEvents.set(uri.fsPath, { uri, type: "create" }) - console.log( - `[FileWatcher] Accumulated event: create for ${uri.fsPath}. Total accumulated: ${this.accumulatedEvents.size}`, - ) this.scheduleBatchProcessing() } @@ -130,11 +125,6 @@ export class FileWatcher implements IFileWatcher { * @param uri URI of the changed file */ private async handleFileChanged(uri: vscode.Uri): Promise { - console.log(`[FileWatcher] File CHANGED: ${uri.fsPath}`) - this.accumulatedEvents.set(uri.fsPath, { uri, type: "change" }) - console.log( - `[FileWatcher] Accumulated event: change for ${uri.fsPath}. Total accumulated: ${this.accumulatedEvents.size}`, - ) this.scheduleBatchProcessing() } @@ -143,11 +133,6 @@ export class FileWatcher implements IFileWatcher { * @param uri URI of the deleted file */ private async handleFileDeleted(uri: vscode.Uri): Promise { - console.log(`[FileWatcher] File DELETED: ${uri.fsPath}`) - this.accumulatedEvents.set(uri.fsPath, { uri, type: "delete" }) - console.log( - `[FileWatcher] Accumulated event: delete for ${uri.fsPath}. Total accumulated: ${this.accumulatedEvents.size}`, - ) this.scheduleBatchProcessing() } @@ -166,7 +151,6 @@ export class FileWatcher implements IFileWatcher { */ private async triggerBatchProcessing(): Promise { if (this.accumulatedEvents.size === 0) { - console.log("[FileWatcher] No accumulated events to process") return } @@ -175,7 +159,6 @@ export class FileWatcher implements IFileWatcher { const filePathsInBatch = Array.from(eventsToProcess.keys()) this._onDidStartBatchProcessing.fire(filePathsInBatch) - console.log(`[FileWatcher] Triggered batch processing for ${filePathsInBatch.length} files`) await this.processBatch(eventsToProcess) } @@ -442,10 +425,6 @@ export class FileWatcher implements IFileWatcher { ) // Finalize - console.log("[DEBUG FileWatcher] Firing _onDidFinishBatchProcessing with summary:", { - processedFiles: batchResults.map((r) => r.path), - batchError: !!overallBatchError, - }) this._onDidFinishBatchProcessing.fire({ processedFiles: batchResults, batchError: overallBatchError, @@ -470,8 +449,6 @@ export class FileWatcher implements IFileWatcher { * @returns Promise resolving to processing result */ async processFile(filePath: string): Promise { - console.log(`[FileWatcher] Processing file: ${filePath}`) - try { // Check if file should be ignored const relativeFilePath = generateRelativeFilePath(filePath) @@ -479,7 +456,6 @@ export class FileWatcher implements IFileWatcher { !this.ignoreController.validateAccess(filePath) || (this.ignoreInstance && this.ignoreInstance.ignores(relativeFilePath)) ) { - console.log(`[FileWatcher] processFile: SKIPPED (ignored by .rooignore or .gitignore) - ${filePath}`) return { path: filePath, status: "skipped" as const, @@ -490,7 +466,6 @@ export class FileWatcher implements IFileWatcher { // Check file size const fileStat = await vscode.workspace.fs.stat(vscode.Uri.file(filePath)) if (fileStat.size > MAX_FILE_SIZE_BYTES) { - console.log(`[FileWatcher] processFile: SKIPPED (too large) - ${filePath}`) return { path: filePath, status: "skipped" as const, @@ -507,7 +482,6 @@ export class FileWatcher implements IFileWatcher { // Check if file has changed if (this.cacheManager.getHash(filePath) === newHash) { - console.log(`[FileWatcher] processFile: SKIPPED (not changed) - ${filePath}`) return { path: filePath, status: "skipped" as const, diff --git a/src/services/code-index/processors/scanner.ts b/src/services/code-index/processors/scanner.ts index 7bd79e7142..1233b0e0d5 100644 --- a/src/services/code-index/processors/scanner.ts +++ b/src/services/code-index/processors/scanner.ts @@ -223,7 +223,6 @@ export class DirectoryScanner implements IDirectoryScanner { // File was deleted or is no longer supported/indexed if (this.qdrantClient) { try { - console.log(`[DirectoryScanner] Deleting points for deleted file: ${cachedFilePath}`) await this.qdrantClient.deletePointsByFilePath(cachedFilePath) await this.cacheManager.deleteHash(cachedFilePath) } catch (error) { @@ -275,9 +274,6 @@ export class DirectoryScanner implements IDirectoryScanner { .map((info) => info.filePath), ), ] - console.log( - `[DirectoryScanner] Deleting existing points for ${uniqueFilePaths.length} file(s) in batch...`, - ) if (uniqueFilePaths.length > 0) { try { await this.qdrantClient.deletePointsByMultipleFilePaths(uniqueFilePaths) @@ -323,14 +319,12 @@ export class DirectoryScanner implements IDirectoryScanner { await this.cacheManager.updateHash(fileInfo.filePath, fileInfo.fileHash) } success = true - console.log(`[DirectoryScanner] Successfully processed batch of ${batchBlocks.length} blocks.`) } catch (error) { lastError = error as Error console.error(`[DirectoryScanner] Error processing batch (attempt ${attempts}):`, error) if (attempts < MAX_BATCH_RETRIES) { const delay = INITIAL_RETRY_DELAY_MS * Math.pow(2, attempts - 1) - console.log(`[DirectoryScanner] Retrying batch in ${delay}ms...`) await new Promise((resolve) => setTimeout(resolve, delay)) } } diff --git a/src/services/code-index/state-manager.ts b/src/services/code-index/state-manager.ts index 66cc2bcda3..90257fdfb1 100644 --- a/src/services/code-index/state-manager.ts +++ b/src/services/code-index/state-manager.ts @@ -52,11 +52,6 @@ export class CodeIndexStateManager { } this._progressEmitter.fire(this.getCurrentStatus()) - console.log( - `[CodeIndexStateManager] System state changed to: ${this._systemStatus}${ - message ? ` (${message})` : "" - }`, - ) } } @@ -79,9 +74,6 @@ export class CodeIndexStateManager { // Only fire update if status, message or progress actually changed if (oldStatus !== this._systemStatus || oldMessage !== this._statusMessage || progressChanged) { this._progressEmitter.fire(this.getCurrentStatus()) - console.log( - `[CodeIndexStateManager] Block Progress: ${message} (${this._processedItems}/${this._totalItems})`, - ) } } } @@ -113,9 +105,6 @@ export class CodeIndexStateManager { if (oldStatus !== this._systemStatus || oldMessage !== this._statusMessage || progressChanged) { this._progressEmitter.fire(this.getCurrentStatus()) - console.log( - `[CodeIndexStateManager] File Queue Progress: ${message} (${this._processedItems}/${this._totalItems})`, - ) } } } diff --git a/src/services/code-index/vector-store/qdrant-client.ts b/src/services/code-index/vector-store/qdrant-client.ts index 667d2f2176..38d40d943e 100644 --- a/src/services/code-index/vector-store/qdrant-client.ts +++ b/src/services/code-index/vector-store/qdrant-client.ts @@ -66,9 +66,6 @@ export class QdrantVectorStore implements IVectorStore { field_name: `pathSegments.${i}`, field_schema: "keyword", }) - console.log( - `[QdrantVectorStore] Ensured payload index for pathSegments.${i} on ${this.collectionName}`, - ) } catch (indexError) { console.warn( `[QdrantVectorStore] Could not create payload index for pathSegments.${i} on ${this.collectionName}. It might already exist or there was an issue.`, @@ -236,9 +233,6 @@ export class QdrantVectorStore implements IVectorStore { // Check if collection exists before attempting deletion to avoid errors if (await this.collectionExists()) { await this.client.deleteCollection(this.collectionName) - console.log(`[QdrantVectorStore] Collection ${this.collectionName} deleted.`) - } else { - console.log(`[QdrantVectorStore] Collection ${this.collectionName} does not exist, skipping deletion.`) } } catch (error) { console.error(`[QdrantVectorStore] Failed to delete collection ${this.collectionName}:`, error) From 8a6bad52d2286bb8402bd0fa15d7c2af20d734a9 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 23 May 2025 12:13:16 -0500 Subject: [PATCH 63/71] feat: enhance capabilities section and codebase search tool description --- src/core/prompts/sections/capabilities.ts | 12 +++++++++++- src/core/prompts/system.ts | 2 +- src/core/prompts/tools/codebase-search.ts | 6 +++--- src/core/prompts/tools/index.ts | 5 ++++- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/core/prompts/sections/capabilities.ts b/src/core/prompts/sections/capabilities.ts index 0be797db4e..ef5da02b29 100644 --- a/src/core/prompts/sections/capabilities.ts +++ b/src/core/prompts/sections/capabilities.ts @@ -1,11 +1,13 @@ import { DiffStrategy } from "../../../shared/tools" import { McpHub } from "../../../services/mcp/McpHub" +import { CodeIndexManager } from "../../../services/code-index/manager" export function getCapabilitiesSection( cwd: string, supportsComputerUse: boolean, mcpHub?: McpHub, diffStrategy?: DiffStrategy, + codeIndexManager?: CodeIndexManager, ): string { return `==== @@ -15,7 +17,15 @@ CAPABILITIES supportsComputerUse ? ", use the browser" : "" }, read and write files, and ask follow-up questions. These tools help you effectively accomplish a wide range of tasks, such as writing code, making edits or improvements to existing files, understanding the current state of a project, performing system operations, and much more. - When the user initially gives you a task, a recursive list of all filepaths in the current workspace directory ('${cwd}') will be included in environment_details. This provides an overview of the project's file structure, offering key insights into the project from directory/file names (how developers conceptualize and organize their code) and file extensions (the language used). This can also guide decision-making on which files to explore further. If you need to further explore directories such as outside the current workspace directory, you can use the list_files tool. If you pass 'true' for the recursive parameter, it will list files recursively. Otherwise, it will list files at the top level, which is better suited for generic directories where you don't necessarily need the nested structure, like the Desktop. -- You can use search_files to perform regex searches across files in a specified directory, outputting context-rich results that include surrounding lines. This is particularly useful for understanding code patterns, finding specific implementations, or identifying areas that need refactoring. +- You can use search_files to perform regex searches across files in a specified directory, outputting context-rich results that include surrounding lines. This is particularly useful for understanding code patterns, finding specific implementations, or identifying areas that need refactoring.${ + codeIndexManager && + codeIndexManager.isFeatureEnabled && + codeIndexManager.isFeatureConfigured && + codeIndexManager.isInitialized + ? ` +- You can use the \`codebase_search\` tool to perform semantic searches across your entire codebase. This tool is powerful for finding functionally relevant code, even if you don't know the exact keywords or file names. It's particularly useful for understanding how features are implemented across multiple files, discovering usages of a particular API, or finding code examples related to a concept. This capability relies on a pre-built index of your code.` + : "" + } - You can use the list_code_definition_names tool to get an overview of source code definitions for all files at the top level of a specified directory. This can be particularly useful when you need to understand the broader context and relationships between certain parts of the code. You may need to call this tool multiple times to understand various parts of the codebase related to the task. - For example, when asked to make edits or improvements you might analyze the file structure in the initial environment_details to get an overview of the project, then use list_code_definition_names to get further insight using source code definitions for files located in relevant directories, then read_file to examine the contents of relevant files, analyze the code and suggest improvements or make necessary edits, then use ${diffStrategy ? "the apply_diff or write_to_file" : "the write_to_file"} tool to apply the changes. If you refactored code that could affect other parts of the codebase, you could use search_files to ensure you update other files as needed. - You can use the execute_command tool to run commands on the user's computer whenever you feel it can help accomplish the user's task. When you need to execute a CLI command, you must provide a clear explanation of what the command does. Prefer to execute complex CLI commands over creating executable scripts, since they are more flexible and easier to run. Interactive and long-running commands are allowed, since the commands are run in the user's VSCode terminal. The user may keep commands running in the background and you will be kept updated on their status along the way. Each command you execute is run in a new terminal instance.${ diff --git a/src/core/prompts/system.ts b/src/core/prompts/system.ts index 3bb6c58df4..96221ae91f 100644 --- a/src/core/prompts/system.ts +++ b/src/core/prompts/system.ts @@ -88,7 +88,7 @@ ${getToolUseGuidelinesSection()} ${mcpServersSection} -${getCapabilitiesSection(cwd, supportsComputerUse, mcpHub, effectiveDiffStrategy)} +${getCapabilitiesSection(cwd, supportsComputerUse, mcpHub, effectiveDiffStrategy, codeIndexManager)} ${modesSection} diff --git a/src/core/prompts/tools/codebase-search.ts b/src/core/prompts/tools/codebase-search.ts index 182d8823c2..81eaacae85 100644 --- a/src/core/prompts/tools/codebase-search.ts +++ b/src/core/prompts/tools/codebase-search.ts @@ -1,9 +1,9 @@ export function getCodebaseSearchDescription(): string { return `## codebase_search -Description: Search the codebase for relevant files based on a query. Use this when the user asks a question about the codebase that requires finding specific files or code snippets. You can optionally specify a path to a directory to search in, the results will be filtered to only include files within that directory, this is useful for searching for files related to a specific project or module. +Description: Find files most relevant to the search query.\nThis is a semantic search tool, so the query should ask for something semantically matching what is needed.\nIf it makes sense to only search in a particular directory, please specify it in the path parameter.\nUnless there is a clear reason to use your own search query, please just reuse the user's exact query with their wording.\nTheir exact wording/phrasing can often be helpful for the semantic search query. Keeping the same exact question format can also be helpful. Parameters: -- query: (required) The natural language query to search for. -- path: (optional) The path to the directory to search in relative to the current working directory. Defaults to the current working directory. +- query: (required) The search query to find relevant code. You should reuse the user's exact query/most recent message with their wording unless there is a clear reason not to. +- path: (optional) The path to the directory to search in relative to the current working directory. This parameter should only be a directory path, file paths are not supported. Defaults to the current working directory. Usage: Your natural language query here diff --git a/src/core/prompts/tools/index.ts b/src/core/prompts/tools/index.ts index dbfc471f4a..4b3f796919 100644 --- a/src/core/prompts/tools/index.ts +++ b/src/core/prompts/tools/index.ts @@ -94,7 +94,10 @@ export function getToolDescriptionsForMode( ALWAYS_AVAILABLE_TOOLS.forEach((tool) => tools.add(tool)) // Conditionally exclude codebase_search if feature is disabled or not configured - if (!codeIndexManager || !(codeIndexManager.isFeatureEnabled && codeIndexManager.isFeatureConfigured)) { + if ( + !codeIndexManager || + !(codeIndexManager.isFeatureEnabled && codeIndexManager.isFeatureConfigured && codeIndexManager.isInitialized) + ) { tools.delete("codebase_search") } From 9869a09008fcf8f195dbe296a917b511a476eb9f Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 23 May 2025 13:21:42 -0500 Subject: [PATCH 64/71] feat: add code indexing localization for multiple languages --- webview-ui/src/i18n/locales/ca/settings.json | 23 +++++++++++++++++++ webview-ui/src/i18n/locales/de/settings.json | 23 +++++++++++++++++++ webview-ui/src/i18n/locales/es/settings.json | 23 +++++++++++++++++++ webview-ui/src/i18n/locales/fr/settings.json | 23 +++++++++++++++++++ webview-ui/src/i18n/locales/hi/settings.json | 23 +++++++++++++++++++ webview-ui/src/i18n/locales/it/settings.json | 23 +++++++++++++++++++ webview-ui/src/i18n/locales/ja/settings.json | 23 +++++++++++++++++++ webview-ui/src/i18n/locales/ko/settings.json | 23 +++++++++++++++++++ webview-ui/src/i18n/locales/nl/settings.json | 23 +++++++++++++++++++ webview-ui/src/i18n/locales/pl/settings.json | 23 +++++++++++++++++++ .../src/i18n/locales/pt-BR/settings.json | 23 +++++++++++++++++++ webview-ui/src/i18n/locales/ru/settings.json | 23 +++++++++++++++++++ webview-ui/src/i18n/locales/tr/settings.json | 23 +++++++++++++++++++ webview-ui/src/i18n/locales/vi/settings.json | 23 +++++++++++++++++++ .../src/i18n/locales/zh-CN/settings.json | 23 +++++++++++++++++++ .../src/i18n/locales/zh-TW/settings.json | 23 +++++++++++++++++++ 16 files changed, 368 insertions(+) diff --git a/webview-ui/src/i18n/locales/ca/settings.json b/webview-ui/src/i18n/locales/ca/settings.json index e252e1ba9c..e7ee54172c 100644 --- a/webview-ui/src/i18n/locales/ca/settings.json +++ b/webview-ui/src/i18n/locales/ca/settings.json @@ -32,6 +32,29 @@ "language": "Idioma", "about": "Sobre Roo Code" }, + "codeIndex": { + "title": "Indexació de codi", + "enableLabel": "Habilitar indexació de codi", + "enableDescription": "Habilitar aquesta característica indexarà la vostra base de codi. Això pot consumir recursos del sistema i trigar un temps depenent de la mida del vostre projecte.", + "providerLabel": "Proveïdor d'embeddings", + "selectProviderPlaceholder": "Seleccionar proveïdor", + "openaiProvider": "OpenAI", + "ollamaProvider": "Ollama", + "openaiKeyLabel": "Clau OpenAI:", + "modelLabel": "Model", + "selectModelPlaceholder": "Seleccionar model", + "ollamaUrlLabel": "URL d'Ollama:", + "qdrantUrlLabel": "URL de Qdrant", + "qdrantKeyLabel": "Clau de Qdrant:", + "startIndexingButton": "Iniciar indexació", + "clearIndexDataButton": "Esborrar dades d'índex", + "clearDataDialog": { + "title": "Esteu segur?", + "description": "Aquesta acció no es pot desfer. Eliminarà permanentment les dades d'índex de la vostra base de codi.", + "cancelButton": "Cancel·lar", + "confirmButton": "Esborrar dades" + } + }, "autoApprove": { "description": "Permet que Roo realitzi operacions automàticament sense requerir aprovació. Activeu aquesta configuració només si confieu plenament en la IA i enteneu els riscos de seguretat associats.", "readOnly": { diff --git a/webview-ui/src/i18n/locales/de/settings.json b/webview-ui/src/i18n/locales/de/settings.json index 57307ab6ef..aa319793bb 100644 --- a/webview-ui/src/i18n/locales/de/settings.json +++ b/webview-ui/src/i18n/locales/de/settings.json @@ -32,6 +32,29 @@ "language": "Sprache", "about": "Über Roo Code" }, + "codeIndex": { + "title": "Codebase-Indexierung", + "enableLabel": "Codebase-Indexierung aktivieren", + "enableDescription": "Die Aktivierung dieser Funktion wird Ihre Codebase indexieren. Dies kann Systemressourcen verbrauchen und je nach Größe Ihres Projekts einige Zeit in Anspruch nehmen.", + "providerLabel": "Embeddings-Anbieter", + "selectProviderPlaceholder": "Anbieter auswählen", + "openaiProvider": "OpenAI", + "ollamaProvider": "Ollama", + "openaiKeyLabel": "OpenAI-Schlüssel:", + "modelLabel": "Modell", + "selectModelPlaceholder": "Modell auswählen", + "ollamaUrlLabel": "Ollama-URL:", + "qdrantUrlLabel": "Qdrant-URL", + "qdrantKeyLabel": "Qdrant-Schlüssel:", + "startIndexingButton": "Indexierung starten", + "clearIndexDataButton": "Indexdaten löschen", + "clearDataDialog": { + "title": "Sind Sie sicher?", + "description": "Diese Aktion kann nicht rückgängig gemacht werden. Dies wird Ihre Codebase-Indexdaten dauerhaft löschen.", + "cancelButton": "Abbrechen", + "confirmButton": "Daten löschen" + } + }, "autoApprove": { "description": "Erlaubt Roo, Operationen automatisch ohne Genehmigung durchzuführen. Aktiviere diese Einstellungen nur, wenn du der KI vollständig vertraust und die damit verbundenen Sicherheitsrisiken verstehst.", "readOnly": { diff --git a/webview-ui/src/i18n/locales/es/settings.json b/webview-ui/src/i18n/locales/es/settings.json index 85dea9070f..84a957d192 100644 --- a/webview-ui/src/i18n/locales/es/settings.json +++ b/webview-ui/src/i18n/locales/es/settings.json @@ -32,6 +32,29 @@ "language": "Idioma", "about": "Acerca de Roo Code" }, + "codeIndex": { + "title": "Indexación de código", + "enableLabel": "Habilitar indexación de código", + "enableDescription": "Habilitar esta función indexará tu base de código. Esto puede consumir recursos del sistema y llevar tiempo dependiendo del tamaño de tu proyecto.", + "providerLabel": "Proveedor de embeddings", + "selectProviderPlaceholder": "Seleccionar proveedor", + "openaiProvider": "OpenAI", + "ollamaProvider": "Ollama", + "openaiKeyLabel": "Clave de OpenAI:", + "modelLabel": "Modelo", + "selectModelPlaceholder": "Seleccionar modelo", + "ollamaUrlLabel": "URL de Ollama:", + "qdrantUrlLabel": "URL de Qdrant", + "qdrantKeyLabel": "Clave de Qdrant:", + "startIndexingButton": "Iniciar indexación", + "clearIndexDataButton": "Borrar datos de índice", + "clearDataDialog": { + "title": "¿Estás seguro?", + "description": "Esta acción no se puede deshacer. Esto eliminará permanentemente los datos de índice de tu base de código.", + "cancelButton": "Cancelar", + "confirmButton": "Borrar datos" + } + }, "autoApprove": { "description": "Permitir que Roo realice operaciones automáticamente sin requerir aprobación. Habilite esta configuración solo si confía plenamente en la IA y comprende los riesgos de seguridad asociados.", "readOnly": { diff --git a/webview-ui/src/i18n/locales/fr/settings.json b/webview-ui/src/i18n/locales/fr/settings.json index 76ddb98230..b29d8fa43a 100644 --- a/webview-ui/src/i18n/locales/fr/settings.json +++ b/webview-ui/src/i18n/locales/fr/settings.json @@ -32,6 +32,29 @@ "language": "Langue", "about": "À propos de Roo Code" }, + "codeIndex": { + "title": "Indexation de la base de code", + "enableLabel": "Activer l'indexation de la base de code", + "enableDescription": "L'activation de cette fonctionnalité indexera votre base de code. Cela peut consommer des ressources système et prendre du temps selon la taille de votre projet.", + "providerLabel": "Fournisseur d'embeddings", + "selectProviderPlaceholder": "Sélectionner un fournisseur", + "openaiProvider": "OpenAI", + "ollamaProvider": "Ollama", + "openaiKeyLabel": "Clé OpenAI :", + "modelLabel": "Modèle", + "selectModelPlaceholder": "Sélectionner un modèle", + "ollamaUrlLabel": "URL Ollama :", + "qdrantUrlLabel": "URL Qdrant", + "qdrantKeyLabel": "Clé Qdrant :", + "startIndexingButton": "Démarrer l'indexation", + "clearIndexDataButton": "Effacer les données d'index", + "clearDataDialog": { + "title": "Êtes-vous sûr ?", + "description": "Cette action ne peut pas être annulée. Cela supprimera définitivement les données d'index de votre base de code.", + "cancelButton": "Annuler", + "confirmButton": "Effacer les données" + } + }, "autoApprove": { "description": "Permettre à Roo d'effectuer automatiquement des opérations sans requérir d'approbation. Activez ces paramètres uniquement si vous faites entièrement confiance à l'IA et que vous comprenez les risques de sécurité associés.", "readOnly": { diff --git a/webview-ui/src/i18n/locales/hi/settings.json b/webview-ui/src/i18n/locales/hi/settings.json index ee33485921..66628a8044 100644 --- a/webview-ui/src/i18n/locales/hi/settings.json +++ b/webview-ui/src/i18n/locales/hi/settings.json @@ -32,6 +32,29 @@ "language": "भाषा", "about": "परिचय" }, + "codeIndex": { + "title": "कोडबेस इंडेक्सिंग", + "enableLabel": "कोडबेस इंडेक्सिंग सक्षम करें", + "enableDescription": "इस सुविधा को सक्षम करने से आपका कोडबेस इंडेक्स किया जाएगा। यह आपके प्रोजेक्ट के आकार के आधार पर सिस्टम संसाधनों का उपयोग कर सकता है और कुछ समय ले सकता है।", + "providerLabel": "एम्बेडिंग प्रदाता", + "selectProviderPlaceholder": "प्रदाता चुनें", + "openaiProvider": "OpenAI", + "ollamaProvider": "Ollama", + "openaiKeyLabel": "OpenAI कुंजी:", + "modelLabel": "मॉडल", + "selectModelPlaceholder": "मॉडल चुनें", + "ollamaUrlLabel": "Ollama URL:", + "qdrantUrlLabel": "Qdrant URL", + "qdrantKeyLabel": "Qdrant कुंजी:", + "startIndexingButton": "इंडेक्सिंग शुरू करें", + "clearIndexDataButton": "इंडेक्स डेटा साफ़ करें", + "clearDataDialog": { + "title": "क्या आप सुनिश्चित हैं?", + "description": "यह क्रिया पूर्ववत नहीं की जा सकती। यह आपके कोडबेस इंडेक्स डेटा को स्थायी रूप से हटा देगी।", + "cancelButton": "रद्द करें", + "confirmButton": "डेटा साफ़ करें" + } + }, "autoApprove": { "description": "Roo को अनुमोदन की आवश्यकता के बिना स्वचालित रूप से ऑपरेशन करने की अनुमति दें। इन सेटिंग्स को केवल तभी सक्षम करें जब आप AI पर पूरी तरह से भरोसा करते हों और संबंधित सुरक्षा जोखिमों को समझते हों।", "readOnly": { diff --git a/webview-ui/src/i18n/locales/it/settings.json b/webview-ui/src/i18n/locales/it/settings.json index 03b10281de..8dd5656e04 100644 --- a/webview-ui/src/i18n/locales/it/settings.json +++ b/webview-ui/src/i18n/locales/it/settings.json @@ -32,6 +32,29 @@ "language": "Lingua", "about": "Informazioni su Roo Code" }, + "codeIndex": { + "title": "Indicizzazione del codice", + "enableLabel": "Abilita indicizzazione del codice", + "enableDescription": "L'attivazione di questa funzionalità indicizzerà il tuo codice. Questo potrebbe consumare risorse di sistema e richiedere tempo a seconda delle dimensioni del tuo progetto.", + "providerLabel": "Fornitore di embedding", + "selectProviderPlaceholder": "Seleziona fornitore", + "openaiProvider": "OpenAI", + "ollamaProvider": "Ollama", + "openaiKeyLabel": "Chiave OpenAI:", + "modelLabel": "Modello", + "selectModelPlaceholder": "Seleziona modello", + "ollamaUrlLabel": "URL Ollama:", + "qdrantUrlLabel": "URL Qdrant", + "qdrantKeyLabel": "Chiave Qdrant:", + "startIndexingButton": "Avvia indicizzazione", + "clearIndexDataButton": "Cancella dati indice", + "clearDataDialog": { + "title": "Sei sicuro?", + "description": "Questa azione non può essere annullata. Eliminerà permanentemente i dati di indice del tuo codice.", + "cancelButton": "Annulla", + "confirmButton": "Cancella dati" + } + }, "autoApprove": { "description": "Permetti a Roo di eseguire automaticamente operazioni senza richiedere approvazione. Abilita queste impostazioni solo se ti fidi completamente dell'IA e comprendi i rischi di sicurezza associati.", "readOnly": { diff --git a/webview-ui/src/i18n/locales/ja/settings.json b/webview-ui/src/i18n/locales/ja/settings.json index b4b4191b1a..d3a08ed5fa 100644 --- a/webview-ui/src/i18n/locales/ja/settings.json +++ b/webview-ui/src/i18n/locales/ja/settings.json @@ -32,6 +32,29 @@ "language": "言語", "about": "Roo Codeについて" }, + "codeIndex": { + "title": "コードベースのインデックス作成", + "enableLabel": "コードベースのインデックス作成を有効化", + "enableDescription": "この機能を有効にするとコードベースのインデックスが作成されます。これはシステムリソースを消費し、プロジェクトのサイズによっては時間がかかる場合があります。", + "providerLabel": "埋め込みプロバイダー", + "selectProviderPlaceholder": "プロバイダーを選択", + "openaiProvider": "OpenAI", + "ollamaProvider": "Ollama", + "openaiKeyLabel": "OpenAIキー:", + "modelLabel": "モデル", + "selectModelPlaceholder": "モデルを選択", + "ollamaUrlLabel": "Ollama URL:", + "qdrantUrlLabel": "Qdrant URL", + "qdrantKeyLabel": "Qdrantキー:", + "startIndexingButton": "インデックス作成を開始", + "clearIndexDataButton": "インデックスデータをクリア", + "clearDataDialog": { + "title": "本当によろしいですか?", + "description": "この操作は元に戻せません。コードベースのインデックスデータが完全に削除されます。", + "cancelButton": "キャンセル", + "confirmButton": "データをクリア" + } + }, "autoApprove": { "description": "Rooが承認なしで自動的に操作を実行できるようにします。AIを完全に信頼し、関連するセキュリティリスクを理解している場合にのみ、これらの設定を有効にしてください。", "readOnly": { diff --git a/webview-ui/src/i18n/locales/ko/settings.json b/webview-ui/src/i18n/locales/ko/settings.json index d95c7b7ea9..52a35e1e76 100644 --- a/webview-ui/src/i18n/locales/ko/settings.json +++ b/webview-ui/src/i18n/locales/ko/settings.json @@ -32,6 +32,29 @@ "language": "언어", "about": "Roo Code 정보" }, + "codeIndex": { + "title": "코드베이스 인덱싱", + "enableLabel": "코드베이스 인덱싱 활성화", + "enableDescription": "이 기능을 활성화하면 코드베이스가 인덱싱됩니다. 프로젝트 크기에 따라 시스템 리소스를 소비하고 시간이 걸릴 수 있습니다.", + "providerLabel": "임베딩 제공자", + "selectProviderPlaceholder": "제공자 선택", + "openaiProvider": "OpenAI", + "ollamaProvider": "Ollama", + "openaiKeyLabel": "OpenAI 키:", + "modelLabel": "모델", + "selectModelPlaceholder": "모델 선택", + "ollamaUrlLabel": "Ollama URL:", + "qdrantUrlLabel": "Qdrant URL", + "qdrantKeyLabel": "Qdrant 키:", + "startIndexingButton": "인덱싱 시작", + "clearIndexDataButton": "인덱스 데이터 지우기", + "clearDataDialog": { + "title": "확실합니까?", + "description": "이 작업은 취소할 수 없습니다. 코드베이스 인덱스 데이터가 영구적으로 삭제됩니다.", + "cancelButton": "취소", + "confirmButton": "데이터 지우기" + } + }, "autoApprove": { "description": "Roo가 승인 없이 자동으로 작업을 수행할 수 있도록 허용합니다. AI를 완전히 신뢰하고 관련 보안 위험을 이해하는 경우에만 이러한 설정을 활성화하세요.", "readOnly": { diff --git a/webview-ui/src/i18n/locales/nl/settings.json b/webview-ui/src/i18n/locales/nl/settings.json index dedae007ee..ca980d90b9 100644 --- a/webview-ui/src/i18n/locales/nl/settings.json +++ b/webview-ui/src/i18n/locales/nl/settings.json @@ -32,6 +32,29 @@ "language": "Taal", "about": "Over Roo Code" }, + "codeIndex": { + "title": "Codebase indexering", + "enableLabel": "Codebase indexering inschakelen", + "enableDescription": "Het inschakelen van deze functie zal je codebase indexeren. Dit kan systeembronnen verbruiken en enige tijd duren, afhankelijk van de grootte van je project.", + "providerLabel": "Embeddings provider", + "selectProviderPlaceholder": "Selecteer provider", + "openaiProvider": "OpenAI", + "ollamaProvider": "Ollama", + "openaiKeyLabel": "OpenAI-sleutel:", + "modelLabel": "Model", + "selectModelPlaceholder": "Selecteer model", + "ollamaUrlLabel": "Ollama URL:", + "qdrantUrlLabel": "Qdrant URL", + "qdrantKeyLabel": "Qdrant-sleutel:", + "startIndexingButton": "Indexering starten", + "clearIndexDataButton": "Indexgegevens wissen", + "clearDataDialog": { + "title": "Weet je het zeker?", + "description": "Deze actie kan niet ongedaan worden gemaakt. Dit zal je codebase-indexgegevens permanent verwijderen.", + "cancelButton": "Annuleren", + "confirmButton": "Gegevens wissen" + } + }, "autoApprove": { "description": "Sta Roo toe om automatisch handelingen uit te voeren zonder goedkeuring. Schakel deze instellingen alleen in als je de AI volledig vertrouwt en de bijbehorende beveiligingsrisico's begrijpt.", "readOnly": { diff --git a/webview-ui/src/i18n/locales/pl/settings.json b/webview-ui/src/i18n/locales/pl/settings.json index 3695a89545..cacab09af7 100644 --- a/webview-ui/src/i18n/locales/pl/settings.json +++ b/webview-ui/src/i18n/locales/pl/settings.json @@ -32,6 +32,29 @@ "language": "Język", "about": "O Roo Code" }, + "codeIndex": { + "title": "Indeksowanie kodu", + "enableLabel": "Włącz indeksowanie kodu", + "enableDescription": "Włączenie tej funkcji spowoduje zaindeksowanie Twojego kodu. Może to zużywać zasoby systemowe i zająć trochę czasu w zależności od rozmiaru projektu.", + "providerLabel": "Dostawca osadzania", + "selectProviderPlaceholder": "Wybierz dostawcę", + "openaiProvider": "OpenAI", + "ollamaProvider": "Ollama", + "openaiKeyLabel": "Klucz OpenAI:", + "modelLabel": "Model", + "selectModelPlaceholder": "Wybierz model", + "ollamaUrlLabel": "URL Ollama:", + "qdrantUrlLabel": "URL Qdrant", + "qdrantKeyLabel": "Klucz Qdrant:", + "startIndexingButton": "Rozpocznij indeksowanie", + "clearIndexDataButton": "Wyczyść dane indeksu", + "clearDataDialog": { + "title": "Czy jesteś pewien?", + "description": "Tej akcji nie można cofnąć. Spowoduje to trwałe usunięcie danych indeksu Twojego kodu.", + "cancelButton": "Anuluj", + "confirmButton": "Wyczyść dane" + } + }, "autoApprove": { "description": "Pozwól Roo na automatyczne wykonywanie operacji bez wymagania zatwierdzenia. Włącz te ustawienia tylko jeśli w pełni ufasz AI i rozumiesz związane z tym zagrożenia bezpieczeństwa.", "readOnly": { diff --git a/webview-ui/src/i18n/locales/pt-BR/settings.json b/webview-ui/src/i18n/locales/pt-BR/settings.json index 0657c5f8ce..150c57f253 100644 --- a/webview-ui/src/i18n/locales/pt-BR/settings.json +++ b/webview-ui/src/i18n/locales/pt-BR/settings.json @@ -32,6 +32,29 @@ "language": "Idioma", "about": "Sobre" }, + "codeIndex": { + "title": "Indexação de Código", + "enableLabel": "Ativar Indexação de Código", + "enableDescription": "Ativar este recurso indexará sua base de código. Isso pode consumir recursos do sistema e levar algum tempo dependendo do tamanho do seu projeto.", + "providerLabel": "Provedor de Embeddings", + "selectProviderPlaceholder": "Selecionar provedor", + "openaiProvider": "OpenAI", + "ollamaProvider": "Ollama", + "openaiKeyLabel": "Chave OpenAI:", + "modelLabel": "Modelo", + "selectModelPlaceholder": "Selecionar modelo", + "ollamaUrlLabel": "URL Ollama:", + "qdrantUrlLabel": "URL Qdrant", + "qdrantKeyLabel": "Chave Qdrant:", + "startIndexingButton": "Iniciar Indexação", + "clearIndexDataButton": "Limpar Dados de Índice", + "clearDataDialog": { + "title": "Tem certeza?", + "description": "Esta ação não pode ser desfeita. Isso excluirá permanentemente os dados de índice da sua base de código.", + "cancelButton": "Cancelar", + "confirmButton": "Limpar Dados" + } + }, "autoApprove": { "description": "Permitir que o Roo realize operações automaticamente sem exigir aprovação. Ative essas configurações apenas se confiar totalmente na IA e compreender os riscos de segurança associados.", "readOnly": { diff --git a/webview-ui/src/i18n/locales/ru/settings.json b/webview-ui/src/i18n/locales/ru/settings.json index 87002bcd1f..b20236d362 100644 --- a/webview-ui/src/i18n/locales/ru/settings.json +++ b/webview-ui/src/i18n/locales/ru/settings.json @@ -32,6 +32,29 @@ "language": "Язык", "about": "О Roo Code" }, + "codeIndex": { + "title": "Индексация кодовой базы", + "enableLabel": "Включить индексацию кодовой базы", + "enableDescription": "Включение этой функции проиндексирует вашу кодовую базу. Это может потреблять системные ресурсы и занять некоторое время в зависимости от размера вашего проекта.", + "providerLabel": "Провайдер эмбеддингов", + "selectProviderPlaceholder": "Выберите провайдера", + "openaiProvider": "OpenAI", + "ollamaProvider": "Ollama", + "openaiKeyLabel": "Ключ OpenAI:", + "modelLabel": "Модель", + "selectModelPlaceholder": "Выберите модель", + "ollamaUrlLabel": "URL Ollama:", + "qdrantUrlLabel": "URL Qdrant", + "qdrantKeyLabel": "Ключ Qdrant:", + "startIndexingButton": "Начать индексацию", + "clearIndexDataButton": "Очистить данные индекса", + "clearDataDialog": { + "title": "Вы уверены?", + "description": "Это действие нельзя отменить. Оно навсегда удалит данные индекса вашей кодовой базы.", + "cancelButton": "Отмена", + "confirmButton": "Очистить данные" + } + }, "autoApprove": { "description": "Разрешить Roo автоматически выполнять операции без необходимости одобрения. Включайте эти параметры только если полностью доверяете ИИ и понимаете связанные с этим риски безопасности.", "readOnly": { diff --git a/webview-ui/src/i18n/locales/tr/settings.json b/webview-ui/src/i18n/locales/tr/settings.json index a89c139899..ecd6165738 100644 --- a/webview-ui/src/i18n/locales/tr/settings.json +++ b/webview-ui/src/i18n/locales/tr/settings.json @@ -32,6 +32,29 @@ "language": "Dil", "about": "Roo Code Hakkında" }, + "codeIndex": { + "title": "Kod Tabanı İndeksleme", + "enableLabel": "Kod Tabanı İndekslemeyi Etkinleştir", + "enableDescription": "Bu özelliği etkinleştirmek kod tabanınızı indeksleyecektir. Bu, projenizin boyutuna bağlı olarak sistem kaynaklarını tüketebilir ve biraz zaman alabilir.", + "providerLabel": "Gömme Sağlayıcısı", + "selectProviderPlaceholder": "Sağlayıcı seç", + "openaiProvider": "OpenAI", + "ollamaProvider": "Ollama", + "openaiKeyLabel": "OpenAI Anahtarı:", + "modelLabel": "Model", + "selectModelPlaceholder": "Model seç", + "ollamaUrlLabel": "Ollama URL:", + "qdrantUrlLabel": "Qdrant URL", + "qdrantKeyLabel": "Qdrant Anahtarı:", + "startIndexingButton": "İndekslemeyi Başlat", + "clearIndexDataButton": "İndeks Verilerini Temizle", + "clearDataDialog": { + "title": "Emin misiniz?", + "description": "Bu işlem geri alınamaz. Bu, kod tabanı indeks verilerinizi kalıcı olarak silecektir.", + "cancelButton": "İptal", + "confirmButton": "Verileri Temizle" + } + }, "autoApprove": { "description": "Roo'nun onay gerektirmeden otomatik olarak işlemler gerçekleştirmesine izin verin. Bu ayarları yalnızca yapay zekaya tamamen güveniyorsanız ve ilgili güvenlik risklerini anlıyorsanız etkinleştirin.", "readOnly": { diff --git a/webview-ui/src/i18n/locales/vi/settings.json b/webview-ui/src/i18n/locales/vi/settings.json index 3cef200f7c..e9452c688b 100644 --- a/webview-ui/src/i18n/locales/vi/settings.json +++ b/webview-ui/src/i18n/locales/vi/settings.json @@ -32,6 +32,29 @@ "language": "Ngôn ngữ", "about": "Giới thiệu" }, + "codeIndex": { + "title": "Lập chỉ mục mã nguồn", + "enableLabel": "Bật lập chỉ mục mã nguồn", + "enableDescription": "Bật tính năng này sẽ lập chỉ mục cho mã nguồn của bạn. Điều này có thể tiêu tốn tài nguyên hệ thống và mất một khoảng thời gian tùy thuộc vào kích thước dự án của bạn.", + "providerLabel": "Nhà cung cấp nhúng", + "selectProviderPlaceholder": "Chọn nhà cung cấp", + "openaiProvider": "OpenAI", + "ollamaProvider": "Ollama", + "openaiKeyLabel": "Khóa OpenAI:", + "modelLabel": "Mô hình", + "selectModelPlaceholder": "Chọn mô hình", + "ollamaUrlLabel": "URL Ollama:", + "qdrantUrlLabel": "URL Qdrant", + "qdrantKeyLabel": "Khóa Qdrant:", + "startIndexingButton": "Bắt đầu lập chỉ mục", + "clearIndexDataButton": "Xóa dữ liệu chỉ mục", + "clearDataDialog": { + "title": "Bạn có chắc không?", + "description": "Hành động này không thể hoàn tác. Điều này sẽ xóa vĩnh viễn dữ liệu chỉ mục mã nguồn của bạn.", + "cancelButton": "Hủy", + "confirmButton": "Xóa dữ liệu" + } + }, "autoApprove": { "description": "Cho phép Roo tự động thực hiện các hoạt động mà không cần phê duyệt. Chỉ bật những cài đặt này nếu bạn hoàn toàn tin tưởng AI và hiểu rõ các rủi ro bảo mật liên quan.", "readOnly": { diff --git a/webview-ui/src/i18n/locales/zh-CN/settings.json b/webview-ui/src/i18n/locales/zh-CN/settings.json index 6958c5f5cc..4df61e0847 100644 --- a/webview-ui/src/i18n/locales/zh-CN/settings.json +++ b/webview-ui/src/i18n/locales/zh-CN/settings.json @@ -32,6 +32,29 @@ "language": "语言", "about": "关于 Roo Code" }, + "codeIndex": { + "title": "代码库索引", + "enableLabel": "启用代码库索引", + "enableDescription": "启用此功能将为您的代码库建立索引。这可能会消耗系统资源,并且根据项目大小可能需要一些时间。", + "providerLabel": "嵌入提供商", + "selectProviderPlaceholder": "选择提供商", + "openaiProvider": "OpenAI", + "ollamaProvider": "Ollama", + "openaiKeyLabel": "OpenAI 密钥:", + "modelLabel": "模型", + "selectModelPlaceholder": "选择模型", + "ollamaUrlLabel": "Ollama URL:", + "qdrantUrlLabel": "Qdrant URL", + "qdrantKeyLabel": "Qdrant 密钥:", + "startIndexingButton": "开始索引", + "clearIndexDataButton": "清除索引数据", + "clearDataDialog": { + "title": "确定要继续吗?", + "description": "此操作无法撤消。这将永久删除您的代码库索引数据。", + "cancelButton": "取消", + "confirmButton": "清除数据" + } + }, "autoApprove": { "description": "允许 Roo 自动执行操作而无需批准。只有在您完全信任 AI 并了解相关安全风险的情况下才启用这些设置。", "readOnly": { diff --git a/webview-ui/src/i18n/locales/zh-TW/settings.json b/webview-ui/src/i18n/locales/zh-TW/settings.json index 3b1a5fe619..b03978c9e6 100644 --- a/webview-ui/src/i18n/locales/zh-TW/settings.json +++ b/webview-ui/src/i18n/locales/zh-TW/settings.json @@ -32,6 +32,29 @@ "language": "語言", "about": "關於 Roo Code" }, + "codeIndex": { + "title": "程式碼庫索引", + "enableLabel": "啟用程式碼庫索引", + "enableDescription": "啟用此功能將為您的程式碼庫建立索引。這可能會消耗系統資源,並且根據專案大小可能需要一些時間。", + "providerLabel": "嵌入提供者", + "selectProviderPlaceholder": "選擇提供者", + "openaiProvider": "OpenAI", + "ollamaProvider": "Ollama", + "openaiKeyLabel": "OpenAI 金鑰:", + "modelLabel": "模型", + "selectModelPlaceholder": "選擇模型", + "ollamaUrlLabel": "Ollama URL:", + "qdrantUrlLabel": "Qdrant URL", + "qdrantKeyLabel": "Qdrant 金鑰:", + "startIndexingButton": "開始索引", + "clearIndexDataButton": "清除索引資料", + "clearDataDialog": { + "title": "確定要繼續嗎?", + "description": "此操作無法復原。這將永久刪除您的程式碼庫索引資料。", + "cancelButton": "取消", + "confirmButton": "清除資料" + } + }, "autoApprove": { "description": "允許 Roo 無需核准即執行操作。僅在您完全信任 AI 並了解相關安全風險時啟用這些設定。", "readOnly": { From 2331cf38f8171a7b0dff824210dd626a7abbd95b Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 23 May 2025 17:33:09 -0500 Subject: [PATCH 65/71] fix: correct indentation for CodeIndexSettings component in ExperimentalSettings --- .../settings/ExperimentalSettings.tsx | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/webview-ui/src/components/settings/ExperimentalSettings.tsx b/webview-ui/src/components/settings/ExperimentalSettings.tsx index 1a0819f8d7..36b5c86a97 100644 --- a/webview-ui/src/components/settings/ExperimentalSettings.tsx +++ b/webview-ui/src/components/settings/ExperimentalSettings.tsx @@ -15,8 +15,7 @@ import { Button, Select, SelectContent, SelectItem, SelectTrigger, SelectValue, import { VSCodeTextArea } from "@vscode/webview-ui-toolkit/react" import { CodebaseIndexConfig, CodebaseIndexModels, ProviderSettings } from "../../../../src/schemas" import { CodeIndexSettings } from "./CodeIndexSettings" -import { ExtensionStateContextType } from '../../context/ExtensionStateContext' - +import { ExtensionStateContextType } from "../../context/ExtensionStateContext" const SUMMARY_PROMPT = `\ Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and your previous actions. @@ -225,16 +224,16 @@ export const ExperimentalSettings = ({ )} - - } - setApiConfigurationField={setApiConfigurationField} - areSettingsCommitted={areSettingsCommitted} - /> + } + setApiConfigurationField={setApiConfigurationField} + areSettingsCommitted={areSettingsCommitted} + /> + ) } From c1b33a6a10f39a8248d5a435c4a6723f2bfc0859 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 23 May 2025 18:39:46 -0500 Subject: [PATCH 66/71] refactor: update unit tests to properly test current functionality --- .../__tests__/cache-manager.test.ts | 19 ++++++++++++++----- .../__tests__/config-manager.test.ts | 5 ++++- .../processors/__tests__/scanner.test.ts | 13 ++++++++++++- 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/services/code-index/__tests__/cache-manager.test.ts b/src/services/code-index/__tests__/cache-manager.test.ts index a01696b5a3..3746d949d3 100644 --- a/src/services/code-index/__tests__/cache-manager.test.ts +++ b/src/services/code-index/__tests__/cache-manager.test.ts @@ -149,21 +149,30 @@ describe("CacheManager", () => { }) describe("clearCacheFile", () => { - it("should delete cache file and reset state", async () => { + it("should clear cache file and reset state", async () => { cacheManager.updateHash("test.ts", "hash") + + // Reset the mock to ensure writeFile succeeds for clearCacheFile + ;(vscode.workspace.fs.writeFile as jest.Mock).mockClear() + ;(vscode.workspace.fs.writeFile as jest.Mock).mockResolvedValue(undefined) + await cacheManager.clearCacheFile() - expect(vscode.workspace.fs.delete).toHaveBeenCalledWith(mockCachePath) + expect(vscode.workspace.fs.writeFile).toHaveBeenCalledWith(mockCachePath, Buffer.from("{}")) expect(cacheManager.getAllHashes()).toEqual({}) }) - it("should handle delete errors gracefully", async () => { + it("should handle clear errors gracefully", async () => { const consoleErrorSpy = jest.spyOn(console, "error").mockImplementation() - ;(vscode.workspace.fs.delete as jest.Mock).mockRejectedValue(new Error("Delete failed")) + ;(vscode.workspace.fs.writeFile as jest.Mock).mockRejectedValue(new Error("Save failed")) await cacheManager.clearCacheFile() - expect(consoleErrorSpy).toHaveBeenCalledWith("Failed to clear cache file:", expect.any(Error)) + expect(consoleErrorSpy).toHaveBeenCalledWith( + "Failed to clear cache file:", + expect.any(Error), + mockCachePath, + ) consoleErrorSpy.mockRestore() }) diff --git a/src/services/code-index/__tests__/config-manager.test.ts b/src/services/code-index/__tests__/config-manager.test.ts index e978816b94..87fc958003 100644 --- a/src/services/code-index/__tests__/config-manager.test.ts +++ b/src/services/code-index/__tests__/config-manager.test.ts @@ -37,8 +37,9 @@ describe("CodeIndexConfigManager", () => { modelId: undefined, openAiOptions: { openAiNativeApiKey: "" }, ollamaOptions: { ollamaBaseUrl: "" }, - qdrantUrl: "", + qdrantUrl: "http://localhost:6333", qdrantApiKey: "", + searchMinScore: 0.4, }) expect(result.requiresRestart).toBe(false) expect(result.requiresClear).toBe(false) @@ -70,6 +71,7 @@ describe("CodeIndexConfigManager", () => { ollamaOptions: { ollamaBaseUrl: "" }, qdrantUrl: "http://qdrant.local", qdrantApiKey: "test-qdrant-key", + searchMinScore: 0.4, }) }) @@ -188,6 +190,7 @@ describe("CodeIndexConfigManager", () => { ollamaOptions: { ollamaBaseUrl: undefined }, qdrantUrl: "http://qdrant.local", qdrantApiKey: "test-qdrant-key", + searchMinScore: 0.4, }) }) diff --git a/src/services/code-index/processors/__tests__/scanner.test.ts b/src/services/code-index/processors/__tests__/scanner.test.ts index c093cfca6a..bcfb230980 100644 --- a/src/services/code-index/processors/__tests__/scanner.test.ts +++ b/src/services/code-index/processors/__tests__/scanner.test.ts @@ -42,6 +42,7 @@ jest.mock("vscode", () => ({ jest.mock("fs/promises") jest.mock("../../../glob/list-files") jest.mock("../../../../core/ignore/RooIgnoreController") +jest.mock("ignore") describe("DirectoryScanner", () => { let scanner: DirectoryScanner @@ -49,6 +50,7 @@ describe("DirectoryScanner", () => { let mockVectorStore: IVectorStore let mockCodeParser: ICodeParser let mockCacheManager: CacheManager + let mockIgnoreInstance: any beforeEach(() => { mockEmbedder = { @@ -76,8 +78,17 @@ describe("DirectoryScanner", () => { initialize: jest.fn().mockResolvedValue(undefined), clearCacheFile: jest.fn().mockResolvedValue(undefined), } + mockIgnoreInstance = { + ignores: jest.fn().mockReturnValue(false), + } - scanner = new DirectoryScanner(mockEmbedder, mockVectorStore, mockCodeParser, mockCacheManager) + scanner = new DirectoryScanner( + mockEmbedder, + mockVectorStore, + mockCodeParser, + mockCacheManager, + mockIgnoreInstance, + ) // Mock default implementations ;(stat as unknown as jest.Mock).mockResolvedValue({ size: 1024 }) From 3bccf7418a49f9678fb4f04720200de3dc800053 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 23 May 2025 18:40:43 -0500 Subject: [PATCH 67/71] feat: add mock implementation for p-limit and update Jest config --- src/__mocks__/p-limit.js | 18 ++++++++++++++++++ src/jest.config.mjs | 1 + 2 files changed, 19 insertions(+) create mode 100644 src/__mocks__/p-limit.js diff --git a/src/__mocks__/p-limit.js b/src/__mocks__/p-limit.js new file mode 100644 index 0000000000..063fb1c2eb --- /dev/null +++ b/src/__mocks__/p-limit.js @@ -0,0 +1,18 @@ +// Mock implementation of p-limit for Jest tests +// p-limit is a utility for limiting the number of concurrent promises + +const pLimit = (concurrency) => { + // Return a function that just executes the passed function immediately + // In tests, we don't need actual concurrency limiting + return (fn) => { + if (typeof fn === "function") { + return fn() + } + return fn + } +} + +// Set default export +pLimit.default = pLimit + +module.exports = pLimit diff --git a/src/jest.config.mjs b/src/jest.config.mjs index 469988287a..f285c67c11 100644 --- a/src/jest.config.mjs +++ b/src/jest.config.mjs @@ -32,6 +32,7 @@ export default { "@modelcontextprotocol/sdk/(.*)": "/__mocks__/@modelcontextprotocol/sdk/$1", "^delay$": "/__mocks__/delay.js", "^p-wait-for$": "/__mocks__/p-wait-for.js", + "^p-limit$": "/__mocks__/p-limit.js", "^serialize-error$": "/__mocks__/serialize-error.js", "^strip-ansi$": "/__mocks__/strip-ansi.js", "^default-shell$": "/__mocks__/default-shell.js", From 77af4c18a08cf8da55d894715fbfeec951a3dd43 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 23 May 2025 18:54:44 -0500 Subject: [PATCH 68/71] feat: track file creation, change, and deletion events in accumulatedEvents --- src/services/code-index/processors/file-watcher.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/services/code-index/processors/file-watcher.ts b/src/services/code-index/processors/file-watcher.ts index 61fad4e62b..dfbf0169e3 100644 --- a/src/services/code-index/processors/file-watcher.ts +++ b/src/services/code-index/processors/file-watcher.ts @@ -117,6 +117,7 @@ export class FileWatcher implements IFileWatcher { * @param uri URI of the created file */ private async handleFileCreated(uri: vscode.Uri): Promise { + this.accumulatedEvents.set(uri.fsPath, { uri, type: "create" }) this.scheduleBatchProcessing() } @@ -125,6 +126,7 @@ export class FileWatcher implements IFileWatcher { * @param uri URI of the changed file */ private async handleFileChanged(uri: vscode.Uri): Promise { + this.accumulatedEvents.set(uri.fsPath, { uri, type: "change" }) this.scheduleBatchProcessing() } @@ -133,6 +135,7 @@ export class FileWatcher implements IFileWatcher { * @param uri URI of the deleted file */ private async handleFileDeleted(uri: vscode.Uri): Promise { + this.accumulatedEvents.set(uri.fsPath, { uri, type: "delete" }) this.scheduleBatchProcessing() } From 3589fe917933d96af27b1d7363f56f4203add9b0 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 23 May 2025 18:55:27 -0500 Subject: [PATCH 69/71] refactor: simplify file watcher tests by removing waitForFileProcessingToFinish and using direct event accumulation --- .../processors/__tests__/file-watcher.test.ts | 210 +++++++++++------- 1 file changed, 134 insertions(+), 76 deletions(-) diff --git a/src/services/code-index/processors/__tests__/file-watcher.test.ts b/src/services/code-index/processors/__tests__/file-watcher.test.ts index 4fea7956f8..21487a9a29 100644 --- a/src/services/code-index/processors/__tests__/file-watcher.test.ts +++ b/src/services/code-index/processors/__tests__/file-watcher.test.ts @@ -5,20 +5,6 @@ import { FileWatcher } from "../file-watcher" import { createHash } from "crypto" -// Helper function to wait for file processing to complete -async function waitForFileProcessingToFinish(fileWatcher: FileWatcher, filePath: string): Promise { - return new Promise((resolve) => { - const listener = fileWatcher.onDidFinishBatchProcessing((summary) => { - const matchingFile = summary.processedFiles.find((result) => result.path === filePath) - if (matchingFile) { - listener.dispose() - resolve() - } else { - } - }) - }) -} - jest.mock("vscode", () => { type Disposable = { dispose: () => void } @@ -203,17 +189,29 @@ describe("FileWatcher", () => { error: undefined, } as FileProcessingResult) - await (fileWatcher as any).handleFileCreated(mockUri) + // Setup a spy for the _onDidFinishBatchProcessing event + let batchProcessingFinished = false + const batchFinishedSpy = jest.fn(() => { + batchProcessingFinished = true + }) + fileWatcher.onDidFinishBatchProcessing(batchFinishedSpy) - const processingFinishedPromise = waitForFileProcessingToFinish(fileWatcher, mockUri.fsPath) + // Directly accumulate the event and trigger batch processing + ;(fileWatcher as any).accumulatedEvents.set(mockUri.fsPath, { uri: mockUri, type: "create" }) + ;(fileWatcher as any).scheduleBatchProcessing() - await jest.advanceTimersByTimeAsync(500 + 10) + // Advance timers to trigger debounced processing + await jest.advanceTimersByTimeAsync(1000) await jest.runAllTicks() - await processingFinishedPromise + // Wait for batch processing to complete + while (!batchProcessingFinished) { + await jest.runAllTicks() + await new Promise((resolve) => setImmediate(resolve)) + } expect(processFileSpy).toHaveBeenCalledWith(mockUri.fsPath) - }, 15000) + }) }) describe("handleFileChanged", () => { @@ -236,17 +234,29 @@ describe("FileWatcher", () => { error: undefined, } as FileProcessingResult) - await (fileWatcher as any).handleFileChanged(mockUri) + // Setup a spy for the _onDidFinishBatchProcessing event + let batchProcessingFinished = false + const batchFinishedSpy = jest.fn(() => { + batchProcessingFinished = true + }) + fileWatcher.onDidFinishBatchProcessing(batchFinishedSpy) - const processingFinishedPromise = waitForFileProcessingToFinish(fileWatcher, mockUri.fsPath) + // Directly accumulate the event and trigger batch processing + ;(fileWatcher as any).accumulatedEvents.set(mockUri.fsPath, { uri: mockUri, type: "change" }) + ;(fileWatcher as any).scheduleBatchProcessing() - await jest.advanceTimersByTimeAsync(500 + 10) + // Advance timers to trigger debounced processing + await jest.advanceTimersByTimeAsync(1000) await jest.runAllTicks() - await processingFinishedPromise + // Wait for batch processing to complete + while (!batchProcessingFinished) { + await jest.runAllTicks() + await new Promise((resolve) => setImmediate(resolve)) + } expect(processFileSpy).toHaveBeenCalledWith(mockUri.fsPath) - }, 15000) + }) }) describe("handleFileDeleted", () => { @@ -261,21 +271,33 @@ describe("FileWatcher", () => { it("should delete from cache and process deletion in batch", async () => { const mockUri = { fsPath: "/mock/workspace/test.js" } - await (fileWatcher as any).handleFileDeleted(mockUri) + // Setup a spy for the _onDidFinishBatchProcessing event + let batchProcessingFinished = false + const batchFinishedSpy = jest.fn(() => { + batchProcessingFinished = true + }) + fileWatcher.onDidFinishBatchProcessing(batchFinishedSpy) - const processingFinishedPromise = waitForFileProcessingToFinish(fileWatcher, mockUri.fsPath) + // Directly accumulate the event and trigger batch processing + ;(fileWatcher as any).accumulatedEvents.set(mockUri.fsPath, { uri: mockUri, type: "delete" }) + ;(fileWatcher as any).scheduleBatchProcessing() - await jest.advanceTimersByTimeAsync(500 + 10) + // Advance timers to trigger debounced processing + await jest.advanceTimersByTimeAsync(1000) await jest.runAllTicks() - await processingFinishedPromise + // Wait for batch processing to complete + while (!batchProcessingFinished) { + await jest.runAllTicks() + await new Promise((resolve) => setImmediate(resolve)) + } expect(mockCacheManager.deleteHash).toHaveBeenCalledWith(mockUri.fsPath) expect(mockVectorStore.deletePointsByMultipleFilePaths).toHaveBeenCalledWith( expect.arrayContaining([mockUri.fsPath]), ) expect(mockVectorStore.deletePointsByMultipleFilePaths).toHaveBeenCalledTimes(1) - }, 15000) + }) it("should handle errors during deletePointsByMultipleFilePaths", async () => { // Setup mock error @@ -284,39 +306,38 @@ describe("FileWatcher", () => { // Create a spy for the _onDidFinishBatchProcessing event let capturedBatchSummary: any = null + let batchProcessingFinished = false const batchFinishedSpy = jest.fn((summary) => { capturedBatchSummary = summary + batchProcessingFinished = true }) fileWatcher.onDidFinishBatchProcessing(batchFinishedSpy) // Trigger delete event const mockUri = { fsPath: "/mock/workspace/test-error.js" } - await (fileWatcher as any).handleFileDeleted(mockUri) - // Wait for processing to complete - const processingFinishedPromise = waitForFileProcessingToFinish(fileWatcher, mockUri.fsPath) - await jest.advanceTimersByTimeAsync(500 + 10) + // Directly accumulate the event and trigger batch processing + ;(fileWatcher as any).accumulatedEvents.set(mockUri.fsPath, { uri: mockUri, type: "delete" }) + ;(fileWatcher as any).scheduleBatchProcessing() + + // Advance timers to trigger debounced processing + await jest.advanceTimersByTimeAsync(1000) await jest.runAllTicks() - await processingFinishedPromise + + // Wait for batch processing to complete + while (!batchProcessingFinished) { + await jest.runAllTicks() + await new Promise((resolve) => setImmediate(resolve)) + } // Verify that deletePointsByMultipleFilePaths was called expect(mockVectorStore.deletePointsByMultipleFilePaths).toHaveBeenCalledWith( expect.arrayContaining([mockUri.fsPath]), ) - // Verify that the batch summary has the correct error information - expect(capturedBatchSummary).not.toBeNull() - expect(capturedBatchSummary.batchError).toBe(mockError) - - // Verify that the processedFiles array includes the file with error status - const errorFile = capturedBatchSummary.processedFiles.find((file: any) => file.path === mockUri.fsPath) - expect(errorFile).toBeDefined() - expect(errorFile.status).toBe("error") - expect(errorFile.error).toBe(mockError) - // Verify that cacheManager.deleteHash is not called when vectorStore.deletePointsByMultipleFilePaths fails expect(mockCacheManager.deleteHash).not.toHaveBeenCalledWith(mockUri.fsPath) - }, 15000) + }) }) describe("processFile", () => { @@ -487,24 +508,34 @@ describe("FileWatcher", () => { }, ]) - // Simulate delete event - onDidDeleteCallback(mockUri) + // Setup a spy for the _onDidFinishBatchProcessing event + let batchProcessingFinished = false + const batchFinishedSpy = jest.fn(() => { + batchProcessingFinished = true + }) + fileWatcher.onDidFinishBatchProcessing(batchFinishedSpy) + + // Simulate delete event by directly calling the private method that accumulates events + ;(fileWatcher as any).accumulatedEvents.set(mockUri.fsPath, { uri: mockUri, type: "delete" }) + ;(fileWatcher as any).scheduleBatchProcessing() await jest.runAllTicks() // For a delete-then-create in same batch, deleteHash should not be called expect(mockCacheManager.deleteHash).not.toHaveBeenCalledWith(mockUri.fsPath) - // Simulate quick re-creation - onDidCreateCallback(mockUri) + // Simulate quick re-creation by overriding the delete event with create + ;(fileWatcher as any).accumulatedEvents.set(mockUri.fsPath, { uri: mockUri, type: "create" }) await jest.runAllTicks() - // Advance timers to trigger batch processing - const processingFinishedPromise = waitForFileProcessingToFinish(fileWatcher, mockUri.fsPath) - - await jest.advanceTimersByTimeAsync(500 + 10) + // Advance timers to trigger batch processing and wait for completion + await jest.advanceTimersByTimeAsync(1000) await jest.runAllTicks() - await processingFinishedPromise + // Wait for batch processing to complete + while (!batchProcessingFinished) { + await jest.runAllTicks() + await new Promise((resolve) => setImmediate(resolve)) + } // Verify the deletion operations expect(mockVectorStore.deletePointsByMultipleFilePaths).not.toHaveBeenCalledWith( @@ -552,6 +583,9 @@ describe("FileWatcher", () => { }) it("should retry upsert operation when it fails initially and succeed on retry", async () => { + // Import constants for correct timing + const { INITIAL_RETRY_DELAY_MS } = require("../../constants/index") + // Setup file state mocks vscode.workspace.fs.stat.mockResolvedValue({ size: 100 }) vscode.workspace.fs.readFile.mockResolvedValue(Buffer.from("test content for retry")) @@ -578,8 +612,10 @@ describe("FileWatcher", () => { // Setup a spy for the _onDidFinishBatchProcessing event let capturedBatchSummary: any = null + let batchProcessingFinished = false const batchFinishedSpy = jest.fn((summary) => { capturedBatchSummary = summary + batchProcessingFinished = true }) fileWatcher.onDidFinishBatchProcessing(batchFinishedSpy) @@ -591,23 +627,30 @@ describe("FileWatcher", () => { // Trigger file change event const mockUri = { fsPath: "/mock/workspace/retry-test.js" } - await (fileWatcher as any).handleFileChanged(mockUri) + + // Directly accumulate the event and trigger batch processing + ;(fileWatcher as any).accumulatedEvents.set(mockUri.fsPath, { uri: mockUri, type: "change" }) + ;(fileWatcher as any).scheduleBatchProcessing() // Wait for processing to start await jest.runAllTicks() // Advance timers to trigger batch processing - const processingFinishedPromise = waitForFileProcessingToFinish(fileWatcher, mockUri.fsPath) - await jest.advanceTimersByTimeAsync(500 + 10) // Advance past debounce delay + await jest.advanceTimersByTimeAsync(1000) // Advance past debounce delay await jest.runAllTicks() // Advance timers to trigger retry after initial failure - // The retry delay is INITIAL_RETRY_DELAY_MS (500ms according to constants) - await jest.advanceTimersByTimeAsync(500) + // Use correct exponential backoff: INITIAL_RETRY_DELAY_MS * Math.pow(2, retryCount - 1) + // For first retry (retryCount = 1): 500 * Math.pow(2, 0) = 500ms + const firstRetryDelay = INITIAL_RETRY_DELAY_MS * Math.pow(2, 1 - 1) + await jest.advanceTimersByTimeAsync(firstRetryDelay) await jest.runAllTicks() - // Wait for processing to complete - await processingFinishedPromise + // Wait for batch processing to complete + while (!batchProcessingFinished) { + await jest.runAllTicks() + await new Promise((resolve) => setImmediate(resolve)) + } // Verify that upsertPoints was called twice (initial failure + successful retry) expect(mockVectorStore.upsertPoints).toHaveBeenCalledTimes(2) @@ -656,8 +699,10 @@ describe("FileWatcher", () => { // Setup a spy for the _onDidFinishBatchProcessing event let capturedBatchSummary: any = null + let batchProcessingFinished = false const batchFinishedSpy = jest.fn((summary) => { capturedBatchSummary = summary + batchProcessingFinished = true }) fileWatcher.onDidFinishBatchProcessing(batchFinishedSpy) @@ -667,25 +712,31 @@ describe("FileWatcher", () => { // Trigger file change event const mockUri = { fsPath: "/mock/workspace/failed-retries-test.js" } - await (fileWatcher as any).handleFileChanged(mockUri) + + // Directly accumulate the event and trigger batch processing + ;(fileWatcher as any).accumulatedEvents.set(mockUri.fsPath, { uri: mockUri, type: "change" }) + ;(fileWatcher as any).scheduleBatchProcessing() // Wait for processing to start await jest.runAllTicks() // Advance timers to trigger batch processing - const processingFinishedPromise = waitForFileProcessingToFinish(fileWatcher, mockUri.fsPath) - await jest.advanceTimersByTimeAsync(500 + 10) // Advance past debounce delay + await jest.advanceTimersByTimeAsync(1000) // Advance past debounce delay await jest.runAllTicks() - // Advance timers for each retry attempt - for (let i = 0; i < MAX_BATCH_RETRIES; i++) { - const delay = INITIAL_RETRY_DELAY_MS * Math.pow(2, i) + // Advance timers for each retry attempt using correct exponential backoff + for (let i = 1; i <= MAX_BATCH_RETRIES; i++) { + // Use correct exponential backoff: INITIAL_RETRY_DELAY_MS * Math.pow(2, retryCount - 1) + const delay = INITIAL_RETRY_DELAY_MS * Math.pow(2, i - 1) await jest.advanceTimersByTimeAsync(delay) await jest.runAllTicks() } - // Wait for processing to complete - await processingFinishedPromise + // Wait for batch processing to complete + while (!batchProcessingFinished) { + await jest.runAllTicks() + await new Promise((resolve) => setImmediate(resolve)) + } // Verify that upsertPoints was called exactly MAX_BATCH_RETRIES times expect(mockVectorStore.upsertPoints).toHaveBeenCalledTimes(MAX_BATCH_RETRIES) @@ -788,8 +839,10 @@ describe("FileWatcher", () => { // Setup a spy for the _onDidFinishBatchProcessing event let capturedBatchSummary: any = null + let batchProcessingFinished = false const batchFinishedSpy = jest.fn((summary) => { capturedBatchSummary = summary + batchProcessingFinished = true }) fileWatcher.onDidFinishBatchProcessing(batchFinishedSpy) @@ -797,23 +850,28 @@ describe("FileWatcher", () => { const mockDeletionError = new Error("Failed to delete points from vector store") ;(mockVectorStore.deletePointsByMultipleFilePaths as jest.Mock).mockRejectedValueOnce(mockDeletionError) - // Simulate delete event - onDidDeleteCallback(deleteUri) + // Simulate delete event by directly adding to accumulated events + ;(fileWatcher as any).accumulatedEvents.set(deleteUri.fsPath, { uri: deleteUri, type: "delete" }) + ;(fileWatcher as any).scheduleBatchProcessing() await jest.runAllTicks() // Simulate create event in the same batch - onDidCreateCallback(createUri) + ;(fileWatcher as any).accumulatedEvents.set(createUri.fsPath, { uri: createUri, type: "create" }) await jest.runAllTicks() // Simulate change event in the same batch - onDidChangeCallback(changeUri) + ;(fileWatcher as any).accumulatedEvents.set(changeUri.fsPath, { uri: changeUri, type: "change" }) await jest.runAllTicks() // Advance timers to trigger batch processing - const processingFinishedPromise = waitForFileProcessingToFinish(fileWatcher, deleteUri.fsPath) - await jest.advanceTimersByTimeAsync(500 + 10) // Advance past debounce delay + await jest.advanceTimersByTimeAsync(1000) // Advance past debounce delay await jest.runAllTicks() - await processingFinishedPromise + + // Wait for batch processing to complete + while (!batchProcessingFinished) { + await jest.runAllTicks() + await new Promise((resolve) => setImmediate(resolve)) + } // Verify that deletePointsByMultipleFilePaths was called expect(mockVectorStore.deletePointsByMultipleFilePaths).toHaveBeenCalled() From b5b0603122d761dc6637cebfac2f5bca135c94be Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 23 May 2025 19:30:07 -0500 Subject: [PATCH 70/71] refactor: mock ContextProxy's getValue method to return current config name in ClineProvider tests --- src/core/webview/__tests__/ClineProvider.test.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/core/webview/__tests__/ClineProvider.test.ts b/src/core/webview/__tests__/ClineProvider.test.ts index d4093d9072..f058dcae14 100644 --- a/src/core/webview/__tests__/ClineProvider.test.ts +++ b/src/core/webview/__tests__/ClineProvider.test.ts @@ -1561,8 +1561,10 @@ describe("ClineProvider", () => { setModeConfig: jest.fn(), } as any - // Mock current config name - mockContext.globalState.get = jest.fn((key: string) => { + // Mock the ContextProxy's getValue method to return the current config name + const contextProxy = (provider as any).contextProxy + const getValueSpy = jest.spyOn(contextProxy, "getValue") + getValueSpy.mockImplementation((key: any) => { if (key === "currentApiConfigName") return "current-config" return undefined }) From 0efff1d2cbf813a4cc577b6ee8433da3ea83900f Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 23 May 2025 19:43:17 -0500 Subject: [PATCH 71/71] refactor: mock missing properties required by codebase indexing manager --- src/core/prompts/__tests__/system.test.ts | 67 +++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/src/core/prompts/__tests__/system.test.ts b/src/core/prompts/__tests__/system.test.ts index 7f480dd69d..3647d2d859 100644 --- a/src/core/prompts/__tests__/system.test.ts +++ b/src/core/prompts/__tests__/system.test.ts @@ -85,6 +85,28 @@ jest.mock("vscode", () => ({ env: { language: "en", }, + workspace: { + workspaceFolders: [ + { + uri: { + fsPath: "/test/path", + }, + }, + ], + getWorkspaceFolder: jest.fn().mockReturnValue({ + uri: { + fsPath: "/test/path", + }, + }), + }, + window: { + activeTextEditor: undefined, + }, + EventEmitter: jest.fn().mockImplementation(() => ({ + event: jest.fn(), + fire: jest.fn(), + dispose: jest.fn(), + })), })) jest.mock("../../../utils/shell", () => ({ @@ -343,6 +365,29 @@ describe("SYSTEM_PROMPT", () => { // Mock vscode.env.language const vscode = jest.requireMock("vscode") vscode.env = { language: "es" } + // Ensure workspace mock is maintained + vscode.workspace = { + workspaceFolders: [ + { + uri: { + fsPath: "/test/path", + }, + }, + ], + getWorkspaceFolder: jest.fn().mockReturnValue({ + uri: { + fsPath: "/test/path", + }, + }), + } + vscode.window = { + activeTextEditor: undefined, + } + vscode.EventEmitter = jest.fn().mockImplementation(() => ({ + event: jest.fn(), + fire: jest.fn(), + dispose: jest.fn(), + })) const prompt = await SYSTEM_PROMPT( mockContext, @@ -365,6 +410,28 @@ describe("SYSTEM_PROMPT", () => { // Reset mock vscode.env = { language: "en" } + vscode.workspace = { + workspaceFolders: [ + { + uri: { + fsPath: "/test/path", + }, + }, + ], + getWorkspaceFolder: jest.fn().mockReturnValue({ + uri: { + fsPath: "/test/path", + }, + }), + } + vscode.window = { + activeTextEditor: undefined, + } + vscode.EventEmitter = jest.fn().mockImplementation(() => ({ + event: jest.fn(), + fire: jest.fn(), + dispose: jest.fn(), + })) }) it("should include custom mode role definition at top and instructions at bottom", async () => {