RooCodeInc · roomote · Aug 10, 2025 · roomote · Aug 10, 2025 · roomote
@@ -903,6 +903,48 @@ export class McpHub {
 			(conn) => conn.server.name === serverName && (conn.server.source === "global" || !conn.server.source),
 		)
 	}
+	/**
+	 * Wait until a server connection is fully ready (client connected and status updated).
+	 * Guards against race conditions where a tool/resource call arrives before initial connect completes.
+	 */
+	private async waitForServerReady(
+		serverName: string,
+		source?: "global" | "project",
+		timeoutMs: number = 10000,
+	): Promise<ConnectedMcpConnection> {
+		// Immediately error if there's no connected-type connection to wait on (preserves legacy error for tests/UX)
+		const initial = this.findConnection(serverName, source)
+		const notFoundMsg = `No connection found for server: ${serverName}${source ? ` with source ${source}` : ""}`
+		if (!initial || initial.type !== "connected") {
+			throw new Error(notFoundMsg)
+		}
+
+		// If we have a connected-type connection but it's still "connecting", wait briefly for readiness.
+		const start = Date.now()
+		while (Date.now() - start < timeoutMs) {
+			const connection = this.findConnection(serverName, source)
+			if (connection && connection.type === "connected" && connection.server.status === "connected") {
+				return connection
+			}
+			await delay(50)
+		}
+		const conn = this.findConnection(serverName, source)
+		const status = conn?.type === "connected" ? conn.server.status : "disconnected"
+		throw new Error(`MCP server "${serverName}" is not ready (status: ${status}). Try again in a moment.`)
+	}
+
+	/**
+	 * Detect transient connection-closed errors from MCP transport to allow a single retry.
+	 */
+	private isTransientClosedError(error: unknown): boolean {
+		const code = (error as any)?.code
+		const msg = error instanceof Error ? error.message : String(error)
+		if (typeof code === "number" && code === -32000) return true
+		if (typeof code === "string" && /-?32000/.test(code)) return true
+		return /connection closed|closed before|socket hang up|ECONNRESET|EPIPE|ERR_STREAM_PREMATURE_CLOSE/i.test(
+			msg || "",
+		)
+	}
 
 	private async fetchToolsList(serverName: string, source?: "global" | "project"): Promise<McpTool[]> {
 		try {
@@ -1559,22 +1601,31 @@ export class McpHub {
 	}
 
 	async readResource(serverName: string, uri: string, source?: "global" | "project"): Promise<McpResourceResponse> {
-		const connection = this.findConnection(serverName, source)
-		if (!connection || connection.type !== "connected") {
-			throw new Error(`No connection found for server: ${serverName}${source ? ` with source ${source}` : ""}`)
-		}
+		let connection = await this.waitForServerReady(serverName, source)
 		if (connection.server.disabled) {
 			throw new Error(`Server "${serverName}" is disabled`)
 		}
-		return await connection.client.request(
-			{
-				method: "resources/read",
-				params: {
-					uri,
+
+		const doRequest = async () =>
+			connection.client.request(
+				{
+					method: "resources/read",
+					params: { uri },
 				},
-			},
-			ReadResourceResultSchema,
-		)
+				ReadResourceResultSchema,
+			)
+
+		try {
+			return await doRequest()
+		} catch (error) {
+			// Handle initial transient "Connection closed" on first invocation after install/start
+			if (this.isTransientClosedError(error)) {
+				await delay(200)
+				connection = await this.waitForServerReady(serverName, source)
+				return await doRequest()
+			}
+			throw error
+		}
 	}
 
 	async callTool(
@@ -1583,12 +1634,7 @@ export class McpHub {
 		toolArguments?: Record<string, unknown>,
 		source?: "global" | "project",
 	): Promise<McpToolCallResponse> {
-		const connection = this.findConnection(serverName, source)
-		if (!connection || connection.type !== "connected") {
-			throw new Error(
-				`No connection found for server: ${serverName}${source ? ` with source ${source}` : ""}. Please make sure to use MCP servers available under 'Connected MCP Servers'.`,
-			)
-		}
+		let connection = await this.waitForServerReady(serverName, source)
 		if (connection.server.disabled) {
 			throw new Error(`Server "${serverName}" is disabled and cannot be used`)
 		}
@@ -1599,23 +1645,33 @@ export class McpHub {
 			timeout = (parsedConfig.timeout ?? 60) * 1000
 		} catch (error) {
 			console.error("Failed to parse server config for timeout:", error)
-			// Default to 60 seconds if parsing fails
 			timeout = 60 * 1000
 		}
 
-		return await connection.client.request(
-			{
-				method: "tools/call",
-				params: {
-					name: toolName,
-					arguments: toolArguments,
+		const doRequest = async () =>
+			connection.client.request(
+				{
+					method: "tools/call",
+					params: {
+						name: toolName,
+						arguments: toolArguments,
+					},
 				},
-			},
-			CallToolResultSchema,
-			{
-				timeout,
-			},
-		)
+				CallToolResultSchema,
+				{ timeout },
+			)
+
+		try {
+			return await doRequest()
+		} catch (error) {
+			// Handle initial transient "Connection closed" that can happen on the very first call
+			if (this.isTransientClosedError(error)) {
+				await delay(200)
+				connection = await this.waitForServerReady(serverName, source)
+				return await doRequest()
+			}
+			throw error
+		}
 	}
 
 	/**

@@ -0,0 +1,180 @@
+import type { ClineProvider } from "../../../core/webview/ClineProvider"
+import { McpHub } from "../McpHub"
+import { vi, describe, it, expect, beforeEach } from "vitest"
+import fs from "fs/promises"
+
+// Minimal VSCode and FS mocks to keep constructor side-effects inert
+vi.mock("vscode", () => ({
+	workspace: {
+		createFileSystemWatcher: vi.fn().mockReturnValue({
+			onDidChange: vi.fn(),
+			onDidCreate: vi.fn(),
+			onDidDelete: vi.fn(),
+			dispose: vi.fn(),
+		}),
+		onDidChangeWorkspaceFolders: vi.fn(),
+		workspaceFolders: [],
+	},
+	window: {
+		showErrorMessage: vi.fn(),
+		showInformationMessage: vi.fn(),
+		showWarningMessage: vi.fn(),
+	},
+	Disposable: {
+		from: vi.fn(),
+	},
+}))
+
+vi.mock("fs/promises", () => ({
+	default: {
+		access: vi.fn().mockResolvedValue(undefined),
+		writeFile: vi.fn().mockResolvedValue(undefined),
+		readFile: vi.fn().mockResolvedValue("{}"),
+		unlink: vi.fn().mockResolvedValue(undefined),
+		rename: vi.fn().mockResolvedValue(undefined),
+		lstat: vi.fn().mockResolvedValue({ isDirectory: () => true }),
+		mkdir: vi.fn().mockResolvedValue(undefined),
+	},
+	access: vi.fn().mockResolvedValue(undefined),
+	writeFile: vi.fn().mockResolvedValue(undefined),
+	readFile: vi.fn().mockResolvedValue("{}"),
+	unlink: vi.fn().mockResolvedValue(undefined),
+	rename: vi.fn().mockResolvedValue(undefined),
+	lstat: vi.fn().mockResolvedValue({ isDirectory: () => true }),
+	mkdir: vi.fn().mockResolvedValue(undefined),
+}))
+
+describe("McpHub transient-retry and readiness", () => {
+	let mcpHub: McpHub
+	let mockProvider: Partial<ClineProvider>
+
+	beforeEach(() => {
+		vi.clearAllMocks()
+
+		// Prevent constructor from kicking off async initialization that races our injected connections
+		vi.spyOn(McpHub.prototype as any, "initializeGlobalMcpServers").mockResolvedValue(undefined as any)
+		vi.spyOn(McpHub.prototype as any, "initializeProjectMcpServers").mockResolvedValue(undefined as any)
+
+		mockProvider = {
+			ensureSettingsDirectoryExists: vi.fn().mockResolvedValue("/mock/settings/path"),
+			ensureMcpServersDirectoryExists: vi.fn().mockResolvedValue("/mock/settings/path"),
+			postMessageToWebview: vi.fn().mockResolvedValue(undefined),
+			getState: vi.fn().mockResolvedValue({ mcpEnabled: true }),
+			context: {
+				subscriptions: [],
+				workspaceState: {} as any,
+				globalState: {} as any,
+				secrets: {} as any,
+				extensionUri: { fsPath: "/test/path" } as any,
+				extensionPath: "/test/path",
+				asAbsolutePath: (p: string) => p,
+				globalStorageUri: { fsPath: "/test/global" } as any,
+				globalStoragePath: "/test/global",
+				extension: {
+					packageJSON: { version: "1.0.0" },
+				} as any,
+			} as any,
+		}
+
+		// Keep global config empty so constructor does not attempt to connect anything
+		vi.mocked(fs.readFile).mockResolvedValue("{}")
+
+		mcpHub = new McpHub(mockProvider as ClineProvider)
+	})
+
+	it("retries once on transient -32000 'Connection closed' for tools/call", async () => {
+		// Prepare a connected connection
+		const firstError: any = new Error("Connection closed")
+		firstError.code = -32000
+
+		const request = vi
+			.fn()
+			// First call: throw transient
+			.mockRejectedValueOnce(firstError)
+			// Second call: succeed
+			.mockResolvedValueOnce({ content: [{ type: "text", text: "ok" }] })
+
+		// Inject a connected connection directly
+		;(mcpHub as any).connections = [
+			{
+				type: "connected",
+				server: {
+					name: "retry-server",
+					config: JSON.stringify({ type: "stdio", command: "node", timeout: 60 }),
+					status: "connected",
+					source: "global",
+					errorHistory: [],
+				},
+				client: { request } as any,
+				transport: {} as any,
+			},
+		]
+
+		const result = await mcpHub.callTool("retry-server", "do_something", { a: 1 })
+		expect(result).toBeTruthy()
+		expect(request).toHaveBeenCalledTimes(2)
+	})
+
+	it("retries once on transient -32000 'Connection closed' for resources/read", async () => {
+		const firstError: any = new Error("Connection closed before response")
+		firstError.code = -32000
+
+		const request = vi
+			.fn()
+			.mockRejectedValueOnce(firstError)
+			.mockResolvedValueOnce({ contents: [{ type: "text", text: "resource" }] })
+
+		;(mcpHub as any).connections = [
+			{
+				type: "connected",
+				server: {
+					name: "retry-server",
+					config: JSON.stringify({ type: "stdio", command: "node", timeout: 60 }),
+					status: "connected",
+					source: "global",
+					errorHistory: [],
+				},
+				client: { request } as any,
+				transport: {} as any,
+			},
+		]
+
+		const result = await mcpHub.readResource("retry-server", "file:///tmp/x.txt")
+		expect(result).toBeTruthy()
+		expect(request).toHaveBeenCalledTimes(2)
+	})
+
+	it("waits for server to become connected before first tool call", async () => {
+		// Start 'connecting' and then flip to 'connected' shortly after
+		const request = vi.fn().mockResolvedValue({ content: [{ type: "text", text: "ok" }] })
+
+		const connection: any = {
+			type: "connected",
+			server: {
+				name: "slow-server",
+				config: JSON.stringify({ type: "stdio", command: "node", timeout: 60 }),
+				status: "connecting",
+				source: "global",
+				errorHistory: [],
+			},
+			client: { request },
+			transport: {},
+		}
+
+		;(mcpHub as any).connections = [connection]
+
+		// Flip status to 'connected' after 100ms
+		setTimeout(() => {
+			connection.server.status = "connected"
+		}, 100)
+
+		const start = Date.now()
+		const result = await mcpHub.callTool("slow-server", "ping", {})
+		const elapsed = Date.now() - start
+
+		expect(result).toBeTruthy()
+		expect(request).toHaveBeenCalledTimes(1)
+		// Should have waited at least ~100ms before making the request
+		expect(elapsed).toBeGreaterThanOrEqual(90)
+	})
+})