-
Notifications
You must be signed in to change notification settings - Fork 2.6k
MCP: wait-for-ready + transient retry on first call; add tests #6894
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -903,6 +903,48 @@ export class McpHub { | |
| (conn) => conn.server.name === serverName && (conn.server.source === "global" || !conn.server.source), | ||
| ) | ||
| } | ||
| /** | ||
| * Wait until a server connection is fully ready (client connected and status updated). | ||
| * Guards against race conditions where a tool/resource call arrives before initial connect completes. | ||
| */ | ||
| private async waitForServerReady( | ||
| serverName: string, | ||
| source?: "global" | "project", | ||
| timeoutMs: number = 10000, | ||
| ): Promise<ConnectedMcpConnection> { | ||
| // Immediately error if there's no connected-type connection to wait on (preserves legacy error for tests/UX) | ||
| const initial = this.findConnection(serverName, source) | ||
| const notFoundMsg = `No connection found for server: ${serverName}${source ? ` with source ${source}` : ""}` | ||
| if (!initial || initial.type !== "connected") { | ||
| throw new Error(notFoundMsg) | ||
| } | ||
|
|
||
| // If we have a connected-type connection but it's still "connecting", wait briefly for readiness. | ||
| const start = Date.now() | ||
| while (Date.now() - start < timeoutMs) { | ||
| const connection = this.findConnection(serverName, source) | ||
| if (connection && connection.type === "connected" && connection.server.status === "connected") { | ||
| return connection | ||
| } | ||
| await delay(50) | ||
| } | ||
| const conn = this.findConnection(serverName, source) | ||
| const status = conn?.type === "connected" ? conn.server.status : "disconnected" | ||
| throw new Error(`MCP server "${serverName}" is not ready (status: ${status}). Try again in a moment.`) | ||
| } | ||
|
|
||
| /** | ||
| * Detect transient connection-closed errors from MCP transport to allow a single retry. | ||
| */ | ||
| private isTransientClosedError(error: unknown): boolean { | ||
| const code = (error as any)?.code | ||
| const msg = error instanceof Error ? error.message : String(error) | ||
| if (typeof code === "number" && code === -32000) return true | ||
| if (typeof code === "string" && /-?32000/.test(code)) return true | ||
| return /connection closed|closed before|socket hang up|ECONNRESET|EPIPE|ERR_STREAM_PREMATURE_CLOSE/i.test( | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This regex pattern is quite broad and might catch unrelated errors. Could we be more specific about which error patterns should trigger a retry? For example, we might want to exclude certain EPIPE errors that indicate permanent failures rather than transient issues. |
||
| msg || "", | ||
| ) | ||
| } | ||
|
|
||
| private async fetchToolsList(serverName: string, source?: "global" | "project"): Promise<McpTool[]> { | ||
| try { | ||
|
|
@@ -1559,22 +1601,31 @@ export class McpHub { | |
| } | ||
|
|
||
| async readResource(serverName: string, uri: string, source?: "global" | "project"): Promise<McpResourceResponse> { | ||
| const connection = this.findConnection(serverName, source) | ||
| if (!connection || connection.type !== "connected") { | ||
| throw new Error(`No connection found for server: ${serverName}${source ? ` with source ${source}` : ""}`) | ||
| } | ||
| let connection = await this.waitForServerReady(serverName, source) | ||
| if (connection.server.disabled) { | ||
| throw new Error(`Server "${serverName}" is disabled`) | ||
| } | ||
| return await connection.client.request( | ||
| { | ||
| method: "resources/read", | ||
| params: { | ||
| uri, | ||
|
|
||
| const doRequest = async () => | ||
| connection.client.request( | ||
| { | ||
| method: "resources/read", | ||
| params: { uri }, | ||
| }, | ||
| }, | ||
| ReadResourceResultSchema, | ||
| ) | ||
| ReadResourceResultSchema, | ||
| ) | ||
|
|
||
| try { | ||
| return await doRequest() | ||
| } catch (error) { | ||
| // Handle initial transient "Connection closed" on first invocation after install/start | ||
| if (this.isTransientClosedError(error)) { | ||
| await delay(200) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we make this retry delay configurable or at least define it as a constant? The magic number 200ms appears in both callTool and readResource. Something like: private static readonly TRANSIENT_ERROR_RETRY_DELAY_MS = 200; |
||
| connection = await this.waitForServerReady(serverName, source) | ||
| return await doRequest() | ||
| } | ||
| throw error | ||
| } | ||
| } | ||
|
|
||
| async callTool( | ||
|
|
@@ -1583,12 +1634,7 @@ export class McpHub { | |
| toolArguments?: Record<string, unknown>, | ||
| source?: "global" | "project", | ||
| ): Promise<McpToolCallResponse> { | ||
| const connection = this.findConnection(serverName, source) | ||
| if (!connection || connection.type !== "connected") { | ||
| throw new Error( | ||
| `No connection found for server: ${serverName}${source ? ` with source ${source}` : ""}. Please make sure to use MCP servers available under 'Connected MCP Servers'.`, | ||
| ) | ||
| } | ||
| let connection = await this.waitForServerReady(serverName, source) | ||
| if (connection.server.disabled) { | ||
| throw new Error(`Server "${serverName}" is disabled and cannot be used`) | ||
| } | ||
|
|
@@ -1599,23 +1645,33 @@ export class McpHub { | |
| timeout = (parsedConfig.timeout ?? 60) * 1000 | ||
| } catch (error) { | ||
| console.error("Failed to parse server config for timeout:", error) | ||
| // Default to 60 seconds if parsing fails | ||
| timeout = 60 * 1000 | ||
| } | ||
|
|
||
| return await connection.client.request( | ||
| { | ||
| method: "tools/call", | ||
| params: { | ||
| name: toolName, | ||
| arguments: toolArguments, | ||
| const doRequest = async () => | ||
| connection.client.request( | ||
| { | ||
| method: "tools/call", | ||
| params: { | ||
| name: toolName, | ||
| arguments: toolArguments, | ||
| }, | ||
| }, | ||
| }, | ||
| CallToolResultSchema, | ||
| { | ||
| timeout, | ||
| }, | ||
| ) | ||
| CallToolResultSchema, | ||
| { timeout }, | ||
| ) | ||
|
|
||
| try { | ||
| return await doRequest() | ||
| } catch (error) { | ||
| // Handle initial transient "Connection closed" that can happen on the very first call | ||
| if (this.isTransientClosedError(error)) { | ||
| await delay(200) | ||
| connection = await this.waitForServerReady(serverName, source) | ||
| return await doRequest() | ||
| } | ||
| throw error | ||
| } | ||
| } | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,180 @@ | ||
| import type { ClineProvider } from "../../../core/webview/ClineProvider" | ||
| import { McpHub } from "../McpHub" | ||
| import { vi, describe, it, expect, beforeEach } from "vitest" | ||
| import fs from "fs/promises" | ||
|
|
||
| // Minimal VSCode and FS mocks to keep constructor side-effects inert | ||
| vi.mock("vscode", () => ({ | ||
| workspace: { | ||
| createFileSystemWatcher: vi.fn().mockReturnValue({ | ||
| onDidChange: vi.fn(), | ||
| onDidCreate: vi.fn(), | ||
| onDidDelete: vi.fn(), | ||
| dispose: vi.fn(), | ||
| }), | ||
| onDidChangeWorkspaceFolders: vi.fn(), | ||
| workspaceFolders: [], | ||
| }, | ||
| window: { | ||
| showErrorMessage: vi.fn(), | ||
| showInformationMessage: vi.fn(), | ||
| showWarningMessage: vi.fn(), | ||
| }, | ||
| Disposable: { | ||
| from: vi.fn(), | ||
| }, | ||
| })) | ||
|
|
||
| vi.mock("fs/promises", () => ({ | ||
| default: { | ||
| access: vi.fn().mockResolvedValue(undefined), | ||
| writeFile: vi.fn().mockResolvedValue(undefined), | ||
| readFile: vi.fn().mockResolvedValue("{}"), | ||
| unlink: vi.fn().mockResolvedValue(undefined), | ||
| rename: vi.fn().mockResolvedValue(undefined), | ||
| lstat: vi.fn().mockResolvedValue({ isDirectory: () => true }), | ||
| mkdir: vi.fn().mockResolvedValue(undefined), | ||
| }, | ||
| access: vi.fn().mockResolvedValue(undefined), | ||
| writeFile: vi.fn().mockResolvedValue(undefined), | ||
| readFile: vi.fn().mockResolvedValue("{}"), | ||
| unlink: vi.fn().mockResolvedValue(undefined), | ||
| rename: vi.fn().mockResolvedValue(undefined), | ||
| lstat: vi.fn().mockResolvedValue({ isDirectory: () => true }), | ||
| mkdir: vi.fn().mockResolvedValue(undefined), | ||
| })) | ||
|
|
||
| describe("McpHub transient-retry and readiness", () => { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great test coverage for the happy path! Consider adding edge case tests: What happens when waitForServerReady times out? What happens on the second retry failure (should not retry a third time)? Error handling when connection is removed during the wait period. These would help ensure the retry logic is robust under various failure scenarios. |
||
| let mcpHub: McpHub | ||
| let mockProvider: Partial<ClineProvider> | ||
|
|
||
| beforeEach(() => { | ||
| vi.clearAllMocks() | ||
|
|
||
| // Prevent constructor from kicking off async initialization that races our injected connections | ||
| vi.spyOn(McpHub.prototype as any, "initializeGlobalMcpServers").mockResolvedValue(undefined as any) | ||
| vi.spyOn(McpHub.prototype as any, "initializeProjectMcpServers").mockResolvedValue(undefined as any) | ||
|
|
||
| mockProvider = { | ||
| ensureSettingsDirectoryExists: vi.fn().mockResolvedValue("/mock/settings/path"), | ||
| ensureMcpServersDirectoryExists: vi.fn().mockResolvedValue("/mock/settings/path"), | ||
| postMessageToWebview: vi.fn().mockResolvedValue(undefined), | ||
| getState: vi.fn().mockResolvedValue({ mcpEnabled: true }), | ||
| context: { | ||
| subscriptions: [], | ||
| workspaceState: {} as any, | ||
| globalState: {} as any, | ||
| secrets: {} as any, | ||
| extensionUri: { fsPath: "/test/path" } as any, | ||
| extensionPath: "/test/path", | ||
| asAbsolutePath: (p: string) => p, | ||
| globalStorageUri: { fsPath: "/test/global" } as any, | ||
| globalStoragePath: "/test/global", | ||
| extension: { | ||
| packageJSON: { version: "1.0.0" }, | ||
| } as any, | ||
| } as any, | ||
| } | ||
|
|
||
| // Keep global config empty so constructor does not attempt to connect anything | ||
| vi.mocked(fs.readFile).mockResolvedValue("{}") | ||
|
|
||
| mcpHub = new McpHub(mockProvider as ClineProvider) | ||
| }) | ||
|
|
||
| it("retries once on transient -32000 'Connection closed' for tools/call", async () => { | ||
| // Prepare a connected connection | ||
| const firstError: any = new Error("Connection closed") | ||
| firstError.code = -32000 | ||
|
|
||
| const request = vi | ||
| .fn() | ||
| // First call: throw transient | ||
| .mockRejectedValueOnce(firstError) | ||
| // Second call: succeed | ||
| .mockResolvedValueOnce({ content: [{ type: "text", text: "ok" }] }) | ||
|
|
||
| // Inject a connected connection directly | ||
| ;(mcpHub as any).connections = [ | ||
| { | ||
| type: "connected", | ||
| server: { | ||
| name: "retry-server", | ||
| config: JSON.stringify({ type: "stdio", command: "node", timeout: 60 }), | ||
| status: "connected", | ||
| source: "global", | ||
| errorHistory: [], | ||
| }, | ||
| client: { request } as any, | ||
| transport: {} as any, | ||
| }, | ||
| ] | ||
|
|
||
| const result = await mcpHub.callTool("retry-server", "do_something", { a: 1 }) | ||
| expect(result).toBeTruthy() | ||
| expect(request).toHaveBeenCalledTimes(2) | ||
| }) | ||
|
|
||
| it("retries once on transient -32000 'Connection closed' for resources/read", async () => { | ||
| const firstError: any = new Error("Connection closed before response") | ||
| firstError.code = -32000 | ||
|
|
||
| const request = vi | ||
| .fn() | ||
| .mockRejectedValueOnce(firstError) | ||
| .mockResolvedValueOnce({ contents: [{ type: "text", text: "resource" }] }) | ||
|
|
||
| ;(mcpHub as any).connections = [ | ||
| { | ||
| type: "connected", | ||
| server: { | ||
| name: "retry-server", | ||
| config: JSON.stringify({ type: "stdio", command: "node", timeout: 60 }), | ||
| status: "connected", | ||
| source: "global", | ||
| errorHistory: [], | ||
| }, | ||
| client: { request } as any, | ||
| transport: {} as any, | ||
| }, | ||
| ] | ||
|
|
||
| const result = await mcpHub.readResource("retry-server", "file:///tmp/x.txt") | ||
| expect(result).toBeTruthy() | ||
| expect(request).toHaveBeenCalledTimes(2) | ||
| }) | ||
|
|
||
| it("waits for server to become connected before first tool call", async () => { | ||
| // Start 'connecting' and then flip to 'connected' shortly after | ||
| const request = vi.fn().mockResolvedValue({ content: [{ type: "text", text: "ok" }] }) | ||
|
|
||
| const connection: any = { | ||
| type: "connected", | ||
| server: { | ||
| name: "slow-server", | ||
| config: JSON.stringify({ type: "stdio", command: "node", timeout: 60 }), | ||
| status: "connecting", | ||
| source: "global", | ||
| errorHistory: [], | ||
| }, | ||
| client: { request }, | ||
| transport: {}, | ||
| } | ||
|
|
||
| ;(mcpHub as any).connections = [connection] | ||
|
|
||
| // Flip status to 'connected' after 100ms | ||
| setTimeout(() => { | ||
| connection.server.status = "connected" | ||
| }, 100) | ||
|
|
||
| const start = Date.now() | ||
| const result = await mcpHub.callTool("slow-server", "ping", {}) | ||
| const elapsed = Date.now() - start | ||
|
|
||
| expect(result).toBeTruthy() | ||
| expect(request).toHaveBeenCalledTimes(1) | ||
| // Should have waited at least ~100ms before making the request | ||
| expect(elapsed).toBeGreaterThanOrEqual(90) | ||
| }) | ||
| }) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this intentional? The connection check happens outside the loop, which could lead to a race condition if the connection is removed between the initial check and entering the loop. Consider moving the findConnection call inside the loop for safer operation.