-
Notifications
You must be signed in to change notification settings - Fork 98
fix: turn atlas-connect-cluster async #343
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 16 commits
e654962
24298ed
f45b22f
d2f91d5
8eeb786
6c84179
dad0111
d2c54ae
e978c82
54dfd7b
42b3e47
e267b45
28372be
2134f16
57981cb
693d31c
4274f1b
d1b2324
0acc685
a0ce60c
11d3a14
0286a89
1127f4c
0a1e57c
7430d49
bf41f0d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,18 +11,42 @@ const EXPIRY_MS = 1000 * 60 * 60 * 12; // 12 hours | |
function sleep(ms: number): Promise<void> { | ||
return new Promise((resolve) => setTimeout(resolve, ms)); | ||
} | ||
|
||
export class ConnectClusterTool extends AtlasToolBase { | ||
protected name = "atlas-connect-cluster"; | ||
protected description = "Connect to MongoDB Atlas cluster"; | ||
protected description = "Connect to / Inspect connection of MongoDB Atlas cluster"; | ||
protected operationType: OperationType = "metadata"; | ||
protected argsShape = { | ||
projectId: z.string().describe("Atlas project ID"), | ||
clusterName: z.string().describe("Atlas cluster name"), | ||
}; | ||
|
||
protected async execute({ projectId, clusterName }: ToolArgs<typeof this.argsShape>): Promise<CallToolResult> { | ||
await this.session.disconnect(); | ||
private async queryConnection( | ||
projectId: string, | ||
clusterName: string | ||
): Promise<"connected" | "disconnected" | "connecting" | "connected-to-other-cluster"> { | ||
if (!this.session.connectedAtlasCluster) { | ||
return "disconnected"; | ||
} | ||
|
||
if ( | ||
this.session.connectedAtlasCluster.projectId !== projectId || | ||
this.session.connectedAtlasCluster.clusterName !== clusterName | ||
) { | ||
return "connected-to-other-cluster"; | ||
} | ||
|
||
if (!this.session.serviceProvider) { | ||
return "connecting"; | ||
} | ||
|
||
await this.session.serviceProvider.runCommand("admin", { | ||
ping: 1, | ||
}); | ||
return "connected"; | ||
} | ||
|
||
private async prepareClusterConnection(projectId: string, clusterName: string): Promise<string> { | ||
const cluster = await inspectCluster(this.session.apiClient, projectId, clusterName); | ||
|
||
if (!cluster.connectionString) { | ||
|
@@ -81,14 +105,37 @@ export class ConnectClusterTool extends AtlasToolBase { | |
cn.username = username; | ||
cn.password = password; | ||
cn.searchParams.set("authSource", "admin"); | ||
const connectionString = cn.toString(); | ||
return cn.toString(); | ||
} | ||
|
||
private async connectToCluster( | ||
projectId: string, | ||
clusterName: string, | ||
connectionString: string, | ||
tryCount: number | ||
): Promise<void> { | ||
let lastError: Error | undefined = undefined; | ||
|
||
for (let i = 0; i < 20; i++) { | ||
logger.debug( | ||
LogId.atlasConnectAttempt, | ||
"atlas-connect-cluster", | ||
`attempting to connect to cluster: ${this.session.connectedAtlasCluster?.clusterName}` | ||
); | ||
|
||
for (let i = 0; i < tryCount; i++) { | ||
if ( | ||
!this.session.connectedAtlasCluster || | ||
this.session.connectedAtlasCluster.projectId != projectId || | ||
this.session.connectedAtlasCluster.clusterName != clusterName | ||
) { | ||
lastError = new Error("Cluster connection aborted"); | ||
break; | ||
} | ||
|
||
try { | ||
await this.session.connectToMongoDB(connectionString, this.config.connectOptions); | ||
lastError = undefined; | ||
|
||
await this.session.connectToMongoDB(connectionString, this.config.connectOptions); | ||
break; | ||
} catch (err: unknown) { | ||
const error = err instanceof Error ? err : new Error(String(err)); | ||
|
@@ -106,16 +153,121 @@ export class ConnectClusterTool extends AtlasToolBase { | |
} | ||
|
||
if (lastError) { | ||
if (this.session.connectedAtlasCluster?.projectId && this.session.connectedAtlasCluster?.username) { | ||
void this.session.apiClient | ||
.deleteDatabaseUser({ | ||
params: { | ||
path: { | ||
groupId: this.session.connectedAtlasCluster.projectId, | ||
username: this.session.connectedAtlasCluster.username, | ||
databaseName: "admin", | ||
}, | ||
}, | ||
}) | ||
.catch((err: unknown) => { | ||
const error = err instanceof Error ? err : new Error(String(err)); | ||
logger.debug( | ||
LogId.atlasConnectFailure, | ||
"atlas-connect-cluster", | ||
`error deleting database user: ${error.message}` | ||
); | ||
}); | ||
} | ||
this.session.connectedAtlasCluster = undefined; | ||
throw lastError; | ||
} | ||
|
||
return { | ||
logger.debug( | ||
LogId.atlasConnectSucceeded, | ||
"atlas-connect-cluster", | ||
`connected to cluster: ${this.session.connectedAtlasCluster?.clusterName}` | ||
); | ||
} | ||
|
||
protected async execute({ projectId, clusterName }: ToolArgs<typeof this.argsShape>): Promise<CallToolResult> { | ||
fmenezes marked this conversation as resolved.
Show resolved
Hide resolved
|
||
const connectingResult = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason to have this at the top of the function if it's only used at the last catch clause? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, it is used in two cases, the last return and a switch case early on |
||
content: [ | ||
{ | ||
type: "text", | ||
text: `Connected to cluster "${clusterName}"`, | ||
type: "text" as const, | ||
text: `Attempting to connect to cluster "${clusterName}"...`, | ||
}, | ||
{ | ||
type: "text" as const, | ||
text: `Warning: Provisioning a user and connecting to the cluster may take more time, please check again in a few seconds.`, | ||
}, | ||
], | ||
}; | ||
|
||
try { | ||
const state = await this.queryConnection(projectId, clusterName); | ||
switch (state) { | ||
case "connected": | ||
return { | ||
content: [ | ||
{ | ||
type: "text", | ||
text: "Cluster is already connected.", | ||
}, | ||
], | ||
}; | ||
case "connecting": | ||
return connectingResult; | ||
case "connected-to-other-cluster": | ||
case "disconnected": | ||
default: | ||
// fall through to create new connection | ||
break; | ||
} | ||
} catch (err: unknown) { | ||
const error = err instanceof Error ? err : new Error(String(err)); | ||
logger.debug( | ||
LogId.atlasConnectFailure, | ||
"atlas-connect-cluster", | ||
`error querying cluster: ${error.message}` | ||
); | ||
// fall through to create new connection | ||
} | ||
|
||
await this.session.disconnect(); | ||
const connectionString = await this.prepareClusterConnection(projectId, clusterName); | ||
|
||
try { | ||
// First, try to connect to the cluster within the current tool call. | ||
// We give it 60 attempts with 500 ms delay between each, so ~30 seconds | ||
await this.connectToCluster(projectId, clusterName, connectionString, 60); | ||
|
||
return { | ||
content: [ | ||
{ | ||
type: "text", | ||
text: `Connected to cluster "${clusterName}".`, | ||
}, | ||
], | ||
}; | ||
} catch (err: unknown) { | ||
const error = err instanceof Error ? err : new Error(String(err)); | ||
logger.debug( | ||
LogId.atlasConnectFailure, | ||
"atlas-connect-cluster", | ||
`error connecting to cluster: ${error.message}` | ||
); | ||
|
||
// We couldn't connect in ~30 seconds, likely because user creation is taking longer. | ||
// Retry the connection with longer timeout (~5 minutes), while also returning a response | ||
// to the client. Many clients will have a 1 minute timeout for tool calls, so we want to | ||
// return well before that. | ||
// | ||
// Once we add support for streamable http, we'd want to use progress notifications here. | ||
void this.connectToCluster(projectId, clusterName, connectionString, 600).catch((err) => { | ||
const error = err instanceof Error ? err : new Error(String(err)); | ||
logger.debug( | ||
LogId.atlasConnectFailure, | ||
"atlas-connect-cluster", | ||
`error connecting to cluster: ${error.message}` | ||
); | ||
}); | ||
|
||
return connectingResult; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -183,13 +183,27 @@ describeWithAtlas("clusters", (integration) => { | |
it("connects to cluster", async () => { | ||
const projectId = getProjectId(); | ||
|
||
const response = (await integration.mcpClient().callTool({ | ||
name: "atlas-connect-cluster", | ||
arguments: { projectId, clusterName }, | ||
})) as CallToolResult; | ||
expect(response.content).toBeArray(); | ||
expect(response.content).toHaveLength(1); | ||
expect(response.content[0]?.text).toContain(`Connected to cluster "${clusterName}"`); | ||
for (let i = 0; i < 600; i++) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now that we added the 30 second retry logic in the connect tool, this may be a bit excessive - worst case scenario, this will result in 5 hours of waiting for the test to fail. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd expect jest has some default test timeouts. maybe we can set an explicit timeout here and turn this into a while loop or create a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is actually not the case, on subsequent calls we don't try for 30 secs, we know there is a background process running so we return There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. changed to reflect what we discussed offline, now we always wait 30 secs, adjusted the test to 10 times only |
||
const response = (await integration.mcpClient().callTool({ | ||
name: "atlas-connect-cluster", | ||
arguments: { projectId, clusterName }, | ||
})) as CallToolResult; | ||
expect(response.content).toBeArray(); | ||
expect(response.content.length).toBeGreaterThanOrEqual(1); | ||
expect(response.content[0]?.type).toEqual("text"); | ||
const c = response.content[0] as { text: string }; | ||
if ( | ||
c.text.includes("Cluster is already connected.") || | ||
c.text.includes(`Connected to cluster "${clusterName}"`) | ||
) { | ||
break; // success | ||
} else { | ||
expect(response.content[0]?.text).toContain( | ||
`Attempting to connect to cluster "${clusterName}"...` | ||
); | ||
} | ||
await sleep(500); | ||
} | ||
}); | ||
}); | ||
}); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wrap the
runCommand('admin', { ping: 1 })
call in a try/catch so transient ping errors don’t bubble up and trigger a full reconnection flow prematurely.Copilot uses AI. Check for mistakes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no need, I'm bubbling up the error