Skip to content

Commit ba5c36f

Browse files
authored
chore(connections): disconnect when we encounter a non-retryable error code on an atlas connection CLOUDP-286331 (#6598)
1 parent dc5be52 commit ba5c36f

File tree

5 files changed

+189
-0
lines changed

5 files changed

+189
-0
lines changed

package-lock.json

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/compass-connections/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
"compass-preferences-model": "^2.32.1",
6363
"hadron-app-registry": "^9.3.1",
6464
"lodash": "^4.17.21",
65+
"mongodb": "^6.12.0",
6566
"mongodb-build-info": "^1.7.2",
6667
"mongodb-connection-string-url": "^3.0.1",
6768
"mongodb-data-service": "^22.24.1",

packages/compass-connections/src/stores/connections-store-redux.spec.tsx

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import {
1010
} from '@mongodb-js/testing-library-compass';
1111
import React from 'react';
1212
import { InMemoryConnectionStorage } from '@mongodb-js/connection-storage/provider';
13+
import { getDataServiceForConnection } from './connections-store-redux';
14+
import { type ConnectionInfo } from '@mongodb-js/connection-info';
1315

1416
const mockConnections = [
1517
{
@@ -34,6 +36,13 @@ const mockConnections = [
3436
},
3537
];
3638

39+
const connectionInfoWithAtlasMetadata = {
40+
...createDefaultConnectionInfo(),
41+
atlasMetadata: {
42+
clusterName: 'pineapple',
43+
} as ConnectionInfo['atlasMetadata'],
44+
};
45+
3746
function renderCompassConnections(opts?: RenderConnectionsOptions) {
3847
return render(
3948
<div>
@@ -274,6 +283,101 @@ describe('CompassConnections store', function () {
274283
await connectionStorage.load({ id: mockConnections[0].id })
275284
).to.have.nested.property('favorite.name', 'turtles');
276285
});
286+
287+
it('should ignore server heartbeat failed events that are not non-retryable error codes', async function () {
288+
const { connectionsStore } = renderCompassConnections({
289+
connectFn: async () => {
290+
await wait(1);
291+
return {};
292+
},
293+
});
294+
295+
// Wait till we're connected.
296+
await connectionsStore.actions.connect(connectionInfoWithAtlasMetadata);
297+
298+
const connections = connectionsStore.getState().connections;
299+
expect(connections.ids).to.have.lengthOf(1);
300+
301+
const dataService = getDataServiceForConnection(
302+
connectionInfoWithAtlasMetadata.id
303+
);
304+
305+
let didDisconnect = false;
306+
let didCheckForConnected = false;
307+
sinon.stub(dataService, 'disconnect').callsFake(async () => {
308+
didDisconnect = true;
309+
return Promise.resolve();
310+
});
311+
dataService.isConnected = () => {
312+
// If this is called we know the error wasn't handled properly.
313+
didCheckForConnected = true;
314+
return true;
315+
};
316+
317+
let didReceiveCallToHeartbeatFailedListener = false;
318+
dataService.on('serverHeartbeatFailed', () => {
319+
didReceiveCallToHeartbeatFailedListener = true;
320+
});
321+
322+
// Send a heartbeat fail with an error that's not a non-retryable error code.
323+
dataService['emit']('serverHeartbeatFailed', {
324+
failure: new Error('code: 1234, Not the error we are looking for'),
325+
});
326+
327+
// Wait for the listener to handle the message.
328+
await waitFor(() => {
329+
expect(didReceiveCallToHeartbeatFailedListener).to.be.true;
330+
});
331+
await wait(1);
332+
333+
expect(didDisconnect).to.be.false;
334+
expect(didCheckForConnected).to.be.false;
335+
});
336+
337+
it('should listen for non-retryable errors on server heartbeat failed events and disconnect the data service when encountered', async function () {
338+
const { connectionsStore } = renderCompassConnections({
339+
connectFn: async () => {
340+
await wait(1);
341+
return {};
342+
},
343+
});
344+
345+
// Wait till we're connected.
346+
await connectionsStore.actions.connect(connectionInfoWithAtlasMetadata);
347+
348+
const connections = connectionsStore.getState().connections;
349+
expect(connections.ids).to.have.lengthOf(1);
350+
351+
const dataService = getDataServiceForConnection(
352+
connectionInfoWithAtlasMetadata.id
353+
);
354+
355+
let didDisconnect = false;
356+
sinon.stub(dataService, 'disconnect').callsFake(async () => {
357+
didDisconnect = true;
358+
return Promise.resolve();
359+
});
360+
dataService.isConnected = () => true;
361+
362+
// Send a heartbeat fail with an error that's a non-retryable error code.
363+
dataService['emit']('serverHeartbeatFailed', {
364+
failure: new Error('code: 3003, reason: Insufficient permissions'),
365+
});
366+
367+
await waitFor(() => {
368+
expect(didDisconnect).to.be.true;
369+
});
370+
371+
await waitFor(function () {
372+
const titleNode = screen.getByText('Unable to connect to pineapple');
373+
expect(titleNode).to.be.visible;
374+
375+
const descriptionNode = screen.getByText(
376+
'Reason: Insufficient permissions. To use continue to use this connection either disconnect and reconnect, or refresh your page.'
377+
);
378+
expect(descriptionNode).to.be.visible;
379+
});
380+
});
277381
});
278382

279383
describe('#saveAndConnect', function () {

packages/compass-connections/src/stores/connections-store-redux.ts

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import type { Reducer, AnyAction, Action } from 'redux';
33
import { createStore, applyMiddleware } from 'redux';
44
import type { ThunkAction } from 'redux-thunk';
55
import thunk from 'redux-thunk';
6+
import type { ServerHeartbeatFailedEvent } from 'mongodb';
67
import {
78
getConnectionTitle,
89
type ConnectionInfo,
@@ -1465,6 +1466,57 @@ function isAtlasStreamsInstance(
14651466
}
14661467
}
14671468

1469+
// We listen for non-retry-able errors on failed server heartbeats.
1470+
// These can happen on compass web when:
1471+
// - A user's session has ended.
1472+
// - The user's roles have changed.
1473+
// - The cluster / group they are trying to connect to has since been deleted.
1474+
// When we encounter one we disconnect. This is to avoid polluting logs/metrics
1475+
// and to avoid constantly retrying to connect when we know it'll fail.
1476+
// These error codes can be found at
1477+
// https://github.com/10gen/mms/blob/de2a9c463cfe530efb8e2a0941033e8207b6cb11/server/src/main/com/xgen/cloud/services/clusterconnection/runtime/res/CustomCloseCodes.java
1478+
const NonRetryableErrorCodes = [3000, 3003, 4004, 1008] as const;
1479+
const NonRetryableErrorDescriptionFallbacks: {
1480+
[code in typeof NonRetryableErrorCodes[number]]: string;
1481+
} = {
1482+
3000: 'Unauthorized',
1483+
3003: 'Forbidden',
1484+
4004: 'Not Found',
1485+
1008: 'Violated policy',
1486+
};
1487+
1488+
function isNonRetryableHeartbeatFailure(evt: ServerHeartbeatFailedEvent) {
1489+
return NonRetryableErrorCodes.some((code) =>
1490+
evt.failure.message.includes(`code: ${code},`)
1491+
);
1492+
}
1493+
1494+
function getDescriptionForNonRetryableError(error: Error): string {
1495+
// Give a description from the error message when provided, otherwise fallback
1496+
// to the generic error description.
1497+
const reason = error.message.match(/code: \d+, reason: (.*)$/)?.[1];
1498+
return reason && reason.length > 0
1499+
? reason
1500+
: NonRetryableErrorDescriptionFallbacks[
1501+
Number(
1502+
error.message.match(/code: (\d+),/)?.[1]
1503+
) as typeof NonRetryableErrorCodes[number]
1504+
] ?? 'Unknown';
1505+
}
1506+
1507+
const openConnectionClosedWithNonRetryableErrorToast = (
1508+
connectionInfo: ConnectionInfo,
1509+
error: Error
1510+
) => {
1511+
openToast(`non-retryable-error-encountered--${connectionInfo.id}`, {
1512+
title: `Unable to connect to ${getConnectionTitle(connectionInfo)}`,
1513+
description: `Reason: ${getDescriptionForNonRetryableError(
1514+
error
1515+
)}. To use continue to use this connection either disconnect and reconnect, or refresh your page.`,
1516+
variant: 'warning',
1517+
});
1518+
};
1519+
14681520
export const connect = (
14691521
connectionInfo: ConnectionInfo
14701522
): ConnectionsThunkAction<
@@ -1659,6 +1711,34 @@ const connectWithOptions = (
16591711
return;
16601712
}
16611713

1714+
let showedNonRetryableErrorToast = false;
1715+
// Listen for non-retry-able errors on failed server heartbeats.
1716+
// These can happen on compass web when:
1717+
// - A user's session has ended.
1718+
// - The user's roles have changed.
1719+
// - The cluster / group they are trying to connect to has since been deleted.
1720+
// When we encounter one we disconnect. This is to avoid polluting logs/metrics
1721+
// and to avoid constantly retrying to connect when we know it'll fail.
1722+
dataService.on(
1723+
'serverHeartbeatFailed',
1724+
(evt: ServerHeartbeatFailedEvent) => {
1725+
if (!isNonRetryableHeartbeatFailure(evt)) {
1726+
return;
1727+
}
1728+
1729+
if (!dataService.isConnected() || showedNonRetryableErrorToast) {
1730+
return;
1731+
}
1732+
1733+
openConnectionClosedWithNonRetryableErrorToast(
1734+
connectionInfo,
1735+
evt.failure
1736+
);
1737+
showedNonRetryableErrorToast = true;
1738+
void dataService.disconnect();
1739+
}
1740+
);
1741+
16621742
dataService.on('oidcAuthFailed', (error) => {
16631743
openToast('oidc-auth-failed', {
16641744
title: `Failed to authenticate for ${getConnectionTitle(

packages/data-service/src/data-service.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ export type ExplainExecuteOptions = ExecutionOptions & {
140140

141141
export interface DataServiceEventMap {
142142
topologyDescriptionChanged: (evt: TopologyDescriptionChangedEvent) => void;
143+
serverHeartbeatFailed: (evt: ServerHeartbeatFailedEvent) => void;
143144
connectionInfoSecretsChanged: () => void;
144145
close: () => void;
145146
oidcAuthFailed: (error: string) => void;
@@ -2414,6 +2415,7 @@ class DataServiceImpl extends WithLogContext implements DataService {
24142415
}
24152416
);
24162417
}
2418+
this._emitter.emit('serverHeartbeatFailed', evt);
24172419
});
24182420

24192421
client.on('commandSucceeded', (evt: CommandSucceededEvent) => {

0 commit comments

Comments
 (0)