From 1eccde76c13887627806fe7410d1c32fef2a1721 Mon Sep 17 00:00:00 2001 From: Rhys Howell Date: Wed, 8 Jan 2025 18:42:01 -0500 Subject: [PATCH 1/3] chore(connections): disconnect when we encounter a non-retryable error code on an atlas connection --- package-lock.json | 2 + packages/compass-connections/package.json | 1 + .../src/stores/connections-store-redux.ts | 71 +++++++++++++++++++ packages/data-service/src/data-service.ts | 2 + 4 files changed, 76 insertions(+) diff --git a/package-lock.json b/package-lock.json index b4056550cd1..f1d5a8ec6a9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -43027,6 +43027,7 @@ "compass-preferences-model": "^2.31.2", "hadron-app-registry": "^9.2.9", "lodash": "^4.17.21", + "mongodb": "^6.12.0", "mongodb-build-info": "^1.7.2", "mongodb-connection-string-url": "^3.0.1", "mongodb-data-service": "^22.23.10", @@ -55371,6 +55372,7 @@ "hadron-app-registry": "^9.2.9", "lodash": "^4.17.21", "mocha": "^10.2.0", + "mongodb": "^6.12.0", "mongodb-build-info": "^1.7.2", "mongodb-connection-string-url": "^3.0.1", "mongodb-data-service": "^22.23.10", diff --git a/packages/compass-connections/package.json b/packages/compass-connections/package.json index 766e9fe0dea..8aead63a69d 100644 --- a/packages/compass-connections/package.json +++ b/packages/compass-connections/package.json @@ -62,6 +62,7 @@ "compass-preferences-model": "^2.31.2", "hadron-app-registry": "^9.2.9", "lodash": "^4.17.21", + "mongodb": "^6.12.0", "mongodb-build-info": "^1.7.2", "mongodb-connection-string-url": "^3.0.1", "mongodb-data-service": "^22.23.10", diff --git a/packages/compass-connections/src/stores/connections-store-redux.ts b/packages/compass-connections/src/stores/connections-store-redux.ts index 80a84e2b2b5..787d06be5ff 100644 --- a/packages/compass-connections/src/stores/connections-store-redux.ts +++ b/packages/compass-connections/src/stores/connections-store-redux.ts @@ -3,6 +3,7 @@ import type { Reducer, AnyAction, Action } from 'redux'; import { createStore, applyMiddleware } from 'redux'; import type { ThunkAction } from 'redux-thunk'; import thunk from 'redux-thunk'; +import type { ServerHeartbeatFailedEvent } from 'mongodb'; import { getConnectionTitle, type ConnectionInfo, @@ -1507,6 +1508,46 @@ function isAtlasStreamsInstance( } } +const NonRetryableErrorCodes = [3000, 3003, 4004, 1008] as const; +const NonRetryableErrorDescriptionFallbacks: { + [code in typeof NonRetryableErrorCodes[number]]: string; +} = { + 3000: 'Unauthorized', + 3003: 'Forbidden', + 4004: 'Not Found', + 1008: 'Violated policy', +}; + +function isNonRetryableHeartbeatFailure(evt: ServerHeartbeatFailedEvent) { + return NonRetryableErrorCodes.some((code) => + evt.failure.message.includes(`code: ${code},`) + ); +} + +function getDescriptionForNonRetryableError(error: Error): string { + // Give a description from the error message when provided, otherwise fallback + // to the generic error description. + const reason = error.message.match(/code: \d+, reason: (.*)$/)?.[1]; + return reason && reason.length > 0 + ? reason + : NonRetryableErrorDescriptionFallbacks[ + Number( + error.message.match(/code: (\d+),/)?.[1] + ) as typeof NonRetryableErrorCodes[number] + ] ?? 'Unknown'; +} + +const openConnectionClosedWithNonRetryableErrorToast = ( + connectionInfo: ConnectionInfo, + error: Error +) => { + openToast(`non-retryable-error-encountered--${connectionInfo.id}`, { + title: `Unable to connect to ${getConnectionTitle(connectionInfo)}`, + description: `Reason: ${getDescriptionForNonRetryableError(error)}`, + variant: 'warning', + }); +}; + export const connect = ( connectionInfo: ConnectionInfo ): ConnectionsThunkAction< @@ -1701,6 +1742,36 @@ const connectWithOptions = ( return; } + let showedNonRetryableErrorToast = false; + if (connectionInfo.atlasMetadata) { + // When we're on cloud we listen for non-retry-able errors on failed server + // heartbeats. These can happen when: + // - A user's session has ended. + // - The user's roles have changed. + // - The cluster / group they are trying to connect to has since been deleted. + // When we encounter one we disconnect. This is to avoid polluting logs/metrics + // and to avoid constantly retrying to connect when we know it'll fail. + dataService.on( + 'serverHeartbeatFailed', + (evt: ServerHeartbeatFailedEvent) => { + if (!isNonRetryableHeartbeatFailure(evt)) { + return; + } + + if (!dataService.isConnected() || showedNonRetryableErrorToast) { + return; + } + + openConnectionClosedWithNonRetryableErrorToast( + connectionInfo, + evt.failure + ); + showedNonRetryableErrorToast = true; + void dataService.disconnect(); + } + ); + } + dataService.on('oidcAuthFailed', (error) => { openToast('oidc-auth-failed', { title: `Failed to authenticate for ${getConnectionTitle( diff --git a/packages/data-service/src/data-service.ts b/packages/data-service/src/data-service.ts index 761f60809f4..195046b6776 100644 --- a/packages/data-service/src/data-service.ts +++ b/packages/data-service/src/data-service.ts @@ -140,6 +140,7 @@ export type ExplainExecuteOptions = ExecutionOptions & { export interface DataServiceEventMap { topologyDescriptionChanged: (evt: TopologyDescriptionChangedEvent) => void; + serverHeartbeatFailed: (evt: ServerHeartbeatFailedEvent) => void; connectionInfoSecretsChanged: () => void; close: () => void; oidcAuthFailed: (error: string) => void; @@ -2414,6 +2415,7 @@ class DataServiceImpl extends WithLogContext implements DataService { } ); } + this._emitter.emit('serverHeartbeatFailed', evt); }); client.on('commandSucceeded', (evt: CommandSucceededEvent) => { From 1aa74b740b62715def4dda40cc2f150d87089b27 Mon Sep 17 00:00:00 2001 From: Rhys Howell Date: Mon, 13 Jan 2025 13:33:23 -0500 Subject: [PATCH 2/3] fixup: remove atlas restrictive check, add unit tests --- .../stores/connections-store-redux.spec.tsx | 104 ++++++++++++++++++ .../src/stores/connections-store-redux.ts | 63 ++++++----- 2 files changed, 140 insertions(+), 27 deletions(-) diff --git a/packages/compass-connections/src/stores/connections-store-redux.spec.tsx b/packages/compass-connections/src/stores/connections-store-redux.spec.tsx index a238f5c7ff6..e54b48226b3 100644 --- a/packages/compass-connections/src/stores/connections-store-redux.spec.tsx +++ b/packages/compass-connections/src/stores/connections-store-redux.spec.tsx @@ -10,6 +10,8 @@ import { } from '@mongodb-js/testing-library-compass'; import React from 'react'; import { InMemoryConnectionStorage } from '@mongodb-js/connection-storage/provider'; +import { getDataServiceForConnection } from './connections-store-redux'; +import { type ConnectionInfo } from '@mongodb-js/connection-info'; const mockConnections = [ { @@ -34,6 +36,13 @@ const mockConnections = [ }, ]; +const connectionInfoWithAtlasMetadata = { + ...createDefaultConnectionInfo(), + atlasMetadata: { + clusterName: 'pineapple', + } as ConnectionInfo['atlasMetadata'], +}; + function renderCompassConnections(opts?: RenderConnectionsOptions) { return render(
@@ -274,6 +283,101 @@ describe('CompassConnections store', function () { await connectionStorage.load({ id: mockConnections[0].id }) ).to.have.nested.property('favorite.name', 'turtles'); }); + + it('should ignore server heartbeat failed events that are not non-retryable error codes', async function () { + const { connectionsStore } = renderCompassConnections({ + connectFn: async () => { + await wait(1); + return {}; + }, + }); + + // Wait till we're connected. + await connectionsStore.actions.connect(connectionInfoWithAtlasMetadata); + + const connections = connectionsStore.getState().connections; + expect(connections.ids).to.have.lengthOf(1); + + const dataService = getDataServiceForConnection( + connectionInfoWithAtlasMetadata.id + ); + + let didDisconnect = false; + let didCheckForConnected = false; + sinon.stub(dataService, 'disconnect').callsFake(async () => { + didDisconnect = true; + return Promise.resolve(); + }); + dataService.isConnected = () => { + // If this is called we know the error wasn't handled properly. + didCheckForConnected = true; + return true; + }; + + let didReceiveCallToHeartbeatFailedListener = false; + dataService.on('serverHeartbeatFailed', () => { + didReceiveCallToHeartbeatFailedListener = true; + }); + + // Send a heartbeat fail with an error that's not a non-retryable error code. + dataService['emit']('serverHeartbeatFailed', { + failure: new Error('code: 1234, Not the error we are looking for'), + }); + + // Wait for the listener to handle the message. + await waitFor(() => { + expect(didReceiveCallToHeartbeatFailedListener).to.be.true; + }); + await wait(1); + + expect(didDisconnect).to.be.false; + expect(didCheckForConnected).to.be.false; + }); + + it('should listen for non-retryable errors on server heartbeat failed events and disconnect the data service when encountered', async function () { + const { connectionsStore } = renderCompassConnections({ + connectFn: async () => { + await wait(1); + return {}; + }, + }); + + // Wait till we're connected. + await connectionsStore.actions.connect(connectionInfoWithAtlasMetadata); + + const connections = connectionsStore.getState().connections; + expect(connections.ids).to.have.lengthOf(1); + + const dataService = getDataServiceForConnection( + connectionInfoWithAtlasMetadata.id + ); + + let didDisconnect = false; + sinon.stub(dataService, 'disconnect').callsFake(async () => { + didDisconnect = true; + return Promise.resolve(); + }); + dataService.isConnected = () => true; + + // Send a heartbeat fail with an error that's a non-retryable error code. + dataService['emit']('serverHeartbeatFailed', { + failure: new Error('code: 3003, reason: Insufficient permissions'), + }); + + await waitFor(() => { + expect(didDisconnect).to.be.true; + }); + + await waitFor(function () { + const titleNode = screen.getByText('Unable to connect to pineapple'); + expect(titleNode).to.be.visible; + + const descriptionNode = screen.getByText( + 'Reason: Insufficient permissions. To use continue to use this connection either disconnect and reconnect, or refresh your page.' + ); + expect(descriptionNode).to.be.visible; + }); + }); }); describe('#saveAndConnect', function () { diff --git a/packages/compass-connections/src/stores/connections-store-redux.ts b/packages/compass-connections/src/stores/connections-store-redux.ts index 41e395d502d..7b282db1dbe 100644 --- a/packages/compass-connections/src/stores/connections-store-redux.ts +++ b/packages/compass-connections/src/stores/connections-store-redux.ts @@ -1466,6 +1466,15 @@ function isAtlasStreamsInstance( } } +// We listen for non-retry-able errors on failed server heartbeats. +// These can happen when: +// - A user's session has ended. +// - The user's roles have changed. +// - The cluster / group they are trying to connect to has since been deleted. +// When we encounter one we disconnect. This is to avoid polluting logs/metrics +// and to avoid constantly retrying to connect when we know it'll fail. +// These error codes can be found at +// https://github.com/10gen/mms/blob/de2a9c463cfe530efb8e2a0941033e8207b6cb11/server/src/main/com/xgen/cloud/services/clusterconnection/runtime/res/CustomCloseCodes.java const NonRetryableErrorCodes = [3000, 3003, 4004, 1008] as const; const NonRetryableErrorDescriptionFallbacks: { [code in typeof NonRetryableErrorCodes[number]]: string; @@ -1501,7 +1510,9 @@ const openConnectionClosedWithNonRetryableErrorToast = ( ) => { openToast(`non-retryable-error-encountered--${connectionInfo.id}`, { title: `Unable to connect to ${getConnectionTitle(connectionInfo)}`, - description: `Reason: ${getDescriptionForNonRetryableError(error)}`, + description: `Reason: ${getDescriptionForNonRetryableError( + error + )}. To use continue to use this connection either disconnect and reconnect, or refresh your page.`, variant: 'warning', }); }; @@ -1701,34 +1712,32 @@ const connectWithOptions = ( } let showedNonRetryableErrorToast = false; - if (connectionInfo.atlasMetadata) { - // When we're on cloud we listen for non-retry-able errors on failed server - // heartbeats. These can happen when: - // - A user's session has ended. - // - The user's roles have changed. - // - The cluster / group they are trying to connect to has since been deleted. - // When we encounter one we disconnect. This is to avoid polluting logs/metrics - // and to avoid constantly retrying to connect when we know it'll fail. - dataService.on( - 'serverHeartbeatFailed', - (evt: ServerHeartbeatFailedEvent) => { - if (!isNonRetryableHeartbeatFailure(evt)) { - return; - } - - if (!dataService.isConnected() || showedNonRetryableErrorToast) { - return; - } + // Listen for non-retry-able errors on failed server heartbeats. + // These can happen when: + // - A user's session has ended. + // - The user's roles have changed. + // - The cluster / group they are trying to connect to has since been deleted. + // When we encounter one we disconnect. This is to avoid polluting logs/metrics + // and to avoid constantly retrying to connect when we know it'll fail. + dataService.on( + 'serverHeartbeatFailed', + (evt: ServerHeartbeatFailedEvent) => { + if (!isNonRetryableHeartbeatFailure(evt)) { + return; + } - openConnectionClosedWithNonRetryableErrorToast( - connectionInfo, - evt.failure - ); - showedNonRetryableErrorToast = true; - void dataService.disconnect(); + if (!dataService.isConnected() || showedNonRetryableErrorToast) { + return; } - ); - } + + openConnectionClosedWithNonRetryableErrorToast( + connectionInfo, + evt.failure + ); + showedNonRetryableErrorToast = true; + void dataService.disconnect(); + } + ); dataService.on('oidcAuthFailed', (error) => { openToast('oidc-auth-failed', { From acac16d64fd13c265f2ac290c9398de8a19861ce Mon Sep 17 00:00:00 2001 From: Rhys Howell Date: Tue, 14 Jan 2025 17:06:32 -0500 Subject: [PATCH 3/3] fixup: mention compass web in comment --- .../compass-connections/src/stores/connections-store-redux.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/compass-connections/src/stores/connections-store-redux.ts b/packages/compass-connections/src/stores/connections-store-redux.ts index 7b282db1dbe..7626587fba7 100644 --- a/packages/compass-connections/src/stores/connections-store-redux.ts +++ b/packages/compass-connections/src/stores/connections-store-redux.ts @@ -1467,7 +1467,7 @@ function isAtlasStreamsInstance( } // We listen for non-retry-able errors on failed server heartbeats. -// These can happen when: +// These can happen on compass web when: // - A user's session has ended. // - The user's roles have changed. // - The cluster / group they are trying to connect to has since been deleted. @@ -1713,7 +1713,7 @@ const connectWithOptions = ( let showedNonRetryableErrorToast = false; // Listen for non-retry-able errors on failed server heartbeats. - // These can happen when: + // These can happen on compass web when: // - A user's session has ended. // - The user's roles have changed. // - The cluster / group they are trying to connect to has since been deleted.