Skip to content

Commit 3645ec4

Browse files
authored
fix(auth): handle network errors during restore/getChatAuthState (aws#5476)
## Problem During extension startup, the auth system tries to restore the connection. This may result in a `refreshToken` call. If a network error happens during this, the connection restore fails and no connection is set to active. Then, Amazon Q will clean up any "leftover" connections, including this one, and the user will be completely logged out. The connection is technically valid despite having an expired token, and another call may clear up the network error and allow us to get better token. ## Solution The auth system will restore this connection even if it runs into a network error. Then, Amazon Q will display as if its logged in normally. If the network error was one-off, then there is no indication that there was an issue to the user. If there are still network issues, using chat or inline-suggestions will make an error display until `refreshToken` is successful (or the registration expires). Also, some telemetry updates were included to help make this visible. Side effects: - On session startup (with network error): - No notification for new CW customizations - "featureConfig" AKA A/B testing? will not initialize. <!--- REMINDER: - Read CONTRIBUTING.md first. - Add test coverage for your changes. - Update the changelog using `npm run newChange`. - Link to related issues/commits. - Testing: how did you test your changes? - Screenshots (if the pull request is related to UI/UX then please include light and dark theme screenshots) --> ## License By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
1 parent a063f08 commit 3645ec4

File tree

10 files changed

+208
-41
lines changed

10 files changed

+208
-41
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"type": "Bug Fix",
3+
"description": "Auth: Users may be silently logged out due to network issues when starting the extension."
4+
}

packages/amazonq/src/extension.ts

Lines changed: 47 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import {
1212
isAnySsoConnection,
1313
} from 'aws-core-vscode/auth'
1414
import {
15+
AuthState,
1516
AuthUtil,
1617
activate as activateCodeWhisperer,
1718
shutdown as shutdownCodeWhisperer,
@@ -34,6 +35,7 @@ import {
3435
globals,
3536
initialize,
3637
initializeComputeRegion,
38+
isNetworkError,
3739
messages,
3840
placeholder,
3941
setContext,
@@ -141,32 +143,52 @@ export async function activateAmazonQCommon(context: vscode.ExtensionContext, is
141143
}, 1000)
142144
}
143145

144-
await telemetry.auth_userState.run(async () => {
145-
telemetry.record({ passive: true })
146-
147-
const firstUse = AuthUtils.ExtensionUse.instance.isFirstUse()
148-
const wasUpdated = AuthUtils.ExtensionUse.instance.wasUpdated()
149-
150-
if (firstUse) {
151-
telemetry.record({ source: ExtStartUpSources.firstStartUp })
152-
} else if (wasUpdated) {
153-
telemetry.record({ source: ExtStartUpSources.update })
154-
} else {
155-
telemetry.record({ source: ExtStartUpSources.reload })
156-
}
157-
158-
const authState = (await AuthUtil.instance.getChatAuthState()).codewhispererChat
159-
const currConn = AuthUtil.instance.conn
160-
if (currConn !== undefined && !isAnySsoConnection(currConn)) {
161-
getLogger().error(`Current Amazon Q connection is not SSO, type is: %s`, currConn?.type)
162-
}
163-
164-
telemetry.record({
165-
authStatus: authState === 'connected' || authState === 'expired' ? authState : 'notConnected',
166-
authEnabledConnections: AuthUtils.getAuthFormIdsFromConnection(currConn).join(','),
167-
...(await getTelemetryMetadataForConn(currConn)),
146+
await telemetry.auth_userState
147+
.run(async () => {
148+
telemetry.record({ passive: true })
149+
150+
const firstUse = AuthUtils.ExtensionUse.instance.isFirstUse()
151+
const wasUpdated = AuthUtils.ExtensionUse.instance.wasUpdated()
152+
153+
if (firstUse) {
154+
telemetry.record({ source: ExtStartUpSources.firstStartUp })
155+
} else if (wasUpdated) {
156+
telemetry.record({ source: ExtStartUpSources.update })
157+
} else {
158+
telemetry.record({ source: ExtStartUpSources.reload })
159+
}
160+
161+
let authState: AuthState = 'disconnected'
162+
try {
163+
// May call connection validate functions that try to refresh the token.
164+
// This could result in network errors.
165+
authState = (await AuthUtil.instance.getChatAuthState(false)).codewhispererChat
166+
} catch (err) {
167+
if (
168+
isNetworkError(err) &&
169+
AuthUtil.instance.conn &&
170+
AuthUtil.instance.auth.getConnectionState(AuthUtil.instance.conn) === 'valid'
171+
) {
172+
authState = 'connectedWithNetworkError'
173+
} else {
174+
throw err
175+
}
176+
}
177+
const currConn = AuthUtil.instance.conn
178+
if (currConn !== undefined && !isAnySsoConnection(currConn)) {
179+
getLogger().error(`Current Amazon Q connection is not SSO, type is: %s`, currConn?.type)
180+
}
181+
182+
telemetry.record({
183+
authStatus:
184+
authState === 'connected' || authState === 'expired' || authState === 'connectedWithNetworkError'
185+
? authState
186+
: 'notConnected',
187+
authEnabledConnections: AuthUtils.getAuthFormIdsFromConnection(currConn).join(','),
188+
...(await getTelemetryMetadataForConn(currConn)),
189+
})
168190
})
169-
})
191+
.catch((err) => getLogger().error('Error collecting telemetry for auth_userState: %s', err))
170192
}
171193

172194
export async function deactivateCommon() {

packages/core/src/auth/auth.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,11 @@ export class Auth implements AuthService, ConnectionManager {
601601
})
602602
}
603603

604+
/**
605+
* Updates the state of the connection based on a series of validations.
606+
* Will throw for network errors encoutered during connection checks, but the state will not be
607+
* invalidated for these errors.
608+
*/
604609
@withTelemetryContext({ name: 'validateConnection', class: authClassName })
605610
private async validateConnection<T extends Profile>(id: Connection['id'], profile: StoredProfile<T>) {
606611
const runCheck = async () => {
@@ -843,6 +848,7 @@ export class Auth implements AuthService, ConnectionManager {
843848
if (isNetworkError(e)) {
844849
throw new ToolkitError('Failed to update connection due to networking issues', {
845850
cause: e,
851+
code: e.code,
846852
})
847853
}
848854
}

packages/core/src/auth/secondaryAuth.ts

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,10 @@ import { isNonNullable } from '../shared/utilities/tsUtils'
1414
import { ToolIdStateKey } from '../shared/globalState'
1515
import { Connection, getTelemetryMetadataForConn, SsoConnection, StatefulConnection } from './connection'
1616
import { indent } from '../shared/utilities/textUtilities'
17-
import { telemetry } from '../shared/telemetry/telemetry'
17+
import { AuthStatus, telemetry } from '../shared/telemetry/telemetry'
1818
import { asStringifiedStack } from '../shared/telemetry/spans'
1919
import { withTelemetryContext } from '../shared/telemetry/util'
20+
import { isNetworkError } from '../shared/errors'
2021

2122
export type ToolId = 'codecatalyst' | 'codewhisperer' | 'testId'
2223

@@ -262,25 +263,21 @@ export class SecondaryAuth<T extends Connection = Connection> {
262263
connectionState: 'undefined',
263264
})
264265
await this.auth.tryAutoConnect()
265-
this.#savedConnection = await this.loadSavedConnection()
266+
this.#savedConnection = await this._loadSavedConnection()
266267
this.#onDidChangeActiveConnection.fire(this.activeConnection)
267268

268-
const conn = this.#activeConnection
269-
if (conn) {
270-
telemetry.record({
271-
connectionState: this.auth.getConnectionState(conn),
272-
...(await getTelemetryMetadataForConn(conn)),
273-
})
274-
}
275-
269+
telemetry.record(await getTelemetryMetadataForConn(this.#savedConnection))
276270
return this.#savedConnection
277271
})
278272
} catch (err) {
279273
getLogger().warn(`auth (${this.toolId}): failed to restore connection: %s`, err)
280274
}
281275
}
282276

283-
private async loadSavedConnection() {
277+
/**
278+
* Provides telemetry if called by restoreConnection() (or another auth_modifyConnection context)
279+
*/
280+
private async _loadSavedConnection() {
284281
// TODO: fix this
285282
// eslint-disable-next-line aws-toolkits/no-banned-usages
286283
const globalState = globals.context.globalState
@@ -297,7 +294,36 @@ export class SecondaryAuth<T extends Connection = Connection> {
297294
getLogger().warn(`auth (${this.toolId}): saved connection "${this.key}" is not valid`)
298295
await globalState.update(this.key, undefined)
299296
} else {
300-
await this.auth.refreshConnectionState(conn)
297+
const getAuthStatus = (state: ReturnType<typeof this.auth.getConnectionState>): AuthStatus => {
298+
return state === 'invalid' ? 'expired' : 'connected'
299+
}
300+
301+
let connectionState = this.auth.getConnectionState(conn)
302+
303+
// This function is expected to be called in the context of restoreConnection()
304+
telemetry.auth_modifyConnection.record({
305+
connectionState,
306+
authStatus: getAuthStatus(connectionState),
307+
})
308+
309+
try {
310+
await this.auth.refreshConnectionState(conn)
311+
312+
connectionState = this.auth.getConnectionState(conn)
313+
telemetry.auth_modifyConnection.record({
314+
connectionState,
315+
authStatus: getAuthStatus(connectionState),
316+
})
317+
} catch (err) {
318+
// The purpose of this function is load a saved connection into memory, not manage the state.
319+
// If updating the state fails, then we should delegate downstream to handle getting the proper state.
320+
getLogger().error('loadSavedConnection: Failed to refresh connection state: %s', err)
321+
if (isNetworkError(err) && connectionState === 'valid') {
322+
telemetry.auth_modifyConnection.record({
323+
authStatus: 'connectedWithNetworkError',
324+
})
325+
}
326+
}
301327
return conn
302328
}
303329
}

packages/core/src/codewhisperer/util/authUtil.ts

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import * as vscode from 'vscode'
77
import * as localizedText from '../../shared/localizedText'
88
import { Auth } from '../../auth/auth'
9-
import { ToolkitError } from '../../shared/errors'
9+
import { ToolkitError, isNetworkError, tryRun } from '../../shared/errors'
1010
import { getSecondaryAuth, setScopes } from '../../auth/secondaryAuth'
1111
import { isCloud9, isSageMaker } from '../../shared/extensionUtilities'
1212
import { AmazonQPromptSettings } from '../../shared/settings'
@@ -403,12 +403,24 @@ export class AuthUtil {
403403
* Asynchronously returns a snapshot of the overall auth state of CodeWhisperer + Chat features.
404404
* It guarantees the latest state is correct at the risk of modifying connection state.
405405
* If this guarantee is not required, use sync method getChatAuthStateSync()
406+
*
407+
* By default, network errors are ignored when determining auth state since they may be silently
408+
* recoverable later.
406409
*/
407410
@withTelemetryContext({ name: 'getChatAuthState', class: authClassName })
408-
public async getChatAuthState(): Promise<FeatureAuthState> {
411+
public async getChatAuthState(ignoreNetErr: boolean = true): Promise<FeatureAuthState> {
409412
// The state of the connection may not have been properly validated
410413
// and the current state we see may be stale, so refresh for latest state.
411-
await this.auth.refreshConnectionState(this.conn)
414+
if (ignoreNetErr) {
415+
await tryRun(
416+
() => this.auth.refreshConnectionState(this.conn),
417+
(err) => !isNetworkError(err),
418+
'getChatAuthState: Cannot refresh connection state due to network error: %s'
419+
)
420+
} else {
421+
await this.auth.refreshConnectionState(this.conn)
422+
}
423+
412424
return this.getChatAuthStateSync(this.conn)
413425
}
414426

@@ -525,6 +537,11 @@ export const AuthStates = {
525537
* Eg: We are currently using Builder ID, but must use Identity Center.
526538
*/
527539
unsupported: 'unsupported',
540+
/**
541+
* The current connection exists and isn't expired,
542+
* but fetching/refreshing the token resulted in a network error.
543+
*/
544+
connectedWithNetworkError: 'connectedWithNetworkError',
528545
} as const
529546
const Features = {
530547
codewhispererCore: 'codewhispererCore',

packages/core/src/shared/errors.ts

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import type * as nodefs from 'fs'
1414
import type * as os from 'os'
1515
import { CodeWhispererStreamingServiceException } from '@amzn/codewhisperer-streaming'
1616
import { driveLetterRegex } from './utilities/pathUtils'
17+
import { getLogger } from './logger/logger'
1718

1819
let _username = 'unknown-user'
1920
let _isAutomation = false
@@ -911,3 +912,38 @@ export function getReasonFromSyntaxError(err: Error): string | undefined {
911912

912913
return undefined
913914
}
915+
916+
/**
917+
* Run a function and swallow any errors that are not specified by `shouldThrow`
918+
*/
919+
export function tryRun<T>(fn: () => T, shouldThrow: (err: Error) => boolean, logMsg?: string): T | undefined
920+
export function tryRun<T>(
921+
fn: () => Promise<T>,
922+
shouldThrow: (err: Error) => boolean,
923+
logMsg?: string
924+
): Promise<T> | undefined
925+
export function tryRun<T>(
926+
fn: () => T | Promise<T>,
927+
shouldThrow: (err: Error) => boolean,
928+
logMsg?: string
929+
): T | Promise<T | void> | undefined {
930+
// The function signature pattern will accept both async and non-async types.
931+
932+
const catchErr = (err: Error) => {
933+
if (shouldThrow(err)) {
934+
throw err
935+
}
936+
937+
getLogger().error(logMsg ?? 'unknown caller: Error ignored: %s', err)
938+
}
939+
940+
try {
941+
const result = fn()
942+
if (result instanceof Promise) {
943+
return result.catch(catchErr)
944+
}
945+
return result
946+
} catch (error: any) {
947+
catchErr(error)
948+
}
949+
}

packages/core/src/shared/telemetry/vscodeTelemetry.json

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@
248248
{
249249
"name": "authStatus",
250250
"type": "string",
251-
"allowedValues": ["connected", "notConnected", "expired"],
251+
"allowedValues": ["connected", "notConnected", "expired", "connectedWithNetworkError"],
252252
"description": "Status of the an auth connection."
253253
},
254254
{
@@ -1068,6 +1068,10 @@
10681068
"type": "authScopes",
10691069
"required": false
10701070
},
1071+
{
1072+
"type": "authStatus",
1073+
"required": false
1074+
},
10711075
{
10721076
"type": "connectionState",
10731077
"required": true

packages/core/src/test/credentials/auth.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ describe('Auth', function () {
289289
const networkError = new ToolkitError('test', { code: 'ETIMEDOUT' })
290290
const expectedError = new ToolkitError('Failed to update connection due to networking issues', {
291291
cause: networkError,
292+
code: 'ETIMEDOUT',
292293
})
293294
const conn = await auth.createConnection(ssoProfile)
294295
auth.getTestTokenProvider(conn)?.getToken.rejects(networkError)

packages/core/src/test/shared/errors.test.ts

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import {
1616
resolveErrorMessageToDisplay,
1717
scrubNames,
1818
ToolkitError,
19+
tryRun,
1920
UnknownError,
2021
} from '../../shared/errors'
2122
import { CancellationError } from '../../shared/utilities/timeoutUtils'
@@ -558,3 +559,49 @@ describe('util', function () {
558559
assert.deepStrictEqual(scrubNames('unix ~/.aws/config failed', fakeUser), 'unix ~/.aws/config failed')
559560
})
560561
})
562+
563+
describe('errors.tryRun()', function () {
564+
it('swallows error from sync fn', function () {
565+
const err = new Error('err')
566+
tryRun(
567+
() => {
568+
throw err
569+
},
570+
() => false
571+
)
572+
})
573+
574+
it('swallows error from async fn', async function () {
575+
const err = new Error('err')
576+
await tryRun(
577+
async () => {
578+
throw err
579+
},
580+
() => false
581+
)
582+
})
583+
584+
it('throws error from sync fn', function () {
585+
const err = new Error('err')
586+
assert.throws(() => {
587+
tryRun(
588+
() => {
589+
throw err
590+
},
591+
() => true
592+
)
593+
}, err)
594+
})
595+
596+
it('throws error from async fn', async function () {
597+
const err = new Error('err')
598+
await assert.rejects(async () => {
599+
await tryRun(
600+
async () => {
601+
throw err
602+
},
603+
() => true
604+
)
605+
}, err)
606+
})
607+
})
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"type": "Bug Fix",
3+
"description": "Auth: Users may be silently logged out due to network issues when starting the extension."
4+
}

0 commit comments

Comments
 (0)