Skip to content

Commit 2bd23c4

Browse files
telemetry: add function stack to auth connection change (#5465)
* telemetry: add function stack to auth connection change Problem: When an auth connection changes, eg goes from valid to invalid, there were cases for some users where it happened unexpectedly. It was difficult to answer why the users connection became invalid Solution: Wrap many auth related functions with their "function entry" context. This will then allow the final telemetry event in `Auth.updateConnectionState()` to report the entire call stack in its `source` field for the metric `auth_modifyConnection`. What this results in is a string like: `AmazonQ#signoutButton:AuthUtil#signout:Auth#signout,deleteConnection,updateConnectionState` which tells us the path taken to get to updating the connection state. In telemetry we can then follow the change in a users connection state and hopefully determine why they may have expired prematurely since the functions which can expire a connection will now emit telemetry. Implementation Details: - A decorator, `withTelemetryContext` was created to wrap a Class Method with its context for telemetry - Every method that uses it adds their function name and class as context - Under the hood it wraps the Class Method in a `telemetry.function_call.run()` with some predefined values - It does not emit a telemetry event by default, as we only need it to contribute to the function stack - This decorator significantly reduces the diff compared to using the explicit `run()` since all method code would need to be explicitly wrapped in it. - **Note that decorators only work with class methods** - For non class methods we just use `telemetry.function_call.run()` Signed-off-by: Nikolas Komonen <[email protected]> * fix: failing unit test Unit test was failing here in CI with a "Reference error", and once I removed the decorator it was working fine again. Signed-off-by: Nikolas Komonen <[email protected]> * update tests for decorator Signed-off-by: Nikolas Komonen <[email protected]> * add context for dev mode expire connections Signed-off-by: Nikolas Komonen <[email protected]> * add more tests for asserting expected telemetry in auth Signed-off-by: Nikolas Komonen <[email protected]> * add sessionDuration to connection state change When a connection becomes invalid we will add the sessionDuration to the `auth_modifyConnection` metric emitted. This will allow us to filter for SSO connections that prematurely became invalid. Signed-off-by: Nikolas Komonen <[email protected]> --------- Signed-off-by: Nikolas Komonen <[email protected]>
1 parent 8c155e6 commit 2bd23c4

File tree

22 files changed

+486
-183
lines changed

22 files changed

+486
-183
lines changed

packages/amazonq/test/unit/codewhisperer/util/authUtil.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ describe('AuthUtil', async function () {
290290
)
291291
await authUtil.secondaryAuth.useNewConnection(conn3)
292292

293-
await authUtil.clearExtraConnections('test') // method under test
293+
await authUtil.clearExtraConnections() // method under test
294294

295295
// Only the conn that AuthUtil is using is remaining
296296
assert.deepStrictEqual(

packages/core/src/auth/auth.ts

Lines changed: 58 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ import {
6262
import { isSageMaker, isCloud9, isAmazonQ } from '../shared/extensionUtilities'
6363
import { telemetry } from '../shared/telemetry/telemetry'
6464
import { randomUUID } from '../shared/crypto'
65+
import { asStringifiedStack } from '../shared/telemetry/spans'
66+
import { withTelemetryContext } from '../shared/telemetry/util'
6567

6668
interface AuthService {
6769
/**
@@ -124,6 +126,9 @@ export type AuthType = Auth
124126
export type DeletedConnection = { connId: Connection['id']; storedProfile?: StoredProfile }
125127
type DeclaredConnection = Pick<SsoProfile, 'ssoRegion' | 'startUrl'> & { source: string }
126128

129+
// Must be declared outside of class for use by decorator
130+
const authClassName = 'Auth'
131+
127132
export class Auth implements AuthService, ConnectionManager {
128133
readonly #ssoCache = getCache()
129134
readonly #validationErrors = new Map<Connection['id'], Error>()
@@ -162,6 +167,7 @@ export class Auth implements AuthService, ConnectionManager {
162167
return Object.values(this._declaredConnections)
163168
}
164169

170+
@withTelemetryContext({ name: 'restorePreviousSession', class: authClassName })
165171
public async restorePreviousSession(): Promise<Connection | undefined> {
166172
const id = this.store.getCurrentProfileId()
167173
if (id === undefined) {
@@ -177,6 +183,7 @@ export class Auth implements AuthService, ConnectionManager {
177183

178184
public async reauthenticate({ id }: Pick<SsoConnection, 'id'>, invalidate?: boolean): Promise<SsoConnection>
179185
public async reauthenticate({ id }: Pick<IamConnection, 'id'>, invalidate?: boolean): Promise<IamConnection>
186+
@withTelemetryContext({ name: 'reauthenticate', class: authClassName })
180187
public async reauthenticate({ id }: Pick<Connection, 'id'>, invalidate?: boolean): Promise<Connection> {
181188
const shouldInvalidate = invalidate ?? true
182189
const profile = this.store.getProfileOrThrow(id)
@@ -195,6 +202,7 @@ export class Auth implements AuthService, ConnectionManager {
195202

196203
public async useConnection({ id }: Pick<SsoConnection, 'id'>): Promise<SsoConnection>
197204
public async useConnection({ id }: Pick<IamConnection, 'id'>): Promise<IamConnection>
205+
@withTelemetryContext({ name: 'useConnection', class: authClassName })
198206
public async useConnection({ id }: Pick<Connection, 'id'>): Promise<Connection> {
199207
await this.refreshConnectionState({ id })
200208

@@ -211,6 +219,7 @@ export class Auth implements AuthService, ConnectionManager {
211219
return conn
212220
}
213221

222+
@withTelemetryContext({ name: 'logout', class: authClassName })
214223
public async logout(): Promise<void> {
215224
if (this.activeConnection === undefined) {
216225
return
@@ -222,6 +231,7 @@ export class Auth implements AuthService, ConnectionManager {
222231
this.#onDidChangeActiveConnection.fire(undefined)
223232
}
224233

234+
@withTelemetryContext({ name: 'listConnections', class: authClassName })
225235
public async listConnections(): Promise<Connection[]> {
226236
await loadIamProfilesIntoStore(this.store, this.iamProfileProvider)
227237

@@ -281,6 +291,7 @@ export class Auth implements AuthService, ConnectionManager {
281291
}
282292

283293
public async createConnection(profile: SsoProfile): Promise<SsoConnection>
294+
@withTelemetryContext({ name: 'createConnection', class: authClassName })
284295
public async createConnection(profile: Profile): Promise<Connection> {
285296
if (profile.type === 'iam') {
286297
throw new Error('Creating IAM connections is not supported')
@@ -311,6 +322,7 @@ export class Auth implements AuthService, ConnectionManager {
311322
}
312323
}
313324

325+
@withTelemetryContext({ name: 'deleteConnection', class: authClassName })
314326
public async deleteConnection(connection: Pick<Connection, 'id'>): Promise<void> {
315327
const connId = connection.id
316328
const profile = this.store.getProfile(connId)
@@ -343,6 +355,7 @@ export class Auth implements AuthService, ConnectionManager {
343355
*
344356
* Forget about a connection without logging out, invalidating it, or deleting it from disk.
345357
*/
358+
@withTelemetryContext({ name: 'forgetConnection', class: authClassName })
346359
public async forgetConnection(connection: Pick<Connection, 'id'>): Promise<void> {
347360
const connId = connection.id
348361
const profile = this.store.getProfile(connId)
@@ -355,6 +368,7 @@ export class Auth implements AuthService, ConnectionManager {
355368
this.#onDidDeleteConnection.fire({ connId, storedProfile: profile })
356369
}
357370

371+
@withTelemetryContext({ name: 'clearStaleLinkedIamConnections', class: authClassName })
358372
private async clearStaleLinkedIamConnections() {
359373
// Updates our store, evicting stale IAM credential profiles if the
360374
// SSO they are linked to was removed.
@@ -373,6 +387,7 @@ export class Auth implements AuthService, ConnectionManager {
373387
*
374388
* Put the SSO connection in to an expired state
375389
*/
390+
@withTelemetryContext({ name: 'expireConnection', class: authClassName })
376391
public async expireConnection(conn: Pick<SsoConnection, 'id'>): Promise<void> {
377392
getLogger().info(`auth: Expiring connection ${conn.id}`)
378393
const profile = this.store.getProfileOrThrow(conn.id)
@@ -398,6 +413,7 @@ export class Auth implements AuthService, ConnectionManager {
398413
* Alternatively you can use the `getToken()` call on an SSO connection to do the same thing,
399414
* but it will additionally prompt for reauthentication if the connection is invalid.
400415
*/
416+
@withTelemetryContext({ name: 'refreshConnectionState', class: authClassName })
401417
public async refreshConnectionState(connection?: Pick<Connection, 'id'>): Promise<undefined> {
402418
if (connection === undefined) {
403419
return
@@ -416,6 +432,7 @@ export class Auth implements AuthService, ConnectionManager {
416432
profile: SsoProfile,
417433
invalidate?: boolean
418434
): Promise<SsoConnection>
435+
@withTelemetryContext({ name: 'updateConnection', class: authClassName })
419436
public async updateConnection(
420437
connection: Pick<Connection, 'id'>,
421438
profile: Profile,
@@ -458,6 +475,7 @@ export class Auth implements AuthService, ConnectionManager {
458475
*
459476
* @returns undefined if authentication succeeds, otherwise object with error info
460477
*/
478+
@withTelemetryContext({ name: 'authenticateData', class: authClassName })
461479
public async authenticateData(data: StaticProfile): Promise<StaticProfileKeyErrorMessage | undefined> {
462480
const tempId = await this.addTempCredential(data)
463481
const tempIdString = asString(tempId)
@@ -507,6 +525,7 @@ export class Auth implements AuthService, ConnectionManager {
507525
* For SSO, this involves an API call to clear server-side state. The call happens
508526
* before the local token(s) are cleared as they are needed in the request.
509527
*/
528+
@withTelemetryContext({ name: 'invalidateConnection', class: authClassName })
510529
private async invalidateConnection(id: Connection['id'], opt?: { skipGlobalLogout?: boolean }) {
511530
getLogger().info(`auth: Invalidating connection: ${id}`)
512531
const profile = this.store.getProfileOrThrow(id)
@@ -534,6 +553,7 @@ export class Auth implements AuthService, ConnectionManager {
534553
await this.updateConnectionState(id, 'invalid')
535554
}
536555

556+
@withTelemetryContext({ name: 'updateConnectionState', class: authClassName })
537557
private async updateConnectionState(id: Connection['id'], connectionState: ProfileMetadata['connectionState']) {
538558
getLogger().info(`auth: Updating connection state of ${id} to ${connectionState}`)
539559

@@ -544,24 +564,44 @@ export class Auth implements AuthService, ConnectionManager {
544564

545565
const oldProfile = this.store.getProfileOrThrow(id)
546566
if (oldProfile.metadata.connectionState === connectionState) {
567+
// new state is same as old state, no need to do anything
547568
return oldProfile
548569
}
549570

550-
const profile = await this.store.updateMetadata(id, { connectionState })
551-
if (connectionState !== 'invalid') {
552-
this.#validationErrors.delete(id)
553-
this.#invalidCredentialsTimeouts.get(id)?.dispose()
554-
}
571+
// Emit an event for when the connection changes state. This the the root of the
572+
// state change. We rely on functions higher in the stack to have added their contexts
573+
// so this event has useful information in the `source` field.
574+
return telemetry.auth_modifyConnection.run(async (span) => {
575+
// if we have an Sso session that became invalid, we will add its session duration to the event
576+
const ssoSessionDuration =
577+
connectionState === 'invalid' && oldProfile.type === 'sso'
578+
? this.getSsoTokenProvider(id, oldProfile).getSessionDuration()
579+
: undefined
580+
span.record({
581+
action: 'updateConnectionState',
582+
connectionState,
583+
source: asStringifiedStack(telemetry.getFunctionStack()),
584+
credentialStartUrl: oldProfile.type === 'sso' ? oldProfile.startUrl : undefined,
585+
sessionDuration: ssoSessionDuration,
586+
})
555587

556-
if (this.#activeConnection?.id === id) {
557-
this.#activeConnection.state = connectionState
558-
this.#onDidChangeActiveConnection.fire(this.#activeConnection)
559-
}
560-
this.#onDidChangeConnectionState.fire({ id, state: connectionState })
588+
const profile = await this.store.updateMetadata(id, { connectionState })
589+
if (connectionState !== 'invalid') {
590+
this.#validationErrors.delete(id)
591+
this.#invalidCredentialsTimeouts.get(id)?.dispose()
592+
}
561593

562-
return profile
594+
if (this.#activeConnection?.id === id) {
595+
this.#activeConnection.state = connectionState
596+
this.#onDidChangeActiveConnection.fire(this.#activeConnection)
597+
}
598+
this.#onDidChangeConnectionState.fire({ id, state: connectionState })
599+
600+
return profile
601+
})
563602
}
564603

604+
@withTelemetryContext({ name: 'validateConnection', class: authClassName })
565605
private async validateConnection<T extends Profile>(id: Connection['id'], profile: StoredProfile<T>) {
566606
const runCheck = async () => {
567607
if (profile.type === 'sso') {
@@ -747,6 +787,7 @@ export class Auth implements AuthService, ConnectionManager {
747787
}
748788

749789
private readonly authenticate = keyedDebounce(this._authenticate.bind(this))
790+
@withTelemetryContext({ name: 'authenticate', class: authClassName })
750791
private async _authenticate<T>(id: Connection['id'], callback: () => Promise<T>, invalidate?: boolean): Promise<T> {
751792
const originalState = this.getConnectionState({ id }) ?? 'unauthenticated'
752793
await this.updateConnectionState(id, 'authenticating')
@@ -780,6 +821,7 @@ export class Auth implements AuthService, ConnectionManager {
780821
}
781822

782823
private readonly getToken = keyedDebounce(this._getToken.bind(this))
824+
@withTelemetryContext({ name: '_getToken', class: authClassName })
783825
private async _getToken(id: Connection['id'], provider: SsoAccessTokenProvider): Promise<SsoToken> {
784826
const token = await provider.getToken().catch((err) => {
785827
this.throwOnNetworkError(err)
@@ -817,6 +859,7 @@ export class Auth implements AuthService, ConnectionManager {
817859
}
818860
}
819861

862+
@withTelemetryContext({ name: 'handleInvalidCredentials', class: authClassName })
820863
private async handleInvalidCredentials<T>(id: Connection['id'], refresh: () => Promise<T>): Promise<T> {
821864
getLogger().info(`auth: Handling invalid credentials of connection: ${id}`)
822865
const profile = this.store.getProfile(id)
@@ -861,7 +904,9 @@ export class Auth implements AuthService, ConnectionManager {
861904
return this.authenticate(id, refresh)
862905
}
863906

864-
public readonly tryAutoConnect = once(async () => {
907+
public readonly tryAutoConnect = once(async () => this._tryAutoConnect())
908+
@withTelemetryContext({ name: 'tryAutoConnect', class: authClassName })
909+
private async _tryAutoConnect() {
865910
if (this.activeConnection !== undefined) {
866911
return
867912
}
@@ -940,7 +985,7 @@ export class Auth implements AuthService, ConnectionManager {
940985
}
941986
}
942987
}
943-
})
988+
}
944989

945990
static #instance: Auth | undefined
946991
public static get instance() {

packages/core/src/auth/secondaryAuth.ts

Lines changed: 42 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ import { ToolIdStateKey } from '../shared/globalState'
1515
import { Connection, getTelemetryMetadataForConn, SsoConnection, StatefulConnection } from './connection'
1616
import { indent } from '../shared/utilities/textUtilities'
1717
import { telemetry } from '../shared/telemetry/telemetry'
18+
import { asStringifiedStack } from '../shared/telemetry/spans'
19+
import { withTelemetryContext } from '../shared/telemetry/util'
1820

1921
export type ToolId = 'codecatalyst' | 'codewhisperer' | 'testId'
2022

@@ -78,6 +80,8 @@ export function getAllConnectionsInUse(auth: Auth): StatefulConnection[] {
7880
const onDidChangeConnectionsEmitter = new vscode.EventEmitter<void>()
7981
export const onDidChangeConnections = onDidChangeConnectionsEmitter.event
8082

83+
// Variable must be declared outside of class to work with decorators
84+
const secondaryAuthClassName = 'SecondaryAuth'
8185
/**
8286
* Enables a tool to bind to a connection independently from the global {@link Auth} service.
8387
*
@@ -187,6 +191,7 @@ export class SecondaryAuth<T extends Connection = Connection> {
187191
* Globally deletes the connection that this secondary auth is using,
188192
* effectively doing a signout.
189193
*/
194+
@withTelemetryContext({ name: 'deleteConnection', class: secondaryAuthClassName })
190195
public async deleteConnection() {
191196
if (this.activeConnection) {
192197
await this.auth.deleteConnection(this.activeConnection)
@@ -228,6 +233,7 @@ export class SecondaryAuth<T extends Connection = Connection> {
228233
this.#onDidChangeActiveConnection.fire(undefined)
229234
}
230235

236+
@withTelemetryContext({ name: 'useNewConnection', class: secondaryAuthClassName })
231237
public async useNewConnection(conn: T): Promise<T> {
232238
await this.saveConnection(conn)
233239
if (this.auth.activeConnection === undefined) {
@@ -245,11 +251,13 @@ export class SecondaryAuth<T extends Connection = Connection> {
245251

246252
// Used to lazily restore persisted connections.
247253
// Kind of clunky. We need an async module loader layer to make things ergonomic.
248-
public readonly restoreConnection: (source?: string) => Promise<T | undefined> = once(async (source?: string) => {
254+
public readonly restoreConnection: typeof this._restoreConnection = once(async () => this._restoreConnection())
255+
@withTelemetryContext({ name: 'restoreConnection', class: secondaryAuthClassName })
256+
private async _restoreConnection(): Promise<T | undefined> {
249257
try {
250258
return await telemetry.auth_modifyConnection.run(async () => {
251259
telemetry.record({
252-
source,
260+
source: asStringifiedStack(telemetry.getFunctionStack()),
253261
action: 'restore',
254262
connectionState: 'undefined',
255263
})
@@ -270,7 +278,7 @@ export class SecondaryAuth<T extends Connection = Connection> {
270278
} catch (err) {
271279
getLogger().warn(`auth (${this.toolId}): failed to restore connection: %s`, err)
272280
}
273-
})
281+
}
274282

275283
private async loadSavedConnection() {
276284
// TODO: fix this
@@ -309,28 +317,38 @@ export class SecondaryAuth<T extends Connection = Connection> {
309317
* Note: This should exist in connection.ts or utils.ts, but due to circular dependencies, it must go here.
310318
*/
311319
export async function addScopes(conn: SsoConnection, extraScopes: string[], auth = Auth.instance) {
312-
const oldScopes = conn.scopes ?? []
313-
const newScopes = Array.from(new Set([...oldScopes, ...extraScopes]))
314-
315-
const updatedConn = await setScopes(conn, newScopes, auth)
316-
317-
try {
318-
return await auth.reauthenticate(updatedConn, false)
319-
} catch (e) {
320-
// We updated the connection scopes pre-emptively, but if there is some issue (e.g. user cancels,
321-
// InvalidGrantException, etc), then we need to revert to the old connection scopes. Otherwise,
322-
// this could soft-lock users into a broken connection that cannot be re-authenticated without
323-
// first deleting the connection.
324-
await setScopes(conn, oldScopes, auth)
325-
throw e
326-
}
320+
return telemetry.function_call.run(
321+
async () => {
322+
const oldScopes = conn.scopes ?? []
323+
const newScopes = Array.from(new Set([...oldScopes, ...extraScopes]))
324+
325+
const updatedConn = await setScopes(conn, newScopes, auth)
326+
327+
try {
328+
return await auth.reauthenticate(updatedConn, false)
329+
} catch (e) {
330+
// We updated the connection scopes pre-emptively, but if there is some issue (e.g. user cancels,
331+
// InvalidGrantException, etc), then we need to revert to the old connection scopes. Otherwise,
332+
// this could soft-lock users into a broken connection that cannot be re-authenticated without
333+
// first deleting the connection.
334+
await setScopes(conn, oldScopes, auth)
335+
throw e
336+
}
337+
},
338+
{ emit: false, functionId: { name: 'addScopesSecondaryAuth' } }
339+
)
327340
}
328341

329342
export function setScopes(conn: SsoConnection, scopes: string[], auth = Auth.instance): Promise<SsoConnection> {
330-
return auth.updateConnection(conn, {
331-
type: 'sso',
332-
scopes,
333-
startUrl: conn.startUrl,
334-
ssoRegion: conn.ssoRegion,
335-
})
343+
return telemetry.function_call.run(
344+
() => {
345+
return auth.updateConnection(conn, {
346+
type: 'sso',
347+
scopes,
348+
startUrl: conn.startUrl,
349+
ssoRegion: conn.ssoRegion,
350+
})
351+
},
352+
{ emit: false, functionId: { name: 'setScopesSecondaryAuth' } }
353+
)
336354
}

0 commit comments

Comments
 (0)