From 728c54c25a3db7f13335cfef2db7de1c32cf450a Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Tue, 24 Sep 2024 12:19:17 -0400 Subject: [PATCH 1/9] dev: Command to force IDE ExtHost to crash Signed-off-by: nkomonen-amazon --- packages/amazonq/src/extensionNode.ts | 1 + packages/core/src/dev/activation.ts | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/packages/amazonq/src/extensionNode.ts b/packages/amazonq/src/extensionNode.ts index 84bc76290b3..aed087d3701 100644 --- a/packages/amazonq/src/extensionNode.ts +++ b/packages/amazonq/src/extensionNode.ts @@ -77,6 +77,7 @@ async function setupDevMode(context: vscode.ExtensionContext) { 'deleteSsoConnections', 'expireSsoConnections', 'editAuthConnections', + 'forceIdeCrash', ], } diff --git a/packages/core/src/dev/activation.ts b/packages/core/src/dev/activation.ts index fac6cbeb11c..0eac16f5c98 100644 --- a/packages/core/src/dev/activation.ts +++ b/packages/core/src/dev/activation.ts @@ -41,6 +41,7 @@ export type DevFunction = | 'deleteSsoConnections' | 'expireSsoConnections' | 'editAuthConnections' + | 'forceIdeCrash' export type DevOptions = { context: vscode.ExtensionContext @@ -105,6 +106,11 @@ const menuOptions: Record = { detail: 'Opens editor to all Auth Connections the extension is using.', executor: editSsoConnections, }, + forceIdeCrash: { + label: 'Crash: Force IDE ExtHost Crash', + detail: `Will SIGKILL ExtHost with pid, ${process.pid}, but IDE will not crash itself.`, + executor: forceQuitIde, + }, } /** @@ -435,6 +441,16 @@ async function expireSsoConnections() { ) } +export function forceQuitIde() { + // This current process is the ExtensionHost. Killing it will cause all the extensions to crash + // for the current ExtensionHost (unless using "extensions.experimental.affinity"). + // The IDE instance itself will remaing running, but a new ExtHost will spawn within it. + // The PPID does not seem to be the IDE instance but the root VS Code process, killing + // this crashes all VS Code instances. + const vsCodePid = process.pid + process.kill(vsCodePid, 'SIGKILL') // SIGTERM would be the graceful shutdown +} + async function showState(path: string) { const uri = vscode.Uri.parse(`aws-dev2://state/${path}-${targetContext.extension.id}`) const doc = await vscode.workspace.openTextDocument(uri) From d710b3733b9cc2e212b8792e48819a3fb1a5277f Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Mon, 23 Sep 2024 12:15:14 -0400 Subject: [PATCH 2/9] add doc about vscode behaviors Signed-off-by: nkomonen-amazon --- docs/vscode_behaviors.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 docs/vscode_behaviors.md diff --git a/docs/vscode_behaviors.md b/docs/vscode_behaviors.md new file mode 100644 index 00000000000..a80abceadfe --- /dev/null +++ b/docs/vscode_behaviors.md @@ -0,0 +1,22 @@ +# VS Code Behaviors + +Many VS Code behavoirs for certain APIs or user interactions with the IDE are not clearly documented, +or documented at all. Please add any findings to this document. + +## `deactivate()` - extension shutdown + +This method is defined as part of the VS Code extension API, and is run on a **graceful** shutdown +for each extension. + +- Our extension and its `deactivate()` function are in the Extension Host process [1] +- The Extension Host process has at most 5 seconds to shut down, after which it will exit. [1] +- The vscode API will be unreliable at deactivation time. So certain VSC APIs like the filesystem may not work. [1] + - The VSC Filesystem API has been confirmed to not work +- In `Run & Debug` mode, closing the Debug IDE instance behaves differently depending on how it is closed + - The regular close button in the Debug IDE instance results in a graceful shutdown + - The red square in the root IDE instance to stop the debugging session results on a non-graceful shutdown, meaning `deactivate()` is not run. + +Sources: + +- [[1]](https://github.com/Microsoft/vscode/issues/47881#issuecomment-381910587) +- [[2]](https://github.com/microsoft/vscode/issues/122825#issuecomment-814218149) From 4113d7a1114b7c07353f07b8fa2739bee000bd83 Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Thu, 26 Sep 2024 14:13:42 -0400 Subject: [PATCH 3/9] fix OS Utils Pc Session start method, should work for all OS Signed-off-by: nkomonen-amazon --- packages/core/src/shared/utilities/osUtils.ts | 61 ++++++------------ .../src/test/shared/utilities/osUtils.test.ts | 64 +++++++++++++------ 2 files changed, 62 insertions(+), 63 deletions(-) diff --git a/packages/core/src/shared/utilities/osUtils.ts b/packages/core/src/shared/utilities/osUtils.ts index a37149aff64..b0266f73e1a 100644 --- a/packages/core/src/shared/utilities/osUtils.ts +++ b/packages/core/src/shared/utilities/osUtils.ts @@ -4,10 +4,7 @@ */ import globals from '../extensionGlobals' -import { fs } from '../fs/fs' -import { tempDirPath } from '../filesystemUtilities' -import path from 'path' -import { getExtensionId } from '../extensionUtilities' +import * as os from 'os' /** * Checks if the current OS session is new. @@ -18,44 +15,24 @@ import { getExtensionId } from '../extensionUtilities' * Use this function to perform one-time initialization tasks that should only happen * once per OS session, regardless of how many extension instances are running. */ -export async function isNewOsSession(tmpDir = tempDirPath) { - switch (process.platform) { - case 'win32': { - // Windows does not have an ephemeral /tmp/ folder that deletes on shutdown, while unix-like os's do. - // So in Windows we calculate the start time and see if it changed from the previous known start time. - - const lastStartTime = globals.globalState.tryGet('lastOsStartTime', Number) - // uptime() returns seconds, convert to ms - const currentOsStartTime = Date.now() - process.uptime() * 1000 - - if (lastStartTime === undefined) { - await globals.globalState.update('lastOsStartTime', currentOsStartTime) - return true - } - - // If the current start time is later than the last, it means we are in a new session - // But remove a few seconds to account for a small delay between getting the current time and - // actually comparing it. - if (currentOsStartTime - 2000 > lastStartTime) { - await globals.globalState.update('lastOsStartTime', currentOsStartTime) - return true - } - - return false - } - default: { - // Store a file in /tmp/ that acts as a flag. The OS automatically deletes on system shutdown. - const unixNewSessionPath = path.join(tmpDir, `sessionStart-${getExtensionId()}`) - - if (await fs.exists(unixNewSessionPath)) { - return false - } +export async function isNewOsSession(now = () => globals.clock.Date.now(), uptime = () => os.uptime()) { + // Windows does not have an ephemeral /tmp/ folder that deletes on shutdown, while unix-like os's do. + // So in Windows we calculate the start time and see if it changed from the previous known start time. + const lastStartTime = globals.globalState.tryGet('lastOsStartTime', Number) + // uptime() returns seconds, convert to ms + const currentOsStartTime = now() - uptime() * 1000 * 60 + + if (lastStartTime === undefined) { + await globals.globalState.update('lastOsStartTime', currentOsStartTime) + return true + } - await fs.writeFile( - unixNewSessionPath, - `This file is autogenerated by the VS Code extension: ${getExtensionId()}` - ) - return true - } + // If the current start time is later than the last, it means we are in a new session since they should be the same value. + // But to account for small differences in how the current time is calculate, we add in a 5 second buffer. + if (currentOsStartTime - 1000 * 5 > lastStartTime) { + await globals.globalState.update('lastOsStartTime', currentOsStartTime) + return true } + + return false } diff --git a/packages/core/src/test/shared/utilities/osUtils.test.ts b/packages/core/src/test/shared/utilities/osUtils.test.ts index 1b64eb7518f..1654aca8cc8 100644 --- a/packages/core/src/test/shared/utilities/osUtils.test.ts +++ b/packages/core/src/test/shared/utilities/osUtils.test.ts @@ -5,27 +5,49 @@ import assert from 'assert' import { isNewOsSession } from '../../../shared/utilities/osUtils' -import { fs } from '../../../shared' -import { isWin } from '../../../shared/vscode/env' -import { TestFolder } from '../../testUtil' +import { InstalledClock } from '@sinonjs/fake-timers' +import { createSandbox, SinonSandbox } from 'sinon' describe('isNewOsSession', () => { - if (!isWin()) { - it('unix-like: returns true when expected', async () => { - const tmpDir = (await TestFolder.create()).path - - // On a new session the first caller will get true - assert.strictEqual(await isNewOsSession(tmpDir), true) - // Subsequent callers will get false - assert.strictEqual(await isNewOsSession(tmpDir), false) - - // Mimic a fresh restart (/tmp/ folder is deleted) - const files = await fs.readdir(tmpDir) - await Promise.all(files.map(async (file) => await fs.delete(`${tmpDir}/${file[0]}`))) - - // Since the tmp/ folder was cleared it is considered a new session - assert.strictEqual(await isNewOsSession(tmpDir), true) - assert.strictEqual(await isNewOsSession(tmpDir), false) - }) - } + let clock: InstalledClock | undefined + let sandbox: SinonSandbox + + beforeEach(function () { + sandbox = createSandbox() + }) + + afterEach(function () { + clock?.uninstall() + sandbox.restore() + }) + + it('unix-like: returns true when expected', async () => { + const uptimeStub = sandbox.stub() + const now = sandbox.stub() + // We started our computer at 2 minutes since epoch (time - pc uptime) + // and the comptuer has been on for 1 minute. So the OS started 1 minute since epoch. + now.returns(60_000 + 60_000) + uptimeStub.returns(1) + + // On a brand new session the first caller will get true + assert.strictEqual(await isNewOsSession(now, uptimeStub), true) + // Subsequent callers will get false + assert.strictEqual(await isNewOsSession(now, uptimeStub), false) + + // Start a computer session 10 minutes from epoch + uptimeStub.returns(0) + now.returns(60_000 * 10) + assert.strictEqual(await isNewOsSession(now, uptimeStub), true) + // Anything that is within a 5 second threshold of the last session time, is considered the same session + now.returns(60_000 * 10 + 5000) + assert.strictEqual(await isNewOsSession(now, uptimeStub), false) + now.returns(60_000 * 10 + 5000 + 1) + assert.strictEqual(await isNewOsSession(now, uptimeStub), true) + + // A non-zero uptime + uptimeStub.returns(5) // The computer has been running for 5 minutes already, so the start time is relative to this. + now.returns(60_000 * 10 + 5000 + 60_000 * 10) // 5 minutes since last session + // Nothing changes since the diff between uptime and the last start has not changed + assert.strictEqual(await isNewOsSession(now, uptimeStub), true) + }) }) From 0abd0ec786448a61830e8c97a07a7c37f86140f7 Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Sat, 28 Sep 2024 13:42:46 -0400 Subject: [PATCH 4/9] feat: Crash Monitoring Signed-off-by: nkomonen-amazon redid the constructors + extracted state Signed-off-by: nkomonen-amazon --- packages/amazonq/src/extensionNode.ts | 7 +- packages/core/src/dev/activation.ts | 14 +- packages/core/src/extensionNode.ts | 7 +- .../src/shared/crashMonitoring/constants.ts | 9 + .../shared/crashMonitoring/crashMonitoring.ts | 608 ++++++++++++++++++ packages/core/src/shared/errors.ts | 7 + packages/core/src/shared/index.ts | 1 + .../core/src/shared/logger/toolkitLogger.ts | 2 +- packages/core/src/shared/settings.ts | 1 + .../src/shared/telemetry/telemetryService.ts | 5 +- .../src/shared/telemetry/vscodeTelemetry.json | 25 +- .../src/shared/utilities/workspaceUtils.ts | 5 + .../core/src/test/fakeExtensionContext.ts | 6 + .../crashMonitoring/crashMonitoring.test.ts | 227 +++++++ .../core/src/test/shared/globalState.test.ts | 7 +- .../crashMonitoring/crashMonitoring.test.ts | 9 + packages/webpack.web.config.js | 2 +- 17 files changed, 926 insertions(+), 16 deletions(-) create mode 100644 packages/core/src/shared/crashMonitoring/constants.ts create mode 100644 packages/core/src/shared/crashMonitoring/crashMonitoring.ts create mode 100644 packages/core/src/test/shared/crashMonitoring/crashMonitoring.test.ts create mode 100644 packages/core/src/testInteg/shared/crashMonitoring/crashMonitoring.test.ts diff --git a/packages/amazonq/src/extensionNode.ts b/packages/amazonq/src/extensionNode.ts index aed087d3701..b203e541a84 100644 --- a/packages/amazonq/src/extensionNode.ts +++ b/packages/amazonq/src/extensionNode.ts @@ -7,7 +7,7 @@ import * as vscode from 'vscode' import { activateAmazonQCommon, amazonQContextPrefix, deactivateCommon } from './extension' import { DefaultAmazonQAppInitContext } from 'aws-core-vscode/amazonq' import { activate as activateQGumby } from 'aws-core-vscode/amazonqGumby' -import { ExtContext, globals } from 'aws-core-vscode/shared' +import { ExtContext, globals, CrashMonitoring } from 'aws-core-vscode/shared' import { filetypes, SchemaService } from 'aws-core-vscode/sharedNode' import { updateDevMode } from 'aws-core-vscode/dev' import { CommonAuthViewProvider } from 'aws-core-vscode/login' @@ -32,6 +32,8 @@ export async function activate(context: vscode.ExtensionContext) { * the code compatible with web and move it to {@link activateAmazonQCommon}. */ async function activateAmazonQNode(context: vscode.ExtensionContext) { + await (await CrashMonitoring.instance()).start() + const extContext = { extensionContext: context, } @@ -93,5 +95,6 @@ async function setupDevMode(context: vscode.ExtensionContext) { } export async function deactivate() { - await deactivateCommon() + // Run concurrently to speed up execution. stop() does not throw so it is safe + await Promise.all([(await CrashMonitoring.instance()).stop(), deactivateCommon()]) } diff --git a/packages/core/src/dev/activation.ts b/packages/core/src/dev/activation.ts index 0eac16f5c98..07de708fc14 100644 --- a/packages/core/src/dev/activation.ts +++ b/packages/core/src/dev/activation.ts @@ -24,6 +24,7 @@ import { entries } from '../shared/utilities/tsUtils' import { getEnvironmentSpecificMemento } from '../shared/utilities/mementos' import { setContext } from '../shared' import { telemetry } from '../shared/telemetry' +import { getSessionId } from '../shared/telemetry/util' interface MenuOption { readonly label: string @@ -108,7 +109,7 @@ const menuOptions: Record = { }, forceIdeCrash: { label: 'Crash: Force IDE ExtHost Crash', - detail: `Will SIGKILL ExtHost with pid, ${process.pid}, but IDE will not crash itself.`, + detail: `Will SIGKILL ExtHost, { pid: ${process.pid}, sessionId: '${getSessionId().slice(0, 8)}-...' }, but the IDE itself will not crash.`, executor: forceQuitIde, }, } @@ -222,6 +223,7 @@ function isSecrets(obj: vscode.Memento | vscode.SecretStorage): obj is vscode.Se class VirtualObjectFile implements FileProvider { private mTime = 0 + private size = 0 private readonly onDidChangeEmitter = new vscode.EventEmitter() public readonly onDidChange = this.onDidChangeEmitter.event @@ -233,8 +235,8 @@ class VirtualObjectFile implements FileProvider { /** Emits an event indicating this file's content has changed */ public refresh() { /** - * Per {@link vscode.FileSystemProvider.onDidChangeFile}, if the mTime does not change, new file content may - * not be retrieved. Without this, when we emit a change the text editor did not update. + * Per {@link vscode.FileSystemProvider.onDidChangeFile}, if the mTime and/or size does not change, new file content may + * not be retrieved due to optimizations. Without this, when we emit a change the text editor did not update. */ this.mTime++ this.onDidChangeEmitter.fire() @@ -242,13 +244,15 @@ class VirtualObjectFile implements FileProvider { public stat(): { ctime: number; mtime: number; size: number } { // This would need to be filled out to track conflicts - return { ctime: 0, mtime: this.mTime, size: 0 } + return { ctime: 0, mtime: this.mTime, size: this.size } } public async read(): Promise { const encoder = new TextEncoder() - return encoder.encode(await this.readStore(this.key)) + const data = encoder.encode(await this.readStore(this.key)) + this.size = data.length + return data } public async write(content: Uint8Array): Promise { diff --git a/packages/core/src/extensionNode.ts b/packages/core/src/extensionNode.ts index aa81b943239..f91fa321f59 100644 --- a/packages/core/src/extensionNode.ts +++ b/packages/core/src/extensionNode.ts @@ -59,7 +59,7 @@ import { ExtensionUse } from './auth/utils' import { ExtStartUpSources } from './shared/telemetry' import { activate as activateThreatComposerEditor } from './threatComposer/activation' import { isSsoConnection, hasScopes } from './auth/connection' -import { setContext } from './shared' +import { CrashMonitoring, setContext } from './shared' let localize: nls.LocalizeFunc @@ -78,6 +78,8 @@ export async function activate(context: vscode.ExtensionContext) { // IMPORTANT: If you are doing setup that should also work in web mode (browser), it should be done in the function below const extContext = await activateCommon(context, contextPrefix, false) + await (await CrashMonitoring.instance()).start() + initializeCredentialsProviderManager() const toolkitEnvDetails = getExtEnvironmentDetails() @@ -251,7 +253,8 @@ export async function activate(context: vscode.ExtensionContext) { } export async function deactivate() { - await deactivateCommon() + // Run concurrently to speed up execution. stop() does not throw so it is safe + await Promise.all([await (await CrashMonitoring.instance()).stop(), deactivateCommon()]) await globals.resourceManager.dispose() } diff --git a/packages/core/src/shared/crashMonitoring/constants.ts b/packages/core/src/shared/crashMonitoring/constants.ts new file mode 100644 index 00000000000..53db57ac24d --- /dev/null +++ b/packages/core/src/shared/crashMonitoring/constants.ts @@ -0,0 +1,9 @@ +/*! + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +// Needed here due to circular dependency issues +export const rootDir = 'crashMonitoring' +export const runningExtDir = 'runningExts' +export const shutdownExtDir = 'shutdownExts' diff --git a/packages/core/src/shared/crashMonitoring/crashMonitoring.ts b/packages/core/src/shared/crashMonitoring/crashMonitoring.ts new file mode 100644 index 00000000000..0aba8fd88c2 --- /dev/null +++ b/packages/core/src/shared/crashMonitoring/crashMonitoring.ts @@ -0,0 +1,608 @@ +/*! + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +import path from 'path' +import globals, { isWeb } from '../extensionGlobals' +import { getSessionId as _getSessionId } from '../telemetry/util' +import { getErrorId, getTelemetryReason, getTelemetryReasonDesc, isFileNotFoundError, ToolkitError } from '../errors' +import { isAutomation, isDebugInstance } from '../vscode/env' +import { DevSettings } from '../settings' +import vscode from 'vscode' +import { telemetry } from '../telemetry' +import { Logger } from '../logger' +import { isNewOsSession } from '../utilities/osUtils' +import nodeFs from 'fs/promises' +import fs from '../fs/fs' +import * as CrashMonitoringConstants from './constants' +import { getLogger } from '../logger/logger' + +const className = 'CrashMonitoring' + +/** + * Handles crash reporting for the extension. + * + * ### Pre-requisite knowledge: + * + * - If an extension crashes it cannot report that it crashed. + * - The ExtensionHost is a separate process from the main VS Code editor process where all extensions run in + * - Read about the [`deactivate()` behavior](../../../../../docs/vscode_behaviors.md) + * - An IDE instance is one instance of VS Code, and Extension Instance is 1 instance of our extension. These are 1:1. + * + * ### How it works at a high level: + * + * - Each IDE instance will start its own crash reporting process on startup + * - The crash reporting process works with each instance sending heartbeats to a centralized state. Separately each instance + * has a "Checker" the each entry in the centralized to see if it is not running anymore, and appropriately handles when needed. + * + * - On a crash we will emit a `session_end` metrics with `{ result: 'Failed', reason: 'ExtHostCrashed', crashedSessionId: '...' }` + * - On successful shutdown a `session_end` with a successful result is already emitted elsewhere. + * - IMPORTANT: There is potential for duplicate metrics to be emitted since all Checkers can emit, so for `session_end` with `'Failed'`, + * deduplicate by on the key `proxiedSessionId`. + * + * - To get the most verbose debug logs, configure the devmode setting: `crashReportInterval` + * + * ### Limitations + * - We will never truly know if we are the cause of the crash + * - Since all extensions run in the same Ext Host process, any one of them could cause it to crash and we wouldn't be + * able to differentiate + * - If the IDE itself crashes, unrelated to the extensions, it will still be seen as a crash in our telemetry + * - We are not able to explicitly determine if we were the cause of the crash + * - If the user shuts down their computer after a crash before the next interval of the Primary can run, that info is lost + * - We cannot persist crash information on computer restart + */ +export class CrashMonitoring { + private isStarted: boolean = false + + protected heartbeat: Heartbeat | undefined + protected crashChecker: CrashChecker | undefined + + constructor( + private readonly state: FileSystemState, + private readonly checkInterval: number, + private readonly isDevMode: boolean, + private readonly isAutomation: boolean, + private readonly devLogger: Logger | undefined + ) {} + + static #instance: CrashMonitoring | undefined + public static async instance(): Promise { + const isDevMode = getIsDevMode() + const devModeLogger: Logger | undefined = isDevMode ? getLogger() : undefined + return (this.#instance ??= new CrashMonitoring( + await crashMonitoringStateFactory(), + DevSettings.instance.get('crashCheckInterval', 1000 * 60 * 3), + isDevMode, + isAutomation(), + devModeLogger + )) + } + + /** Start the Crash Monitoring process */ + public async start() { + if (isWeb()) { + return + } + + // In the Prod code this runs by default and interferes as it reports its own heartbeats. + if (this.isAutomation) { + return + } + + // Dont throw since this feature is not critical and shouldn't prevent extension execution + try { + this.heartbeat = new Heartbeat(this.state, this.checkInterval, this.isDevMode) + this.crashChecker = new CrashChecker(this.state, this.checkInterval, this.isDevMode, this.devLogger) + + await this.heartbeat.start() + await this.crashChecker.start() + + this.isStarted = true + } catch (error) { + emitFailure({ functionName: 'start', error }) + // In development this gives us a useful stacktrace + if (this.isDevMode) { + throw error + } + } + } + + /** Stop the Crash Monitoring process, signifying a graceful shutdown */ + public async stop() { + if (!this.isStarted) { + return + } + + // Dont throw since this feature is not critical and shouldn't prevent extension shutdown + try { + this.crashChecker?.stop() + await this.heartbeat?.stop() + } catch (error) { + try { + // This probably wont emit in time before shutdown, but may be written to the logs + emitFailure({ functionName: 'stop', error }) + } catch (e) { + // In case emit fails, do nothing + } + if (this.isDevMode) { + throw error + } + } + } + + /** + * Mimic a crash of the extension, or can just be used as cleanup. + * Only use this for tests. + */ + protected crash() { + if (!this.isStarted) { + return + } + + this.crashChecker?.stop() + this.heartbeat?.crash() + } +} + +/** + * Heartbeats that indicate the extension instance is still running. + * {@link CrashChecker} listens for these. + */ +class Heartbeat { + private isRunning: boolean = false + private intervalRef: NodeJS.Timer | undefined + constructor( + private readonly state: FileSystemState, + private readonly checkInterval: number, + private readonly isDevMode: boolean + ) {} + + public async start() { + this.isRunning = true + + // heartbeat 2 times per check + const heartbeatInterval = this.checkInterval / 2 + + // Send an initial heartbeat immediately + await withFailCtx('initialSendHeartbeat', () => this.state.sendHeartbeat()) + + // Send a heartbeat every interval + this.intervalRef = globals.clock.setInterval(async () => { + try { + await this.state.sendHeartbeat() + } catch (e) { + emitFailure({ functionName: 'sendHeartbeat', error: e }) + + // Since there was an error we want to stop crash monitoring since it is pointless. + // We will need to monitor telemetry to see if we can determine widespread issues. + // Make sure it is signaled as a graceful shutdown to reduce noise of crashed extensions. + await this.stop() + + // During development we are fine with impacting extension execution, so throw + if (this.isDevMode) { + throw e + } + } + }, heartbeatInterval) + } + + public async stop() { + // non-happy path where heartbeats were never started. + if (!this.isRunning) { + return + } + + globals.clock.clearInterval(this.intervalRef) + return this.state.indicateGracefulShutdown() + } + + public crash() { + globals.clock.clearInterval(this.intervalRef) + } +} + +/** + * This checks for if an extension has crashed and handles that result appropriately. + * It listens to heartbeats sent by {@link Heartbeat}, and then handles appropriately when the heartbeats + * stop. + * + * --- + * + * This follows the Primary/Secondary design where one of the extension instances is the Primary checker + * and all others are Secondary. + * + * The Primary actually reads the state and reports crashes if detected. + * + * The Secondary continuously attempts to become the Primary if the previous Primary is no longer responsive. + * This helps to reduce raceconditions for operations on the state. + */ +class CrashChecker { + private intervalRef: NodeJS.Timer | undefined + + constructor( + private readonly state: FileSystemState, + private readonly checkInterval: number, + private readonly isDevMode: boolean, + private readonly devLogger: Logger | undefined + ) {} + + public async start() { + { + this.devLogger?.debug(`crashMonitoring: checkInterval ${this.checkInterval}`) + + // do an initial check + await withFailCtx('initialCrashCheck', () => + tryCheckCrash(this.state, this.checkInterval, this.isDevMode, this.devLogger) + ) + + this.intervalRef = globals.clock.setInterval(async () => { + try { + await tryCheckCrash(this.state, this.checkInterval, this.isDevMode, this.devLogger) + } catch (e) { + emitFailure({ functionName: 'checkCrashInterval', error: e }) + + // Since there was an error we want to stop crash monitoring since it is pointless. + // We will need to monitor telemetry to see if we can determine widespread issues. + this.stop() + + // During development we are fine with impacting extension execution, so throw + if (this.isDevMode) { + throw e + } + } + }, this.checkInterval) + } + + // ------------ Inner Functions ------------ + + async function tryCheckCrash( + state: FileSystemState, + checkInterval: number, + isDevMode: boolean, + devLogger: Logger | undefined + ) { + // Iterate all known extensions and for each check if they have crashed + const knownExts = await state.getAllExts() + const runningExts: ExtInstanceHeartbeat[] = [] + for (const ext of knownExts) { + if (!isStoppedHeartbeats(ext, checkInterval)) { + runningExts.push(ext) + continue + } + + // Ext is not running anymore, handle appropriately depending on why it stopped running + await state.handleExtNotRunning(ext, { + shutdown: async () => { + // Nothing to do, just log info if necessary + devLogger?.debug( + `crashMonitoring: SHUTDOWN: following has gracefully shutdown: pid ${ext.extHostPid} + sessionId: ${ext.sessionId}` + ) + }, + crash: async () => { + // Debugger instances may incorrectly look like they crashed, so don't emit. + // Example is if I hit the red square in the debug menu, it is a non-graceful shutdown. But the regular + // 'x' button in the Debug IDE instance is a graceful shutdown. + if (ext.isDebug) { + devLogger?.debug(`crashMonitoring: DEBUG instance crashed: ${JSON.stringify(ext)}`) + return + } + + // This is the metric to let us know the extension crashed + telemetry.session_end.emit({ + result: 'Failed', + proxiedSessionId: ext.sessionId, + reason: 'ExtHostCrashed', + passive: true, + }) + + devLogger?.debug( + `crashMonitoring: CRASH: following has crashed: pid ${ext.extHostPid} + sessionId: ${ext.sessionId}` + ) + }, + }) + } + + if (isDevMode) { + const before = knownExts.map((i) => i.extHostPid) + const after = runningExts.map((i) => i.extHostPid) + // Sanity check: ENSURE THAT AFTER === ACTUAL or this implies that our data is out of sync + const afterActual = (await state.getAllExts()).map((i) => i.extHostPid) + devLogger?.debug( + `crashMonitoring: CHECKED: Result of cleaning up stopped instances\nBEFORE: ${JSON.stringify(before)}\nAFTER: ${JSON.stringify(after)}\nACTUAL: ${JSON.stringify(afterActual)}` + ) + } + + return + } + + function isStoppedHeartbeats(ext: ExtInstanceHeartbeat, checkInterval: number) { + const millisSinceLastHeartbeat = globals.clock.Date.now() - ext.lastHeartbeat + // since heartbeats happen 2 times per check interval it will have occured + // at least once in the timespan of the check interval. + // + // But if we want to be more flexible this condition can be modified since + // something like global state taking time to sync can return the incorrect last heartbeat value. + return millisSinceLastHeartbeat >= checkInterval + } + } + + public stop() { + globals.clock.clearInterval(this.intervalRef) + } +} + +/** + * We define this externally so that we have a single source of truth for the contructor args. + * Ideally we'd use ConstructorParameters, but it does not work when the constructor is protected. + */ +type MementoStateDependencies = { + memento: vscode.Memento + pid: number + sessionId: string + workDirPath: string + isDevMode: boolean + isStateStale: () => Promise + now: () => number + devLogger: Logger | undefined +} + +function getDefaultDependencies(): MementoStateDependencies { + return { + now: () => globals.clock.Date.now(), + workDirPath: path.join(globals.context.globalStorageUri.fsPath), + memento: globals.globalState as vscode.Memento, + isStateStale: () => isNewOsSession(), + pid: process.pid, + sessionId: _getSessionId(), + isDevMode: getIsDevMode(), + devLogger: getIsDevMode() ? getLogger() : undefined, + } +} +export async function crashMonitoringStateFactory(deps = getDefaultDependencies()): Promise { + const state: FileSystemState = new FileSystemState(deps) + await state.init() + return state +} + +/** + * The state of all running extensions. This state is globally shared with all other extension instances. + * This state specifically uses the File System. + */ +export class FileSystemState { + private readonly stateDirPath: string + + /** + * Use {@link crashMonitoringStateFactory} to make an instance + */ + constructor(protected readonly deps: MementoStateDependencies) { + this.stateDirPath = path.join(this.deps.workDirPath, CrashMonitoringConstants.rootDir) + + this.deps.devLogger?.debug(`crashMonitoring: pid: ${this.deps.pid}`) + this.deps.devLogger?.debug(`crashMonitoring: sessionId: ${this.deps.sessionId.slice(0, 8)}-...`) + this.deps.devLogger?.debug(`crashMonitoring: dir: ${this.stateDirPath}`) + } + + /** + * Does the required initialization steps, this must always be run after + * creation of the instance. + */ + public async init() { + // Clear the state if the user did something like a computer restart + if (await this.deps.isStateStale()) { + await this.clearState() + } + } + + // ------------------ Heartbeat methods ------------------ + public async sendHeartbeat() { + await withFailCtx('sendHeartbeatState', async () => { + const dir = await this.runningExtsDir() + const extId = this.createExtId(this.ext) + await fs.writeFile( + path.join(dir, extId), + JSON.stringify({ ...this.ext, lastHeartbeat: this.deps.now() }, undefined, 4) + ) + this.deps.devLogger?.debug( + `crashMonitoring: HEARTBEAT pid ${this.deps.pid} + sessionId: ${this.deps.sessionId.slice(0, 8)}-...` + ) + }) + } + + /** + * Signal that this extension is gracefully shutting down. This will prevent the IDE from thinking it crashed. + * + * IMPORTANT: This code is being run in `deactivate()` where VS Code api is not available. Due to this we cannot + * easily update the state to indicate a graceful shutdown. So the next best option is to write to a file on disk, + * and its existence indicates a graceful shutdown. + * + * IMPORTANT: Since the VSC FileSystem api is not available during deactivation we MUST use Node FS in anything this + * function touches. + */ + public async indicateGracefulShutdown(): Promise { + const dir = await this.shutdownExtsDir() + await withFailCtx('writeShutdownFile', () => nodeFs.writeFile(path.join(dir, this.extId), '')) + } + + // ------------------ Checker Methods ------------------ + + /** + * Signals the state that the given extension is not running, allowing the state to appropriately update + * depending on a graceful shutdown or crash. + * + * NOTE: This does NOT run in the `deactivate()` method, so it CAN reliably use the VS Code FS api + * + * @param opts - functions to run depending on why the extension stopped running + */ + public async handleExtNotRunning( + ext: ExtInstance, + opts: { shutdown: () => Promise; crash: () => Promise } + ): Promise { + const extId = this.createExtId(ext) + const shutdownFilePath = path.join(await this.shutdownExtsDir(), extId) + + if (await withFailCtx('existsShutdownFile', () => fs.exists(shutdownFilePath))) { + await opts.shutdown() + // We intentionally do not clean up the file in shutdown since there may be another + // extension may be doing the same thing in parallel, and would read the extension as + // crashed since the file was missing. The file will be cleared on computer restart though. + + // TODO: Be smart and clean up the file after some time. + } else { + await opts.crash() + } + + // Clean up the running extension file since it is no longer exists + const dir = await this.runningExtsDir() + // Use force since another checker may have already removed this file before this is ran + await withFailCtx('deleteStaleRunningFile', () => fs.delete(path.join(dir, extId), { force: true })) + } + + // ------------------ State data ------------------ + public get extId(): ExtInstanceId { + return this.createExtId(this.ext) + } + /** This extensions metadata */ + protected get ext(): ExtInstance { + return { + sessionId: this.deps.sessionId, + extHostPid: this.deps.pid, + isDebug: isDebugInstance() ? true : undefined, + } + } + /** + * Returns a value that uniquely identifies an Extension Instance. + * + * - When an ExtHost crashes the VSCode instance itself does not crash, this means the session ID stays the + * same. This is the current behavior of vscode's sessionId api that we use under the hood (verified manually). + * - The Extension Host PID used in addition to the session ID should be good enough to uniquely identiy. + */ + protected createExtId(ext: ExtInstance): ExtInstanceId { + return `${ext.extHostPid}_${ext.sessionId}` + } + private async runningExtsDir(): Promise { + const p = path.join(this.stateDirPath, CrashMonitoringConstants.runningExtDir) + // ensure the dir exists + await withFailCtx('ensureRunningExtsDir', () => fs.mkdir(p)) + return p + } + private async shutdownExtsDir() { + const p = path.join(this.stateDirPath, CrashMonitoringConstants.shutdownExtDir) + // Since this runs in `deactivate()` it cannot use the VS Code FS api + await withFailCtx('ensureShutdownExtsDir', () => nodeFs.mkdir(p, { recursive: true })) + return p + } + public async clearState(): Promise { + await withFailCtx('clearState', async () => fs.delete(this.stateDirPath, { force: true })) + } + public async getAllExts(): Promise { + const res = await withFailCtx('getAllExts', async () => { + // The file names are intentionally the IDs for easy mapping + const allExtIds: ExtInstanceId[] = await withFailCtx('readdir', async () => + (await fs.readdir(await this.runningExtsDir())).map((k) => k[0]) + ) + + const allExts = allExtIds.map>(async (extId: string) => { + // Due to a race condition, a separate extension instance may have removed this file by this point. It is okay since + // we will assume that other instance handled its termination appropriately. + const ext = await withFailCtx('parseRunningExtFile', async () => + ignoreBadFileError(async () => { + const text = await fs.readFileAsString(path.join(await this.runningExtsDir(), extId)) + + if (!text) { + return undefined + } + + // This was sometimes throwing SyntaxError + return JSON.parse(text) as ExtInstanceHeartbeat + }) + ) + + if (ext === undefined) { + return + } + + if (!isExtHeartbeat(ext)) { + throw new CrashMonitoringError(`Unexpected result from state for ext with key, ${extId}: ${ext}`) + } + return ext + }) + // filter out undefined before returning + const result = (await Promise.all(allExts)).filter(isExtHeartbeat) + return result + }) + return res + } +} + +/** + * Runs the given callback, returning undefined in the case a common file operation + * error occured + */ +async function ignoreBadFileError(fn: () => Promise): Promise { + try { + return await fn() + } catch (e) { + if (isFileNotFoundError(e) || (e instanceof Error && getErrorId(e) === 'SyntaxError')) { + return undefined + } + throw e + } +} + +/** + * Returns true if the context this is being run in is development. + * Enabling dev mode will slightly change behavior: + * - Adding more verbose debug logs + * - throwing errors, when typically in prod they would be swallowed + */ +function getIsDevMode() { + return DevSettings.instance.get('crashCheckInterval', -1) !== -1 +} + +type ExtInstanceId = string + +/** The static metadata of an instance of this extension */ +export type ExtInstance = { + extHostPid: number + sessionId: string + lastHeartbeat?: number + /** + * True if this instance was being run in a `Run & Debug` VS Code instance. + */ + isDebug?: boolean +} + +type ExtInstanceHeartbeat = ExtInstance & { lastHeartbeat: number } + +function isExtHeartbeat(ext: unknown): ext is ExtInstanceHeartbeat { + return typeof ext === 'object' && ext !== null && 'lastHeartbeat' in ext && ext.lastHeartbeat !== undefined +} + +// Use this error for all crash reporting as it gives context to this feature +const CrashMonitoringError = ToolkitError.named(className) + +/** + * Executes the given function, and wraps and throw exceptions with relevant context. + * This is helpful in telemetry as it gives more context about the error since we do not + * have a stacktrace there. + */ +async function withFailCtx(ctx: string, fn: () => Promise): Promise { + try { + // make sure we await the function so it actually executes within the try/catch + return await fn() + } catch (err) { + throw CrashMonitoringError.chain(err, `Failed "${ctx}"`, { code: className }) + } +} + +/** Emits a metric for the given failure, but wraps it with relevant context. */ +function emitFailure(args: { functionName: string; error: unknown }) { + telemetry.function_call.emit({ + className, + functionName: args.functionName, + result: 'Failed', + reason: getTelemetryReason(args.error), + reasonDesc: getTelemetryReasonDesc(args.error), + passive: true, + }) +} diff --git a/packages/core/src/shared/errors.ts b/packages/core/src/shared/errors.ts index 3a7ec27c5a0..6f12f59ee1c 100644 --- a/packages/core/src/shared/errors.ts +++ b/packages/core/src/shared/errors.ts @@ -15,6 +15,7 @@ import type * as os from 'os' import { CodeWhispererStreamingServiceException } from '@amzn/codewhisperer-streaming' import { driveLetterRegex } from './utilities/pathUtils' import { getLogger } from './logger/logger' +import * as CrashMonitoring from './crashMonitoring/constants' let _username = 'unknown-user' let _isAutomation = false @@ -376,6 +377,12 @@ export function scrubNames(s: string, username?: string) { 'Users', 'users', 'home', + 'tmp', + 'aws-toolkit-vscode', + 'globalStorage', // from vscode globalStorageUri + CrashMonitoring.rootDir, + CrashMonitoring.runningExtDir, + CrashMonitoring.shutdownExtDir, ]) if (username && username.length > 2) { diff --git a/packages/core/src/shared/index.ts b/packages/core/src/shared/index.ts index 3abc5d58f7b..fff64a564d1 100644 --- a/packages/core/src/shared/index.ts +++ b/packages/core/src/shared/index.ts @@ -51,3 +51,4 @@ export * as errors from './errors' export * as funcUtil from './utilities/functionUtils' export { fs } from './fs/fs' export * from './handleUninstall' +export { CrashMonitoring } from './crashMonitoring/crashMonitoring' diff --git a/packages/core/src/shared/logger/toolkitLogger.ts b/packages/core/src/shared/logger/toolkitLogger.ts index 35d1ba1f5c1..901294d1a11 100644 --- a/packages/core/src/shared/logger/toolkitLogger.ts +++ b/packages/core/src/shared/logger/toolkitLogger.ts @@ -15,7 +15,7 @@ import { ConsoleLogTransport } from './consoleLogTransport' import { isWeb } from '../extensionGlobals' /* define log topics */ -export type LogTopic = 'unknown' | 'test' +export type LogTopic = 'unknown' | 'test' | 'crashReport' class ErrorLog { constructor( diff --git a/packages/core/src/shared/settings.ts b/packages/core/src/shared/settings.ts index 74911fe6946..6b9f506ba0c 100644 --- a/packages/core/src/shared/settings.ts +++ b/packages/core/src/shared/settings.ts @@ -745,6 +745,7 @@ export class Experiments extends Settings.define( } const devSettings = { + crashCheckInterval: Number, logfile: String, forceCloud9: Boolean, forceDevMode: Boolean, diff --git a/packages/core/src/shared/telemetry/telemetryService.ts b/packages/core/src/shared/telemetry/telemetryService.ts index 6472101f6ac..a249a747cf8 100644 --- a/packages/core/src/shared/telemetry/telemetryService.ts +++ b/packages/core/src/shared/telemetry/telemetryService.ts @@ -110,7 +110,10 @@ export class DefaultTelemetryService { if (this.telemetryEnabled && !isAutomation()) { const currTime = new globals.clock.Date() // This is noisy when running tests in vscode. - telemetry.session_end.emit({ value: currTime.getTime() - this.startTime.getTime(), result: 'Succeeded' }) + telemetry.session_end.emit({ + result: 'Succeeded', + duration: currTime.getTime() - this.startTime.getTime(), + }) try { await fs.writeFile(this.persistFilePath, JSON.stringify(this._eventQueue)) diff --git a/packages/core/src/shared/telemetry/vscodeTelemetry.json b/packages/core/src/shared/telemetry/vscodeTelemetry.json index 85b42d98daf..b71debaf75f 100644 --- a/packages/core/src/shared/telemetry/vscodeTelemetry.json +++ b/packages/core/src/shared/telemetry/vscodeTelemetry.json @@ -5,6 +5,11 @@ "type": "double", "description": "The time it takes to generate approach response" }, + { + "name": "proxiedSessionId", + "type": "string", + "description": "The sessionId of the extension instance that this instance is reporting on behalf of." + }, { "name": "documentFormat", "type": "string", @@ -292,6 +297,11 @@ "type": "int", "description": "Memory heap usage in MB" }, + { + "name": "className", + "type": "string", + "description": "The name of the class" + }, { "name": "functionName", "type": "string", @@ -1166,6 +1176,10 @@ { "type": "functionName", "required": true + }, + { + "type": "className", + "required": false } ], "passive": true @@ -1223,7 +1237,16 @@ "type": "amazonqMessageDisplayedMs" }, { - "type": "source", + "type": "source" + } + ] + }, + { + "name": "session_end", + "description": "Called when stopping the IDE on a best effort basis", + "metadata": [ + { + "type": "proxiedSessionId", "required": false } ] diff --git a/packages/core/src/shared/utilities/workspaceUtils.ts b/packages/core/src/shared/utilities/workspaceUtils.ts index c60a79bd885..f8b7d9ce00e 100644 --- a/packages/core/src/shared/utilities/workspaceUtils.ts +++ b/packages/core/src/shared/utilities/workspaceUtils.ts @@ -68,6 +68,11 @@ export class GitIgnoreFilter { export type CurrentWsFolders = [vscode.WorkspaceFolder, ...vscode.WorkspaceFolder[]] +export function hasWorkspace() { + const wsFolders = vscode.workspace.workspaceFolders + return wsFolders !== undefined && wsFolders.length > 0 +} + /** * Resolves `relPath` against parent `workspaceFolder`, or returns `relPath` if * already absolute or the operation fails. diff --git a/packages/core/src/test/fakeExtensionContext.ts b/packages/core/src/test/fakeExtensionContext.ts index 195f77bd993..d16bd8a1dd4 100644 --- a/packages/core/src/test/fakeExtensionContext.ts +++ b/packages/core/src/test/fakeExtensionContext.ts @@ -159,6 +159,12 @@ export class FakeMemento implements vscode.Memento { return this._storage[key] ?? defaultValue } public update(key: string, value: any): Thenable { + /** From the docs of {@link vscode.Memento.update*()} if a value is updated to undefined, it should be deleted */ + if (value === undefined) { + delete this._storage[key] + return Promise.resolve() + } + this._storage[key] = value return Promise.resolve() diff --git a/packages/core/src/test/shared/crashMonitoring/crashMonitoring.test.ts b/packages/core/src/test/shared/crashMonitoring/crashMonitoring.test.ts new file mode 100644 index 00000000000..6a73c498d25 --- /dev/null +++ b/packages/core/src/test/shared/crashMonitoring/crashMonitoring.test.ts @@ -0,0 +1,227 @@ +/*! + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +import { assertTelemetry, getMetrics, partialDeepCompare, TestFolder } from '../../testUtil' +import assert from 'assert' +import globals from '../../../shared/extensionGlobals' +import { + CrashMonitoring, + ExtInstance, + crashMonitoringStateFactory, +} from '../../../shared/crashMonitoring/crashMonitoring' +import { isCI } from '../../../shared/vscode/env' +import { getLogger } from '../../../shared/logger/logger' + +class TestCrashMonitoring extends CrashMonitoring { + public constructor(...deps: ConstructorParameters) { + super(...deps) + } + public override crash() { + super.crash() + } +} + +export const crashMonitoringTest = async () => { + let testFolder: TestFolder + let spawnedExtensions: TestCrashMonitoring[] + + // Scale down the default interval we heartbeat and check for crashes to something much short for testing. + const checkInterval = 200 + + // Add some buffer since after 1 interval the work is actually done, including file i/o which may be slow. + // **IF FLAKY**, see if increasing the buffer helps. + const oneInterval = checkInterval + 500 + + /** + * Makes N "extension instances" that can be used for testing. + * Each instances is actually the Crash Reporting instance, but there is a + * 1:1 mapping between Crash Reporting instances and the Extension instances. + */ + async function makeTestExtensions(amount: number) { + const devLogger = getLogger() + + const extensions: TestExtension[] = [] + for (let i = 0; i < amount; i++) { + const sessionId = `sessionId-${i}` + const pid = Number(String(i).repeat(6)) + const state = await crashMonitoringStateFactory({ + workDirPath: testFolder.path, + isStateStale: async () => false, + pid, + sessionId: sessionId, + now: () => globals.clock.Date.now(), + memento: globals.globalState, + isDevMode: true, + devLogger, + }) + const ext = new TestCrashMonitoring(state, checkInterval, true, false, devLogger) + spawnedExtensions.push(ext) + const metadata = { + extHostPid: pid, + sessionId, + lastHeartbeat: globals.clock.Date.now(), + isDebug: undefined, + } + extensions[i] = { ext, metadata } + } + return extensions + } + + beforeEach(async function () { + testFolder = await TestFolder.create() + spawnedExtensions = [] + }) + + afterEach(async function () { + // clean up all running instances + spawnedExtensions?.forEach((e) => e.crash()) + }) + + it('graceful shutdown no metric emitted', async function () { + // this.retries(3) + + const exts = await makeTestExtensions(2) + + await exts[0].ext.start() + await awaitIntervals(oneInterval) // allow time to become primary checker + // There is no other active instance to report the issue + assertTelemetry('session_end', []) + + // Ext 1 does a graceful shutdown + await exts[1].ext.start() + await exts[1].ext.stop() + await awaitIntervals(oneInterval) + // Ext 1 did a graceful shutdown so no metric emitted + assertTelemetry('session_end', []) + }) + + it('single running instances crashes, so nothing is reported, but a new instaces appears and reports', async function () { + // this.retries(3) + + const exts = await makeTestExtensions(2) + + await exts[0].ext.start() + exts[0].ext.crash() + await awaitIntervals(oneInterval) + // There is no other active instance to report the issue + assertTelemetry('session_end', []) + + await exts[1].ext.start() + await awaitIntervals(oneInterval) + // Starting a new instance will detect the previously crashed one + assertCrashedExtensions([exts[0]]) + }) + + it('start the first extension, then start many subsequent ones and crash them all at once', async function () { + // this.retries(3) + const latestCrashedExts: TestExtension[] = [] + + const extCount = 10 + const exts = await makeTestExtensions(extCount) + for (let i = 0; i < extCount; i++) { + await exts[i].ext.start() + } + + for (let i = 1; i < extCount; i++) { + exts[i].ext.crash() + latestCrashedExts.push(exts[i]) + } + + // Give some extra time since there is a lot of file i/o + await awaitIntervals(oneInterval * 2) + + assertCrashedExtensions(latestCrashedExts) + }) + + it('the Primary checker crashes and another checker is promoted to Primary', async function () { + // this.retries(3) + const latestCrashedExts: TestExtension[] = [] + + const exts = await makeTestExtensions(4) + // Ext 0 is the Primary checker + await exts[0].ext.start() + await awaitIntervals(oneInterval) + + // start Ext 1 then crash it, Ext 0 finds the crash + await exts[1].ext.start() + exts[1].ext.crash() + latestCrashedExts.push(exts[1]) + await awaitIntervals(oneInterval * 1) + + assertCrashedExtensions(latestCrashedExts) + + // start Ext 2 and crash Ext 0, Ext 2 is promoted to Primary checker + await exts[2].ext.start() + exts[0].ext.crash() + latestCrashedExts.push(exts[0]) + await awaitIntervals(oneInterval * 1) + assertCrashedExtensions(latestCrashedExts) + + // Ext 3 starts, then crashes. Ext 2 reports the crash since it is the Primary checker + await exts[3].ext.start() + exts[3].ext.crash() + latestCrashedExts.push(exts[3]) + await awaitIntervals(oneInterval * 1) + assertCrashedExtensions(latestCrashedExts) + }) + + /** + * Something like the following code can switch contexts early and the test will + * finish before it has completed. Certain async functions that may take longer to run + * can cause the issue, an example is a FileSystem call like delete(). + * + * Example: + * ```ts + * globals.clock.setInterval(async () => { + * await thisThing + * + * await fs.delete(fileName) + * + * return 'result' + * }) + * ``` + * + * Because of this we need be able to block and await on something so that the callback can + * properly finish. Separately I noticed that if this function Timeout time was too small + * it would not actually block, and would simply continue. + * + * In general this is related to event loop Task Queues. + * + */ + async function awaitIntervals(milliseconds: number) { + await new Promise((resolve) => { + setTimeout(resolve, milliseconds) // The lower this is, the less the chance this function will actually block the test from continuing + }) + } + + function assertCrashedExtensions(expectedExts: TestExtension[]) { + const allSessionEnds = getMetrics('session_end') + + const deduplicatedSessionEnds = deduplicate( + [...allSessionEnds], + (a, b) => a.proxiedSessionId === b.proxiedSessionId + ) + assert.strictEqual(deduplicatedSessionEnds.length, expectedExts.length) + + expectedExts.sort((a, b) => a.metadata.sessionId.localeCompare(b.metadata.sessionId)) + deduplicatedSessionEnds.sort((a, b) => a.proxiedSessionId!.localeCompare(b.proxiedSessionId!)) + + expectedExts.forEach((ext, i) => { + partialDeepCompare(deduplicatedSessionEnds[i], { + result: 'Failed', + proxiedSessionId: ext.metadata.sessionId, + reason: 'ExtHostCrashed', + }) + }) + } + + function deduplicate(array: T[], predicate: (a: T, b: T) => boolean): T[] { + return array.filter((item, index, self) => index === self.findIndex((t) => predicate(item, t))) + } +} +// This test is slow, so we only want to run it locally and not in CI. It will be run in the integ CI tests though. +;(isCI() ? describe.skip : describe)('CrashReporting', crashMonitoringTest) + +type TestExtension = { ext: TestCrashMonitoring; metadata: ExtInstance } diff --git a/packages/core/src/test/shared/globalState.test.ts b/packages/core/src/test/shared/globalState.test.ts index f43fd5cbe20..6788abc50e9 100644 --- a/packages/core/src/test/shared/globalState.test.ts +++ b/packages/core/src/test/shared/globalState.test.ts @@ -104,10 +104,11 @@ describe('GlobalState', function () { await globalState.update(keys[2] as any, 'val3') assert.deepStrictEqual(globalState.keys(), keys) assert.deepStrictEqual(globalState.values(), ['val1', 'val2', 'val3']) + await globalState.clear() - // XXX: no way to actually delete the key? - assert.deepStrictEqual(globalState.keys(), keys) - assert.deepStrictEqual(globalState.values(), [undefined, undefined, undefined]) + + assert.deepStrictEqual(globalState.keys(), []) + assert.deepStrictEqual(globalState.values(), []) }) describe('redshift state', function () { diff --git a/packages/core/src/testInteg/shared/crashMonitoring/crashMonitoring.test.ts b/packages/core/src/testInteg/shared/crashMonitoring/crashMonitoring.test.ts new file mode 100644 index 00000000000..7fb8ed88427 --- /dev/null +++ b/packages/core/src/testInteg/shared/crashMonitoring/crashMonitoring.test.ts @@ -0,0 +1,9 @@ +/*! + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +import { crashMonitoringTest } from '../../../test/shared/crashMonitoring/crashMonitoring.test' + +// This test is slower so we want to it to run as an integ test for CI +describe('CrashMonitoring', crashMonitoringTest) diff --git a/packages/webpack.web.config.js b/packages/webpack.web.config.js index 0031134d9f3..6481d84e8e1 100644 --- a/packages/webpack.web.config.js +++ b/packages/webpack.web.config.js @@ -38,7 +38,7 @@ module.exports = (env, argv) => { }), /** * HACK: the HttpResourceFetcher breaks Web mode if imported, BUT we still dynamically import this module for non web mode - * environments. The following allows compilation to pass in Web mode by never bundling the module in the final output. + * environments. The following allows compilation to pass in Web mode by never bundling the module in the final output for web mode. */ new webpack.IgnorePlugin({ resourceRegExp: /httpResourceFetcher/, // matches the path in the require() statement From 0083d36383812893e299008894e97697a6a03f57 Mon Sep 17 00:00:00 2001 From: "Justin M. Keyes" Date: Wed, 2 Oct 2024 16:41:45 -0700 Subject: [PATCH 5/9] Update packages/core/src/dev/activation.ts --- packages/core/src/dev/activation.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/core/src/dev/activation.ts b/packages/core/src/dev/activation.ts index 07de708fc14..df69c018791 100644 --- a/packages/core/src/dev/activation.ts +++ b/packages/core/src/dev/activation.ts @@ -449,8 +449,7 @@ export function forceQuitIde() { // This current process is the ExtensionHost. Killing it will cause all the extensions to crash // for the current ExtensionHost (unless using "extensions.experimental.affinity"). // The IDE instance itself will remaing running, but a new ExtHost will spawn within it. - // The PPID does not seem to be the IDE instance but the root VS Code process, killing - // this crashes all VS Code instances. + // The PPID (parent process) is vscode itself, killing it crashes all vscode instances. const vsCodePid = process.pid process.kill(vsCodePid, 'SIGKILL') // SIGTERM would be the graceful shutdown } From 08128a2737e8881a7a84a5371cf86327222f7aef Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Wed, 2 Oct 2024 23:14:08 -0400 Subject: [PATCH 6/9] move constants to the main constant file Signed-off-by: nkomonen-amazon --- packages/core/src/shared/constants.ts | 11 +++++++++++ packages/core/src/shared/crashMonitoring/constants.ts | 9 --------- .../src/shared/crashMonitoring/crashMonitoring.ts | 8 ++++---- packages/core/src/shared/errors.ts | 8 ++++---- 4 files changed, 19 insertions(+), 17 deletions(-) delete mode 100644 packages/core/src/shared/crashMonitoring/constants.ts diff --git a/packages/core/src/shared/constants.ts b/packages/core/src/shared/constants.ts index b343a329968..700e21f2220 100644 --- a/packages/core/src/shared/constants.ts +++ b/packages/core/src/shared/constants.ts @@ -157,3 +157,14 @@ export const amazonQHelpUrl = 'https://aws.amazon.com/q/' // URL for Amazon Q VS Code export const amazonQVscodeMarketplace = 'https://marketplace.visualstudio.com/items?itemName=AmazonWebServices.amazon-q-vscode' + +/** + * Names of directories relevant to the crash reporting functionality. + * + * Moved here to resolve circular dependency issues. + */ +export const crashMonitoringDirName = { + root: 'crashMonitoring', + running: 'running', + shutdown: 'shutdown', +} as const diff --git a/packages/core/src/shared/crashMonitoring/constants.ts b/packages/core/src/shared/crashMonitoring/constants.ts deleted file mode 100644 index 53db57ac24d..00000000000 --- a/packages/core/src/shared/crashMonitoring/constants.ts +++ /dev/null @@ -1,9 +0,0 @@ -/*! - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0 - */ - -// Needed here due to circular dependency issues -export const rootDir = 'crashMonitoring' -export const runningExtDir = 'runningExts' -export const shutdownExtDir = 'shutdownExts' diff --git a/packages/core/src/shared/crashMonitoring/crashMonitoring.ts b/packages/core/src/shared/crashMonitoring/crashMonitoring.ts index 0aba8fd88c2..dba6bbd6846 100644 --- a/packages/core/src/shared/crashMonitoring/crashMonitoring.ts +++ b/packages/core/src/shared/crashMonitoring/crashMonitoring.ts @@ -14,8 +14,8 @@ import { Logger } from '../logger' import { isNewOsSession } from '../utilities/osUtils' import nodeFs from 'fs/promises' import fs from '../fs/fs' -import * as CrashMonitoringConstants from './constants' import { getLogger } from '../logger/logger' +import { crashMonitoringDirName } from '../constants' const className = 'CrashMonitoring' @@ -375,7 +375,7 @@ export class FileSystemState { * Use {@link crashMonitoringStateFactory} to make an instance */ constructor(protected readonly deps: MementoStateDependencies) { - this.stateDirPath = path.join(this.deps.workDirPath, CrashMonitoringConstants.rootDir) + this.stateDirPath = path.join(this.deps.workDirPath, crashMonitoringDirName.root) this.deps.devLogger?.debug(`crashMonitoring: pid: ${this.deps.pid}`) this.deps.devLogger?.debug(`crashMonitoring: sessionId: ${this.deps.sessionId.slice(0, 8)}-...`) @@ -480,13 +480,13 @@ export class FileSystemState { return `${ext.extHostPid}_${ext.sessionId}` } private async runningExtsDir(): Promise { - const p = path.join(this.stateDirPath, CrashMonitoringConstants.runningExtDir) + const p = path.join(this.stateDirPath, crashMonitoringDirName.running) // ensure the dir exists await withFailCtx('ensureRunningExtsDir', () => fs.mkdir(p)) return p } private async shutdownExtsDir() { - const p = path.join(this.stateDirPath, CrashMonitoringConstants.shutdownExtDir) + const p = path.join(this.stateDirPath, crashMonitoringDirName.shutdown) // Since this runs in `deactivate()` it cannot use the VS Code FS api await withFailCtx('ensureShutdownExtsDir', () => nodeFs.mkdir(p, { recursive: true })) return p diff --git a/packages/core/src/shared/errors.ts b/packages/core/src/shared/errors.ts index 6f12f59ee1c..88e4f6a5f61 100644 --- a/packages/core/src/shared/errors.ts +++ b/packages/core/src/shared/errors.ts @@ -15,7 +15,7 @@ import type * as os from 'os' import { CodeWhispererStreamingServiceException } from '@amzn/codewhisperer-streaming' import { driveLetterRegex } from './utilities/pathUtils' import { getLogger } from './logger/logger' -import * as CrashMonitoring from './crashMonitoring/constants' +import { crashMonitoringDirName } from './constants' let _username = 'unknown-user' let _isAutomation = false @@ -380,9 +380,9 @@ export function scrubNames(s: string, username?: string) { 'tmp', 'aws-toolkit-vscode', 'globalStorage', // from vscode globalStorageUri - CrashMonitoring.rootDir, - CrashMonitoring.runningExtDir, - CrashMonitoring.shutdownExtDir, + crashMonitoringDirName.root, + crashMonitoringDirName.running, + crashMonitoringDirName.shutdown, ]) if (username && username.length > 2) { From 85f04b26232e15a6c59cd69792920d093d859497 Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Wed, 2 Oct 2024 23:15:33 -0400 Subject: [PATCH 7/9] extracted out of topic folder no longer needed that we have a single file for crash reporting Signed-off-by: nkomonen-amazon --- .../{crashMonitoring => }/crashMonitoring.ts | 22 +++++++++---------- packages/core/src/shared/index.ts | 2 +- .../crashMonitoring.test.ts | 14 +++++------- .../crashMonitoring/crashMonitoring.test.ts | 2 +- 4 files changed, 18 insertions(+), 22 deletions(-) rename packages/core/src/shared/{crashMonitoring => }/crashMonitoring.ts (97%) rename packages/core/src/test/shared/{crashMonitoring => }/crashMonitoring.test.ts (96%) diff --git a/packages/core/src/shared/crashMonitoring/crashMonitoring.ts b/packages/core/src/shared/crashMonitoring.ts similarity index 97% rename from packages/core/src/shared/crashMonitoring/crashMonitoring.ts rename to packages/core/src/shared/crashMonitoring.ts index dba6bbd6846..3c6dd81a604 100644 --- a/packages/core/src/shared/crashMonitoring/crashMonitoring.ts +++ b/packages/core/src/shared/crashMonitoring.ts @@ -3,19 +3,19 @@ * SPDX-License-Identifier: Apache-2.0 */ import path from 'path' -import globals, { isWeb } from '../extensionGlobals' -import { getSessionId as _getSessionId } from '../telemetry/util' -import { getErrorId, getTelemetryReason, getTelemetryReasonDesc, isFileNotFoundError, ToolkitError } from '../errors' -import { isAutomation, isDebugInstance } from '../vscode/env' -import { DevSettings } from '../settings' +import globals, { isWeb } from './extensionGlobals' +import { getSessionId as _getSessionId } from './telemetry/util' +import { getErrorId, getTelemetryReason, getTelemetryReasonDesc, isFileNotFoundError, ToolkitError } from './errors' +import { isAutomation, isDebugInstance } from './vscode/env' +import { DevSettings } from './settings' import vscode from 'vscode' -import { telemetry } from '../telemetry' -import { Logger } from '../logger' -import { isNewOsSession } from '../utilities/osUtils' +import { telemetry } from './telemetry' +import { Logger } from './logger' +import { isNewOsSession } from './utilities/osUtils' import nodeFs from 'fs/promises' -import fs from '../fs/fs' -import { getLogger } from '../logger/logger' -import { crashMonitoringDirName } from '../constants' +import fs from './fs/fs' +import { getLogger } from './logger/logger' +import { crashMonitoringDirName } from './constants' const className = 'CrashMonitoring' diff --git a/packages/core/src/shared/index.ts b/packages/core/src/shared/index.ts index fff64a564d1..a31c647e226 100644 --- a/packages/core/src/shared/index.ts +++ b/packages/core/src/shared/index.ts @@ -51,4 +51,4 @@ export * as errors from './errors' export * as funcUtil from './utilities/functionUtils' export { fs } from './fs/fs' export * from './handleUninstall' -export { CrashMonitoring } from './crashMonitoring/crashMonitoring' +export { CrashMonitoring } from './crashMonitoring' diff --git a/packages/core/src/test/shared/crashMonitoring/crashMonitoring.test.ts b/packages/core/src/test/shared/crashMonitoring.test.ts similarity index 96% rename from packages/core/src/test/shared/crashMonitoring/crashMonitoring.test.ts rename to packages/core/src/test/shared/crashMonitoring.test.ts index 6a73c498d25..ec61d5487f3 100644 --- a/packages/core/src/test/shared/crashMonitoring/crashMonitoring.test.ts +++ b/packages/core/src/test/shared/crashMonitoring.test.ts @@ -3,16 +3,12 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { assertTelemetry, getMetrics, partialDeepCompare, TestFolder } from '../../testUtil' +import { assertTelemetry, getMetrics, partialDeepCompare, TestFolder } from '../testUtil' import assert from 'assert' -import globals from '../../../shared/extensionGlobals' -import { - CrashMonitoring, - ExtInstance, - crashMonitoringStateFactory, -} from '../../../shared/crashMonitoring/crashMonitoring' -import { isCI } from '../../../shared/vscode/env' -import { getLogger } from '../../../shared/logger/logger' +import globals from '../../shared/extensionGlobals' +import { CrashMonitoring, ExtInstance, crashMonitoringStateFactory } from '../../shared/crashMonitoring' +import { isCI } from '../../shared/vscode/env' +import { getLogger } from '../../shared/logger/logger' class TestCrashMonitoring extends CrashMonitoring { public constructor(...deps: ConstructorParameters) { diff --git a/packages/core/src/testInteg/shared/crashMonitoring/crashMonitoring.test.ts b/packages/core/src/testInteg/shared/crashMonitoring/crashMonitoring.test.ts index 7fb8ed88427..8dacc56f61c 100644 --- a/packages/core/src/testInteg/shared/crashMonitoring/crashMonitoring.test.ts +++ b/packages/core/src/testInteg/shared/crashMonitoring/crashMonitoring.test.ts @@ -3,7 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { crashMonitoringTest } from '../../../test/shared/crashMonitoring/crashMonitoring.test' +import { crashMonitoringTest } from '../../../test/shared/crashMonitoring.test' // This test is slower so we want to it to run as an integ test for CI describe('CrashMonitoring', crashMonitoringTest) From 35e675abeb08908954a26de816e4aa827c583ab2 Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Thu, 3 Oct 2024 11:28:21 -0400 Subject: [PATCH 8/9] rename variable Signed-off-by: nkomonen-amazon --- packages/core/src/shared/constants.ts | 2 +- packages/core/src/shared/crashMonitoring.ts | 8 ++++---- packages/core/src/shared/errors.ts | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/packages/core/src/shared/constants.ts b/packages/core/src/shared/constants.ts index 700e21f2220..6d6cf842552 100644 --- a/packages/core/src/shared/constants.ts +++ b/packages/core/src/shared/constants.ts @@ -163,7 +163,7 @@ export const amazonQVscodeMarketplace = * * Moved here to resolve circular dependency issues. */ -export const crashMonitoringDirName = { +export const crashMonitoringDirNames = { root: 'crashMonitoring', running: 'running', shutdown: 'shutdown', diff --git a/packages/core/src/shared/crashMonitoring.ts b/packages/core/src/shared/crashMonitoring.ts index 3c6dd81a604..c5d7358ba14 100644 --- a/packages/core/src/shared/crashMonitoring.ts +++ b/packages/core/src/shared/crashMonitoring.ts @@ -15,7 +15,7 @@ import { isNewOsSession } from './utilities/osUtils' import nodeFs from 'fs/promises' import fs from './fs/fs' import { getLogger } from './logger/logger' -import { crashMonitoringDirName } from './constants' +import { crashMonitoringDirNames } from './constants' const className = 'CrashMonitoring' @@ -375,7 +375,7 @@ export class FileSystemState { * Use {@link crashMonitoringStateFactory} to make an instance */ constructor(protected readonly deps: MementoStateDependencies) { - this.stateDirPath = path.join(this.deps.workDirPath, crashMonitoringDirName.root) + this.stateDirPath = path.join(this.deps.workDirPath, crashMonitoringDirNames.root) this.deps.devLogger?.debug(`crashMonitoring: pid: ${this.deps.pid}`) this.deps.devLogger?.debug(`crashMonitoring: sessionId: ${this.deps.sessionId.slice(0, 8)}-...`) @@ -480,13 +480,13 @@ export class FileSystemState { return `${ext.extHostPid}_${ext.sessionId}` } private async runningExtsDir(): Promise { - const p = path.join(this.stateDirPath, crashMonitoringDirName.running) + const p = path.join(this.stateDirPath, crashMonitoringDirNames.running) // ensure the dir exists await withFailCtx('ensureRunningExtsDir', () => fs.mkdir(p)) return p } private async shutdownExtsDir() { - const p = path.join(this.stateDirPath, crashMonitoringDirName.shutdown) + const p = path.join(this.stateDirPath, crashMonitoringDirNames.shutdown) // Since this runs in `deactivate()` it cannot use the VS Code FS api await withFailCtx('ensureShutdownExtsDir', () => nodeFs.mkdir(p, { recursive: true })) return p diff --git a/packages/core/src/shared/errors.ts b/packages/core/src/shared/errors.ts index 88e4f6a5f61..e2822c2fb31 100644 --- a/packages/core/src/shared/errors.ts +++ b/packages/core/src/shared/errors.ts @@ -15,7 +15,7 @@ import type * as os from 'os' import { CodeWhispererStreamingServiceException } from '@amzn/codewhisperer-streaming' import { driveLetterRegex } from './utilities/pathUtils' import { getLogger } from './logger/logger' -import { crashMonitoringDirName } from './constants' +import { crashMonitoringDirNames } from './constants' let _username = 'unknown-user' let _isAutomation = false @@ -380,9 +380,9 @@ export function scrubNames(s: string, username?: string) { 'tmp', 'aws-toolkit-vscode', 'globalStorage', // from vscode globalStorageUri - crashMonitoringDirName.root, - crashMonitoringDirName.running, - crashMonitoringDirName.shutdown, + crashMonitoringDirNames.root, + crashMonitoringDirNames.running, + crashMonitoringDirNames.shutdown, ]) if (username && username.length > 2) { From d6012a50c6632b8761a866ec23874182c59a1a3c Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Thu, 3 Oct 2024 11:43:02 -0400 Subject: [PATCH 9/9] move file Signed-off-by: nkomonen-amazon --- .../shared/{crashMonitoring => }/crashMonitoring.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename packages/core/src/testInteg/shared/{crashMonitoring => }/crashMonitoring.test.ts (74%) diff --git a/packages/core/src/testInteg/shared/crashMonitoring/crashMonitoring.test.ts b/packages/core/src/testInteg/shared/crashMonitoring.test.ts similarity index 74% rename from packages/core/src/testInteg/shared/crashMonitoring/crashMonitoring.test.ts rename to packages/core/src/testInteg/shared/crashMonitoring.test.ts index 8dacc56f61c..7f27e2fdc94 100644 --- a/packages/core/src/testInteg/shared/crashMonitoring/crashMonitoring.test.ts +++ b/packages/core/src/testInteg/shared/crashMonitoring.test.ts @@ -3,7 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { crashMonitoringTest } from '../../../test/shared/crashMonitoring.test' +import { crashMonitoringTest } from '../../test/shared/crashMonitoring.test' // This test is slower so we want to it to run as an integ test for CI describe('CrashMonitoring', crashMonitoringTest)