diff --git a/packages/core/src/shared/crashMonitoring.ts b/packages/core/src/shared/crashMonitoring.ts index 3336a1e72fc..bdbc66af230 100644 --- a/packages/core/src/shared/crashMonitoring.ts +++ b/packages/core/src/shared/crashMonitoring.ts @@ -4,7 +4,7 @@ */ import path from 'path' import globals, { isWeb } from './extensionGlobals' -import { getSessionId as _getSessionId } from './telemetry/util' +import { getSessionId as _getSessionId, getComputeEnvType } from './telemetry/util' import { getErrorId, getTelemetryReason, getTelemetryReasonDesc, isFileNotFoundError, ToolkitError } from './errors' import { isAutomation, isDebugInstance } from './vscode/env' import { DevSettings } from './settings' @@ -18,7 +18,7 @@ import { getLogger } from './logger/logger' import { crashMonitoringDirName } from './constants' import { throwOnUnstableFileSystem } from './filesystemUtilities' import { withRetries } from './utilities/functionUtils' -import { TimeLag } from './utilities/timeoutUtils' +import { truncateUuid } from './crypto' const className = 'CrashMonitoring' @@ -61,22 +61,33 @@ const className = 'CrashMonitoring' * - To mitigate this we do not run crash reporting on machines that we detect have a flaky filesystem */ export class CrashMonitoring { - protected heartbeat: Heartbeat | undefined - protected crashChecker: CrashChecker | undefined + protected heartbeat: Heartbeat + protected crashChecker: CrashChecker constructor( private readonly state: FileSystemState, private readonly checkInterval: number, private readonly isDevMode: boolean, private readonly isAutomation: boolean, - private readonly devLogger: Logger | undefined, - private readonly timeLag: TimeLag - ) {} + private readonly devLogger: Logger | undefined + ) { + this.heartbeat = new Heartbeat(this.state, this.checkInterval, this.isDevMode) + this.heartbeat.onFailure(() => this.cleanup()) + + this.crashChecker = new CrashChecker(this.state, this.checkInterval, this.isDevMode, this.devLogger) + this.crashChecker.onFailure(() => this.cleanup()) + } static #didTryCreate = false static #instance: CrashMonitoring | undefined /** Returns an instance of this class or undefined if any initial validation fails. */ public static async instance(): Promise { + // - We do not support web due to some incompatibilities + // - We do not support cloud desktop due to false crash reports due to sleep+vpn+ssh timeouts with the IDE + if (isWeb() || (await getComputeEnvType()) === 'cloudDesktop-amzn') { + return undefined + } + // Since the first attempt to create an instance may have failed, we do not // attempt to create an instance again and return whatever we have if (this.#didTryCreate === true) { @@ -90,11 +101,10 @@ export class CrashMonitoring { const state = await crashMonitoringStateFactory() // can throw return (this.#instance ??= new CrashMonitoring( state, - DevSettings.instance.get('crashCheckInterval', 1000 * 60 * 10), // check every 10 minutes + DevSettings.instance.get('crashCheckInterval', 1000 * 60 * 20), // check every 20 minutes isDevMode, isAutomation(), - devModeLogger, - new TimeLag() + devModeLogger )) } catch (error) { emitFailure({ functionName: 'instance', error }) @@ -104,35 +114,21 @@ export class CrashMonitoring { /** Start the Crash Monitoring process */ public async start() { - if (isWeb()) { - return - } - // During tests, the Prod code also runs this function. It interferes with telemetry assertion since it reports additional heartbeats. if (this.isAutomation) { return } try { - this.heartbeat = new Heartbeat(this.state, this.checkInterval, this.isDevMode) - this.heartbeat.onFailure(() => this.cleanup()) - - this.crashChecker = new CrashChecker( - this.state, - this.checkInterval, - this.isDevMode, - this.devLogger, - this.timeLag - ) - this.crashChecker.onFailure(() => this.cleanup()) - await this.heartbeat.start() await this.crashChecker.start() } catch (error) { emitFailure({ functionName: 'start', error }) try { await this.cleanup() - } catch {} + } catch (e) { + emitFailure({ functionName: 'startErrorHandler', error: e }) + } // Surface errors during development, otherwise it can be missed. if (this.isDevMode) { @@ -144,8 +140,7 @@ export class CrashMonitoring { /** Stop the Crash Monitoring process, signifying a graceful shutdown */ public async shutdown() { try { - this.crashChecker?.cleanup() - await this.heartbeat?.shutdown() + await this.cleanup() } catch (error) { try { // This probably wont emit in time before shutdown, but may be written to the logs @@ -159,8 +154,8 @@ export class CrashMonitoring { } public async cleanup() { - this.crashChecker?.cleanup() - await this.heartbeat?.shutdown() + this.crashChecker.cleanup() + await this.heartbeat.shutdown() } } @@ -192,9 +187,12 @@ class Heartbeat { await this.state.sendHeartbeat() } catch (e) { try { + // Since something went wrong cleanup everything to prevent false crash report await this.shutdown() emitFailure({ functionName: 'sendHeartbeatInterval', error: e }) - } catch {} + } catch (e) { + emitFailure({ functionName: 'sendHeartbeatIntervalErrorHandler', error: e }) + } if (this.isDevMode) { throw e @@ -223,45 +221,46 @@ class CrashChecker { private intervalRef: NodeJS.Timer | undefined private _onFailure = new vscode.EventEmitter() public onFailure = this._onFailure.event + public timeLag: TimeLag constructor( private readonly state: FileSystemState, private readonly checkInterval: number, private readonly isDevMode: boolean, - private readonly devLogger: Logger | undefined, - /** - * This class is required for the following edge case: - * 1. Heartbeat is sent - * 2. Computer goes to sleep for X minutes - * 3. Wake up computer. But before a new heartbeat can be sent, a crash checker (can be from another ext instance) runs - * and sees a stale heartbeat. It assumes a crash. - * - * Why? Intervals do not run while the computer is asleep, so the latest heartbeat has a "lag" since it wasn't able to send - * a new heartbeat. - * Then on wake, there is a racecondition for the next heartbeat to be sent before the next crash check. If the crash checker - * runs first it will incorrectly conclude a crash. - * - * Solution: Keep track of the lag, and then skip the next crash check if there was a lag. This will give time for the - * next heartbeat to be sent. - */ - private readonly timeLag: TimeLag - ) {} + private readonly devLogger: Logger | undefined + ) { + this.timeLag = new TimeLag(this.checkInterval) + } public async start() { { this.devLogger?.debug(`crashMonitoring: checkInterval ${this.checkInterval}`) - this.timeLag.start() - // do an initial check await withFailCtx('initialCrashCheck', () => - tryCheckCrash(this.state, this.checkInterval, this.isDevMode, this.devLogger, this.timeLag) + tryCheckCrash(this.state, this.checkInterval, this.isDevMode, this.devLogger) ) + this.timeLag.init() + // check on an interval this.intervalRef = globals.clock.setInterval(async () => { try { - await tryCheckCrash(this.state, this.checkInterval, this.isDevMode, this.devLogger, this.timeLag) + // Due to sleep+wake the heartbeat has not been sent consistently. Skip 1 crash check interval + // to allow for a new heartbeat to be sent + if (this.timeLag.didLag()) { + this.devLogger?.warn('crashMonitoring: LAG detected time lag and skipped a crash check') + telemetry.function_call.emit({ + className: className, + functionName: 'timeLag', + result: 'Succeeded', + duration: this.timeLag.getLag(), // How much the lag actually was + }) + } else { + await tryCheckCrash(this.state, this.checkInterval, this.isDevMode, this.devLogger) + } + + this.timeLag.updateLastExecution() } catch (e) { emitFailure({ functionName: 'checkCrashInterval', error: e }) @@ -280,15 +279,8 @@ class CrashChecker { state: FileSystemState, checkInterval: number, isDevMode: boolean, - devLogger: Logger | undefined, - timeLag: TimeLag + devLogger: Logger | undefined ) { - if (await timeLag.didLag()) { - timeLag.reset() - devLogger?.warn('crashMonitoring: SKIPPED check crash due to time lag') - return - } - // check each extension if it crashed const knownExts = await state.getAllExts() const runningExts: ExtInstanceHeartbeat[] = [] @@ -315,21 +307,18 @@ class CrashChecker { proxiedSessionId: ext.sessionId, reason: 'ExtHostCrashed', passive: true, + timestamp: ext.lastHeartbeat, }) - - devLogger?.debug( - `crashMonitoring: CRASH: following has crashed: pid ${ext.extHostPid} + sessionId: ${ext.sessionId}` - ) }) } if (isDevMode) { - const before = knownExts.map((i) => i.extHostPid) - const after = runningExts.map((i) => i.extHostPid) + const before = knownExts.map((i) => truncateUuid(i.sessionId)) + const after = runningExts.map((i) => truncateUuid(i.sessionId)) // Sanity check: ENSURE THAT AFTER === ACTUAL or this implies that our data is out of sync - const afterActual = (await state.getAllExts()).map((i) => i.extHostPid) + const afterActual = (await state.getAllExts()).map((i) => truncateUuid(i.sessionId)) devLogger?.debug( - `crashMonitoring: CHECKED: Result of cleaning up stopped instances\nBEFORE: ${JSON.stringify(before)}\nAFTER: ${JSON.stringify(after)}\nACTUAL: ${JSON.stringify(afterActual)}` + `crashMonitoring: CHECKED: Result of cleaning up crashed instances\nBEFORE: ${JSON.stringify(before)}\nAFTER: ${JSON.stringify(after)}\nACTUAL: ${JSON.stringify(afterActual)}` ) } @@ -345,7 +334,6 @@ class CrashChecker { /** Use this on failures to terminate the crash checker */ public cleanup() { globals.clock.clearInterval(this.intervalRef) - this.timeLag.cleanup() } /** Mimics a crash, only for testing */ @@ -354,13 +342,49 @@ class CrashChecker { } } +/** + * Helper class to detect when an interval does not execute at the expected cadence. + * It works by calculating the time diff between interval executions, indicating a lag if this diff + * is greater than the expected interval. + * + * When the computer goes to sleep, intervals do not run. So when the computer wakes it will + * pick up where it left off. We have cases where we want to detect this. + * + * TODO: Merge this in with the Interval class so that it comes for free with it. + */ +class TimeLag { + private lastExecution: number = 0 + + constructor(private readonly interval: number) {} + + /** Call this right before the interval is defined */ + public init() { + this.updateLastExecution() + } + + /** Call this at the beginning of each interval execution to determine if a lag was detected */ + public didLag() { + const buffer = 10_000 // accounts for the actual work done in an interval + return this.getLag() > this.interval + buffer + } + + /** Get the time diff between the last interval execution and now */ + public getLag() { + return globals.clock.Date.now() - this.lastExecution + } + + /** Call this at the end of your interval callback */ + public updateLastExecution() { + this.lastExecution = globals.clock.Date.now() + } +} + /** * We define this externally so that we have a single source of truth for the contructor args. * Ideally we'd use ConstructorParameters, but it does not work when the constructor is protected. */ type MementoStateDependencies = { memento: vscode.Memento - pid: number sessionId: string workDirPath: string isDevMode: boolean @@ -375,7 +399,6 @@ function getDefaultDependencies(): MementoStateDependencies { workDirPath: path.join(globals.context.globalStorageUri.fsPath), memento: globals.globalState as vscode.Memento, isStateStale: () => isNewOsSession(), - pid: process.pid, sessionId: _getSessionId(), isDevMode: getIsDevMode(), devLogger: getIsDevMode() ? getLogger() : undefined, @@ -407,7 +430,6 @@ export class FileSystemState { constructor(protected readonly deps: MementoStateDependencies) { this.stateDirPath = path.join(this.deps.workDirPath, crashMonitoringDirName) - this.deps.devLogger?.debug(`crashMonitoring: pid: ${this.deps.pid}`) this.deps.devLogger?.debug(`crashMonitoring: sessionId: ${this.deps.sessionId.slice(0, 8)}-...`) this.deps.devLogger?.debug(`crashMonitoring: dir: ${this.stateDirPath}`) } @@ -420,16 +442,14 @@ export class FileSystemState { */ public async init() { // IMPORTANT: do not run crash reporting on unstable filesystem to reduce invalid crash data - // - // NOTE: Emits a metric to know how many clients we skipped - await telemetry.function_call.run(async (span) => { - span.record({ className, functionName: 'FileSystemStateValidation' }) - await withFailCtx('validateFileSystemStability', () => throwOnUnstableFileSystem()) - }) + await withFailCtx('validateFileSystemStability', () => throwOnUnstableFileSystem()) // Clear the state if the user did something like a computer restart if (await this.deps.isStateStale()) { - await this.clearState() + await telemetry.function_call.run(async (span) => { + span.record({ functionName: 'handleComputerRestart', className }) + await this.clearState() + }) } await withFailCtx('init', () => fs.mkdir(this.stateDirPath)) @@ -437,29 +457,38 @@ export class FileSystemState { // ------------------ Heartbeat methods ------------------ public async sendHeartbeat() { + const extId = this.extId try { + const now = this.deps.now() const func = async () => { - await fs.writeFile( - this.makeStateFilePath(this.extId), - JSON.stringify({ ...this.ext, lastHeartbeat: this.deps.now() }, undefined, 4) - ) - this.deps.devLogger?.debug( - `crashMonitoring: HEARTBEAT pid ${this.deps.pid} + sessionId: ${this.deps.sessionId.slice(0, 8)}-...` - ) + const filePath = this.makeStateFilePath(extId) + + // We were seeing weird behavior where we possibly read an old file, even though we overwrote it. + // So this is a sanity check. + await fs.delete(filePath, { force: true }) + + await fs.writeFile(filePath, JSON.stringify({ ...this.ext, lastHeartbeat: now }, undefined, 4)) + + // Sanity check to verify the write is accessible immediately after + const heartbeatData = JSON.parse(await fs.readFileText(filePath)) as ExtInstanceHeartbeat + if (heartbeatData.lastHeartbeat !== now) { + throw new CrashMonitoringError('Heartbeat write validation failed', { code: className }) + } } const funcWithCtx = () => withFailCtx('sendHeartbeatState', func) - const funcWithRetries = withRetries(funcWithCtx, { maxRetries: 8, delay: 100, backoff: 2 }) - return await funcWithRetries + const funcWithRetries = withRetries(funcWithCtx, { maxRetries: 6, delay: 100, backoff: 2 }) + const funcWithTelemetryRun = await telemetry.ide_heartbeat.run((span) => { + span.record({ id: className, timestamp: now }) + return funcWithRetries + }) + + return funcWithTelemetryRun } catch (e) { // delete this ext from the state to avoid an incorrectly reported crash since we could not send a new heartbeat - await withFailCtx('sendHeartbeatFailureCleanup', () => this.clearHeartbeat()) + await this.deleteHeartbeatFile(extId, 'sendHeartbeatFailureCleanup') throw e } } - /** Clears this extentions heartbeat from the state */ - public async clearHeartbeat() { - await this.deleteHeartbeatFile(this.extId) - } /** * Indicates that this extension instance has gracefully shutdown. @@ -474,28 +503,22 @@ export class FileSystemState { public async indicateGracefulShutdown(): Promise { // By removing the heartbeat entry, the crash checkers will not be able to find this entry anymore, making it // impossible to report on since the file system is the source of truth - await withFailCtx('indicateGracefulShutdown', () => - nodeFs.rm(this.makeStateFilePath(this.extId), { force: true }) - ) + await this.deleteHeartbeatFile(this.extId, 'indicateGracefulShutdown') } // ------------------ Checker Methods ------------------ public async handleCrashedExt(ext: ExtInstance, fn: () => void) { - await withFailCtx('handleCrashedExt', async () => { - await this.deleteHeartbeatFile(ext) - fn() - }) + await this.deleteHeartbeatFile(ext, 'handleCrashedExt') + await withFailCtx('handleCrashedExt', async () => fn()) } - private async deleteHeartbeatFile(ext: ExtInstanceId | ExtInstance) { - // Retry file deletion to prevent incorrect crash reports. Common Windows errors seen in telemetry: EPERM/EBUSY. - // See: https://github.com/aws/aws-toolkit-vscode/pull/5335 - await withRetries(() => withFailCtx('deleteStaleRunningFile', () => fs.delete(this.makeStateFilePath(ext))), { - maxRetries: 8, - delay: 100, - backoff: 2, - }) + private async deleteHeartbeatFile(ext: ExtInstanceId | ExtInstance, ctx: string) { + // IMPORTANT: Must use NodeFs here since this is used during shutdown + const func = () => nodeFs.rm(this.makeStateFilePath(ext), { force: true }) + const funcWithCtx = () => withFailCtx(ctx, func) + const funcWithRetries = withRetries(funcWithCtx, { maxRetries: 6, delay: 100, backoff: 2 }) + await funcWithRetries } // ------------------ State data ------------------ @@ -506,7 +529,6 @@ export class FileSystemState { protected get ext(): ExtInstance { return { sessionId: this.deps.sessionId, - extHostPid: this.deps.pid, isDebug: isDebugInstance() ? true : undefined, } } @@ -518,9 +540,9 @@ export class FileSystemState { * - The Extension Host PID used in addition to the session ID should be good enough to uniquely identiy. */ protected createExtId(ext: ExtInstance): ExtInstanceId { - return `${ext.extHostPid}_${ext.sessionId}` + return ext.sessionId } - private readonly fileSuffix = 'running' + private readonly fileSuffix = 'v1' private makeStateFilePath(ext: ExtInstance | ExtInstanceId) { const extId = typeof ext === 'string' ? ext : this.createExtId(ext) return path.join(this.stateDirPath, extId + `.${this.fileSuffix}`) @@ -533,24 +555,15 @@ export class FileSystemState { }) } public async getAllExts(): Promise { - const res = await withFailCtx('getAllExts', async () => { + return withFailCtx('getAllExts', async () => { // Read all the exts from the filesystem, deserializing as needed const allExtIds: ExtInstanceId[] = await withFailCtx('readdir', async () => { const filesInDir = await fs.readdir(this.stateDirPath) - const relevantFiles = filesInDir.filter((file: [string, vscode.FileType]) => { - const name = file[0] - const type = file[1] - if (type !== vscode.FileType.File) { - return false - } - if (path.extname(name) !== `.${this.fileSuffix}`) { - return false - } - return true + const relevantFiles = filesInDir.filter((file: [name: string, type: vscode.FileType]) => { + return file[1] === vscode.FileType.File && path.extname(file[0]) === `.${this.fileSuffix}` }) - const idsFromFileNames = relevantFiles.map((file: [string, vscode.FileType]) => { - const name = file[0] - return name.split('.')[0] + const idsFromFileNames = relevantFiles.map((file: [name: string, type: vscode.FileType]) => { + return file[0].split('.')[0] }) return idsFromFileNames }) @@ -559,22 +572,21 @@ export class FileSystemState { // Due to a race condition, a separate extension instance may have removed this file by this point. It is okay since // we will assume that other instance handled its termination appropriately. // NOTE: On Windows we were failing on EBUSY, so we retry on failure. - const ext: ExtInstanceHeartbeat | undefined = await withRetries( - () => - withFailCtx('parseRunningExtFile', async () => - ignoreBadFileError(async () => { - const text = await fs.readFileText(this.makeStateFilePath(extId)) - - if (!text) { - return undefined - } - - // This was sometimes throwing SyntaxError - return JSON.parse(text) as ExtInstanceHeartbeat - }) - ), - { maxRetries: 6, delay: 100, backoff: 2 } - ) + const loadExtFromDisk = async () => { + const text = await fs.readFileText(this.makeStateFilePath(extId)) + + if (!text) { + return undefined + } + + // This was sometimes throwing SyntaxError + return JSON.parse(text) as ExtInstanceHeartbeat + } + const funcWithIgnoreBadFile = () => ignoreBadFileError(loadExtFromDisk) + const funcWithRetries = () => + withRetries(funcWithIgnoreBadFile, { maxRetries: 6, delay: 100, backoff: 2 }) + const funcWithCtx = () => withFailCtx('parseRunningExtFile', funcWithRetries) + const ext: ExtInstanceHeartbeat | undefined = await funcWithCtx() if (ext === undefined) { return @@ -586,10 +598,8 @@ export class FileSystemState { return ext }) // filter out undefined before returning - const result = (await Promise.all(allExts)).filter(isExtHeartbeat) - return result + return (await Promise.all(allExts)).filter(isExtHeartbeat) }) - return res } } @@ -622,7 +632,6 @@ type ExtInstanceId = string /** The static metadata of an instance of this extension */ export type ExtInstance = { - extHostPid: number sessionId: string lastHeartbeat?: number /** diff --git a/packages/core/src/shared/crypto.ts b/packages/core/src/shared/crypto.ts index a1496ad054a..808f5730d81 100644 --- a/packages/core/src/shared/crypto.ts +++ b/packages/core/src/shared/crypto.ts @@ -42,3 +42,15 @@ export function isUuid(uuid: string): boolean { const uuidPattern = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i return uuidPattern.test(uuid) } + +/** + * Eg: 'aaaabbbb-cccc-dddd-eeee-ffffhhhhiiii' -> 'aaaa...iiii' + */ +export function truncateUuid(uuid: string) { + if (uuid.length !== 36) { + throw new Error(`Cannot truncate uuid of value: "${uuid}"`) + } + + const cleanedUUID = uuid.replace(/-/g, '') + return `${cleanedUUID.substring(0, 4)}...${cleanedUUID.substring(cleanedUUID.length - 4)}` +} diff --git a/packages/core/src/shared/telemetry/vscodeTelemetry.json b/packages/core/src/shared/telemetry/vscodeTelemetry.json index 9c3dbbdfd67..3f2f4be53b4 100644 --- a/packages/core/src/shared/telemetry/vscodeTelemetry.json +++ b/packages/core/src/shared/telemetry/vscodeTelemetry.json @@ -311,6 +311,11 @@ "type": "string", "description": "The CPU architecture" }, + { + "name": "timestamp", + "type": "int", + "description": "A timestamp in milliseconds" + }, { "name": "traceId", "type": "string", @@ -1184,8 +1189,31 @@ { "type": "proxiedSessionId", "required": false + }, + { + "type": "timestamp", + "required": false } ] + }, + { + "name": "ide_heartbeat", + "description": "A heartbeat sent by the extension", + "metadata": [ + { + "type": "id", + "required": true + }, + { + "type": "name", + "required": false + }, + { + "type": "timestamp", + "required": false + } + ], + "passive": true } ] } diff --git a/packages/core/src/shared/utilities/timeoutUtils.ts b/packages/core/src/shared/utilities/timeoutUtils.ts index 11710589ec0..8dc38cfef5c 100644 --- a/packages/core/src/shared/utilities/timeoutUtils.ts +++ b/packages/core/src/shared/utilities/timeoutUtils.ts @@ -320,70 +320,3 @@ export function sleep(duration: number = 0): Promise { const schedule = globals?.clock?.setTimeout ?? setTimeout return new Promise((r) => schedule(r, Math.max(duration, 0))) } - -/** - * Time lag occurs when the computer goes to sleep, and intervals cannot run on the expected - * cadence. This keeps track of that lag in cadence. - */ -export class TimeLag { - private intervalRef: NodeJS.Timer | undefined - private isCompleted: Promise | undefined - /** Resolves {@link isCompleted} when the next interval of lag checking is completed */ - private setCompleted: (() => void) | undefined - - /** The last timestamp we remember. If this is more than 1 {@link lagCheckInterval} we probably did a sleep+wake */ - private latestTimestamp: number = 0 - /** The accumulation of lag before the next crash checker interval, since the user can sleep+wake multiple times in between */ - private totalLag: number = 0 - private readonly lagCheckInterval = 1000 - - public start() { - this.reset() // initialize - - // Every interval calculate the lag - this.intervalRef = globals.clock.setInterval(() => { - const expectedNow = this.latestTimestamp + this.lagCheckInterval - const actualNow = globals.clock.Date.now() - const lag = actualNow - expectedNow - - // interval callback execution is not exact, so this buffer avoids micro lag from being - // considered actual lag - if (lag > 5000) { - this.totalLag += lag - } - - this.latestTimestamp = Date.now() - - // race condition between crash checker and lag checker on computer wake. So we have - // the crash checker block until this is completed. - this.setCompleted!() - this.setInProgress() - }, this.lagCheckInterval) - } - - /** True if there is a time lag */ - public async didLag() { - await this.isCompleted! - return this.totalLag > 0 - } - - /** Indicates the next time lag interval check is not completed */ - private setInProgress() { - this.isCompleted = new Promise((resolve) => { - this.setCompleted = resolve - }) - } - - /** - * Call this once the user of this instance has handled the lag. If not done, {@link didLag} can - * be permanently true. - */ - public reset() { - this.totalLag = 0 - this.latestTimestamp = globals.clock.Date.now() - this.setInProgress() - } - public cleanup() { - globals.clock.clearInterval(this.intervalRef) - } -} diff --git a/packages/core/src/test/shared/crashMonitoring.test.ts b/packages/core/src/test/shared/crashMonitoring.test.ts index 6d1b311aa62..c06b8b167a1 100644 --- a/packages/core/src/test/shared/crashMonitoring.test.ts +++ b/packages/core/src/test/shared/crashMonitoring.test.ts @@ -9,9 +9,8 @@ import globals from '../../shared/extensionGlobals' import { CrashMonitoring, ExtInstance, crashMonitoringStateFactory } from '../../shared/crashMonitoring' import { isCI } from '../../shared/vscode/env' import { getLogger } from '../../shared/logger/logger' -import { TimeLag } from '../../shared/utilities/timeoutUtils' import { SinonSandbox, createSandbox } from 'sinon' -import { fs } from '../../shared' +import { fs, randomUUID } from '../../shared' import path from 'path' class TestCrashMonitoring extends CrashMonitoring { @@ -20,15 +19,18 @@ class TestCrashMonitoring extends CrashMonitoring { } /** Imitates an extension crash */ public async crash() { - this.crashChecker?.testCrash() - this.heartbeat?.testCrash() + this.crashChecker.testCrash() + this.heartbeat.testCrash() + } + + public getTimeLag() { + return this.crashChecker.timeLag } } export const crashMonitoringTest = async () => { let testFolder: TestFolder let spawnedExtensions: TestCrashMonitoring[] - let timeLag: TimeLag let sandbox: SinonSandbox // Scale down the default interval we heartbeat and check for crashes to something much short for testing. @@ -46,30 +48,27 @@ export const crashMonitoringTest = async () => { async function makeTestExtensions(amount: number) { const extensions: TestExtension[] = [] for (let i = 0; i < amount; i++) { - extensions[i] = await makeTestExtension(i, new TimeLag()) + extensions[i] = await makeTestExtension(i) } return extensions } - async function makeTestExtension(id: number, timeLag: TimeLag, opts?: { isStateStale: () => Promise }) { + async function makeTestExtension(id: number, opts?: { isStateStale: () => Promise }) { const isStateStale = opts?.isStateStale ?? (() => Promise.resolve(false)) - const sessionId = `sessionId-${id}` - const pid = Number(String(id).repeat(6)) + const sessionId = randomUUID() const state = await crashMonitoringStateFactory({ workDirPath: testFolder.path, isStateStale, - pid, sessionId: sessionId, now: () => globals.clock.Date.now(), memento: globals.globalState, isDevMode: true, devLogger: getLogger(), }) - const ext = new TestCrashMonitoring(state, checkInterval, true, false, getLogger(), timeLag) + const ext = new TestCrashMonitoring(state, checkInterval, true, false, getLogger()) spawnedExtensions.push(ext) const metadata = { - extHostPid: pid, sessionId, lastHeartbeat: globals.clock.Date.now(), isDebug: undefined, @@ -80,14 +79,12 @@ export const crashMonitoringTest = async () => { beforeEach(async function () { testFolder = await TestFolder.create() spawnedExtensions = [] - timeLag = new TimeLag() sandbox = createSandbox() }) afterEach(async function () { // clean up all running instances spawnedExtensions?.forEach((e) => e.crash()) - timeLag.cleanup() sandbox.restore() }) @@ -164,7 +161,7 @@ export const crashMonitoringTest = async () => { assertTelemetry('session_end', []) // This extension clears the state due to it being stale, not reporting the previously crashed ext - const ext1 = await makeTestExtension(1, timeLag, { isStateStale: () => Promise.resolve(true) }) + const ext1 = await makeTestExtension(1, { isStateStale: () => Promise.resolve(true) }) await ext1.ext.start() await awaitIntervals(oneInterval * 1) assertCrashedExtensions([]) @@ -195,27 +192,24 @@ export const crashMonitoringTest = async () => { // This test handles the case for a users computer doing a sleep+wake and // then a crash was incorrectly reported since a new heartbeat could not be sent in time - const timeLagStub = sandbox.stub(timeLag) - timeLagStub.start.resolves() - timeLagStub.didLag.resolves(false) - // Load up a crash - const ext0 = await makeTestExtension(0, timeLagStub as unknown as TimeLag) + const ext0 = await makeTestExtension(0) await ext0.ext.start() await ext0.ext.crash() - const ext1 = await makeTestExtension(1, timeLagStub as unknown as TimeLag) - await ext1.ext.start() - + const ext1 = await makeTestExtension(1) // Indicate that we have a time lag, and until it returns false // we will skip crash checking - timeLagStub.didLag.resolves(true) - await awaitIntervals(oneInterval) - assertCrashedExtensions([]) - await awaitIntervals(oneInterval) + const didLagStub = sandbox.stub(ext1.ext.getTimeLag(), 'didLag') + didLagStub.returns(true) + await ext1.ext.start() + + // Since we have a time lag the crash checker will not run + await awaitIntervals(oneInterval * 2) assertCrashedExtensions([]) + // Now that the time lag is true, we will check for a crash - timeLagStub.didLag.resolves(false) + didLagStub.returns(false) await awaitIntervals(oneInterval) assertCrashedExtensions([ext0]) }) @@ -279,8 +273,7 @@ export const crashMonitoringTest = async () => { const state = await crashMonitoringStateFactory({ workDirPath: testFolder.path, isStateStale: () => Promise.resolve(false), - pid: 1111, - sessionId: 'sessionId_1111', + sessionId: randomUUID(), now: () => globals.clock.Date.now(), memento: globals.globalState, isDevMode: true, diff --git a/packages/core/src/test/shared/crypto.test.ts b/packages/core/src/test/shared/crypto.test.ts index daefe0debe1..f9620d41233 100644 --- a/packages/core/src/test/shared/crypto.test.ts +++ b/packages/core/src/test/shared/crypto.test.ts @@ -4,7 +4,7 @@ */ import assert from 'assert' -import { isUuid, randomUUID } from '../../shared/crypto' +import { isUuid, randomUUID, truncateUuid } from '../../shared/crypto' describe('crypto', function () { describe('randomUUID()', function () { @@ -44,3 +44,21 @@ describe('crypto', function () { assert.equal(isUuid('47fe01cf-f37a-4e7c-b971-d10fe5897763'.toUpperCase()), true) }) }) + +describe('truncateUUID', function () { + it('should return the first 4 and last 4 characters of a valid UUID', function () { + const fullUUID1 = 'aaaabbbb-cccc-dddd-eeee-ffffhhhhiiii' + const result1 = truncateUuid(fullUUID1) + assert.strictEqual(result1, 'aaaa...iiii') + + const fullUUID2 = '12340000-0000-0000-0000-000000005678' + const result2 = truncateUuid(fullUUID2) + assert.strictEqual(result2, '1234...5678') + }) + + it('should throw an error if the input is not 36 characters long', function () { + assert.throws(() => { + truncateUuid('invalid-uuid') + }, /Cannot truncate uuid of value: "invalid-uuid"/) + }) +})