From f2ed2048dc184fbffc8dae2d0550292a80365117 Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Tue, 22 Oct 2024 16:28:06 -0400 Subject: [PATCH 01/12] refactor: remove all time lag code This is in preparation for a more simple implementation in the next commit Signed-off-by: nkomonen-amazon --- packages/core/src/shared/crashMonitoring.ts | 49 ++------------ .../core/src/shared/utilities/timeoutUtils.ts | 67 ------------------- .../src/test/shared/crashMonitoring.test.ts | 41 ++---------- 3 files changed, 11 insertions(+), 146 deletions(-) diff --git a/packages/core/src/shared/crashMonitoring.ts b/packages/core/src/shared/crashMonitoring.ts index 3336a1e72fc..8823da6b0ed 100644 --- a/packages/core/src/shared/crashMonitoring.ts +++ b/packages/core/src/shared/crashMonitoring.ts @@ -18,7 +18,6 @@ import { getLogger } from './logger/logger' import { crashMonitoringDirName } from './constants' import { throwOnUnstableFileSystem } from './filesystemUtilities' import { withRetries } from './utilities/functionUtils' -import { TimeLag } from './utilities/timeoutUtils' const className = 'CrashMonitoring' @@ -69,8 +68,7 @@ export class CrashMonitoring { private readonly checkInterval: number, private readonly isDevMode: boolean, private readonly isAutomation: boolean, - private readonly devLogger: Logger | undefined, - private readonly timeLag: TimeLag + private readonly devLogger: Logger | undefined ) {} static #didTryCreate = false @@ -93,8 +91,7 @@ export class CrashMonitoring { DevSettings.instance.get('crashCheckInterval', 1000 * 60 * 10), // check every 10 minutes isDevMode, isAutomation(), - devModeLogger, - new TimeLag() + devModeLogger )) } catch (error) { emitFailure({ functionName: 'instance', error }) @@ -117,13 +114,7 @@ export class CrashMonitoring { this.heartbeat = new Heartbeat(this.state, this.checkInterval, this.isDevMode) this.heartbeat.onFailure(() => this.cleanup()) - this.crashChecker = new CrashChecker( - this.state, - this.checkInterval, - this.isDevMode, - this.devLogger, - this.timeLag - ) + this.crashChecker = new CrashChecker(this.state, this.checkInterval, this.isDevMode, this.devLogger) this.crashChecker.onFailure(() => this.cleanup()) await this.heartbeat.start() @@ -228,40 +219,22 @@ class CrashChecker { private readonly state: FileSystemState, private readonly checkInterval: number, private readonly isDevMode: boolean, - private readonly devLogger: Logger | undefined, - /** - * This class is required for the following edge case: - * 1. Heartbeat is sent - * 2. Computer goes to sleep for X minutes - * 3. Wake up computer. But before a new heartbeat can be sent, a crash checker (can be from another ext instance) runs - * and sees a stale heartbeat. It assumes a crash. - * - * Why? Intervals do not run while the computer is asleep, so the latest heartbeat has a "lag" since it wasn't able to send - * a new heartbeat. - * Then on wake, there is a racecondition for the next heartbeat to be sent before the next crash check. If the crash checker - * runs first it will incorrectly conclude a crash. - * - * Solution: Keep track of the lag, and then skip the next crash check if there was a lag. This will give time for the - * next heartbeat to be sent. - */ - private readonly timeLag: TimeLag + private readonly devLogger: Logger | undefined ) {} public async start() { { this.devLogger?.debug(`crashMonitoring: checkInterval ${this.checkInterval}`) - this.timeLag.start() - // do an initial check await withFailCtx('initialCrashCheck', () => - tryCheckCrash(this.state, this.checkInterval, this.isDevMode, this.devLogger, this.timeLag) + tryCheckCrash(this.state, this.checkInterval, this.isDevMode, this.devLogger) ) // check on an interval this.intervalRef = globals.clock.setInterval(async () => { try { - await tryCheckCrash(this.state, this.checkInterval, this.isDevMode, this.devLogger, this.timeLag) + await tryCheckCrash(this.state, this.checkInterval, this.isDevMode, this.devLogger) } catch (e) { emitFailure({ functionName: 'checkCrashInterval', error: e }) @@ -280,15 +253,8 @@ class CrashChecker { state: FileSystemState, checkInterval: number, isDevMode: boolean, - devLogger: Logger | undefined, - timeLag: TimeLag + devLogger: Logger | undefined ) { - if (await timeLag.didLag()) { - timeLag.reset() - devLogger?.warn('crashMonitoring: SKIPPED check crash due to time lag') - return - } - // check each extension if it crashed const knownExts = await state.getAllExts() const runningExts: ExtInstanceHeartbeat[] = [] @@ -345,7 +311,6 @@ class CrashChecker { /** Use this on failures to terminate the crash checker */ public cleanup() { globals.clock.clearInterval(this.intervalRef) - this.timeLag.cleanup() } /** Mimics a crash, only for testing */ diff --git a/packages/core/src/shared/utilities/timeoutUtils.ts b/packages/core/src/shared/utilities/timeoutUtils.ts index 11710589ec0..8dc38cfef5c 100644 --- a/packages/core/src/shared/utilities/timeoutUtils.ts +++ b/packages/core/src/shared/utilities/timeoutUtils.ts @@ -320,70 +320,3 @@ export function sleep(duration: number = 0): Promise { const schedule = globals?.clock?.setTimeout ?? setTimeout return new Promise((r) => schedule(r, Math.max(duration, 0))) } - -/** - * Time lag occurs when the computer goes to sleep, and intervals cannot run on the expected - * cadence. This keeps track of that lag in cadence. - */ -export class TimeLag { - private intervalRef: NodeJS.Timer | undefined - private isCompleted: Promise | undefined - /** Resolves {@link isCompleted} when the next interval of lag checking is completed */ - private setCompleted: (() => void) | undefined - - /** The last timestamp we remember. If this is more than 1 {@link lagCheckInterval} we probably did a sleep+wake */ - private latestTimestamp: number = 0 - /** The accumulation of lag before the next crash checker interval, since the user can sleep+wake multiple times in between */ - private totalLag: number = 0 - private readonly lagCheckInterval = 1000 - - public start() { - this.reset() // initialize - - // Every interval calculate the lag - this.intervalRef = globals.clock.setInterval(() => { - const expectedNow = this.latestTimestamp + this.lagCheckInterval - const actualNow = globals.clock.Date.now() - const lag = actualNow - expectedNow - - // interval callback execution is not exact, so this buffer avoids micro lag from being - // considered actual lag - if (lag > 5000) { - this.totalLag += lag - } - - this.latestTimestamp = Date.now() - - // race condition between crash checker and lag checker on computer wake. So we have - // the crash checker block until this is completed. - this.setCompleted!() - this.setInProgress() - }, this.lagCheckInterval) - } - - /** True if there is a time lag */ - public async didLag() { - await this.isCompleted! - return this.totalLag > 0 - } - - /** Indicates the next time lag interval check is not completed */ - private setInProgress() { - this.isCompleted = new Promise((resolve) => { - this.setCompleted = resolve - }) - } - - /** - * Call this once the user of this instance has handled the lag. If not done, {@link didLag} can - * be permanently true. - */ - public reset() { - this.totalLag = 0 - this.latestTimestamp = globals.clock.Date.now() - this.setInProgress() - } - public cleanup() { - globals.clock.clearInterval(this.intervalRef) - } -} diff --git a/packages/core/src/test/shared/crashMonitoring.test.ts b/packages/core/src/test/shared/crashMonitoring.test.ts index 6d1b311aa62..161598c84f8 100644 --- a/packages/core/src/test/shared/crashMonitoring.test.ts +++ b/packages/core/src/test/shared/crashMonitoring.test.ts @@ -9,7 +9,6 @@ import globals from '../../shared/extensionGlobals' import { CrashMonitoring, ExtInstance, crashMonitoringStateFactory } from '../../shared/crashMonitoring' import { isCI } from '../../shared/vscode/env' import { getLogger } from '../../shared/logger/logger' -import { TimeLag } from '../../shared/utilities/timeoutUtils' import { SinonSandbox, createSandbox } from 'sinon' import { fs } from '../../shared' import path from 'path' @@ -28,7 +27,6 @@ class TestCrashMonitoring extends CrashMonitoring { export const crashMonitoringTest = async () => { let testFolder: TestFolder let spawnedExtensions: TestCrashMonitoring[] - let timeLag: TimeLag let sandbox: SinonSandbox // Scale down the default interval we heartbeat and check for crashes to something much short for testing. @@ -46,12 +44,12 @@ export const crashMonitoringTest = async () => { async function makeTestExtensions(amount: number) { const extensions: TestExtension[] = [] for (let i = 0; i < amount; i++) { - extensions[i] = await makeTestExtension(i, new TimeLag()) + extensions[i] = await makeTestExtension(i) } return extensions } - async function makeTestExtension(id: number, timeLag: TimeLag, opts?: { isStateStale: () => Promise }) { + async function makeTestExtension(id: number, opts?: { isStateStale: () => Promise }) { const isStateStale = opts?.isStateStale ?? (() => Promise.resolve(false)) const sessionId = `sessionId-${id}` const pid = Number(String(id).repeat(6)) @@ -66,7 +64,7 @@ export const crashMonitoringTest = async () => { isDevMode: true, devLogger: getLogger(), }) - const ext = new TestCrashMonitoring(state, checkInterval, true, false, getLogger(), timeLag) + const ext = new TestCrashMonitoring(state, checkInterval, true, false, getLogger()) spawnedExtensions.push(ext) const metadata = { extHostPid: pid, @@ -80,14 +78,12 @@ export const crashMonitoringTest = async () => { beforeEach(async function () { testFolder = await TestFolder.create() spawnedExtensions = [] - timeLag = new TimeLag() sandbox = createSandbox() }) afterEach(async function () { // clean up all running instances spawnedExtensions?.forEach((e) => e.crash()) - timeLag.cleanup() sandbox.restore() }) @@ -164,7 +160,7 @@ export const crashMonitoringTest = async () => { assertTelemetry('session_end', []) // This extension clears the state due to it being stale, not reporting the previously crashed ext - const ext1 = await makeTestExtension(1, timeLag, { isStateStale: () => Promise.resolve(true) }) + const ext1 = await makeTestExtension(1, { isStateStale: () => Promise.resolve(true) }) await ext1.ext.start() await awaitIntervals(oneInterval * 1) assertCrashedExtensions([]) @@ -191,35 +187,6 @@ export const crashMonitoringTest = async () => { assertCrashedExtensions(latestCrashedExts) }) - it('does not check for crashes when there is a time lag', async function () { - // This test handles the case for a users computer doing a sleep+wake and - // then a crash was incorrectly reported since a new heartbeat could not be sent in time - - const timeLagStub = sandbox.stub(timeLag) - timeLagStub.start.resolves() - timeLagStub.didLag.resolves(false) - - // Load up a crash - const ext0 = await makeTestExtension(0, timeLagStub as unknown as TimeLag) - await ext0.ext.start() - await ext0.ext.crash() - - const ext1 = await makeTestExtension(1, timeLagStub as unknown as TimeLag) - await ext1.ext.start() - - // Indicate that we have a time lag, and until it returns false - // we will skip crash checking - timeLagStub.didLag.resolves(true) - await awaitIntervals(oneInterval) - assertCrashedExtensions([]) - await awaitIntervals(oneInterval) - assertCrashedExtensions([]) - // Now that the time lag is true, we will check for a crash - timeLagStub.didLag.resolves(false) - await awaitIntervals(oneInterval) - assertCrashedExtensions([ext0]) - }) - /** * Something like the following code can switch contexts early and the test will * finish before it has completed. Certain async functions that may take longer to run From e2a5682698a32c29a79a42179065361154c1b148 Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Fri, 18 Oct 2024 18:53:05 -0400 Subject: [PATCH 02/12] refactor: Add more simple Time Lag implementation Signed-off-by: nkomonen-amazon --- packages/core/src/shared/crashMonitoring.ts | 93 ++++++++++++++----- .../src/test/shared/crashMonitoring.test.ts | 34 ++++++- 2 files changed, 104 insertions(+), 23 deletions(-) diff --git a/packages/core/src/shared/crashMonitoring.ts b/packages/core/src/shared/crashMonitoring.ts index 8823da6b0ed..5a852f4deb0 100644 --- a/packages/core/src/shared/crashMonitoring.ts +++ b/packages/core/src/shared/crashMonitoring.ts @@ -60,8 +60,8 @@ const className = 'CrashMonitoring' * - To mitigate this we do not run crash reporting on machines that we detect have a flaky filesystem */ export class CrashMonitoring { - protected heartbeat: Heartbeat | undefined - protected crashChecker: CrashChecker | undefined + protected heartbeat: Heartbeat + protected crashChecker: CrashChecker constructor( private readonly state: FileSystemState, @@ -69,7 +69,13 @@ export class CrashMonitoring { private readonly isDevMode: boolean, private readonly isAutomation: boolean, private readonly devLogger: Logger | undefined - ) {} + ) { + this.heartbeat = new Heartbeat(this.state, this.checkInterval, this.isDevMode) + this.heartbeat.onFailure(() => this.cleanup()) + + this.crashChecker = new CrashChecker(this.state, this.checkInterval, this.isDevMode, this.devLogger) + this.crashChecker.onFailure(() => this.cleanup()) + } static #didTryCreate = false static #instance: CrashMonitoring | undefined @@ -111,12 +117,6 @@ export class CrashMonitoring { } try { - this.heartbeat = new Heartbeat(this.state, this.checkInterval, this.isDevMode) - this.heartbeat.onFailure(() => this.cleanup()) - - this.crashChecker = new CrashChecker(this.state, this.checkInterval, this.isDevMode, this.devLogger) - this.crashChecker.onFailure(() => this.cleanup()) - await this.heartbeat.start() await this.crashChecker.start() } catch (error) { @@ -135,8 +135,8 @@ export class CrashMonitoring { /** Stop the Crash Monitoring process, signifying a graceful shutdown */ public async shutdown() { try { - this.crashChecker?.cleanup() - await this.heartbeat?.shutdown() + this.crashChecker.cleanup() + await this.heartbeat.shutdown() } catch (error) { try { // This probably wont emit in time before shutdown, but may be written to the logs @@ -150,8 +150,8 @@ export class CrashMonitoring { } public async cleanup() { - this.crashChecker?.cleanup() - await this.heartbeat?.shutdown() + this.crashChecker.cleanup() + await this.heartbeat.shutdown() } } @@ -214,13 +214,16 @@ class CrashChecker { private intervalRef: NodeJS.Timer | undefined private _onFailure = new vscode.EventEmitter() public onFailure = this._onFailure.event + public timeLag: TimeLag constructor( private readonly state: FileSystemState, private readonly checkInterval: number, private readonly isDevMode: boolean, private readonly devLogger: Logger | undefined - ) {} + ) { + this.timeLag = new TimeLag(this.checkInterval) + } public async start() { { @@ -231,10 +234,26 @@ class CrashChecker { tryCheckCrash(this.state, this.checkInterval, this.isDevMode, this.devLogger) ) + this.timeLag.init() + // check on an interval this.intervalRef = globals.clock.setInterval(async () => { try { - await tryCheckCrash(this.state, this.checkInterval, this.isDevMode, this.devLogger) + // Due to sleep+wake the heartbeat has not been sent consistently. Skip 1 crash check interval + // to allow for a new heartbeat to be sent + if (this.timeLag.didLag()) { + this.devLogger?.warn('crashMonitoring: LAG detected time lag and skipped a crash check') + telemetry.function_call.emit({ + className: className, + functionName: 'timeLag', + result: 'Succeeded', + duration: this.timeLag.getLag(), // How much the lag actually was + }) + } else { + await tryCheckCrash(this.state, this.checkInterval, this.isDevMode, this.devLogger) + } + + this.timeLag.updateLastExecution() } catch (e) { emitFailure({ functionName: 'checkCrashInterval', error: e }) @@ -319,6 +338,43 @@ class CrashChecker { } } +/** + * Helper class to detect when an interval does not execute at the expected cadence. + * It works by calculating the time diff between interval executions, indicating a lag if this diff + * is greater than the expected interval. + * + * When the computer goes to sleep, intervals do not run. So when the computer wakes it will + * pick up where it left off. We have cases where we want to detect this. + * + * TODO: Merge this in with the Interval class so that it comes for free with it. + */ +class TimeLag { + private lastExecution: number = 0 + + constructor(private readonly interval: number) {} + + /** Call this right before the interval is defined */ + public init() { + this.updateLastExecution() + } + + /** Call this at the beginning of each interval execution to determine if a lag was detected */ + public didLag() { + const buffer = 10_000 // accounts for the actual work done in an interval + return this.getLag() > this.interval + buffer + } + + /** Get the time diff between the last interval execution and now */ + public getLag() { + return globals.clock.Date.now() - this.lastExecution + } + + /** Call this at the end of your interval callback */ + public updateLastExecution() { + this.lastExecution = globals.clock.Date.now() + } +} + /** * We define this externally so that we have a single source of truth for the contructor args. * Ideally we'd use ConstructorParameters, but it does not work when the constructor is protected. @@ -385,12 +441,7 @@ export class FileSystemState { */ public async init() { // IMPORTANT: do not run crash reporting on unstable filesystem to reduce invalid crash data - // - // NOTE: Emits a metric to know how many clients we skipped - await telemetry.function_call.run(async (span) => { - span.record({ className, functionName: 'FileSystemStateValidation' }) - await withFailCtx('validateFileSystemStability', () => throwOnUnstableFileSystem()) - }) + await withFailCtx('validateFileSystemStability', () => throwOnUnstableFileSystem()) // Clear the state if the user did something like a computer restart if (await this.deps.isStateStale()) { diff --git a/packages/core/src/test/shared/crashMonitoring.test.ts b/packages/core/src/test/shared/crashMonitoring.test.ts index 161598c84f8..f971d8561e2 100644 --- a/packages/core/src/test/shared/crashMonitoring.test.ts +++ b/packages/core/src/test/shared/crashMonitoring.test.ts @@ -19,8 +19,12 @@ class TestCrashMonitoring extends CrashMonitoring { } /** Imitates an extension crash */ public async crash() { - this.crashChecker?.testCrash() - this.heartbeat?.testCrash() + this.crashChecker.testCrash() + this.heartbeat.testCrash() + } + + public getTimeLag() { + return this.crashChecker.timeLag } } @@ -187,6 +191,32 @@ export const crashMonitoringTest = async () => { assertCrashedExtensions(latestCrashedExts) }) + it('does not check for crashes when there is a time lag', async function () { + // This test handles the case for a users computer doing a sleep+wake and + // then a crash was incorrectly reported since a new heartbeat could not be sent in time + + // Load up a crash + const ext0 = await makeTestExtension(0) + await ext0.ext.start() + await ext0.ext.crash() + + const ext1 = await makeTestExtension(1) + // Indicate that we have a time lag, and until it returns false + // we will skip crash checking + const didLagStub = sandbox.stub(ext1.ext.getTimeLag(), 'didLag') + didLagStub.returns(true) + await ext1.ext.start() + + // Since we have a time lag the crash checker will not run + await awaitIntervals(oneInterval * 2) + assertCrashedExtensions([]) + + // Now that the time lag is true, we will check for a crash + didLagStub.returns(false) + await awaitIntervals(oneInterval) + assertCrashedExtensions([ext0]) + }) + /** * Something like the following code can switch contexts early and the test will * finish before it has completed. Certain async functions that may take longer to run From aee976d7a180f680924150e8afe2a1e9212ba726 Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Wed, 23 Oct 2024 16:56:09 -0400 Subject: [PATCH 03/12] minor fixes - Increase check interval from 10 to 30 minutes, so 15 minute heartbeats. - Emit failure if heartbeat abort itself fails. This will let us know that if heartbeats fail and we try to clean them up, that the cleanup failed. We need to know this otherwise there will be incorrectly reported crashes. - Emit a metric when we detect a computer restart and handle it - Validate that when we write a new heartbeat we can immediately read it from disk. Signed-off-by: nkomonen-amazon --- packages/core/src/shared/crashMonitoring.ts | 31 +++++++++++++++------ 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/packages/core/src/shared/crashMonitoring.ts b/packages/core/src/shared/crashMonitoring.ts index 5a852f4deb0..19b4d3efa46 100644 --- a/packages/core/src/shared/crashMonitoring.ts +++ b/packages/core/src/shared/crashMonitoring.ts @@ -94,7 +94,7 @@ export class CrashMonitoring { const state = await crashMonitoringStateFactory() // can throw return (this.#instance ??= new CrashMonitoring( state, - DevSettings.instance.get('crashCheckInterval', 1000 * 60 * 10), // check every 10 minutes + DevSettings.instance.get('crashCheckInterval', 1000 * 60 * 30), // check every 30 minutes isDevMode, isAutomation(), devModeLogger @@ -123,7 +123,9 @@ export class CrashMonitoring { emitFailure({ functionName: 'start', error }) try { await this.cleanup() - } catch {} + } catch (e) { + emitFailure({ functionName: 'startErrorHandler', error: e }) + } // Surface errors during development, otherwise it can be missed. if (this.isDevMode) { @@ -183,9 +185,12 @@ class Heartbeat { await this.state.sendHeartbeat() } catch (e) { try { + // Since something went wrong cleanup everything to prevent false crash report await this.shutdown() emitFailure({ functionName: 'sendHeartbeatInterval', error: e }) - } catch {} + } catch (e) { + emitFailure({ functionName: 'sendHeartbeatIntervalErrorHandler', error: e }) + } if (this.isDevMode) { throw e @@ -445,7 +450,10 @@ export class FileSystemState { // Clear the state if the user did something like a computer restart if (await this.deps.isStateStale()) { - await this.clearState() + await telemetry.function_call.run(async (span) => { + span.record({ functionName: 'handleComputerRestart', className }) + await this.clearState() + }) } await withFailCtx('init', () => fs.mkdir(this.stateDirPath)) @@ -455,13 +463,20 @@ export class FileSystemState { public async sendHeartbeat() { try { const func = async () => { - await fs.writeFile( - this.makeStateFilePath(this.extId), - JSON.stringify({ ...this.ext, lastHeartbeat: this.deps.now() }, undefined, 4) - ) + const filePath = this.makeStateFilePath(this.extId) + const lastHeartbeat = this.deps.now() + + await fs.writeFile(filePath, JSON.stringify({ ...this.ext, lastHeartbeat }, undefined, 4)) + this.deps.devLogger?.debug( `crashMonitoring: HEARTBEAT pid ${this.deps.pid} + sessionId: ${this.deps.sessionId.slice(0, 8)}-...` ) + + // Sanity check to verify the write is accessible immediately after + const heartbeatData = JSON.parse(await fs.readFileText(filePath)) as ExtInstanceHeartbeat + if (heartbeatData.lastHeartbeat !== lastHeartbeat) { + throw new CrashMonitoringError('Heartbeat write validation failed', { code: className }) + } } const funcWithCtx = () => withFailCtx('sendHeartbeatState', func) const funcWithRetries = withRetries(funcWithCtx, { maxRetries: 8, delay: 100, backoff: 2 }) From ed1fe8f17ebd8f0d459525bdb4d81f8562a9e032 Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Thu, 24 Oct 2024 00:05:07 -0400 Subject: [PATCH 04/12] telemetry: emit on heartbeats + add timestamp field - Now we emit an event for every heartbeat (every 10 minutes) - Include a timestamp field in the heartbeat and session_end so we can see the diff if any Signed-off-by: nkomonen-amazon --- packages/core/src/shared/crashMonitoring.ts | 24 ++++++++-------- .../src/shared/telemetry/vscodeTelemetry.json | 28 +++++++++++++++++++ 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/packages/core/src/shared/crashMonitoring.ts b/packages/core/src/shared/crashMonitoring.ts index 19b4d3efa46..4e02bfeaac1 100644 --- a/packages/core/src/shared/crashMonitoring.ts +++ b/packages/core/src/shared/crashMonitoring.ts @@ -94,7 +94,7 @@ export class CrashMonitoring { const state = await crashMonitoringStateFactory() // can throw return (this.#instance ??= new CrashMonitoring( state, - DevSettings.instance.get('crashCheckInterval', 1000 * 60 * 30), // check every 30 minutes + DevSettings.instance.get('crashCheckInterval', 1000 * 60 * 20), // check every 20 minutes isDevMode, isAutomation(), devModeLogger @@ -305,6 +305,7 @@ class CrashChecker { proxiedSessionId: ext.sessionId, reason: 'ExtHostCrashed', passive: true, + timestamp: ext.lastHeartbeat, }) devLogger?.debug( @@ -462,25 +463,26 @@ export class FileSystemState { // ------------------ Heartbeat methods ------------------ public async sendHeartbeat() { try { + const now = this.deps.now() const func = async () => { const filePath = this.makeStateFilePath(this.extId) - const lastHeartbeat = this.deps.now() - await fs.writeFile(filePath, JSON.stringify({ ...this.ext, lastHeartbeat }, undefined, 4)) - - this.deps.devLogger?.debug( - `crashMonitoring: HEARTBEAT pid ${this.deps.pid} + sessionId: ${this.deps.sessionId.slice(0, 8)}-...` - ) + await fs.writeFile(filePath, JSON.stringify({ ...this.ext, lastHeartbeat: now }, undefined, 4)) // Sanity check to verify the write is accessible immediately after const heartbeatData = JSON.parse(await fs.readFileText(filePath)) as ExtInstanceHeartbeat - if (heartbeatData.lastHeartbeat !== lastHeartbeat) { + if (heartbeatData.lastHeartbeat !== now) { throw new CrashMonitoringError('Heartbeat write validation failed', { code: className }) } } const funcWithCtx = () => withFailCtx('sendHeartbeatState', func) - const funcWithRetries = withRetries(funcWithCtx, { maxRetries: 8, delay: 100, backoff: 2 }) - return await funcWithRetries + const funcWithRetries = withRetries(funcWithCtx, { maxRetries: 6, delay: 100, backoff: 2 }) + const funcWithTelemetryRun = await telemetry.ide_heartbeat.run((span) => { + span.record({ id: className, timestamp: now }) + return funcWithRetries + }) + + return funcWithTelemetryRun } catch (e) { // delete this ext from the state to avoid an incorrectly reported crash since we could not send a new heartbeat await withFailCtx('sendHeartbeatFailureCleanup', () => this.clearHeartbeat()) @@ -523,7 +525,7 @@ export class FileSystemState { // Retry file deletion to prevent incorrect crash reports. Common Windows errors seen in telemetry: EPERM/EBUSY. // See: https://github.com/aws/aws-toolkit-vscode/pull/5335 await withRetries(() => withFailCtx('deleteStaleRunningFile', () => fs.delete(this.makeStateFilePath(ext))), { - maxRetries: 8, + maxRetries: 6, delay: 100, backoff: 2, }) diff --git a/packages/core/src/shared/telemetry/vscodeTelemetry.json b/packages/core/src/shared/telemetry/vscodeTelemetry.json index 9c3dbbdfd67..3f2f4be53b4 100644 --- a/packages/core/src/shared/telemetry/vscodeTelemetry.json +++ b/packages/core/src/shared/telemetry/vscodeTelemetry.json @@ -311,6 +311,11 @@ "type": "string", "description": "The CPU architecture" }, + { + "name": "timestamp", + "type": "int", + "description": "A timestamp in milliseconds" + }, { "name": "traceId", "type": "string", @@ -1184,8 +1189,31 @@ { "type": "proxiedSessionId", "required": false + }, + { + "type": "timestamp", + "required": false } ] + }, + { + "name": "ide_heartbeat", + "description": "A heartbeat sent by the extension", + "metadata": [ + { + "type": "id", + "required": true + }, + { + "type": "name", + "required": false + }, + { + "type": "timestamp", + "required": false + } + ], + "passive": true } ] } From 340418c6b770434703c3f1ba9d74945203047338 Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Thu, 24 Oct 2024 00:23:15 -0400 Subject: [PATCH 05/12] minor refactor Signed-off-by: nkomonen-amazon --- packages/core/src/shared/crashMonitoring.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/core/src/shared/crashMonitoring.ts b/packages/core/src/shared/crashMonitoring.ts index 4e02bfeaac1..dcc571b2655 100644 --- a/packages/core/src/shared/crashMonitoring.ts +++ b/packages/core/src/shared/crashMonitoring.ts @@ -137,8 +137,7 @@ export class CrashMonitoring { /** Stop the Crash Monitoring process, signifying a graceful shutdown */ public async shutdown() { try { - this.crashChecker.cleanup() - await this.heartbeat.shutdown() + await this.cleanup() } catch (error) { try { // This probably wont emit in time before shutdown, but may be written to the logs From 35eb271baa151aab05908f4b075f7bded70ceb55 Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Thu, 24 Oct 2024 00:50:42 -0400 Subject: [PATCH 06/12] refactor: remove PID requirement We do not need the PID anymore since we fixed the sessionId so that a new one is generated even after the ext host crashes and restarts. The PID previously allowed us to differentiate between ide instances with the same sessionId but different PIDs Signed-off-by: nkomonen-amazon --- packages/core/src/shared/crashMonitoring.ts | 20 ++++++----------- packages/core/src/shared/crypto.ts | 12 ++++++++++ .../src/test/shared/crashMonitoring.test.ts | 10 +++------ packages/core/src/test/shared/crypto.test.ts | 22 ++++++++++++++++++- 4 files changed, 43 insertions(+), 21 deletions(-) diff --git a/packages/core/src/shared/crashMonitoring.ts b/packages/core/src/shared/crashMonitoring.ts index dcc571b2655..671e07a179a 100644 --- a/packages/core/src/shared/crashMonitoring.ts +++ b/packages/core/src/shared/crashMonitoring.ts @@ -18,6 +18,7 @@ import { getLogger } from './logger/logger' import { crashMonitoringDirName } from './constants' import { throwOnUnstableFileSystem } from './filesystemUtilities' import { withRetries } from './utilities/functionUtils' +import { truncateUuid } from './crypto' const className = 'CrashMonitoring' @@ -307,17 +308,15 @@ class CrashChecker { timestamp: ext.lastHeartbeat, }) - devLogger?.debug( - `crashMonitoring: CRASH: following has crashed: pid ${ext.extHostPid} + sessionId: ${ext.sessionId}` - ) + devLogger?.debug(`crashMonitoring: CRASH: following has crashed: sessionId: ${ext.sessionId}`) }) } if (isDevMode) { - const before = knownExts.map((i) => i.extHostPid) - const after = runningExts.map((i) => i.extHostPid) + const before = knownExts.map((i) => truncateUuid(i.sessionId)) + const after = runningExts.map((i) => truncateUuid(i.sessionId)) // Sanity check: ENSURE THAT AFTER === ACTUAL or this implies that our data is out of sync - const afterActual = (await state.getAllExts()).map((i) => i.extHostPid) + const afterActual = (await state.getAllExts()).map((i) => truncateUuid(i.sessionId)) devLogger?.debug( `crashMonitoring: CHECKED: Result of cleaning up stopped instances\nBEFORE: ${JSON.stringify(before)}\nAFTER: ${JSON.stringify(after)}\nACTUAL: ${JSON.stringify(afterActual)}` ) @@ -386,7 +385,6 @@ class TimeLag { */ type MementoStateDependencies = { memento: vscode.Memento - pid: number sessionId: string workDirPath: string isDevMode: boolean @@ -401,7 +399,6 @@ function getDefaultDependencies(): MementoStateDependencies { workDirPath: path.join(globals.context.globalStorageUri.fsPath), memento: globals.globalState as vscode.Memento, isStateStale: () => isNewOsSession(), - pid: process.pid, sessionId: _getSessionId(), isDevMode: getIsDevMode(), devLogger: getIsDevMode() ? getLogger() : undefined, @@ -433,7 +430,6 @@ export class FileSystemState { constructor(protected readonly deps: MementoStateDependencies) { this.stateDirPath = path.join(this.deps.workDirPath, crashMonitoringDirName) - this.deps.devLogger?.debug(`crashMonitoring: pid: ${this.deps.pid}`) this.deps.devLogger?.debug(`crashMonitoring: sessionId: ${this.deps.sessionId.slice(0, 8)}-...`) this.deps.devLogger?.debug(`crashMonitoring: dir: ${this.stateDirPath}`) } @@ -538,7 +534,6 @@ export class FileSystemState { protected get ext(): ExtInstance { return { sessionId: this.deps.sessionId, - extHostPid: this.deps.pid, isDebug: isDebugInstance() ? true : undefined, } } @@ -550,9 +545,9 @@ export class FileSystemState { * - The Extension Host PID used in addition to the session ID should be good enough to uniquely identiy. */ protected createExtId(ext: ExtInstance): ExtInstanceId { - return `${ext.extHostPid}_${ext.sessionId}` + return ext.sessionId } - private readonly fileSuffix = 'running' + private readonly fileSuffix = 'v1' private makeStateFilePath(ext: ExtInstance | ExtInstanceId) { const extId = typeof ext === 'string' ? ext : this.createExtId(ext) return path.join(this.stateDirPath, extId + `.${this.fileSuffix}`) @@ -654,7 +649,6 @@ type ExtInstanceId = string /** The static metadata of an instance of this extension */ export type ExtInstance = { - extHostPid: number sessionId: string lastHeartbeat?: number /** diff --git a/packages/core/src/shared/crypto.ts b/packages/core/src/shared/crypto.ts index a1496ad054a..808f5730d81 100644 --- a/packages/core/src/shared/crypto.ts +++ b/packages/core/src/shared/crypto.ts @@ -42,3 +42,15 @@ export function isUuid(uuid: string): boolean { const uuidPattern = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i return uuidPattern.test(uuid) } + +/** + * Eg: 'aaaabbbb-cccc-dddd-eeee-ffffhhhhiiii' -> 'aaaa...iiii' + */ +export function truncateUuid(uuid: string) { + if (uuid.length !== 36) { + throw new Error(`Cannot truncate uuid of value: "${uuid}"`) + } + + const cleanedUUID = uuid.replace(/-/g, '') + return `${cleanedUUID.substring(0, 4)}...${cleanedUUID.substring(cleanedUUID.length - 4)}` +} diff --git a/packages/core/src/test/shared/crashMonitoring.test.ts b/packages/core/src/test/shared/crashMonitoring.test.ts index f971d8561e2..c06b8b167a1 100644 --- a/packages/core/src/test/shared/crashMonitoring.test.ts +++ b/packages/core/src/test/shared/crashMonitoring.test.ts @@ -10,7 +10,7 @@ import { CrashMonitoring, ExtInstance, crashMonitoringStateFactory } from '../.. import { isCI } from '../../shared/vscode/env' import { getLogger } from '../../shared/logger/logger' import { SinonSandbox, createSandbox } from 'sinon' -import { fs } from '../../shared' +import { fs, randomUUID } from '../../shared' import path from 'path' class TestCrashMonitoring extends CrashMonitoring { @@ -55,13 +55,11 @@ export const crashMonitoringTest = async () => { async function makeTestExtension(id: number, opts?: { isStateStale: () => Promise }) { const isStateStale = opts?.isStateStale ?? (() => Promise.resolve(false)) - const sessionId = `sessionId-${id}` - const pid = Number(String(id).repeat(6)) + const sessionId = randomUUID() const state = await crashMonitoringStateFactory({ workDirPath: testFolder.path, isStateStale, - pid, sessionId: sessionId, now: () => globals.clock.Date.now(), memento: globals.globalState, @@ -71,7 +69,6 @@ export const crashMonitoringTest = async () => { const ext = new TestCrashMonitoring(state, checkInterval, true, false, getLogger()) spawnedExtensions.push(ext) const metadata = { - extHostPid: pid, sessionId, lastHeartbeat: globals.clock.Date.now(), isDebug: undefined, @@ -276,8 +273,7 @@ export const crashMonitoringTest = async () => { const state = await crashMonitoringStateFactory({ workDirPath: testFolder.path, isStateStale: () => Promise.resolve(false), - pid: 1111, - sessionId: 'sessionId_1111', + sessionId: randomUUID(), now: () => globals.clock.Date.now(), memento: globals.globalState, isDevMode: true, diff --git a/packages/core/src/test/shared/crypto.test.ts b/packages/core/src/test/shared/crypto.test.ts index daefe0debe1..77e4131bb78 100644 --- a/packages/core/src/test/shared/crypto.test.ts +++ b/packages/core/src/test/shared/crypto.test.ts @@ -4,7 +4,7 @@ */ import assert from 'assert' -import { isUuid, randomUUID } from '../../shared/crypto' +import { isUuid, randomUUID, truncateUuid } from '../../shared/crypto' describe('crypto', function () { describe('randomUUID()', function () { @@ -44,3 +44,23 @@ describe('crypto', function () { assert.equal(isUuid('47fe01cf-f37a-4e7c-b971-d10fe5897763'.toUpperCase()), true) }) }) + +describe('truncateUUID', function () { + it('should return the first 4 and last 4 characters of a valid UUID', function () { + const fullUUID = 'aaaabbbb-cccc-dddd-eeee-ffffhhhhiiii' + const result = truncateUuid(fullUUID) + assert.strictEqual(result, 'aaaa...iiii') + }) + + it('should handle a different valid UUID correctly', function () { + const fullUUID = '12340000-0000-0000-0000-000000005678' + const result = truncateUuid(fullUUID) + assert.strictEqual(result, '1234...5678') + }) + + it('should throw an error if the input is not 36 characters long', function () { + assert.throws(() => { + truncateUuid('invalid-uuid') + }, /Cannot truncate uuid of value: "invalid-uuid"/) + }) +}) From a858f957d9330706a2064dee12997a612f8c3b43 Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Thu, 24 Oct 2024 01:07:11 -0400 Subject: [PATCH 07/12] minor refactor simplify the code that deletes the heartbeat file Signed-off-by: nkomonen-amazon --- packages/core/src/shared/crashMonitoring.ts | 33 ++++++++------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/packages/core/src/shared/crashMonitoring.ts b/packages/core/src/shared/crashMonitoring.ts index 671e07a179a..797a9974b70 100644 --- a/packages/core/src/shared/crashMonitoring.ts +++ b/packages/core/src/shared/crashMonitoring.ts @@ -457,10 +457,11 @@ export class FileSystemState { // ------------------ Heartbeat methods ------------------ public async sendHeartbeat() { + const extId = this.extId try { const now = this.deps.now() const func = async () => { - const filePath = this.makeStateFilePath(this.extId) + const filePath = this.makeStateFilePath(extId) await fs.writeFile(filePath, JSON.stringify({ ...this.ext, lastHeartbeat: now }, undefined, 4)) @@ -480,14 +481,10 @@ export class FileSystemState { return funcWithTelemetryRun } catch (e) { // delete this ext from the state to avoid an incorrectly reported crash since we could not send a new heartbeat - await withFailCtx('sendHeartbeatFailureCleanup', () => this.clearHeartbeat()) + await this.deleteHeartbeatFile(extId, 'sendHeartbeatFailureCleanup') throw e } } - /** Clears this extentions heartbeat from the state */ - public async clearHeartbeat() { - await this.deleteHeartbeatFile(this.extId) - } /** * Indicates that this extension instance has gracefully shutdown. @@ -502,28 +499,22 @@ export class FileSystemState { public async indicateGracefulShutdown(): Promise { // By removing the heartbeat entry, the crash checkers will not be able to find this entry anymore, making it // impossible to report on since the file system is the source of truth - await withFailCtx('indicateGracefulShutdown', () => - nodeFs.rm(this.makeStateFilePath(this.extId), { force: true }) - ) + await this.deleteHeartbeatFile(this.extId, 'indicateGracefulShutdown') } // ------------------ Checker Methods ------------------ public async handleCrashedExt(ext: ExtInstance, fn: () => void) { - await withFailCtx('handleCrashedExt', async () => { - await this.deleteHeartbeatFile(ext) - fn() - }) + await this.deleteHeartbeatFile(ext, 'handleCrashedExt') + await withFailCtx('handleCrashedExt', async () => fn()) } - private async deleteHeartbeatFile(ext: ExtInstanceId | ExtInstance) { - // Retry file deletion to prevent incorrect crash reports. Common Windows errors seen in telemetry: EPERM/EBUSY. - // See: https://github.com/aws/aws-toolkit-vscode/pull/5335 - await withRetries(() => withFailCtx('deleteStaleRunningFile', () => fs.delete(this.makeStateFilePath(ext))), { - maxRetries: 6, - delay: 100, - backoff: 2, - }) + private async deleteHeartbeatFile(ext: ExtInstanceId | ExtInstance, ctx: string) { + // IMPORTANT: Must use NodeFs here since this is used during shutdown + const func = () => nodeFs.rm(this.makeStateFilePath(ext), { force: true }) + const funcWithCtx = () => withFailCtx(ctx, func) + const funcWithRetries = withRetries(funcWithCtx, { maxRetries: 6, delay: 100, backoff: 2 }) + await funcWithRetries } // ------------------ State data ------------------ From 4026148be50446196abd83eb1ffcabd57cee77f1 Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Thu, 24 Oct 2024 01:35:05 -0400 Subject: [PATCH 08/12] minor refactor - reduce log messages Signed-off-by: nkomonen-amazon --- packages/core/src/shared/crashMonitoring.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/core/src/shared/crashMonitoring.ts b/packages/core/src/shared/crashMonitoring.ts index 797a9974b70..f70cb1049bd 100644 --- a/packages/core/src/shared/crashMonitoring.ts +++ b/packages/core/src/shared/crashMonitoring.ts @@ -307,8 +307,6 @@ class CrashChecker { passive: true, timestamp: ext.lastHeartbeat, }) - - devLogger?.debug(`crashMonitoring: CRASH: following has crashed: sessionId: ${ext.sessionId}`) }) } @@ -318,7 +316,7 @@ class CrashChecker { // Sanity check: ENSURE THAT AFTER === ACTUAL or this implies that our data is out of sync const afterActual = (await state.getAllExts()).map((i) => truncateUuid(i.sessionId)) devLogger?.debug( - `crashMonitoring: CHECKED: Result of cleaning up stopped instances\nBEFORE: ${JSON.stringify(before)}\nAFTER: ${JSON.stringify(after)}\nACTUAL: ${JSON.stringify(afterActual)}` + `crashMonitoring: CHECKED: Result of cleaning up crashed instances\nBEFORE: ${JSON.stringify(before)}\nAFTER: ${JSON.stringify(after)}\nACTUAL: ${JSON.stringify(afterActual)}` ) } From 0b7dc77cdfa29f0e59c920ea57764f1c573b792b Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Thu, 24 Oct 2024 01:53:13 -0400 Subject: [PATCH 09/12] minor refactor - clean up code Signed-off-by: nkomonen-amazon --- packages/core/src/shared/crashMonitoring.ts | 54 ++++++++------------- 1 file changed, 21 insertions(+), 33 deletions(-) diff --git a/packages/core/src/shared/crashMonitoring.ts b/packages/core/src/shared/crashMonitoring.ts index f70cb1049bd..6604733a353 100644 --- a/packages/core/src/shared/crashMonitoring.ts +++ b/packages/core/src/shared/crashMonitoring.ts @@ -549,24 +549,15 @@ export class FileSystemState { }) } public async getAllExts(): Promise { - const res = await withFailCtx('getAllExts', async () => { + return withFailCtx('getAllExts', async () => { // Read all the exts from the filesystem, deserializing as needed const allExtIds: ExtInstanceId[] = await withFailCtx('readdir', async () => { const filesInDir = await fs.readdir(this.stateDirPath) - const relevantFiles = filesInDir.filter((file: [string, vscode.FileType]) => { - const name = file[0] - const type = file[1] - if (type !== vscode.FileType.File) { - return false - } - if (path.extname(name) !== `.${this.fileSuffix}`) { - return false - } - return true + const relevantFiles = filesInDir.filter((file: [name: string, type: vscode.FileType]) => { + return file[1] === vscode.FileType.File && path.extname(file[0]) === `.${this.fileSuffix}` }) - const idsFromFileNames = relevantFiles.map((file: [string, vscode.FileType]) => { - const name = file[0] - return name.split('.')[0] + const idsFromFileNames = relevantFiles.map((file: [name: string, type: vscode.FileType]) => { + return file[0].split('.')[0] }) return idsFromFileNames }) @@ -575,22 +566,21 @@ export class FileSystemState { // Due to a race condition, a separate extension instance may have removed this file by this point. It is okay since // we will assume that other instance handled its termination appropriately. // NOTE: On Windows we were failing on EBUSY, so we retry on failure. - const ext: ExtInstanceHeartbeat | undefined = await withRetries( - () => - withFailCtx('parseRunningExtFile', async () => - ignoreBadFileError(async () => { - const text = await fs.readFileText(this.makeStateFilePath(extId)) - - if (!text) { - return undefined - } - - // This was sometimes throwing SyntaxError - return JSON.parse(text) as ExtInstanceHeartbeat - }) - ), - { maxRetries: 6, delay: 100, backoff: 2 } - ) + const loadExtFromDisk = async () => { + const text = await fs.readFileText(this.makeStateFilePath(extId)) + + if (!text) { + return undefined + } + + // This was sometimes throwing SyntaxError + return JSON.parse(text) as ExtInstanceHeartbeat + } + const funcWithIgnoreBadFile = () => ignoreBadFileError(loadExtFromDisk) + const funcWithRetries = () => + withRetries(funcWithIgnoreBadFile, { maxRetries: 6, delay: 100, backoff: 2 }) + const funcWithCtx = () => withFailCtx('parseRunningExtFile', funcWithRetries) + const ext: ExtInstanceHeartbeat | undefined = await funcWithCtx() if (ext === undefined) { return @@ -602,10 +592,8 @@ export class FileSystemState { return ext }) // filter out undefined before returning - const result = (await Promise.all(allExts)).filter(isExtHeartbeat) - return result + return (await Promise.all(allExts)).filter(isExtHeartbeat) }) - return res } } From b6ee6409b0b64ce98346025f813bb3a7d7767525 Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Thu, 24 Oct 2024 11:20:34 -0400 Subject: [PATCH 10/12] do not run for cloud desktop Signed-off-by: nkomonen-amazon --- packages/core/src/shared/crashMonitoring.ts | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/packages/core/src/shared/crashMonitoring.ts b/packages/core/src/shared/crashMonitoring.ts index 6604733a353..69375be5e5c 100644 --- a/packages/core/src/shared/crashMonitoring.ts +++ b/packages/core/src/shared/crashMonitoring.ts @@ -4,7 +4,7 @@ */ import path from 'path' import globals, { isWeb } from './extensionGlobals' -import { getSessionId as _getSessionId } from './telemetry/util' +import { getSessionId as _getSessionId, getComputeEnvType } from './telemetry/util' import { getErrorId, getTelemetryReason, getTelemetryReasonDesc, isFileNotFoundError, ToolkitError } from './errors' import { isAutomation, isDebugInstance } from './vscode/env' import { DevSettings } from './settings' @@ -82,6 +82,12 @@ export class CrashMonitoring { static #instance: CrashMonitoring | undefined /** Returns an instance of this class or undefined if any initial validation fails. */ public static async instance(): Promise { + // - We do not support web due to some incompatibilities + // - We do not support cloud desktop due to false crash reports due to sleep+vpn+ssh timeouts with the IDE + if (isWeb() || (await getComputeEnvType()) === 'cloudDesktop-amzn') { + return undefined + } + // Since the first attempt to create an instance may have failed, we do not // attempt to create an instance again and return whatever we have if (this.#didTryCreate === true) { @@ -108,10 +114,6 @@ export class CrashMonitoring { /** Start the Crash Monitoring process */ public async start() { - if (isWeb()) { - return - } - // During tests, the Prod code also runs this function. It interferes with telemetry assertion since it reports additional heartbeats. if (this.isAutomation) { return From 36b7e5467bac0e7eb9d24deb464f6ea43867728a Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Thu, 24 Oct 2024 12:15:28 -0400 Subject: [PATCH 11/12] fix PR comment Signed-off-by: nkomonen-amazon --- packages/core/src/test/shared/crypto.test.ts | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/packages/core/src/test/shared/crypto.test.ts b/packages/core/src/test/shared/crypto.test.ts index 77e4131bb78..f9620d41233 100644 --- a/packages/core/src/test/shared/crypto.test.ts +++ b/packages/core/src/test/shared/crypto.test.ts @@ -47,15 +47,13 @@ describe('crypto', function () { describe('truncateUUID', function () { it('should return the first 4 and last 4 characters of a valid UUID', function () { - const fullUUID = 'aaaabbbb-cccc-dddd-eeee-ffffhhhhiiii' - const result = truncateUuid(fullUUID) - assert.strictEqual(result, 'aaaa...iiii') - }) + const fullUUID1 = 'aaaabbbb-cccc-dddd-eeee-ffffhhhhiiii' + const result1 = truncateUuid(fullUUID1) + assert.strictEqual(result1, 'aaaa...iiii') - it('should handle a different valid UUID correctly', function () { - const fullUUID = '12340000-0000-0000-0000-000000005678' - const result = truncateUuid(fullUUID) - assert.strictEqual(result, '1234...5678') + const fullUUID2 = '12340000-0000-0000-0000-000000005678' + const result2 = truncateUuid(fullUUID2) + assert.strictEqual(result2, '1234...5678') }) it('should throw an error if the input is not 36 characters long', function () { From f7822ab6a2a7aa300b648f1e9f347f8aa76bf21f Mon Sep 17 00:00:00 2001 From: nkomonen-amazon Date: Thu, 24 Oct 2024 12:21:28 -0400 Subject: [PATCH 12/12] heartbeat - delete before overwrite Handles a possible edge case when sending a heartbeat Signed-off-by: nkomonen-amazon --- packages/core/src/shared/crashMonitoring.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/core/src/shared/crashMonitoring.ts b/packages/core/src/shared/crashMonitoring.ts index 69375be5e5c..bdbc66af230 100644 --- a/packages/core/src/shared/crashMonitoring.ts +++ b/packages/core/src/shared/crashMonitoring.ts @@ -463,6 +463,10 @@ export class FileSystemState { const func = async () => { const filePath = this.makeStateFilePath(extId) + // We were seeing weird behavior where we possibly read an old file, even though we overwrote it. + // So this is a sanity check. + await fs.delete(filePath, { force: true }) + await fs.writeFile(filePath, JSON.stringify({ ...this.ext, lastHeartbeat: now }, undefined, 4)) // Sanity check to verify the write is accessible immediately after