Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
312 changes: 159 additions & 153 deletions packages/core/src/test/shared/crashMonitoring.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@
* SPDX-License-Identifier: Apache-2.0
*/

import { assertTelemetry, getMetrics, partialDeepCompare, TestFolder } from '../testUtil'
import { getMetrics, partialDeepCompare, TestFolder } from '../testUtil'
import assert from 'assert'
import globals from '../../shared/extensionGlobals'
import { CrashMonitoring, ExtInstance, crashMonitoringStateFactory } from '../../shared/crashMonitoring'
import { isCI } from '../../shared/vscode/env'
import { getLogger } from '../../shared/logger/logger'
import { SinonSandbox, createSandbox } from 'sinon'
import { fs, randomUUID } from '../../shared'
import path from 'path'
import { randomUUID } from '../../shared'

class TestCrashMonitoring extends CrashMonitoring {
public constructor(...deps: ConstructorParameters<typeof CrashMonitoring>) {
Expand Down Expand Up @@ -88,131 +87,133 @@ export const crashMonitoringTest = async () => {
sandbox.restore()
})

it('graceful shutdown no metric emitted', async function () {
const exts = await makeTestExtensions(2)

await exts[0].ext.start()
await awaitIntervals(oneInterval) // allow time to become primary checker
// There is no other active instance to report the issue
assertTelemetry('session_end', [])

// Ext 1 does a graceful shutdown
await exts[1].ext.start()
await exts[1].ext.shutdown()
await awaitIntervals(oneInterval)
// Ext 1 did a graceful shutdown so no metric emitted
assertTelemetry('session_end', [])
})

it('single running instance crashes, so nothing is reported, but a new instaces appears and reports', async function () {
const exts = await makeTestExtensions(2)

await exts[0].ext.start()
await exts[0].ext.crash()
await awaitIntervals(oneInterval)
// There is no other active instance to report the issue
assertTelemetry('session_end', [])

await exts[1].ext.start()
await awaitIntervals(oneInterval)
// Starting a new instance will detect the previously crashed one
assertCrashedExtensions([exts[0]])
})

it('multiple running instances start+crash at different times, but another instance always reports', async function () {
const latestCrashedExts: TestExtension[] = []

const exts = await makeTestExtensions(4)

await exts[0].ext.start()
await awaitIntervals(oneInterval)

// start Ext 1 then crash it, Ext 0 finds the crash
await exts[1].ext.start()
await exts[1].ext.crash()
latestCrashedExts.push(exts[1])
await awaitIntervals(oneInterval * 1)

assertCrashedExtensions(latestCrashedExts)

// start Ext 2 and crash Ext 0, Ext 2 is promoted to Primary checker
await exts[2].ext.start()
await exts[0].ext.crash()
latestCrashedExts.push(exts[0])
await awaitIntervals(oneInterval * 1)
assertCrashedExtensions(latestCrashedExts)

// Ext 3 starts, then crashes. Ext 2 reports the crash since it is the Primary checker
await exts[3].ext.start()
await exts[3].ext.crash()
latestCrashedExts.push(exts[3])
await awaitIntervals(oneInterval * 1)
assertCrashedExtensions(latestCrashedExts)
})

it('clears the state when a new os session is determined', async function () {
const exts = await makeTestExtensions(1)

// Start an extension then crash it
await exts[0].ext.start()
await exts[0].ext.crash()
await awaitIntervals(oneInterval)
// There is no other active instance to report the issue
assertTelemetry('session_end', [])

// This extension clears the state due to it being stale, not reporting the previously crashed ext
const ext1 = await makeTestExtension(1, { isStateStale: () => Promise.resolve(true) })
await ext1.ext.start()
await awaitIntervals(oneInterval * 1)
assertCrashedExtensions([])
})

it('start the first extension, then start many subsequent ones and crash them all at once', async function () {
const latestCrashedExts: TestExtension[] = []

const extCount = 10
const exts = await makeTestExtensions(extCount)
for (let i = 0; i < extCount; i++) {
await exts[i].ext.start()
}

// Crash all exts except the 0th one
for (let i = 1; i < extCount; i++) {
await exts[i].ext.crash()
latestCrashedExts.push(exts[i])
}

// Give some extra time since there is a lot of file i/o
await awaitIntervals(oneInterval * 3)

assertCrashedExtensions(latestCrashedExts)
})

it('does not check for crashes when there is a time lag', async function () {
// This test handles the case for a users computer doing a sleep+wake and
// then a crash was incorrectly reported since a new heartbeat could not be sent in time

// Load up a crash
const ext0 = await makeTestExtension(0)
await ext0.ext.start()
await ext0.ext.crash()

const ext1 = await makeTestExtension(1)
// Indicate that we have a time lag, and until it returns false
// we will skip crash checking
const didLagStub = sandbox.stub(ext1.ext.getTimeLag(), 'didLag')
didLagStub.returns(true)
await ext1.ext.start()

// Since we have a time lag the crash checker will not run
await awaitIntervals(oneInterval * 2)
assertCrashedExtensions([])

// Now that the time lag is true, we will check for a crash
didLagStub.returns(false)
await awaitIntervals(oneInterval)
assertCrashedExtensions([ext0])
})
// it('graceful shutdown no metric emitted', async function () {
// const exts = await makeTestExtensions(2)
//
// await exts[0].ext.start()
// await awaitIntervals(oneInterval) // allow time to become primary checker
// // There is no other active instance to report the issue
// assertTelemetry('session_end', [])
//
// // Ext 1 does a graceful shutdown
// await exts[1].ext.start()
// await exts[1].ext.shutdown()
// await awaitIntervals(oneInterval)
// // Ext 1 did a graceful shutdown so no metric emitted
// assertTelemetry('session_end', [])
// })
//
// it('single running instance crashes, so nothing is reported, but a new instaces appears and reports', async function () {
// const exts = await makeTestExtensions(2)
//
// await exts[0].ext.start()
// await exts[0].ext.crash()
// await awaitIntervals(oneInterval)
// // There is no other active instance to report the issue
// assertTelemetry('session_end', [])
//
// await exts[1].ext.start()
// await awaitIntervals(oneInterval)
// // Starting a new instance will detect the previously crashed one
// assertCrashedExtensions([exts[0]])
// })

// for loop 50 times
for (let i = 0; i < 10; i++) {
it('multiple running instances start+crash at different times, but another instance always reports', async function () {
const latestCrashedExts: TestExtension[] = []

const exts = await makeTestExtensions(4)

await exts[0].ext.start()
await awaitIntervals(oneInterval)

// start Ext 1 then crash it, Ext 0 finds the crash
await exts[1].ext.start()
await exts[1].ext.crash()
latestCrashedExts.push(exts[1])
await awaitIntervals(oneInterval * 1)

assertCrashedExtensions(latestCrashedExts)

// start Ext 2 and crash Ext 0, Ext 2 is promoted to Primary checker
await exts[2].ext.start()
await exts[0].ext.crash()
latestCrashedExts.push(exts[0])
await awaitIntervals(oneInterval * 1)
assertCrashedExtensions(latestCrashedExts)

// Ext 3 starts, then crashes. Ext 2 reports the crash since it is the Primary checker
await exts[3].ext.start()
await exts[3].ext.crash()
latestCrashedExts.push(exts[3])
await awaitIntervals(oneInterval * 1)
assertCrashedExtensions(latestCrashedExts)
})
}
// it('clears the state when a new os session is determined', async function () {
// const exts = await makeTestExtensions(1)
//
// // Start an extension then crash it
// await exts[0].ext.start()
// await exts[0].ext.crash()
// await awaitIntervals(oneInterval)
// // There is no other active instance to report the issue
// assertTelemetry('session_end', [])
//
// // This extension clears the state due to it being stale, not reporting the previously crashed ext
// const ext1 = await makeTestExtension(1, { isStateStale: () => Promise.resolve(true) })
// await ext1.ext.start()
// await awaitIntervals(oneInterval * 1)
// assertCrashedExtensions([])
// })
//
// it('start the first extension, then start many subsequent ones and crash them all at once', async function () {
// const latestCrashedExts: TestExtension[] = []
//
// const extCount = 10
// const exts = await makeTestExtensions(extCount)
// for (let i = 0; i < extCount; i++) {
// await exts[i].ext.start()
// }
//
// // Crash all exts except the 0th one
// for (let i = 1; i < extCount; i++) {
// await exts[i].ext.crash()
// latestCrashedExts.push(exts[i])
// }
//
// // Give some extra time since there is a lot of file i/o
// await awaitIntervals(oneInterval * 3)
//
// assertCrashedExtensions(latestCrashedExts)
// })
//
// it('does not check for crashes when there is a time lag', async function () {
// // This test handles the case for a users computer doing a sleep+wake and
// // then a crash was incorrectly reported since a new heartbeat could not be sent in time
//
// // Load up a crash
// const ext0 = await makeTestExtension(0)
// await ext0.ext.start()
// await ext0.ext.crash()
//
// const ext1 = await makeTestExtension(1)
// // Indicate that we have a time lag, and until it returns false
// // we will skip crash checking
// const didLagStub = sandbox.stub(ext1.ext.getTimeLag(), 'didLag')
// didLagStub.returns(true)
// await ext1.ext.start()
//
// // Since we have a time lag the crash checker will not run
// await awaitIntervals(oneInterval * 2)
// assertCrashedExtensions([])
//
// // Now that the time lag is true, we will check for a crash
// didLagStub.returns(false)
// await awaitIntervals(oneInterval)
// assertCrashedExtensions([ext0])
// })

/**
* Something like the following code can switch contexts early and the test will
Expand Down Expand Up @@ -250,11 +251,16 @@ export const crashMonitoringTest = async () => {
[...allSessionEnds],
(a, b) => a.proxiedSessionId === b.proxiedSessionId
)
assert.strictEqual(deduplicatedSessionEnds.length, expectedExts.length)

expectedExts.sort((a, b) => a.metadata.sessionId.localeCompare(b.metadata.sessionId))
deduplicatedSessionEnds.sort((a, b) => a.proxiedSessionId!.localeCompare(b.proxiedSessionId!))

assert.strictEqual(
deduplicatedSessionEnds.length,
expectedExts.length,
`Actual: ${JSON.stringify(deduplicatedSessionEnds)}, Expected: ${expectedExts}`
)

expectedExts.forEach((ext, i) => {
partialDeepCompare(deduplicatedSessionEnds[i], {
result: 'Failed',
Expand All @@ -268,31 +274,31 @@ export const crashMonitoringTest = async () => {
return array.filter((item, index, self) => index === self.findIndex((t) => predicate(item, t)))
}

describe('FileSystemState', async function () {
it('ignores irrelevant files in state', async function () {
const state = await crashMonitoringStateFactory({
workDirPath: testFolder.path,
isStateStale: () => Promise.resolve(false),
sessionId: randomUUID(),
now: () => globals.clock.Date.now(),
memento: globals.globalState,
isDevMode: true,
devLogger: getLogger(),
})
const stateDirPath = state.stateDirPath

assert.deepStrictEqual((await fs.readdir(stateDirPath)).length, 0)
await fs.writeFile(path.join(stateDirPath, 'ignoreMe.json'), '')
await fs.mkdir(path.join(stateDirPath, 'ignoreMe'))
await state.sendHeartbeat() // creates a relevant file in the state
assert.deepStrictEqual((await fs.readdir(stateDirPath)).length, 3)

const result = await state.getAllExts()
assert.deepStrictEqual(result.length, 1)
})
})
// describe('FileSystemState', async function () {
// it('ignores irrelevant files in state', async function () {
// const state = await crashMonitoringStateFactory({
// workDirPath: testFolder.path,
// isStateStale: () => Promise.resolve(false),
// sessionId: randomUUID(),
// now: () => globals.clock.Date.now(),
// memento: globals.globalState,
// isDevMode: true,
// devLogger: getLogger(),
// })
// const stateDirPath = state.stateDirPath
//
// assert.deepStrictEqual((await fs.readdir(stateDirPath)).length, 0)
// await fs.writeFile(path.join(stateDirPath, 'ignoreMe.json'), '')
// await fs.mkdir(path.join(stateDirPath, 'ignoreMe'))
// await state.sendHeartbeat() // creates a relevant file in the state
// assert.deepStrictEqual((await fs.readdir(stateDirPath)).length, 3)
//
// const result = await state.getAllExts()
// assert.deepStrictEqual(result.length, 1)
// })
// })
}
// This test is slow, so we only want to run it locally and not in CI. It will be run in the integ CI tests though.
;(isCI() ? describe.skip : describe)('CrashReporting', crashMonitoringTest)
;(isCI() ? describe : describe)('CrashReporting', crashMonitoringTest)

type TestExtension = { ext: TestCrashMonitoring; metadata: ExtInstance }
Loading