Skip to content

Commit 5f0829c

Browse files
do not crash monitor on unstable fs
Signed-off-by: nkomonen-amazon <[email protected]>
1 parent a425c97 commit 5f0829c

File tree

6 files changed

+172
-20
lines changed

6 files changed

+172
-20
lines changed

packages/amazonq/src/extensionNode.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ export async function activate(context: vscode.ExtensionContext) {
3232
* the code compatible with web and move it to {@link activateAmazonQCommon}.
3333
*/
3434
async function activateAmazonQNode(context: vscode.ExtensionContext) {
35-
await (await CrashMonitoring.instance()).start()
35+
await (await CrashMonitoring.instance())?.start()
3636

3737
const extContext = {
3838
extensionContext: context,
@@ -96,5 +96,5 @@ async function setupDevMode(context: vscode.ExtensionContext) {
9696

9797
export async function deactivate() {
9898
// Run concurrently to speed up execution. stop() does not throw so it is safe
99-
await Promise.all([(await CrashMonitoring.instance()).stop(), deactivateCommon()])
99+
await Promise.all([(await CrashMonitoring.instance())?.stop(), deactivateCommon()])
100100
}

packages/core/src/extensionNode.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ export async function activate(context: vscode.ExtensionContext) {
7878
// IMPORTANT: If you are doing setup that should also work in web mode (browser), it should be done in the function below
7979
const extContext = await activateCommon(context, contextPrefix, false)
8080

81-
await (await CrashMonitoring.instance()).start()
81+
await (await CrashMonitoring.instance())?.start()
8282

8383
initializeCredentialsProviderManager()
8484

@@ -254,7 +254,7 @@ export async function activate(context: vscode.ExtensionContext) {
254254

255255
export async function deactivate() {
256256
// Run concurrently to speed up execution. stop() does not throw so it is safe
257-
await Promise.all([await (await CrashMonitoring.instance()).stop(), deactivateCommon()])
257+
await Promise.all([await (await CrashMonitoring.instance())?.stop(), deactivateCommon()])
258258
await globals.resourceManager.dispose()
259259
}
260260

packages/core/src/shared/crashMonitoring.ts

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import nodeFs from 'fs/promises'
1616
import fs from './fs/fs'
1717
import { getLogger } from './logger/logger'
1818
import { crashMonitoringDirNames } from './constants'
19+
import { throwOnUnstableFileSystem } from './filesystemUtilities'
1920

2021
const className = 'CrashMonitoring'
2122

@@ -65,17 +66,32 @@ export class CrashMonitoring {
6566
private readonly devLogger: Logger | undefined
6667
) {}
6768

69+
static #didTryInstance = false
6870
static #instance: CrashMonitoring | undefined
69-
public static async instance(): Promise<CrashMonitoring> {
70-
const isDevMode = getIsDevMode()
71-
const devModeLogger: Logger | undefined = isDevMode ? getLogger() : undefined
72-
return (this.#instance ??= new CrashMonitoring(
73-
await crashMonitoringStateFactory(),
74-
DevSettings.instance.get('crashCheckInterval', 1000 * 60 * 3),
75-
isDevMode,
76-
isAutomation(),
77-
devModeLogger
78-
))
71+
public static async instance(): Promise<CrashMonitoring | undefined> {
72+
// Since the first attempt to create an instance may have failed, we do not
73+
// attempt to create an instance again and return whatever we have
74+
if (this.#didTryInstance === true) {
75+
return this.#instance
76+
}
77+
78+
try {
79+
this.#didTryInstance = true
80+
const isDevMode = getIsDevMode()
81+
const devModeLogger: Logger | undefined = isDevMode ? getLogger() : undefined
82+
const state = await crashMonitoringStateFactory()
83+
return (this.#instance ??= new CrashMonitoring(
84+
state,
85+
DevSettings.instance.get('crashCheckInterval', 1000 * 60 * 3),
86+
isDevMode,
87+
isAutomation(),
88+
devModeLogger
89+
))
90+
} catch (error) {
91+
// Creating the crash monitoring state can throw/fail
92+
emitFailure({ functionName: 'instance', error })
93+
return undefined
94+
}
7995
}
8096

8197
/** Start the Crash Monitoring process */
@@ -358,6 +374,12 @@ function getDefaultDependencies(): MementoStateDependencies {
358374
devLogger: getIsDevMode() ? getLogger() : undefined,
359375
}
360376
}
377+
/**
378+
* Factory to create an instance of the state. Do not create an instance of the
379+
* class manually since there are required setup steps.
380+
*
381+
* @throws if the filesystem state cannot get in to a good state
382+
*/
361383
export async function crashMonitoringStateFactory(deps = getDefaultDependencies()): Promise<FileSystemState> {
362384
const state: FileSystemState = new FileSystemState(deps)
363385
await state.init()
@@ -385,8 +407,18 @@ export class FileSystemState {
385407
/**
386408
* Does the required initialization steps, this must always be run after
387409
* creation of the instance.
410+
*
411+
* @throws if the filesystem state cannot get in to a good state
388412
*/
389413
public async init() {
414+
// IMPORTANT: do not run crash reporting on unstable filesystem to reduce invalid crash data
415+
// NOTE: We want to get a diff between clients that succeeded vs failed this check,
416+
// so emit a metric to track that
417+
await telemetry.function_call.run(async (span) => {
418+
span.record({ className, functionName: 'FileSystemStateValidation' })
419+
await withFailCtx('validateFileSystemStability', () => throwOnUnstableFileSystem(this.deps.workDirPath))
420+
})
421+
390422
// Clear the state if the user did something like a computer restart
391423
if (await this.deps.isStateStale()) {
392424
await this.clearState()
@@ -595,7 +627,7 @@ async function withFailCtx<T>(ctx: string, fn: () => Promise<T>): Promise<T> {
595627
// make sure we await the function so it actually executes within the try/catch
596628
return await fn()
597629
} catch (err) {
598-
throw CrashMonitoringError.chain(err, `Failed "${ctx}"`, { code: className })
630+
throw CrashMonitoringError.chain(err, `Context: "${ctx}"`, { code: className })
599631
}
600632
}
601633

packages/core/src/shared/filesystemUtilities.ts

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import { getLogger } from './logger'
1111
import * as pathutils from './utilities/pathUtils'
1212
import globals from '../shared/extensionGlobals'
1313
import fs from '../shared/fs/fs'
14+
import { ToolkitError } from './errors'
1415

1516
export const tempDirPath = path.join(
1617
// https://github.com/aws/aws-toolkit-vscode/issues/240
@@ -246,3 +247,117 @@ export async function setDefaultDownloadPath(downloadPath: string) {
246247
getLogger().error('Error while setting "aws.downloadPath"', err as Error)
247248
}
248249
}
250+
251+
const FileSystemStabilityExceptionId = 'FileSystemStabilityException'
252+
const FileSystemStabilityException = ToolkitError.named(FileSystemStabilityExceptionId)
253+
254+
/**
255+
* Run this function to validate common file system calls/flows, ensuring they work
256+
* as expected.
257+
*
258+
* The intent of this function is to catch potential fs issues early,
259+
* testing common cases of fs usage. We need this since each machine can behave
260+
* differently depending on things like OS, permissions, disk speed, ...
261+
*
262+
* @throws a {@link FileSystemStabilityException} which wraps the underlying failed fs operation error.
263+
*/
264+
export async function throwOnUnstableFileSystem(tmpRoot: string) {
265+
const tmpFolder = path.join(tmpRoot, `validateStableFS-${crypto.randomBytes(4).toString('hex')}`)
266+
const tmpFile = path.join(tmpFolder, 'file.txt')
267+
268+
try {
269+
// test basic folder operations
270+
await withFailCtx('mkdirInitial', () => fs.mkdir(tmpFolder))
271+
// Verifies that we do not throw if the dir exists and we try to make it again
272+
await withFailCtx('mkdirButAlreadyExists', () => fs.mkdir(tmpFolder))
273+
await withFailCtx('mkdirSubfolder', () => fs.mkdir(path.join(tmpFolder, 'subfolder')))
274+
await withFailCtx('rmdirInitial', () => fs.delete(tmpFolder, { recursive: true }))
275+
276+
// test basic file operations
277+
await withFailCtx('mkdirForFileOpsTest', () => fs.mkdir(tmpFolder))
278+
await withFailCtx('writeFile1', () => fs.writeFile(tmpFile, 'test1'))
279+
await withFailCtx('readFile1', async () => {
280+
const text = await fs.readFileText(tmpFile)
281+
if (text !== 'test1') {
282+
throw new Error(`Unexpected file contents: "${text}"`)
283+
}
284+
})
285+
// overwrite the file content multiple times
286+
await withFailCtx('writeFile2', () => fs.writeFile(tmpFile, 'test2'))
287+
await withFailCtx('writeFile3', () => fs.writeFile(tmpFile, 'test3'))
288+
await withFailCtx('writeFile4', () => fs.writeFile(tmpFile, 'test4'))
289+
await withFailCtx('writeFile5', () => fs.writeFile(tmpFile, 'test5'))
290+
await withFailCtx('readFile5', async () => {
291+
const text = await fs.readFileText(tmpFile)
292+
if (text !== 'test5') {
293+
throw new Error(`Unexpected file contents after multiple writes: "${text}"`)
294+
}
295+
})
296+
// test concurrent reads on a file
297+
await withFailCtx('writeFileConcurrencyTest', () => fs.writeFile(tmpFile, 'concurrencyTest'))
298+
const result = await Promise.all([
299+
withFailCtx('readFileConcurrent1', () => fs.readFileText(tmpFile)),
300+
withFailCtx('readFileConcurrent2', () => fs.readFileText(tmpFile)),
301+
withFailCtx('readFileConcurrent3', () => fs.readFileText(tmpFile)),
302+
withFailCtx('readFileConcurrent4', () => fs.readFileText(tmpFile)),
303+
withFailCtx('readFileConcurrent5', () => fs.readFileText(tmpFile)),
304+
])
305+
if (result.some((text) => text !== 'concurrencyTest')) {
306+
throw new Error(`Unexpected concurrent file reads: ${result}`)
307+
}
308+
// test deleting a file
309+
await withFailCtx('deleteFileInitial', () => fs.delete(tmpFile))
310+
await withFailCtx('writeFileAfterDelete', () => fs.writeFile(tmpFile, 'afterDelete'))
311+
await withFailCtx('readNewFileAfterDelete', async () => {
312+
const text = await fs.readFileText(tmpFile)
313+
if (text !== 'afterDelete') {
314+
throw new Error(`Unexpected file content after writing to deleted file: "${text}"`)
315+
}
316+
})
317+
await withFailCtx('deleteFileFully', () => fs.delete(tmpFile))
318+
await withFailCtx('notExistsFileAfterDelete', async () => {
319+
const res = await fs.exists(tmpFile)
320+
if (res) {
321+
throw new Error(`Expected file to NOT exist: "${tmpFile}"`)
322+
}
323+
})
324+
325+
// test rename
326+
await withFailCtx('writeFileForRename', () => fs.writeFile(tmpFile, 'TestingRename'))
327+
const tmpFileRenamed = tmpFile + '.renamed'
328+
await withFailCtx('renameFile', () => fs.rename(tmpFile, tmpFileRenamed))
329+
await withFailCtx('existsRenamedFile', async () => {
330+
const res = await fs.exists(tmpFileRenamed)
331+
if (!res) {
332+
throw new Error(`Expected RENAMED file to exist: "${tmpFileRenamed}"`)
333+
}
334+
})
335+
await withFailCtx('writeToRenamedFile', async () => fs.writeFile(tmpFileRenamed, 'hello'))
336+
await withFailCtx('readFromRenamedFile', async () => {
337+
const res = await fs.readFileText(tmpFileRenamed)
338+
if (res !== 'hello') {
339+
throw new Error(`Expected RENAMED file to be writable: "${tmpFileRenamed}"`)
340+
}
341+
})
342+
await withFailCtx('renameFileReset', () => fs.rename(tmpFileRenamed, tmpFile))
343+
await withFailCtx('renameFileResetExists', async () => {
344+
const res = await fs.exists(tmpFile)
345+
if (!res) {
346+
throw new Error(`Expected reverted renamed file to exist: "${tmpFile}"`)
347+
}
348+
})
349+
} finally {
350+
await fs.delete(tmpFolder, { recursive: true, force: true })
351+
}
352+
353+
async function withFailCtx<T>(ctx: string, fn: () => Promise<T>): Promise<T> {
354+
try {
355+
return await fn()
356+
} catch (e) {
357+
if (!(e instanceof Error)) {
358+
throw e
359+
}
360+
throw FileSystemStabilityException.chain(e, `context: "${ctx}"`, { code: FileSystemStabilityExceptionId })
361+
}
362+
}
363+
}

packages/core/src/test/shared/filesystemUtilities.test.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@ import {
1313
isInDirectory,
1414
makeTemporaryToolkitFolder,
1515
tempDirPath,
16+
throwOnUnstableFileSystem,
1617
} from '../../shared/filesystemUtilities'
1718
import { fs } from '../../shared'
19+
import { TestFolder } from '../testUtil'
1820

1921
describe('filesystemUtilities', function () {
2022
const targetFilename = 'findThisFile12345.txt'
@@ -191,4 +193,11 @@ describe('filesystemUtilities', function () {
191193
assert.strictEqual(actual, 3)
192194
})
193195
})
196+
197+
describe('throwOnUnstableFileSystem', async function () {
198+
it('does not throw on stable filesystem', async function () {
199+
const testFolder = await TestFolder.create()
200+
await throwOnUnstableFileSystem(testFolder.path)
201+
})
202+
})
194203
})

packages/core/src/test/testUtil.ts

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ export function getWorkspaceFolder(dir: string): vscode.WorkspaceFolder {
7575
* But if the day comes that we need it for web, we should be able to add some agnostic FS methods in here.
7676
*/
7777
export class TestFolder {
78-
protected constructor(private readonly rootFolder: string) {}
78+
protected constructor(public readonly path: string) {}
7979

8080
/** Creates a folder that deletes itself once all tests are done running. */
8181
static async create() {
@@ -118,10 +118,6 @@ export class TestFolder {
118118
pathFrom(relativePath: string): string {
119119
return path.join(this.path, relativePath)
120120
}
121-
122-
get path(): string {
123-
return path.join(this.rootFolder)
124-
}
125121
}
126122

127123
/**

0 commit comments

Comments
 (0)