@@ -179,7 +179,7 @@ class Heartbeat {
179179
180180 public async start ( ) {
181181 // Send an initial heartbeat immediately
182- await withFailCtx ( 'initialSendHeartbeat ' , ( ) => this . state . sendHeartbeat ( ) )
182+ await withFailCtx ( 'sendHeartbeatInitial ' , ( ) => this . state . sendHeartbeat ( ) )
183183
184184 // Send a heartbeat every interval
185185 this . intervalRef = globals . clock . setInterval ( async ( ) => {
@@ -200,6 +200,10 @@ class Heartbeat {
200200 this . _onFailure . fire ( )
201201 }
202202 } , this . heartbeatInterval )
203+
204+ // We will know the first heartbeat, and can infer the next ones starting from this timestamp.
205+ // In case of heartbeat failure we have a separate failure metric.
206+ telemetry . ide_heartbeat . emit ( { timestamp : globals . clock . Date . now ( ) , id : className , result : 'Succeeded' } )
203207 }
204208
205209 /** Stops everything, signifying a graceful shutdown */
@@ -455,38 +459,40 @@ export class FileSystemState {
455459 } )
456460 }
457461
458- await withFailCtx ( 'init' , ( ) => fs . mkdir ( this . stateDirPath ) )
462+ await withFailCtx ( 'init' , ( ) => nodeFs . mkdir ( this . stateDirPath , { recursive : true } ) )
459463 }
460464
461465 // ------------------ Heartbeat methods ------------------
462466 public async sendHeartbeat ( ) {
463467 const extId = this . extId
468+ const filePath = this . makeStateFilePath ( extId )
469+ const now = this . deps . now ( )
470+
471+ let fileHandle : nodeFs . FileHandle | undefined
464472 try {
465- const now = this . deps . now ( )
466473 const func = async ( ) => {
467- const filePath = this . makeStateFilePath ( extId )
468-
469- // We were seeing weird behavior where we possibly read an old file, even though we overwrote it.
470- // So this is a sanity check.
471- await fs . delete ( filePath , { force : true } )
472-
473- await fs . writeFile ( filePath , JSON . stringify ( { ...this . ext , lastHeartbeat : now } , undefined , 4 ) )
474-
475- // Sanity check to verify the write is accessible immediately after
476- const heartbeatData = JSON . parse ( await fs . readFileText ( filePath ) ) as ExtInstanceHeartbeat
474+ fileHandle = await nodeFs . open ( filePath , 'w' )
475+ await fileHandle . writeFile ( JSON . stringify ( { ...this . ext , lastHeartbeat : now } , undefined , 4 ) )
476+ // Noticing that some file reads are not immediately available after write. `fsync` is known to address this.
477+ await fileHandle . sync ( )
478+ await fileHandle . close ( )
479+ fileHandle = undefined
480+
481+ // Sanity check to verify the latest write is accessible immediately
482+ const heartbeatData = JSON . parse ( await nodeFs . readFile ( filePath , 'utf-8' ) ) as ExtInstanceHeartbeat
477483 if ( heartbeatData . lastHeartbeat !== now ) {
478484 throw new CrashMonitoringError ( 'Heartbeat write validation failed' , { code : className } )
479485 }
486+
487+ this . deps . devLogger ?. debug ( `crashMonitoring: HEARTBEAT sent for ${ truncateUuid ( this . ext . sessionId ) } ` )
480488 }
481489 const funcWithCtx = ( ) => withFailCtx ( 'sendHeartbeatState' , func )
482490 const funcWithRetries = withRetries ( funcWithCtx , { maxRetries : 6 , delay : 100 , backoff : 2 } )
483- const funcWithTelemetryRun = await telemetry . ide_heartbeat . run ( ( span ) => {
484- span . record ( { id : className , timestamp : now } )
485- return funcWithRetries
486- } )
487491
488- return funcWithTelemetryRun
492+ return funcWithRetries
489493 } catch ( e ) {
494+ await fileHandle ?. close ( )
495+
490496 // delete this ext from the state to avoid an incorrectly reported crash since we could not send a new heartbeat
491497 await this . deleteHeartbeatFile ( extId , 'sendHeartbeatFailureCleanup' )
492498 throw e
@@ -518,7 +524,21 @@ export class FileSystemState {
518524
519525 private async deleteHeartbeatFile ( ext : ExtInstanceId | ExtInstance , ctx : string ) {
520526 // IMPORTANT: Must use NodeFs here since this is used during shutdown
521- const func = ( ) => nodeFs . rm ( this . makeStateFilePath ( ext ) , { force : true } )
527+ const func = async ( ) => {
528+ const filePath = this . makeStateFilePath ( ext )
529+
530+ // Even when deleting a file, if there is an open file handle it may still exist. This empties the
531+ // contents, so that any following reads will get no data.
532+ let fileHandle : nodeFs . FileHandle | undefined
533+ try {
534+ fileHandle = await nodeFs . open ( filePath , 'w' )
535+ await fileHandle . sync ( )
536+ } finally {
537+ await fileHandle ?. close ( )
538+ }
539+
540+ await nodeFs . rm ( filePath , { force : true } )
541+ }
522542 const funcWithCtx = ( ) => withFailCtx ( ctx , func )
523543 const funcWithRetries = withRetries ( funcWithCtx , { maxRetries : 6 , delay : 100 , backoff : 2 } )
524544 await funcWithRetries
@@ -553,7 +573,7 @@ export class FileSystemState {
553573 public async clearState ( ) : Promise < void > {
554574 this . deps . devLogger ?. debug ( 'crashMonitoring: CLEAR_STATE: Started' )
555575 await withFailCtx ( 'clearState' , async ( ) => {
556- await fs . delete ( this . stateDirPath , { force : true , recursive : true } )
576+ await nodeFs . rm ( this . stateDirPath , { force : true , recursive : true } )
557577 this . deps . devLogger ?. debug ( 'crashMonitoring: CLEAR_STATE: Succeeded' )
558578 } )
559579 }
@@ -576,7 +596,7 @@ export class FileSystemState {
576596 // we will assume that other instance handled its termination appropriately.
577597 // NOTE: On Windows we were failing on EBUSY, so we retry on failure.
578598 const loadExtFromDisk = async ( ) => {
579- const text = await fs . readFileText ( this . makeStateFilePath ( extId ) )
599+ const text = await nodeFs . readFile ( this . makeStateFilePath ( extId ) , 'utf-8' )
580600
581601 if ( ! text ) {
582602 return undefined
0 commit comments