@@ -26,7 +26,7 @@ export default class WikiUpdates {
2626
2727 private apiHealthStatus = new Map <
2828 ChannelTypes ,
29- { isHealthy : boolean ; lastCheck : number }
29+ { isHealthy : boolean ; lastCheck : number ; alertSent : boolean }
3030 > ( )
3131
3232 constructor ( ) {
@@ -276,6 +276,10 @@ export default class WikiUpdates {
276276 return result as ApiResponse
277277 }
278278
279+ private async sleep ( ms : number ) : Promise < void > {
280+ return new Promise ( resolve => setTimeout ( resolve , ms ) )
281+ }
282+
279283 async checkApiHealth ( channelType : ChannelTypes ) : Promise < boolean > {
280284 const link =
281285 channelType === ChannelTypes . DEV ? this . DEV_API_URL : this . PROD_API_URL
@@ -288,21 +292,49 @@ export default class WikiUpdates {
288292 }
289293 `
290294
291- try {
292- await this . makeApiCallWithTimeout ( link , simpleQuery , 30000 )
293- this . apiHealthStatus . set ( channelType , {
294- isHealthy : true ,
295- lastCheck : Date . now ( ) ,
296- } )
297- return true
298- } catch ( error ) {
299- this . apiHealthStatus . set ( channelType , {
300- isHealthy : false ,
301- lastCheck : Date . now ( ) ,
302- } )
303- console . error ( `API Health Check Failed for ${ channelType } :` , error )
304- return false
295+ const maxRetries = 3
296+ const retryDelay = 10000 // 10 seconds
297+
298+ let lastError : any
299+
300+ // Try up to 3 times with 10 second delays between attempts
301+ for ( let attempt = 1 ; attempt <= maxRetries ; attempt ++ ) {
302+ try {
303+ await this . makeApiCallWithTimeout ( link , simpleQuery , 30000 )
304+
305+ // Success - update health status and return
306+ this . apiHealthStatus . set ( channelType , {
307+ isHealthy : true ,
308+ lastCheck : Date . now ( ) ,
309+ alertSent : false ,
310+ } )
311+
312+ if ( attempt > 1 ) {
313+ console . log ( `✅ API ${ channelType } health check succeeded on attempt ${ attempt } /${ maxRetries } ` )
314+ }
315+
316+ return true
317+ } catch ( error ) {
318+ lastError = error
319+ console . error ( `❌ API Health Check Failed for ${ channelType } (attempt ${ attempt } /${ maxRetries } ):` , error )
320+
321+ // If not the last attempt, wait before retrying
322+ if ( attempt < maxRetries ) {
323+ console . log ( `⏳ Retrying API health check for ${ channelType } in 10 seconds...` )
324+ await this . sleep ( retryDelay )
325+ }
326+ }
305327 }
328+
329+ // All attempts failed - preserve alertSent state
330+ const currentStatus = this . apiHealthStatus . get ( channelType )
331+ this . apiHealthStatus . set ( channelType , {
332+ isHealthy : false ,
333+ lastCheck : Date . now ( ) ,
334+ alertSent : currentStatus ?. alertSent || false ,
335+ } )
336+ console . error ( `❌ API Health Check Failed for ${ channelType } after ${ maxRetries } attempts` )
337+ return false
306338 }
307339
308340 async startApiHealthMonitoring ( ) : Promise < void > {
@@ -313,10 +345,12 @@ export default class WikiUpdates {
313345 this . apiHealthStatus . set ( ChannelTypes . DEV , {
314346 isHealthy : true ,
315347 lastCheck : 0 ,
348+ alertSent : false ,
316349 } )
317350 this . apiHealthStatus . set ( ChannelTypes . PROD , {
318351 isHealthy : true ,
319352 lastCheck : 0 ,
353+ alertSent : false ,
320354 } )
321355
322356 setInterval ( async ( ) => {
@@ -325,26 +359,41 @@ export default class WikiUpdates {
325359 for ( const channelType of [ ChannelTypes . DEV , ChannelTypes . PROD ] ) {
326360 const previousStatus = this . apiHealthStatus . get ( channelType )
327361 const isHealthy = await this . checkApiHealth ( channelType )
362+ const currentStatus = this . apiHealthStatus . get ( channelType )
328363
329364 if ( ! isHealthy ) {
330365 console . warn (
331366 `⚠️ API ${ channelType } is unresponsive at ${ new Date ( ) . toISOString ( ) } ` ,
332367 )
333368
334- await this . notifyError (
335- 1 ,
336- channelType ,
337- channelType === ChannelTypes . DEV
338- ? this . DEV_API_URL
339- : this . PROD_API_URL ,
340- 'HEALTH_CHECK_FAILED' ,
341- )
369+ // Only send alert if we haven't already sent one for this failure
370+ if ( ! currentStatus ?. alertSent ) {
371+ console . log ( `🚨 Sending initial error alert for ${ channelType } ` )
372+ await this . notifyError (
373+ 1 ,
374+ channelType ,
375+ channelType === ChannelTypes . DEV
376+ ? this . DEV_API_URL
377+ : this . PROD_API_URL ,
378+ 'HEALTH_CHECK_FAILED' ,
379+ )
380+
381+ // Mark that we've sent the alert
382+ this . apiHealthStatus . set ( channelType , {
383+ isHealthy : false ,
384+ lastCheck : Date . now ( ) ,
385+ alertSent : true ,
386+ } )
387+ } else {
388+ console . log ( `⏳ API ${ channelType } still down, continuing to monitor silently...` )
389+ }
342390 } else {
343391 console . log (
344392 `✅ API ${ channelType } is healthy at ${ new Date ( ) . toISOString ( ) } ` ,
345393 )
346394
347- if ( previousStatus && ! previousStatus . isHealthy ) {
395+ // Send recovery message only if API was previously unhealthy AND we sent an alert
396+ if ( previousStatus && ! previousStatus . isHealthy && previousStatus . alertSent ) {
348397 console . log ( `🎉 API ${ channelType } has recovered!` )
349398 const webhookUrl =
350399 channelType === ChannelTypes . DEV
@@ -357,6 +406,13 @@ export default class WikiUpdates {
357406 )
358407 webhook . destroy ( )
359408 }
409+
410+ // Reset alertSent flag since API is healthy again
411+ this . apiHealthStatus . set ( channelType , {
412+ isHealthy : true ,
413+ lastCheck : Date . now ( ) ,
414+ alertSent : false ,
415+ } )
360416 }
361417 }
362418 }
@@ -387,7 +443,7 @@ export default class WikiUpdates {
387443
388444 getApiHealthStatus ( ) : Map <
389445 ChannelTypes ,
390- { isHealthy : boolean ; lastCheck : number }
446+ { isHealthy : boolean ; lastCheck : number ; alertSent : boolean }
391447 > {
392448 return new Map ( this . apiHealthStatus )
393449 }
0 commit comments