Skip to content

Commit be29964

Browse files
committed
Refactor error alerts
1 parent 781b854 commit be29964

File tree

1 file changed

+81
-25
lines changed

1 file changed

+81
-25
lines changed

src/services/wikiUpdates.ts

Lines changed: 81 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ export default class WikiUpdates {
2626

2727
private apiHealthStatus = new Map<
2828
ChannelTypes,
29-
{ isHealthy: boolean; lastCheck: number }
29+
{ isHealthy: boolean; lastCheck: number; alertSent: boolean }
3030
>()
3131

3232
constructor() {
@@ -276,6 +276,10 @@ export default class WikiUpdates {
276276
return result as ApiResponse
277277
}
278278

279+
private async sleep(ms: number): Promise<void> {
280+
return new Promise(resolve => setTimeout(resolve, ms))
281+
}
282+
279283
async checkApiHealth(channelType: ChannelTypes): Promise<boolean> {
280284
const link =
281285
channelType === ChannelTypes.DEV ? this.DEV_API_URL : this.PROD_API_URL
@@ -288,21 +292,49 @@ export default class WikiUpdates {
288292
}
289293
`
290294

291-
try {
292-
await this.makeApiCallWithTimeout(link, simpleQuery, 30000)
293-
this.apiHealthStatus.set(channelType, {
294-
isHealthy: true,
295-
lastCheck: Date.now(),
296-
})
297-
return true
298-
} catch (error) {
299-
this.apiHealthStatus.set(channelType, {
300-
isHealthy: false,
301-
lastCheck: Date.now(),
302-
})
303-
console.error(`API Health Check Failed for ${channelType}:`, error)
304-
return false
295+
const maxRetries = 3
296+
const retryDelay = 10000 // 10 seconds
297+
298+
let lastError: any
299+
300+
// Try up to 3 times with 10 second delays between attempts
301+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
302+
try {
303+
await this.makeApiCallWithTimeout(link, simpleQuery, 30000)
304+
305+
// Success - update health status and return
306+
this.apiHealthStatus.set(channelType, {
307+
isHealthy: true,
308+
lastCheck: Date.now(),
309+
alertSent: false,
310+
})
311+
312+
if (attempt > 1) {
313+
console.log(`✅ API ${channelType} health check succeeded on attempt ${attempt}/${maxRetries}`)
314+
}
315+
316+
return true
317+
} catch (error) {
318+
lastError = error
319+
console.error(`❌ API Health Check Failed for ${channelType} (attempt ${attempt}/${maxRetries}):`, error)
320+
321+
// If not the last attempt, wait before retrying
322+
if (attempt < maxRetries) {
323+
console.log(`⏳ Retrying API health check for ${channelType} in 10 seconds...`)
324+
await this.sleep(retryDelay)
325+
}
326+
}
305327
}
328+
329+
// All attempts failed - preserve alertSent state
330+
const currentStatus = this.apiHealthStatus.get(channelType)
331+
this.apiHealthStatus.set(channelType, {
332+
isHealthy: false,
333+
lastCheck: Date.now(),
334+
alertSent: currentStatus?.alertSent || false,
335+
})
336+
console.error(`❌ API Health Check Failed for ${channelType} after ${maxRetries} attempts`)
337+
return false
306338
}
307339

308340
async startApiHealthMonitoring(): Promise<void> {
@@ -313,10 +345,12 @@ export default class WikiUpdates {
313345
this.apiHealthStatus.set(ChannelTypes.DEV, {
314346
isHealthy: true,
315347
lastCheck: 0,
348+
alertSent: false,
316349
})
317350
this.apiHealthStatus.set(ChannelTypes.PROD, {
318351
isHealthy: true,
319352
lastCheck: 0,
353+
alertSent: false,
320354
})
321355

322356
setInterval(async () => {
@@ -325,26 +359,41 @@ export default class WikiUpdates {
325359
for (const channelType of [ChannelTypes.DEV, ChannelTypes.PROD]) {
326360
const previousStatus = this.apiHealthStatus.get(channelType)
327361
const isHealthy = await this.checkApiHealth(channelType)
362+
const currentStatus = this.apiHealthStatus.get(channelType)
328363

329364
if (!isHealthy) {
330365
console.warn(
331366
`⚠️ API ${channelType} is unresponsive at ${new Date().toISOString()}`,
332367
)
333368

334-
await this.notifyError(
335-
1,
336-
channelType,
337-
channelType === ChannelTypes.DEV
338-
? this.DEV_API_URL
339-
: this.PROD_API_URL,
340-
'HEALTH_CHECK_FAILED',
341-
)
369+
// Only send alert if we haven't already sent one for this failure
370+
if (!currentStatus?.alertSent) {
371+
console.log(`🚨 Sending initial error alert for ${channelType}`)
372+
await this.notifyError(
373+
1,
374+
channelType,
375+
channelType === ChannelTypes.DEV
376+
? this.DEV_API_URL
377+
: this.PROD_API_URL,
378+
'HEALTH_CHECK_FAILED',
379+
)
380+
381+
// Mark that we've sent the alert
382+
this.apiHealthStatus.set(channelType, {
383+
isHealthy: false,
384+
lastCheck: Date.now(),
385+
alertSent: true,
386+
})
387+
} else {
388+
console.log(`⏳ API ${channelType} still down, continuing to monitor silently...`)
389+
}
342390
} else {
343391
console.log(
344392
`✅ API ${channelType} is healthy at ${new Date().toISOString()}`,
345393
)
346394

347-
if (previousStatus && !previousStatus.isHealthy) {
395+
// Send recovery message only if API was previously unhealthy AND we sent an alert
396+
if (previousStatus && !previousStatus.isHealthy && previousStatus.alertSent) {
348397
console.log(`🎉 API ${channelType} has recovered!`)
349398
const webhookUrl =
350399
channelType === ChannelTypes.DEV
@@ -357,6 +406,13 @@ export default class WikiUpdates {
357406
)
358407
webhook.destroy()
359408
}
409+
410+
// Reset alertSent flag since API is healthy again
411+
this.apiHealthStatus.set(channelType, {
412+
isHealthy: true,
413+
lastCheck: Date.now(),
414+
alertSent: false,
415+
})
360416
}
361417
}
362418
}
@@ -387,7 +443,7 @@ export default class WikiUpdates {
387443

388444
getApiHealthStatus(): Map<
389445
ChannelTypes,
390-
{ isHealthy: boolean; lastCheck: number }
446+
{ isHealthy: boolean; lastCheck: number; alertSent: boolean }
391447
> {
392448
return new Map(this.apiHealthStatus)
393449
}

0 commit comments

Comments
 (0)