@@ -17,6 +17,8 @@ const CONFIG = {
17
17
MAX_RETRIES : 3 ,
18
18
RETRY_DELAY : 1000 , // 1 second base delay
19
19
CONNECTION_POOL_SIZE : 10 ,
20
+ HEALTH_CHECK_TIMEOUT : 5000 , // 5 seconds for health check
21
+ MAX_HEALTH_CHECK_FAILURES : 3 , // Max consecutive failures before re-resolution
20
22
} as const ;
21
23
22
24
const STORE_META_ENVELOPE = `
@@ -126,6 +128,10 @@ export class EVaultClient {
126
128
private endpoints : Map < string , string > = new Map ( ) ;
127
129
private tokenInfo : TokenInfo | null = null ;
128
130
private isDisposed = false ;
131
+
132
+ // Health check tracking
133
+ private healthCheckFailures : Map < string , number > = new Map ( ) ;
134
+ private lastHealthCheck : Map < string , number > = new Map ( ) ;
129
135
130
136
constructor (
131
137
private registryUrl : string ,
@@ -141,6 +147,8 @@ export class EVaultClient {
141
147
this . isDisposed = true ;
142
148
this . clients . clear ( ) ;
143
149
this . endpoints . clear ( ) ;
150
+ this . healthCheckFailures . clear ( ) ;
151
+ this . lastHealthCheck . clear ( ) ;
144
152
this . tokenInfo = null ;
145
153
}
146
154
@@ -274,8 +282,15 @@ export class EVaultClient {
274
282
if ( this . clients . has ( w3id ) ) {
275
283
const client = this . clients . get ( w3id ) ! ;
276
284
const endpoint = this . endpoints . get ( w3id ) ! ;
277
- console . log ( 'reusing existing client for w3id:' , w3id , 'endpoint:' , endpoint ) ;
278
- return client ;
285
+
286
+ // Check if the cached endpoint is still healthy
287
+ if ( await this . isEndpointHealthy ( w3id , endpoint ) ) {
288
+ console . log ( 'reusing existing client for w3id:' , w3id , 'endpoint:' , endpoint ) ;
289
+ return client ;
290
+ } else {
291
+ console . log ( 'cached endpoint is unhealthy, removing and re-resolving for w3id:' , w3id ) ;
292
+ this . removeCachedClient ( w3id ) ;
293
+ }
279
294
}
280
295
281
296
// Resolve endpoint for this specific w3id
@@ -294,10 +309,153 @@ export class EVaultClient {
294
309
this . clients . set ( w3id , client ) ;
295
310
this . endpoints . set ( w3id , endpoint ) ;
296
311
312
+ // Initialize health check tracking
313
+ this . healthCheckFailures . set ( w3id , 0 ) ;
314
+ this . lastHealthCheck . set ( w3id , Date . now ( ) ) ;
315
+
297
316
console . log ( 'created new client for w3id:' , w3id , 'endpoint:' , endpoint ) ;
298
317
return client ;
299
318
}
300
319
320
+ /**
321
+ * Check if a cached endpoint is still healthy
322
+ */
323
+ private async isEndpointHealthy ( w3id : string , endpoint : string ) : Promise < boolean > {
324
+ try {
325
+ // Extract base URL from GraphQL endpoint
326
+ const baseUrl = endpoint . replace ( '/graphql' , '' ) ;
327
+
328
+ // Check if we should perform health check (avoid too frequent checks)
329
+ const now = Date . now ( ) ;
330
+ const lastCheck = this . lastHealthCheck . get ( w3id ) || 0 ;
331
+ const timeSinceLastCheck = now - lastCheck ;
332
+
333
+ // Only check every 30 seconds to avoid performance impact
334
+ if ( timeSinceLastCheck < 30000 ) {
335
+ return true ; // Assume healthy if checked recently
336
+ }
337
+
338
+ // Perform health check on the whois endpoint
339
+ const healthCheckUrl = `${ baseUrl } /whois` ;
340
+ console . log ( `Health checking endpoint for ${ w3id } : ${ healthCheckUrl } ` ) ;
341
+
342
+ const controller = new AbortController ( ) ;
343
+ const timeoutId = setTimeout ( ( ) => controller . abort ( ) , CONFIG . HEALTH_CHECK_TIMEOUT ) ;
344
+
345
+ const response = await fetch ( healthCheckUrl , {
346
+ method : 'HEAD' ,
347
+ signal : controller . signal ,
348
+ } ) ;
349
+
350
+ clearTimeout ( timeoutId ) ;
351
+
352
+ if ( response . ok ) {
353
+ // Reset failure count on success
354
+ this . healthCheckFailures . set ( w3id , 0 ) ;
355
+ this . lastHealthCheck . set ( w3id , now ) ;
356
+ return true ;
357
+ } else {
358
+ throw new Error ( `Health check failed with status: ${ response . status } ` ) ;
359
+ }
360
+
361
+ } catch ( error ) {
362
+ console . log ( `Health check failed for ${ w3id } :` , error instanceof Error ? error . message : 'Unknown error' ) ;
363
+
364
+ // Increment failure count
365
+ const currentFailures = this . healthCheckFailures . get ( w3id ) || 0 ;
366
+ const newFailures = currentFailures + 1 ;
367
+ this . healthCheckFailures . set ( w3id , newFailures ) ;
368
+ this . lastHealthCheck . set ( w3id , Date . now ( ) ) ;
369
+
370
+ // If we've had too many consecutive failures, mark as unhealthy
371
+ if ( newFailures >= CONFIG . MAX_HEALTH_CHECK_FAILURES ) {
372
+ console . log ( `Endpoint for ${ w3id } marked as unhealthy after ${ newFailures } consecutive failures` ) ;
373
+ return false ;
374
+ }
375
+
376
+ // Still allow some failures before marking as unhealthy
377
+ return true ;
378
+ }
379
+ }
380
+
381
+ /**
382
+ * Remove cached client and endpoint for a specific w3id
383
+ */
384
+ private removeCachedClient ( w3id : string ) : void {
385
+ this . clients . delete ( w3id ) ;
386
+ this . endpoints . delete ( w3id ) ;
387
+ this . healthCheckFailures . delete ( w3id ) ;
388
+ this . lastHealthCheck . delete ( w3id ) ;
389
+ console . log ( `Removed cached client for ${ w3id } ` ) ;
390
+ }
391
+
392
+ /**
393
+ * Manually trigger a health check for a specific w3id
394
+ * Useful for testing or forcing re-resolution
395
+ */
396
+ public async forceHealthCheck ( w3id : string ) : Promise < boolean > {
397
+ if ( ! this . clients . has ( w3id ) ) {
398
+ console . log ( `No cached client found for ${ w3id } ` ) ;
399
+ return false ;
400
+ }
401
+
402
+ const endpoint = this . endpoints . get ( w3id ) ;
403
+ if ( ! endpoint ) {
404
+ console . log ( `No cached endpoint found for ${ w3id } ` ) ;
405
+ return false ;
406
+ }
407
+
408
+ // Force health check by clearing last check time
409
+ this . lastHealthCheck . set ( w3id , 0 ) ;
410
+
411
+ const isHealthy = await this . isEndpointHealthy ( w3id , endpoint ) ;
412
+
413
+ if ( ! isHealthy ) {
414
+ console . log ( `Forced health check failed for ${ w3id } , removing cached client` ) ;
415
+ this . removeCachedClient ( w3id ) ;
416
+ }
417
+
418
+ return isHealthy ;
419
+ }
420
+
421
+ /**
422
+ * Get health status for all cached endpoints
423
+ */
424
+ public getHealthStatus ( ) : Record < string , {
425
+ endpoint : string ;
426
+ failures : number ;
427
+ lastCheck : number ;
428
+ isHealthy : boolean ;
429
+ } > {
430
+ const status : Record < string , any > = { } ;
431
+
432
+ for ( const [ w3id , endpoint ] of this . endpoints ) {
433
+ const failures = this . healthCheckFailures . get ( w3id ) || 0 ;
434
+ const lastCheck = this . lastHealthCheck . get ( w3id ) || 0 ;
435
+ const isHealthy = failures < CONFIG . MAX_HEALTH_CHECK_FAILURES ;
436
+
437
+ status [ w3id ] = {
438
+ endpoint,
439
+ failures,
440
+ lastCheck,
441
+ isHealthy,
442
+ } ;
443
+ }
444
+
445
+ return status ;
446
+ }
447
+
448
+ /**
449
+ * Clear all cached clients (useful for testing or forcing fresh connections)
450
+ */
451
+ public clearCache ( ) : void {
452
+ console . log ( 'Clearing all cached clients and endpoints' ) ;
453
+ this . clients . clear ( ) ;
454
+ this . endpoints . clear ( ) ;
455
+ this . healthCheckFailures . clear ( ) ;
456
+ this . lastHealthCheck . clear ( ) ;
457
+ }
458
+
301
459
async storeMetaEnvelope ( envelope : MetaEnvelope ) : Promise < string > {
302
460
return this . withRetry ( async ( ) => {
303
461
const client = await this . ensureClient ( envelope . w3id ) . catch ( ( ) => {
0 commit comments