@@ -349,34 +349,156 @@ export class SentinelFramework extends DockerBase {
349349 ) ;
350350 }
351351
352- return [
353- ...await Promise . all ( promises )
354- ]
352+ const sentinels = await Promise . all ( promises ) ;
353+
354+ await this . validateSentinelConfiguration ( sentinels ) ;
355+
356+ return sentinels ;
357+ }
358+
359+ // Add this new method to validate sentinel configuration
360+ private async validateSentinelConfiguration (
361+ sentinels : Awaited < ReturnType < SentinelFramework [ 'spawnRedisSentinelSentinelDocker' ] > > [ ]
362+ ) : Promise < void > {
363+ const maxRetries = 10 ;
364+ const retryDelay = 2000 ;
365+
366+ // Wait for all sentinels to recognize each other
367+ for ( let retry = 0 ; retry < maxRetries ; retry ++ ) {
368+ try {
369+ let allConfigured = true ;
370+
371+ // Check that each sentinel recognizes all other sentinels
372+ for ( const sentinel of sentinels ) {
373+ if ( ! sentinel . client . isReady ) {
374+ allConfigured = false ;
375+ break ;
376+ }
377+
378+ // Check if this sentinel can see the master
379+ const masterInfo = await sentinel . client . sentinel . sentinelMaster ( this . config . sentinelName )
380+ . catch ( ( ) => null ) ;
381+
382+ if ( ! masterInfo ) {
383+ allConfigured = false ;
384+ break ;
385+ }
386+
387+ // Check if this sentinel can see other sentinels
388+ const knownSentinels = await sentinel . client . sentinel . sentinelSentinels ( this . config . sentinelName )
389+ . catch ( ( ) => [ ] ) ;
390+
391+ // Ensure this sentinel knows about all other sentinels (minus itself)
392+ if ( knownSentinels . length < sentinels . length - 1 ) {
393+ allConfigured = false ;
394+ break ;
395+ }
396+ }
397+
398+ if ( allConfigured ) {
399+ // All sentinels are properly configured
400+ return ;
401+ }
402+
403+ // Wait before retrying
404+ await setTimeout ( retryDelay ) ;
405+ } catch ( err ) {
406+ // Wait before retrying after an error
407+ await setTimeout ( retryDelay ) ;
408+ }
355409 }
410+
411+ throw new Error ( 'Sentinel configuration did not propagate correctly within the timeout period' ) ;
412+ }
356413
357- async getAllRunning ( ) {
414+ async getAllRunning ( ) : Promise < void > {
415+ const MAX_RETRIES = 5 ;
416+ const RETRY_DELAY = 500 ;
417+
418+ // Fix for Redis nodes
358419 for ( const port of this . getAllNodesPort ( ) ) {
359- let first = true ;
360- while ( await isPortAvailable ( port ) ) {
361- if ( ! first ) {
362- console . log ( `problematic restart ${ port } ` ) ;
363- await setTimeout ( 500 ) ;
364- } else {
365- first = false ;
420+ let retries = 0 ;
421+
422+ while ( await isPortAvailable ( port ) ) {
423+ if ( retries >= MAX_RETRIES ) {
424+ throw new Error ( `Failed to restart Redis node at port ${ port } after ${ MAX_RETRIES } attempts` ) ;
425+ }
426+
427+ try {
428+ await this . restartNode ( port . toString ( ) ) ;
429+ await setTimeout ( RETRY_DELAY ) ; // Give the node time to start
430+ } catch ( err ) {
431+ console . error ( `Error restarting Redis node at port ${ port } :` , err ) ;
366432 }
367- await this . restartNode ( port . toString ( ) ) ;
433+
434+ retries ++ ;
368435 }
369436 }
370437
438+ // Fix for Sentinel nodes
371439 for ( const port of this . getAllSentinelsPort ( ) ) {
372- let first = true ;
373- while ( await isPortAvailable ( port ) ) {
374- if ( ! first ) {
375- await setTimeout ( 500 ) ;
376- } else {
377- first = false ;
440+ let retries = 0 ;
441+
442+ while ( await isPortAvailable ( port ) ) {
443+ if ( retries >= MAX_RETRIES ) {
444+ throw new Error ( `Failed to restart Sentinel node at port ${ port } after ${ MAX_RETRIES } attempts` ) ;
445+ }
446+
447+ try {
448+ await this . restartSentinel ( port . toString ( ) ) ;
449+ await setTimeout ( RETRY_DELAY ) ; // Give the sentinel time to start
450+ } catch ( err ) {
451+ console . error ( `Error restarting Sentinel at port ${ port } :` , err ) ;
452+ }
453+
454+ retries ++ ;
455+ }
456+ }
457+
458+ // Verify all nodes are actually responsive
459+ await this . verifyNodesResponsive ( ) ;
460+ }
461+
462+ // Add a method to verify nodes are responsive
463+ private async verifyNodesResponsive ( ) : Promise < void > {
464+ const MAX_ATTEMPTS = 10 ;
465+ const ATTEMPT_DELAY = 2000 ;
466+
467+ // Check Redis nodes
468+ for ( const nodeInfo of this . #nodeMap. values ( ) ) {
469+ if ( ! nodeInfo . client . isReady ) {
470+ // Try to reconnect client if not ready
471+ for ( let attempt = 0 ; attempt < MAX_ATTEMPTS ; attempt ++ ) {
472+ try {
473+ await nodeInfo . client . connect ( ) ;
474+ await nodeInfo . client . ping ( ) ;
475+ break ;
476+ } catch ( err ) {
477+ if ( attempt === MAX_ATTEMPTS - 1 ) {
478+ throw new Error ( `Node at port ${ nodeInfo . docker . port } is not responsive after ${ MAX_ATTEMPTS } attempts` ) ;
479+ }
480+ await setTimeout ( ATTEMPT_DELAY ) ;
481+ }
482+ }
483+ }
484+ }
485+
486+ // Check Sentinel nodes
487+ for ( const sentinelInfo of this . #sentinelMap. values ( ) ) {
488+ if ( ! sentinelInfo . client . isReady ) {
489+ // Try to reconnect client if not ready
490+ for ( let attempt = 0 ; attempt < MAX_ATTEMPTS ; attempt ++ ) {
491+ try {
492+ await sentinelInfo . client . connect ( ) ;
493+ await sentinelInfo . client . ping ( ) ;
494+ break ;
495+ } catch ( err ) {
496+ if ( attempt === MAX_ATTEMPTS - 1 ) {
497+ throw new Error ( `Sentinel at port ${ sentinelInfo . docker . port } is not responsive after ${ MAX_ATTEMPTS } attempts` ) ;
498+ }
499+ await setTimeout ( ATTEMPT_DELAY ) ;
500+ }
378501 }
379- await this . restartSentinel ( port . toString ( ) ) ;
380502 }
381503 }
382504 }
@@ -486,8 +608,16 @@ export class SentinelFramework extends DockerBase {
486608 if ( node === undefined ) {
487609 throw new Error ( "unknown node: " + id ) ;
488610 }
489-
611+
612+ let masterPort : number | null = null ;
613+ try {
614+ masterPort = await this . getMasterPort ( ) ;
615+ } catch ( err ) {
616+ console . log ( `Could not determine master before restarting node ${ id } : ${ err } ` ) ;
617+ }
618+
490619 await this . dockerStart ( node . docker . dockerId ) ;
620+
491621 if ( ! node . client . isOpen ) {
492622 node . client = await RedisClient . create ( {
493623 password : this . config . password ,
@@ -496,6 +626,17 @@ export class SentinelFramework extends DockerBase {
496626 }
497627 } ) . on ( "error" , ( ) => { } ) . connect ( ) ;
498628 }
629+
630+ // Wait for node to be ready
631+ await setTimeout ( 500 ) ;
632+
633+ if ( masterPort && node . docker . port !== masterPort ) {
634+ try {
635+ await node . client . replicaOf ( '127.0.0.1' , masterPort ) ;
636+ } catch ( err ) {
637+ console . error ( `Failed to reconfigure node ${ id } as replica: ${ err } ` ) ;
638+ }
639+ }
499640 }
500641
501642 async stopSentinel ( id : string ) {
0 commit comments