@@ -114,6 +114,23 @@ impl Default for FutureHeightTracker {
114114 }
115115}
116116
117+ /// Consensus mode state machine for standard-aligned blocksync.
118+ ///
119+ /// When a node falls behind (e.g., during network partition or restart),
120+ /// it must enter Blocksync mode to catch up rather than participating in
121+ /// consensus at the wrong height. This prevents stuck consensus scenarios
122+ /// like TM-B5 where a node misses commits and cycles rounds indefinitely.
123+ #[ derive( Debug , Clone , Copy , PartialEq , Eq , Default ) ]
124+ pub enum ConsensusMode {
125+ /// Syncing mode - reject consensus messages, accept sync blocks.
126+ /// Node is catching up to network height.
127+ #[ default]
128+ Blocksync ,
129+ /// Consensus mode - participate in Tendermint consensus.
130+ /// Node is at network tip and can propose/vote.
131+ Consensus ,
132+ }
133+
117134/// Simplified ChainActor - core blockchain functionality (Clone-enabled for async handlers)
118135#[ derive( Clone ) ]
119136pub struct ChainActor {
@@ -200,6 +217,11 @@ pub struct ChainActor {
200217 /// When votes for heights beyond our current height arrive, this helps determine
201218 /// if we've fallen behind and need to trigger catch-up sync.
202219 pub ( crate ) future_height_tracker : Arc < RwLock < FutureHeightTracker > > ,
220+
221+ /// Consensus mode state machine.
222+ /// Determines whether to process consensus messages (Consensus mode) or
223+ /// defer them while catching up (Blocksync mode).
224+ pub ( crate ) consensus_mode : Arc < RwLock < ConsensusMode > > ,
203225}
204226
205227impl ChainActor {
@@ -250,6 +272,8 @@ impl ChainActor {
250272 tendermint_sync_validator : None ,
251273 // Future height vote tracking for sync detection
252274 future_height_tracker : Arc :: new ( RwLock :: new ( FutureHeightTracker :: new ( ) ) ) ,
275+ // Start in Blocksync mode - transition to Consensus after sync completes
276+ consensus_mode : Arc :: new ( RwLock :: new ( ConsensusMode :: Blocksync ) ) ,
253277 }
254278 }
255279
@@ -352,6 +376,96 @@ impl ChainActor {
352376 self . engine_actor = Some ( addr) ;
353377 }
354378
379+ // =========================================================================
380+ // Consensus Mode Transitions (TM-B5 Fix)
381+ // =========================================================================
382+
383+ /// Transition to Consensus mode after sync completion.
384+ ///
385+ /// Called when node reaches network tip and should participate in consensus.
386+ /// This resumes the TendermintDriver and resets the future height tracker.
387+ ///
388+ /// # Arguments
389+ /// * `height` - The height at which to resume consensus
390+ /// * `correlation_id` - Tracing correlation ID
391+ pub async fn enter_consensus_mode ( & self , height : u64 , correlation_id : Uuid ) {
392+ let mut mode = self . consensus_mode . write ( ) . await ;
393+ if * mode == ConsensusMode :: Blocksync {
394+ * mode = ConsensusMode :: Consensus ;
395+ info ! (
396+ correlation_id = %correlation_id,
397+ height = height,
398+ "Entered Consensus mode - resuming participation"
399+ ) ;
400+
401+ // Reset future height tracker since we're now caught up
402+ self . future_height_tracker . write ( ) . await . reset ( ) ;
403+
404+ // Resume TendermintDriver
405+ if let Some ( ref driver) = self . tendermint_driver {
406+ driver. do_send ( crate :: actors_v2:: tendermint_driver:: TendermintDriverMessage :: Resume {
407+ height,
408+ } ) ;
409+ }
410+ } else {
411+ debug ! (
412+ correlation_id = %correlation_id,
413+ "Already in Consensus mode"
414+ ) ;
415+ }
416+ }
417+
418+ /// Transition to Blocksync mode when node falls behind.
419+ ///
420+ /// Called when node detects it's behind network height (via future height
421+ /// votes, NewRound announcements, or sync health checks). This pauses
422+ /// consensus to prevent the node from cycling rounds at the wrong height.
423+ ///
424+ /// # Arguments
425+ /// * `correlation_id` - Tracing correlation ID
426+ pub async fn enter_blocksync_mode ( & self , correlation_id : Uuid ) {
427+ let mut mode = self . consensus_mode . write ( ) . await ;
428+ if * mode == ConsensusMode :: Consensus {
429+ * mode = ConsensusMode :: Blocksync ;
430+ info ! (
431+ correlation_id = %correlation_id,
432+ "Entered Blocksync mode - pausing consensus participation"
433+ ) ;
434+
435+ // Pause TendermintDriver
436+ if let Some ( ref driver) = self . tendermint_driver {
437+ driver. do_send ( crate :: actors_v2:: tendermint_driver:: TendermintDriverMessage :: Pause ) ;
438+ }
439+ } else {
440+ debug ! (
441+ correlation_id = %correlation_id,
442+ "Already in Blocksync mode"
443+ ) ;
444+ }
445+ }
446+
447+ /// Check if currently in Consensus mode.
448+ pub async fn is_consensus_mode ( & self ) -> bool {
449+ * self . consensus_mode . read ( ) . await == ConsensusMode :: Consensus
450+ }
451+
452+ /// Get current consensus mode.
453+ pub async fn get_consensus_mode ( & self ) -> ConsensusMode {
454+ * self . consensus_mode . read ( ) . await
455+ }
456+
457+ /// Get current Tendermint height from state machine.
458+ /// Returns 0 if Tendermint is not configured.
459+ pub async fn get_tendermint_height ( & self ) -> Result < u64 , ChainError > {
460+ if let Some ( ref tm_state) = self . tendermint_state {
461+ let state = tm_state. read ( ) . await ;
462+ Ok ( state. height )
463+ } else {
464+ // Not configured - return storage height as fallback
465+ Ok ( self . state . get_height ( ) . await )
466+ }
467+ }
468+
355469 /// Record activity and update metrics
356470 pub ( crate ) fn record_activity ( & mut self ) {
357471 self . last_activity = Instant :: now ( ) ;
@@ -860,7 +974,9 @@ impl ChainActor {
860974 }
861975 } ;
862976
863- const HEALTH_THRESHOLD : u64 = 10 ;
977+ // TM-B5 Fix: Lowered from 10 to 1 to detect single-block lag
978+ // This ensures nodes stuck 1 block behind are detected quickly
979+ const HEALTH_THRESHOLD : u64 = 1 ;
864980
865981 if network_height > storage_height + HEALTH_THRESHOLD {
866982 warn ! (
0 commit comments