Skip to content

Commit c031b64

Browse files
committed
fix(consensus): fix bootstrap stuck at genesis after blocksync mode changes
Two issues prevented consensus from starting at bootstrap: 1. TendermintNewHeight was incorrectly guarded by blocksync mode check, preventing state machine initialization. State must be initialized regardless of mode - only active participation should be blocked. 2. Resume handler did nothing when is_paused=false (initial state). At bootstrap, enter_consensus_mode sends Resume but resume_consensus returned early due to the is_paused guard, so start_height was never called and no timeouts were scheduled. Fixes: - Remove blocksync guard from TendermintNewHeight handler - Set is_paused=true before resume_consensus to ensure execution - Handle height==current_height case for bootstrap/WAL recovery scenarios
1 parent 692d239 commit c031b64

File tree

2 files changed

+16
-23
lines changed

2 files changed

+16
-23
lines changed

app/src/actors_v2/chain/handlers.rs

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2052,21 +2052,10 @@ impl Handler<ChainMessage> for ChainActor {
20522052

20532053
let actor = self.clone();
20542054
Box::pin(async move {
2055-
// TM-B5 Fix: Check consensus mode - skip height init during blocksync
2056-
// NewHeight is part of startup/recovery, so we queue it implicitly
2057-
// by not processing until consensus mode is entered
2058-
let mode = actor.get_consensus_mode().await;
2059-
if mode == ConsensusMode::Blocksync {
2060-
debug!(
2061-
correlation_id = %correlation_id,
2062-
height = height,
2063-
"TendermintNewHeight deferred - in Blocksync mode"
2064-
);
2065-
// Return success but don't actually initialize
2066-
// The height will be initialized when entering consensus mode
2067-
return Ok(ChainResponse::TendermintHeightStarted { height, round: 0 });
2068-
}
2069-
2055+
// TM-B5 Fix Note: TendermintNewHeight MUST be processed regardless of mode.
2056+
// This initializes the state machine - without it, consensus can never start.
2057+
// The mode guards on TendermintPropose/Timeout prevent active participation
2058+
// during blocksync, but the state machine must be initialized.
20702059
let (height, round) = actor
20712060
.handle_tendermint_new_height(height, correlation_id)
20722061
.await?;

app/src/actors_v2/tendermint_driver.rs

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1038,22 +1038,26 @@ impl Handler<TendermintDriverMessage> for TendermintDriver {
10381038
resume_height = height,
10391039
"Resuming consensus at new height after sync"
10401040
);
1041+
// Need to set is_paused so resume_consensus will execute
1042+
self.is_paused = true;
10411043
self.resume_consensus(height, ctx);
1042-
} else if self.is_paused {
1043-
// Same or lower height but we're paused - just unpause
1044+
} else if height == self.current_height {
1045+
// TM-B5 Fix: Same height - restart consensus round
1046+
// This handles bootstrap case where initial proposal was rejected
1047+
// due to blocksync guards, but now we're in consensus mode.
10441048
info!(
10451049
current_height = self.current_height,
1046-
requested_height = height,
1047-
"Unpausing consensus at current height"
1050+
"Restarting consensus at current height after sync"
10481051
);
1049-
self.is_paused = false;
1050-
// Re-schedule timeout for current position
1051-
self.schedule_timeout(ctx);
1052+
// Force restart by setting paused and calling resume
1053+
self.is_paused = true;
1054+
self.resume_consensus(height, ctx);
10521055
} else {
1056+
// height < current_height - stale resume, ignore
10531057
trace!(
10541058
current_height = self.current_height,
10551059
requested_height = height,
1056-
"Resume ignored - already running at higher height"
1060+
"Resume ignored - already at higher height"
10571061
);
10581062
}
10591063
}

0 commit comments

Comments
 (0)