Skip to content

Commit 3121f40

Browse files
committed
fix(consensus): prevent blocksync race condition during commit step
When a node is in Commit step for height H and receives NewRound(H+1), it was incorrectly entering blocksync mode even though it was about to advance naturally. This caused network-wide deadlocks where the committing node would wait for a block that couldn't be finalized without its vote. Add get_tendermint_height_and_step() helper to atomically fetch both values, and skip blocksync when receiving NewRound(H+1) while in Commit step for H.
1 parent c031b64 commit 3121f40

File tree

2 files changed

+44
-7
lines changed

2 files changed

+44
-7
lines changed

app/src/actors_v2/chain/actor.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ use lighthouse_wrapper::types::MainnetEthSpec;
3232

3333
// Tendermint imports
3434
use super::tendermint::{
35-
Commit, ConsensusWAL, TendermintState, TimeoutEvent, TimeoutScheduler, ValidatorSet,
35+
Commit, ConsensusWAL, TendermintState, TendermintStep, TimeoutEvent, TimeoutScheduler,
36+
ValidatorSet,
3637
};
3738

3839
pub(crate) const DEFAULT_MAX_PENDING_IMPORTS: usize = 1000;
@@ -466,6 +467,17 @@ impl ChainActor {
466467
}
467468
}
468469

470+
/// Get current Tendermint height and step atomically.
471+
/// Returns (height, step). Returns (storage_height, Propose) if Tendermint is not configured.
472+
pub async fn get_tendermint_height_and_step(&self) -> Result<(u64, TendermintStep), ChainError> {
473+
if let Some(ref tm_state) = self.tendermint_state {
474+
let state = tm_state.read().await;
475+
Ok((state.height, state.step))
476+
} else {
477+
Ok((self.state.get_height().await, TendermintStep::Propose))
478+
}
479+
}
480+
469481
/// Record activity and update metrics
470482
pub(crate) fn record_activity(&mut self) {
471483
self.last_activity = Instant::now();

app/src/actors_v2/chain/handlers.rs

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use super::{
1717
AuxPowParams, BlockSource, ChainManagerMessage, ChainManagerResponse, ChainMessage,
1818
ChainResponse, CreateAuxBlock, SubmitAuxBlock,
1919
},
20+
tendermint::TendermintStep,
2021
ChainActor, ChainError,
2122
};
2223

@@ -2349,23 +2350,47 @@ impl Handler<ChainMessage> for ChainActor {
23492350
let correlation_id = correlation_id.unwrap_or_else(uuid::Uuid::new_v4);
23502351

23512352
Box::pin(async move {
2352-
// TM-B5 Fix: Check if peer is at a height we haven't reached
2353-
let current_height = actor.get_tendermint_height().await.unwrap_or(0);
2353+
// Get current height AND step atomically to avoid race condition
2354+
let (current_height, current_step) = actor
2355+
.get_tendermint_height_and_step()
2356+
.await
2357+
.unwrap_or((0, TendermintStep::Propose));
2358+
2359+
// Determine if we should enter blocksync
2360+
let should_sync = if height > current_height {
2361+
// TM-B5 Fix: Don't enter blocksync if we're in Commit step for height H
2362+
// and receive NewRound(H+1) - we're about to advance naturally
2363+
if height == current_height + 1 && current_step == TendermintStep::Commit {
2364+
debug!(
2365+
correlation_id = %correlation_id,
2366+
announced_height = height,
2367+
current_height = current_height,
2368+
current_step = ?current_step,
2369+
peer_id = ?peer_id,
2370+
"NewRound(H+1) received while in Commit step - not behind, continuing commit"
2371+
);
2372+
false
2373+
} else {
2374+
// Genuine gap: either gap > 1, or we're not in Commit step
2375+
true
2376+
}
2377+
} else {
2378+
false
2379+
};
23542380

2355-
if height > current_height {
2381+
if should_sync {
23562382
info!(
23572383
correlation_id = %correlation_id,
23582384
announced_height = height,
23592385
announced_round = round,
23602386
current_height = current_height,
2387+
current_step = ?current_step,
23612388
peer_id = ?peer_id,
23622389
"NewRound announcement indicates we're behind - triggering sync"
23632390
);
23642391

2365-
// Enter blocksync mode and trigger sync
23662392
actor.enter_blocksync_mode(correlation_id).await;
23672393

2368-
// Trigger sync to catch up
23692394
if let Some(ref sync_actor) = actor.sync_actor {
23702395
sync_actor.do_send(crate::actors_v2::network::SyncMessage::StartSync {
23712396
start_height: current_height,
@@ -2377,7 +2402,7 @@ impl Handler<ChainMessage> for ChainActor {
23772402
correlation_id = %correlation_id,
23782403
announced_height = height,
23792404
current_height = current_height,
2380-
"NewRound announcement for current/past height - ignoring"
2405+
"NewRound for current/past height or expected advance - ignoring"
23812406
);
23822407
}
23832408

0 commit comments

Comments
 (0)