Skip to content

Commit ab272e8

Browse files
michaelgptclaude
andcommitted
fix(sync): reduce quorum to 1 and add orphan-triggered re-sync
Fixes two critical bugs identified in chaos test analysis: 1. Quorum Requirement (config.rs): Reduced min_peer_quorum from 2 to 1 - In 2-node networks, a quorum of 2 blocked gap detection since only 1 peer existed - Single peer height reports are now trusted for recovery 2. Orphan-Triggered Re-sync (handlers.rs): Added gap check after caching orphan blocks - If orphan gap >= 5 blocks, triggers ForceResync immediately - Handles case where gossipsub delivers blocks but Active Height Monitoring fails to detect the gap - Example: Node 2 receives orphan block 68 while at height 47, gap of 21 triggers immediate re-sync Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 4d56a9d commit ab272e8

File tree

2 files changed

+45
-1
lines changed

2 files changed

+45
-1
lines changed

app/src/actors_v2/chain/handlers.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -867,6 +867,49 @@ impl Handler<ChainMessage> for ChainActor {
867867
block_hash = %block_hash,
868868
"Orphan block cached successfully"
869869
);
870+
871+
// Bug Fix: Orphan-triggered re-sync
872+
// If we receive orphan blocks that are far ahead of our current height,
873+
// it indicates we've fallen behind and need to re-sync.
874+
// This handles the case where gossipsub delivers blocks but
875+
// Active Height Monitoring fails to detect the gap.
876+
const ORPHAN_RESYNC_THRESHOLD: u64 = 5;
877+
878+
let observed_height = {
879+
let cache = self_clone.orphan_cache.read().await;
880+
cache.observed_height()
881+
};
882+
883+
let gap = observed_height.saturating_sub(current_height);
884+
885+
if gap >= ORPHAN_RESYNC_THRESHOLD {
886+
warn!(
887+
correlation_id = %correlation_id,
888+
current_height = current_height,
889+
observed_height = observed_height,
890+
gap = gap,
891+
threshold = ORPHAN_RESYNC_THRESHOLD,
892+
"Large orphan gap detected - triggering re-sync"
893+
);
894+
895+
// Trigger ForceResync to fetch missing blocks
896+
if let Some(ref sync_actor) = self_clone.sync_actor {
897+
let reason = format!(
898+
"Orphan gap {} exceeds threshold {} (current: {}, observed: {})",
899+
gap, ORPHAN_RESYNC_THRESHOLD, current_height, observed_height
900+
);
901+
if let Err(e) = sync_actor.send(
902+
crate::actors_v2::network::SyncMessage::ForceResync { reason }
903+
).await {
904+
warn!(
905+
correlation_id = %correlation_id,
906+
error = %e,
907+
"Failed to trigger ForceResync from orphan detection"
908+
);
909+
}
910+
}
911+
}
912+
870913
// Return success - block is cached, not rejected
871914
return Ok(ChainResponse::BlockRejected {
872915
reason: format!("Orphan block cached: parent {} not found", orphan_parent_hash),

app/src/actors_v2/network/config.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,8 @@ impl Default for SyncConfig {
153153
// Network height monitoring defaults
154154
peer_height_poll_interval_secs: 30,
155155
resync_threshold: 10,
156-
min_peer_quorum: 2,
156+
// Set to 1 for 2-node networks - quorum of 2 blocks recovery when only 1 peer exists
157+
min_peer_quorum: 1,
157158
peer_height_max_age_secs: 60,
158159
sync_cooldown_secs: 30,
159160
}

0 commit comments

Comments
 (0)