Skip to content

Commit 6a43b8d

Browse files
committed
Sync fixes for fusaka-devnet-4 (#7876)
Squashed commit of the following: commit d8b6a99 Author: Jimmy Chen <[email protected]> Date: Mon Aug 18 23:48:28 2025 +1000 Remove unnecessary test commit 832d862 Author: Jimmy Chen <[email protected]> Date: Fri Aug 15 16:22:48 2025 +1000 Clippy commit 0437243 Merge: a250daa 317dc0f Author: Jimmy Chen <[email protected]> Date: Fri Aug 15 16:22:30 2025 +1000 Merge branch 'unstable' into fusaka-devnet-4-revive commit a250daa Author: Jimmy Chen <[email protected]> Date: Fri Aug 15 16:02:05 2025 +1000 Clean up and refactor custody peer checks commit ec7956b Author: Jimmy Chen <[email protected]> Date: Fri Aug 15 14:40:06 2025 +1000 Revert "Increase target peer count to 200." This reverts commit 5f38f31. commit cdc417f Author: Jimmy Chen <[email protected]> Date: Thu Aug 14 23:39:10 2025 +1000 Prevent sampling subnets from getting pruned if below min target threshold. commit 9ce9318 Author: Jimmy Chen <[email protected]> Date: Thu Aug 14 17:39:57 2025 +1000 Check for sync status for custody subnet discovery commit 1b8f0e4 Author: Jimmy Chen <[email protected]> Date: Thu Aug 14 17:30:30 2025 +1000 Fix tests. commit 1465d72 Author: Jimmy Chen <[email protected]> Date: Thu Aug 14 11:47:33 2025 +1000 Only send lookup requests to peers that are synced or advacned. commit 5f38f31 Author: Jimmy Chen <[email protected]> Date: Thu Aug 14 11:30:04 2025 +1000 Increase target peer count to 200. commit 56ca5bf Author: Jimmy Chen <[email protected]> Date: Thu Aug 14 11:14:20 2025 +1000 Remove penalties for block sidecar coupling. commit 8e95a54 Author: Jimmy Chen <[email protected]> Date: Thu Aug 14 11:07:50 2025 +1000 Revert "Reduce number of head chains we sync to" This reverts commit 1f776a4 commit 0144205 Author: Pawan Dhananjay <[email protected]> Date: Wed Aug 13 17:33:47 2025 -0700 Request columns from global peer pool commit a8f6801 Author: Pawan Dhananjay <[email protected]> Date: Wed Aug 13 16:37:12 2025 -0700 Priorotize status v2 commit c8bbe47 Author: Pawan Dhananjay <[email protected]> Date: Wed Aug 13 16:36:56 2025 -0700 Penalize if invalid EL block commit 1f776a4 Author: Pawan Dhananjay <[email protected]> Date: Wed Aug 13 16:36:13 2025 -0700 Reduce number of head chains we sync to
1 parent 140d61a commit 6a43b8d

File tree

13 files changed

+392
-88
lines changed

13 files changed

+392
-88
lines changed

beacon_node/lighthouse_network/src/peer_manager/mod.rs

Lines changed: 272 additions & 13 deletions
Large diffs are not rendered by default.

beacon_node/lighthouse_network/src/peer_manager/peerdb.rs

Lines changed: 57 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ impl<E: EthSpec> PeerDB<E> {
300300
.filter(move |(_, info)| {
301301
// We check both the metadata and gossipsub data as we only want to count long-lived subscribed peers
302302
info.is_connected()
303+
&& info.is_synced_or_advanced()
303304
&& info.on_subnet_metadata(&subnet)
304305
&& info.on_subnet_gossipsub(&subnet)
305306
&& info.is_good_gossipsub_peer()
@@ -318,40 +319,72 @@ impl<E: EthSpec> PeerDB<E> {
318319
.filter(move |(_, info)| {
319320
// The custody_subnets hashset can be populated via enr or metadata
320321
let is_custody_subnet_peer = info.is_assigned_to_custody_subnet(&subnet);
321-
info.is_connected() && info.is_good_gossipsub_peer() && is_custody_subnet_peer
322+
info.is_connected()
323+
&& info.is_good_gossipsub_peer()
324+
&& is_custody_subnet_peer
325+
&& matches!(
326+
info.sync_status(),
327+
SyncStatus::Synced { .. } | SyncStatus::Advanced { .. }
328+
)
322329
})
323330
.map(|(peer_id, _)| peer_id)
324331
}
325332

326-
/// Returns an iterator of all peers that are supposed to be custodying
327-
/// the given subnet id.
328-
pub fn good_range_sync_custody_subnet_peers(
333+
/// Checks if there is at least one good peer for each specified custody subnet for the given epoch.
334+
/// A "good" peer is one that is both connected and synced (or advanced) for the specified epoch.
335+
pub fn has_good_custody_range_sync_peer(
329336
&self,
330-
subnet: DataColumnSubnetId,
331-
) -> impl Iterator<Item = &PeerId> {
332-
self.peers
333-
.iter()
334-
.filter(move |(_, info)| {
335-
// The custody_subnets hashset can be populated via enr or metadata
336-
info.is_connected() && info.is_assigned_to_custody_subnet(&subnet)
337-
})
338-
.map(|(peer_id, _)| peer_id)
337+
subnets: &HashSet<DataColumnSubnetId>,
338+
epoch: Epoch,
339+
) -> bool {
340+
let mut remaining_subnets = subnets.clone();
341+
342+
let good_sync_peers_for_epoch = self.peers.values().filter(|&info| {
343+
info.is_connected()
344+
&& match info.sync_status() {
345+
SyncStatus::Synced { info } | SyncStatus::Advanced { info } => {
346+
info.has_slot(epoch.end_slot(E::slots_per_epoch()))
347+
}
348+
SyncStatus::IrrelevantPeer
349+
| SyncStatus::Behind { .. }
350+
| SyncStatus::Unknown => false,
351+
}
352+
});
353+
354+
for info in good_sync_peers_for_epoch {
355+
for subnet in info.custody_subnets_iter() {
356+
if remaining_subnets.remove(subnet) && remaining_subnets.is_empty() {
357+
return true;
358+
}
359+
}
360+
}
361+
362+
false
339363
}
340364

341-
/// Returns `true` if the given peer is assigned to the given subnet.
342-
/// else returns `false`
343-
///
344-
/// Returns `false` if peer doesn't exist in peerdb.
345-
pub fn is_good_range_sync_custody_subnet_peer(
365+
/// Checks if there are sufficient good peers for a single custody subnet.
366+
/// A "good" peer is one that is both connected and synced (or advanced).
367+
pub fn has_good_peers_in_custody_subnet(
346368
&self,
347-
subnet: DataColumnSubnetId,
348-
peer: &PeerId,
369+
subnet: &DataColumnSubnetId,
370+
target_peers: usize,
349371
) -> bool {
350-
if let Some(info) = self.peers.get(peer) {
351-
info.is_connected() && info.is_assigned_to_custody_subnet(&subnet)
352-
} else {
353-
false
372+
let mut peer_count = 0usize;
373+
for info in self
374+
.peers
375+
.values()
376+
.filter(|info| info.is_connected() && info.is_synced_or_advanced())
377+
{
378+
if info.is_assigned_to_custody_subnet(subnet) {
379+
peer_count += 1;
380+
}
381+
382+
if peer_count >= target_peers {
383+
return true;
384+
}
354385
}
386+
387+
false
355388
}
356389

357390
/// Gives the ids of all known disconnected peers.

beacon_node/lighthouse_network/src/peer_manager/peerdb/peer_info.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ impl<E: EthSpec> PeerInfo<E> {
176176

177177
/// Returns the number of long lived subnets a peer is subscribed to.
178178
// NOTE: This currently excludes sync committee subnets
179-
pub fn long_lived_subnet_count(&self) -> usize {
179+
pub fn long_lived_attnet_count(&self) -> usize {
180180
if let Some(meta_data) = self.meta_data.as_ref() {
181181
return meta_data.attnets().num_set_bits();
182182
} else if let Some(enr) = self.enr.as_ref()
@@ -222,6 +222,13 @@ impl<E: EthSpec> PeerInfo<E> {
222222
}
223223
}
224224
}
225+
226+
long_lived_subnets.extend(
227+
self.custody_subnets
228+
.iter()
229+
.map(|&id| Subnet::DataColumn(id)),
230+
);
231+
225232
long_lived_subnets
226233
}
227234

@@ -262,6 +269,11 @@ impl<E: EthSpec> PeerInfo<E> {
262269
{
263270
return true;
264271
}
272+
273+
if !self.custody_subnets.is_empty() {
274+
return true;
275+
}
276+
265277
false
266278
}
267279

@@ -318,6 +330,14 @@ impl<E: EthSpec> PeerInfo<E> {
318330
)
319331
}
320332

333+
/// Checks if the peer is synced or advanced.
334+
pub fn is_synced_or_advanced(&self) -> bool {
335+
matches!(
336+
self.sync_status,
337+
SyncStatus::Synced { .. } | SyncStatus::Advanced { .. }
338+
)
339+
}
340+
321341
/// Checks if the status is connected.
322342
pub fn is_dialing(&self) -> bool {
323343
matches!(self.connection_status, PeerConnectionStatus::Dialing { .. })

beacon_node/lighthouse_network/src/rpc/protocol.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -825,8 +825,8 @@ impl<E: EthSpec> RequestType<E> {
825825
match self {
826826
// add more protocols when versions/encodings are supported
827827
RequestType::Status(_) => vec![
828-
ProtocolId::new(SupportedProtocol::StatusV1, Encoding::SSZSnappy),
829828
ProtocolId::new(SupportedProtocol::StatusV2, Encoding::SSZSnappy),
829+
ProtocolId::new(SupportedProtocol::StatusV1, Encoding::SSZSnappy),
830830
],
831831
RequestType::Goodbye(_) => vec![ProtocolId::new(
832832
SupportedProtocol::GoodbyeV1,

beacon_node/lighthouse_network/src/service/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1911,7 +1911,7 @@ impl<E: EthSpec> Network<E> {
19111911
}
19121912
},
19131913
};
1914-
debug!(our_addr = %local_addr, from = %send_back_addr, error = error_repr, "Failed incoming connection");
1914+
tracing::trace!(our_addr = %local_addr, from = %send_back_addr, error = error_repr, "Failed incoming connection");
19151915
None
19161916
}
19171917
SwarmEvent::OutgoingConnectionError {

beacon_node/lighthouse_network/tests/rpc_tests.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,21 +76,23 @@ fn test_tcp_status_rpc() {
7676
.await;
7777

7878
// Dummy STATUS RPC message
79-
let rpc_request = RequestType::Status(StatusMessage::V1(StatusMessageV1 {
79+
let rpc_request = RequestType::Status(StatusMessage::V2(StatusMessageV2 {
8080
fork_digest: [0; 4],
8181
finalized_root: Hash256::zero(),
8282
finalized_epoch: Epoch::new(1),
8383
head_root: Hash256::zero(),
8484
head_slot: Slot::new(1),
85+
earliest_available_slot: Slot::new(0),
8586
}));
8687

8788
// Dummy STATUS RPC message
88-
let rpc_response = Response::Status(StatusMessage::V1(StatusMessageV1 {
89+
let rpc_response = Response::Status(StatusMessage::V2(StatusMessageV2 {
8990
fork_digest: [0; 4],
9091
finalized_root: Hash256::zero(),
9192
finalized_epoch: Epoch::new(1),
9293
head_root: Hash256::zero(),
9394
head_slot: Slot::new(1),
95+
earliest_available_slot: Slot::new(0),
9496
}));
9597

9698
// build the sender future
@@ -1205,21 +1207,23 @@ fn test_delayed_rpc_response() {
12051207
.await;
12061208

12071209
// Dummy STATUS RPC message
1208-
let rpc_request = RequestType::Status(StatusMessage::V1(StatusMessageV1 {
1210+
let rpc_request = RequestType::Status(StatusMessage::V2(StatusMessageV2 {
12091211
fork_digest: [0; 4],
12101212
finalized_root: Hash256::from_low_u64_be(0),
12111213
finalized_epoch: Epoch::new(1),
12121214
head_root: Hash256::from_low_u64_be(0),
12131215
head_slot: Slot::new(1),
1216+
earliest_available_slot: Slot::new(0),
12141217
}));
12151218

12161219
// Dummy STATUS RPC message
1217-
let rpc_response = Response::Status(StatusMessage::V1(StatusMessageV1 {
1220+
let rpc_response = Response::Status(StatusMessage::V2(StatusMessageV2 {
12181221
fork_digest: [0; 4],
12191222
finalized_root: Hash256::from_low_u64_be(0),
12201223
finalized_epoch: Epoch::new(1),
12211224
head_root: Hash256::from_low_u64_be(0),
12221225
head_slot: Slot::new(1),
1226+
earliest_available_slot: Slot::new(0),
12231227
}));
12241228

12251229
// build the sender future
@@ -1335,21 +1339,23 @@ fn test_active_requests() {
13351339
.await;
13361340

13371341
// Dummy STATUS RPC request.
1338-
let rpc_request = RequestType::Status(StatusMessage::V1(StatusMessageV1 {
1342+
let rpc_request = RequestType::Status(StatusMessage::V2(StatusMessageV2 {
13391343
fork_digest: [0; 4],
13401344
finalized_root: Hash256::from_low_u64_be(0),
13411345
finalized_epoch: Epoch::new(1),
13421346
head_root: Hash256::from_low_u64_be(0),
13431347
head_slot: Slot::new(1),
1348+
earliest_available_slot: Slot::new(0),
13441349
}));
13451350

13461351
// Dummy STATUS RPC response.
1347-
let rpc_response = Response::Status(StatusMessage::V1(StatusMessageV1 {
1352+
let rpc_response = Response::Status(StatusMessage::V2(StatusMessageV2 {
13481353
fork_digest: [0; 4],
13491354
finalized_root: Hash256::zero(),
13501355
finalized_epoch: Epoch::new(1),
13511356
head_root: Hash256::zero(),
13521357
head_slot: Slot::new(1),
1358+
earliest_available_slot: Slot::new(0),
13531359
}));
13541360

13551361
// Number of requests.

beacon_node/network/src/network_beacon_processor/rpc_methods.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
212212
send_block_count += 1;
213213
}
214214
Ok(None) => {
215-
debug!(
215+
tracing::trace!(
216216
%peer_id,
217217
request_root = ?root,
218218
"Peer requested unknown block"

beacon_node/network/src/network_beacon_processor/sync_methods.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ use beacon_chain::data_availability_checker::AvailabilityCheckError;
1010
use beacon_chain::data_availability_checker::MaybeAvailableBlock;
1111
use beacon_chain::{
1212
AvailabilityProcessingStatus, BeaconChainTypes, BlockError, ChainSegmentResult,
13-
HistoricalBlockError, NotifyExecutionLayer, validator_monitor::get_slot_delay_ms,
13+
ExecutionPayloadError, HistoricalBlockError, NotifyExecutionLayer,
14+
validator_monitor::get_slot_delay_ms,
1415
};
1516
use beacon_processor::{
1617
AsyncFn, BlockingFn, DuplicateCache,
@@ -774,7 +775,19 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
774775
})
775776
}
776777
ref err @ BlockError::ExecutionPayloadError(ref epe) => {
777-
if !epe.penalize_peer() {
778+
if matches!(epe, ExecutionPayloadError::RejectedByExecutionEngine { .. }) {
779+
debug!(
780+
error = ?err,
781+
"Invalid execution payload rejected by EE"
782+
);
783+
Err(ChainSegmentFailed {
784+
message: format!(
785+
"Peer sent a block containing invalid execution payload. Reason: {:?}",
786+
err
787+
),
788+
peer_action: Some(PeerAction::LowToleranceError),
789+
})
790+
} else if !epe.penalize_peer() {
778791
// These errors indicate an issue with the EL and not the `ChainSegment`.
779792
// Pause the syncing while the EL recovers
780793
debug!(

beacon_node/network/src/sync/backfill_sync/mod.rs

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,6 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
313313
CouplingError::DataColumnPeerFailure {
314314
error,
315315
faulty_peers,
316-
action,
317316
exceeded_retries,
318317
} => {
319318
debug!(?batch_id, error, "Block components coupling error");
@@ -325,9 +324,6 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
325324
failed_columns.insert(*column);
326325
failed_peers.insert(*peer);
327326
}
328-
for peer in failed_peers.iter() {
329-
network.report_peer(*peer, *action, "failed to return columns");
330-
}
331327

332328
// Only retry if peer failure **and** retries have been exceeded
333329
if !*exceeded_retries {
@@ -1120,13 +1116,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
11201116
.sampling_subnets()
11211117
.iter()
11221118
.all(|subnet_id| {
1123-
let peer_count = network
1119+
let min_peer_count = 1;
1120+
network
11241121
.network_globals()
11251122
.peers
11261123
.read()
1127-
.good_range_sync_custody_subnet_peers(*subnet_id)
1128-
.count();
1129-
peer_count > 0
1124+
.has_good_peers_in_custody_subnet(subnet_id, min_peer_count)
11301125
})
11311126
} else {
11321127
true

0 commit comments

Comments
 (0)