From fd10b632740e441354c4547a12727c29dd288239 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Fri, 5 Sep 2025 07:54:30 +1000 Subject: [PATCH 01/45] Add co-author to mergify commits (#7993) * Add co-author to mergify commits. * Remove unnecessary pull request rules from mergify config. * Revert automation removals --- .github/mergify.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/mergify.yml b/.github/mergify.yml index 4ab73bcf079..0b917b25467 100644 --- a/.github/mergify.yml +++ b/.github/mergify.yml @@ -105,6 +105,10 @@ queue_rules: {{ body | get_section("## Proposed Changes", "") }} + + {% for commit in commits | unique(attribute='email_author') %} + Co-Authored-By: {{ commit.author }} <{{ commit.email_author }}> + {% endfor %} queue_conditions: - "#approved-reviews-by >= 1" - "check-success=license/cla" From 148997b39ea79c0f1788c1adcfa7bee2a1b926ca Mon Sep 17 00:00:00 2001 From: sashaodessa <140454972+sashaodessa@users.noreply.github.com> Date: Tue, 9 Sep 2025 17:23:06 +0200 Subject: [PATCH 02/45] fix TOCTOU vulnerability in unused_port module --- .../lighthouse_network/src/listen_addr.rs | 30 ++++- beacon_node/src/config.rs | 110 +++++++----------- common/unused_port/src/lib.rs | 33 ++++++ lighthouse/tests/beacon_node.rs | 23 ++-- lighthouse/tests/boot_node.rs | 8 +- 5 files changed, 121 insertions(+), 83 deletions(-) diff --git a/beacon_node/lighthouse_network/src/listen_addr.rs b/beacon_node/lighthouse_network/src/listen_addr.rs index 3b0ff98b34f..97646b2b49b 100644 --- a/beacon_node/lighthouse_network/src/listen_addr.rs +++ b/beacon_node/lighthouse_network/src/listen_addr.rs @@ -88,9 +88,9 @@ impl ListenAddress { pub fn unused_v4_ports() -> Self { ListenAddress::V4(ListenAddr { addr: Ipv4Addr::UNSPECIFIED, - disc_port: unused_port::unused_udp4_port().unwrap(), - quic_port: unused_port::unused_udp4_port().unwrap(), - tcp_port: unused_port::unused_tcp4_port().unwrap(), + disc_port: 0, + quic_port: 0, + tcp_port: 0, }) } @@ -98,9 +98,27 @@ impl ListenAddress { pub fn unused_v6_ports() -> Self { ListenAddress::V6(ListenAddr { addr: Ipv6Addr::UNSPECIFIED, - disc_port: unused_port::unused_udp6_port().unwrap(), - quic_port: unused_port::unused_udp6_port().unwrap(), - tcp_port: unused_port::unused_tcp6_port().unwrap(), + disc_port: 0, + quic_port: 0, + tcp_port: 0, }) } } + +/// Compute the UDP discovery port given flags and TCP port. +pub fn compute_discovery_port(use_zero_ports: bool, tcp_port: u16, maybe_disc_port: Option) -> u16 { + if use_zero_ports { + 0 + } else { + maybe_disc_port.unwrap_or(tcp_port) + } +} + +/// Compute the UDP QUIC port given flags and TCP port. +pub fn compute_quic_port(use_zero_ports: bool, tcp_port: u16, maybe_quic_port: Option) -> u16 { + if use_zero_ports { + 0 + } else { + maybe_quic_port.unwrap_or(if tcp_port == 0 { 0 } else { tcp_port + 1 }) + } +} diff --git a/beacon_node/src/config.rs b/beacon_node/src/config.rs index f55b91d58c3..878bfc68655 100644 --- a/beacon_node/src/config.rs +++ b/beacon_node/src/config.rs @@ -1020,10 +1020,7 @@ pub fn parse_listening_addresses(cli_args: &ArgMatches) -> Result Result Result Result>> = LazyLock::new(|| Mutex::new(LRUTimeCache::new(CACHED_PORTS_TTL))); /// A convenience wrapper over [`zero_port`]. +#[deprecated(note = "Use bind_tcp4_any() which returns a bound socket to avoid TOCTOU")] pub fn unused_tcp4_port() -> Result { zero_port(Transport::Tcp, IpVersion::Ipv4) } /// A convenience wrapper over [`zero_port`]. +#[deprecated(note = "Use bind_udp4_any() which returns a bound socket to avoid TOCTOU")] pub fn unused_udp4_port() -> Result { zero_port(Transport::Udp, IpVersion::Ipv4) } /// A convenience wrapper over [`zero_port`]. +#[deprecated(note = "Use bind_tcp6_any() which returns a bound socket to avoid TOCTOU")] pub fn unused_tcp6_port() -> Result { zero_port(Transport::Tcp, IpVersion::Ipv6) } /// A convenience wrapper over [`zero_port`]. +#[deprecated(note = "Use bind_udp6_any() which returns a bound socket to avoid TOCTOU")] pub fn unused_udp6_port() -> Result { zero_port(Transport::Udp, IpVersion::Ipv6) } +/// Bind a TCPv4 listener on localhost with an ephemeral port (port 0) and return it. +/// Safe against TOCTOU: the socket remains open and reserved by the OS. +pub fn bind_tcp4_any() -> Result { + let addr = std::net::SocketAddr::new(std::net::Ipv4Addr::LOCALHOST.into(), 0); + TcpListener::bind(addr).map_err(|e| format!("Failed to bind TCPv4 listener: {:?}", e)) +} + +/// Bind a TCPv6 listener on localhost with an ephemeral port (port 0) and return it. +/// Safe against TOCTOU: the socket remains open and reserved by the OS. +pub fn bind_tcp6_any() -> Result { + let addr = std::net::SocketAddr::new(std::net::Ipv6Addr::LOCALHOST.into(), 0); + TcpListener::bind(addr).map_err(|e| format!("Failed to bind TCPv6 listener: {:?}", e)) +} + +/// Bind a UDPv4 socket on localhost with an ephemeral port (port 0) and return it. +/// Safe against TOCTOU: the socket remains open and reserved by the OS. +pub fn bind_udp4_any() -> Result { + let addr = std::net::SocketAddr::new(std::net::Ipv4Addr::LOCALHOST.into(), 0); + UdpSocket::bind(addr).map_err(|e| format!("Failed to bind UDPv4 socket: {:?}", e)) +} + +/// Bind a UDPv6 socket on localhost with an ephemeral port (port 0) and return it. +/// Safe against TOCTOU: the socket remains open and reserved by the OS. +pub fn bind_udp6_any() -> Result { + let addr = std::net::SocketAddr::new(std::net::Ipv6Addr::LOCALHOST.into(), 0); + UdpSocket::bind(addr).map_err(|e| format!("Failed to bind UDPv6 socket: {:?}", e)) +} + /// A bit of hack to find an unused port. /// /// Does not guarantee that the given port is unused after the function exits, just that it was @@ -51,6 +83,7 @@ pub fn unused_udp6_port() -> Result { /// It is possible that users are unable to bind to the ports returned by this function as the OS /// has a buffer period where it doesn't allow binding to the same port even after the socket is /// closed. We might have to use SO_REUSEADDR socket option from `std::net2` crate in that case. +#[deprecated(note = "Use bind_*_any() functions that return a bound socket to avoid TOCTOU")] pub fn zero_port(transport: Transport, ipv: IpVersion) -> Result { let localhost = match ipv { IpVersion::Ipv4 => std::net::Ipv4Addr::LOCALHOST.into(), diff --git a/lighthouse/tests/beacon_node.rs b/lighthouse/tests/beacon_node.rs index 884e5eddeba..79afcb6d5b9 100644 --- a/lighthouse/tests/beacon_node.rs +++ b/lighthouse/tests/beacon_node.rs @@ -21,7 +21,6 @@ use std::time::Duration; use tempfile::TempDir; use types::non_zero_usize::new_non_zero_usize; use types::{Address, Checkpoint, Epoch, Hash256, MainnetEthSpec}; -use unused_port::{unused_tcp4_port, unused_tcp6_port, unused_udp4_port, unused_udp6_port}; const DEFAULT_EXECUTION_ENDPOINT: &str = "http://localhost:8551/"; const DEFAULT_EXECUTION_JWT_SECRET_KEY: &str = @@ -32,6 +31,12 @@ const DUMMY_ENR_TCP_PORT: u16 = 7777; const DUMMY_ENR_UDP_PORT: u16 = 8888; const DUMMY_ENR_QUIC_PORT: u16 = 9999; +// Fixed test ports for config-only assertions (no actual bind occurs in these tests). +const TEST_TCP4_PORT: u16 = 39001; +const TEST_TCP6_PORT: u16 = 39011; +const TEST_UDP4_PORT: u16 = 39002; +const TEST_UDP6_PORT: u16 = 39012; + const _: () = assert!(DUMMY_ENR_QUIC_PORT != 0 && DUMMY_ENR_TCP_PORT != 0 && DUMMY_ENR_UDP_PORT != 0); @@ -1039,8 +1044,8 @@ fn network_port_flag_over_ipv4_and_ipv6() { ); }); - let port = unused_tcp4_port().expect("Unable to find unused port."); - let port6 = unused_tcp6_port().expect("Unable to find unused port."); + let port = TEST_TCP4_PORT; + let port6 = TEST_TCP6_PORT; CommandLineTest::new() .flag("listen-address", Some("127.0.0.1")) .flag("listen-address", Some("::1")) @@ -1422,8 +1427,8 @@ fn enr_match_flag_over_ipv6() { const ADDR: &str = "::1"; let addr = ADDR.parse::().unwrap(); - let udp6_port = unused_udp6_port().expect("Unable to find unused port."); - let tcp6_port = unused_tcp6_port().expect("Unable to find unused port."); + let udp6_port = TEST_UDP6_PORT; + let tcp6_port = TEST_TCP6_PORT; CommandLineTest::new() .flag("enr-match", None) @@ -1452,13 +1457,13 @@ fn enr_match_flag_over_ipv6() { fn enr_match_flag_over_ipv4_and_ipv6() { const IPV6_ADDR: &str = "::1"; - let udp6_port = unused_udp6_port().expect("Unable to find unused port."); - let tcp6_port = unused_tcp6_port().expect("Unable to find unused port."); + let udp6_port = TEST_UDP6_PORT; + let tcp6_port = TEST_TCP6_PORT; let ipv6_addr = IPV6_ADDR.parse::().unwrap(); const IPV4_ADDR: &str = "127.0.0.1"; - let udp4_port = unused_udp4_port().expect("Unable to find unused port."); - let tcp4_port = unused_tcp4_port().expect("Unable to find unused port."); + let udp4_port = TEST_UDP4_PORT; + let tcp4_port = TEST_TCP4_PORT; let ipv4_addr = IPV4_ADDR.parse::().unwrap(); CommandLineTest::new() diff --git a/lighthouse/tests/boot_node.rs b/lighthouse/tests/boot_node.rs index b243cd6001e..cb6e645d49d 100644 --- a/lighthouse/tests/boot_node.rs +++ b/lighthouse/tests/boot_node.rs @@ -12,10 +12,12 @@ use std::path::{Path, PathBuf}; use std::process::Command; use std::str::FromStr; use tempfile::TempDir; -use unused_port::unused_udp4_port; const IP_ADDRESS: &str = "192.168.2.108"; +/ Fixed test port for config-only assertions (no actual bind occurs in these tests). +const TEST_UDP4_PORT: u16 = 39102; + /// Returns the `lighthouse boot_node` command. fn base_cmd() -> Command { let lighthouse_bin = env!("CARGO_BIN_EXE_lighthouse"); @@ -62,7 +64,7 @@ fn enr_address_arg() { #[test] fn port_flag() { - let port = unused_udp4_port().unwrap(); + let port = TEST_UDP4_PORT; CommandLineTest::new() .flag("port", Some(port.to_string().as_str())) .run_with_ip() @@ -134,7 +136,7 @@ fn boot_nodes_flag() { #[test] fn enr_port_flag() { - let port = unused_udp4_port().unwrap(); + let port = TEST_UDP4_PORT; CommandLineTest::new() .flag("enr-port", Some(port.to_string().as_str())) .run_with_ip() From b8178515cd1b844d9af3bbab55455753b9949242 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Mon, 15 Sep 2025 09:41:12 +1000 Subject: [PATCH 03/45] Update engine methods in notifier (#8038) Fulu uses `getPayloadV5`, this PR updates the notifier logging prior to the fork. Co-Authored-By: Jimmy Chen --- beacon_node/client/src/notifier.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/beacon_node/client/src/notifier.rs b/beacon_node/client/src/notifier.rs index 1e58c210daa..c83cdad7e01 100644 --- a/beacon_node/client/src/notifier.rs +++ b/beacon_node/client/src/notifier.rs @@ -9,8 +9,8 @@ use execution_layer::{ EngineCapabilities, http::{ ENGINE_FORKCHOICE_UPDATED_V2, ENGINE_FORKCHOICE_UPDATED_V3, ENGINE_GET_PAYLOAD_V2, - ENGINE_GET_PAYLOAD_V3, ENGINE_GET_PAYLOAD_V4, ENGINE_NEW_PAYLOAD_V2, ENGINE_NEW_PAYLOAD_V3, - ENGINE_NEW_PAYLOAD_V4, + ENGINE_GET_PAYLOAD_V3, ENGINE_GET_PAYLOAD_V4, ENGINE_GET_PAYLOAD_V5, ENGINE_NEW_PAYLOAD_V2, + ENGINE_NEW_PAYLOAD_V3, ENGINE_NEW_PAYLOAD_V4, }, }; use lighthouse_network::{NetworkGlobals, types::SyncState}; @@ -524,18 +524,16 @@ fn methods_required_for_fork( } } ForkName::Fulu => { - // TODO(fulu) switch to v5 when the EL is ready - if !capabilities.get_payload_v4 { - missing_methods.push(ENGINE_GET_PAYLOAD_V4); + if !capabilities.get_payload_v5 { + missing_methods.push(ENGINE_GET_PAYLOAD_V5); } if !capabilities.new_payload_v4 { missing_methods.push(ENGINE_NEW_PAYLOAD_V4); } } ForkName::Gloas => { - // TODO(gloas) switch to v5/v6 when the EL is ready - if !capabilities.get_payload_v4 { - missing_methods.push(ENGINE_GET_PAYLOAD_V4); + if !capabilities.get_payload_v5 { + missing_methods.push(ENGINE_GET_PAYLOAD_V5); } if !capabilities.new_payload_v4 { missing_methods.push(ENGINE_NEW_PAYLOAD_V4); From f04d5ecddd976646d1a07add33ce74eff1bf2a3c Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Tue, 16 Sep 2025 14:10:42 +1000 Subject: [PATCH 04/45] Another check to prevent duplicate block imports (#8050) Attempt to address performance issues caused by importing the same block multiple times. - Check fork choice "after" obtaining the fork choice write lock in `BeaconChain::import_block`. We actually use an upgradable read lock, but this is semantically equivalent (the upgradable read has the advantage of not excluding regular reads). The hope is that this change has several benefits: 1. By preventing duplicate block imports we save time repeating work inside `import_block` that is unnecessary, e.g. writing the state to disk. Although the store itself now takes some measures to avoid re-writing diffs, it is even better if we avoid a disk write entirely. 2. By returning `DuplicateFullyImported`, we reduce some duplicated work downstream. E.g. if multiple threads importing columns trigger `import_block`, now only _one_ of them will get a notification of the block import completing successfully, and only this one will run `recompute_head`. This should help avoid a situation where multiple beacon processor workers are consumed by threads blocking on the `recompute_head_lock`. However, a similar block-fest is still possible with the upgradable fork choice lock (a large number of threads can be blocked waiting for the first thread to complete block import). Co-Authored-By: Michael Sproul --- beacon_node/beacon_chain/src/beacon_chain.rs | 9 ++++++++- beacon_node/beacon_chain/src/canonical_head.rs | 14 +++++++++++++- beacon_node/beacon_chain/src/metrics.rs | 8 ++++++++ .../beacon_chain/tests/block_verification.rs | 13 +++++++++---- 4 files changed, 38 insertions(+), 6 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 6e11b666102..eeafefdff84 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -3889,9 +3889,16 @@ impl BeaconChain { .map_err(BeaconChainError::from)?; } + // Take an upgradable read lock on fork choice so we can check if this block has already + // been imported. We don't want to repeat work importing a block that is already imported. + let fork_choice_reader = self.canonical_head.fork_choice_upgradable_read_lock(); + if fork_choice_reader.contains_block(&block_root) { + return Err(BlockError::DuplicateFullyImported(block_root)); + } + // Take an exclusive write-lock on fork choice. It's very important to prevent deadlocks by // avoiding taking other locks whilst holding this lock. - let mut fork_choice = self.canonical_head.fork_choice_write_lock(); + let mut fork_choice = parking_lot::RwLockUpgradableReadGuard::upgrade(fork_choice_reader); // Do not import a block that doesn't descend from the finalized root. let signed_block = diff --git a/beacon_node/beacon_chain/src/canonical_head.rs b/beacon_node/beacon_chain/src/canonical_head.rs index 56d19759725..3dbe8bf5c40 100644 --- a/beacon_node/beacon_chain/src/canonical_head.rs +++ b/beacon_node/beacon_chain/src/canonical_head.rs @@ -48,7 +48,7 @@ use fork_choice::{ }; use itertools::process_results; use logging::crit; -use parking_lot::{Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard}; +use parking_lot::{Mutex, RwLock, RwLockReadGuard, RwLockUpgradableReadGuard, RwLockWriteGuard}; use slot_clock::SlotClock; use state_processing::AllCaches; use std::sync::Arc; @@ -79,6 +79,10 @@ impl CanonicalHeadRwLock { self.0.read() } + fn upgradable_read(&self) -> RwLockUpgradableReadGuard<'_, T> { + self.0.upgradable_read() + } + fn write(&self) -> RwLockWriteGuard<'_, T> { self.0.write() } @@ -389,6 +393,14 @@ impl CanonicalHead { self.fork_choice.read() } + /// Access an upgradable read-lock for fork choice. + pub fn fork_choice_upgradable_read_lock( + &self, + ) -> RwLockUpgradableReadGuard<'_, BeaconForkChoice> { + let _timer = metrics::start_timer(&metrics::FORK_CHOICE_UPGRADABLE_READ_LOCK_AQUIRE_TIMES); + self.fork_choice.upgradable_read() + } + /// Access a write-lock for fork choice. pub fn fork_choice_write_lock(&self) -> RwLockWriteGuard<'_, BeaconForkChoice> { let _timer = metrics::start_timer(&metrics::FORK_CHOICE_WRITE_LOCK_AQUIRE_TIMES); diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index 1b57bad1049..3da3cf163a4 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -578,6 +578,14 @@ pub static FORK_CHOICE_READ_LOCK_AQUIRE_TIMES: LazyLock> = Laz exponential_buckets(1e-4, 4.0, 7), ) }); +pub static FORK_CHOICE_UPGRADABLE_READ_LOCK_AQUIRE_TIMES: LazyLock> = + LazyLock::new(|| { + try_create_histogram_with_buckets( + "beacon_fork_choice_upgradable_read_lock_aquire_seconds", + "Time taken to aquire the fork-choice upgradable read lock", + exponential_buckets(1e-4, 4.0, 7), + ) + }); pub static FORK_CHOICE_WRITE_LOCK_AQUIRE_TIMES: LazyLock> = LazyLock::new(|| { try_create_histogram_with_buckets( "beacon_fork_choice_write_lock_aquire_seconds", diff --git a/beacon_node/beacon_chain/tests/block_verification.rs b/beacon_node/beacon_chain/tests/block_verification.rs index 58ca4a032ed..b27295751ec 100644 --- a/beacon_node/beacon_chain/tests/block_verification.rs +++ b/beacon_node/beacon_chain/tests/block_verification.rs @@ -1730,6 +1730,8 @@ async fn add_altair_block_to_base_chain() { )); } +// This is a regression test for this bug: +// https://github.com/sigp/lighthouse/issues/4332#issuecomment-1565092279 #[tokio::test] async fn import_duplicate_block_unrealized_justification() { let spec = MainnetEthSpec::default_spec(); @@ -1791,7 +1793,7 @@ async fn import_duplicate_block_unrealized_justification() { .await .unwrap(); - // Unrealized justification should NOT have updated. + // The store's global unrealized justification should update immediately and match the block. let unrealized_justification = { let fc = chain.canonical_head.fork_choice_read_lock(); assert_eq!(fc.justified_checkpoint().epoch, 0); @@ -1808,9 +1810,12 @@ async fn import_duplicate_block_unrealized_justification() { }; // Import the second verified block, simulating a block processed via RPC. - import_execution_pending_block(chain.clone(), verified_block2) - .await - .unwrap(); + assert_eq!( + import_execution_pending_block(chain.clone(), verified_block2) + .await + .unwrap_err(), + format!("DuplicateFullyImported({block_root})") + ); // Unrealized justification should still be updated. let fc3 = chain.canonical_head.fork_choice_read_lock(); From 4409500f63007f98bc901924cee536cfad42f677 Mon Sep 17 00:00:00 2001 From: Eitan Seri-Levi Date: Mon, 15 Sep 2025 22:18:25 -0700 Subject: [PATCH 05/45] Remove column reconstruction when processing rpc requests (#8051) Co-Authored-By: Eitan Seri- Levi --- .../src/network_beacon_processor/gossip_methods.rs | 2 +- .../network/src/network_beacon_processor/mod.rs | 5 +---- .../src/network_beacon_processor/sync_methods.rs | 13 +------------ 3 files changed, 3 insertions(+), 17 deletions(-) diff --git a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs index 1f1a3427e78..bc44db40e9e 100644 --- a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs @@ -1067,7 +1067,7 @@ impl NetworkBeaconProcessor { slot: *slot, process_fn: Box::pin(async move { cloned_self - .attempt_data_column_reconstruction(block_root, true) + .attempt_data_column_reconstruction(block_root) .await; }), }, diff --git a/beacon_node/network/src/network_beacon_processor/mod.rs b/beacon_node/network/src/network_beacon_processor/mod.rs index 73349cd4314..030f77be371 100644 --- a/beacon_node/network/src/network_beacon_processor/mod.rs +++ b/beacon_node/network/src/network_beacon_processor/mod.rs @@ -837,7 +837,6 @@ impl NetworkBeaconProcessor { async fn attempt_data_column_reconstruction( self: &Arc, block_root: Hash256, - publish_columns: bool, ) -> Option { // Only supernodes attempt reconstruction if !self @@ -852,9 +851,7 @@ impl NetworkBeaconProcessor { let result = self.chain.reconstruct_data_columns(block_root).await; match result { Ok(Some((availability_processing_status, data_columns_to_publish))) => { - if publish_columns { - self.publish_data_columns_gradually(data_columns_to_publish, block_root); - } + self.publish_data_columns_gradually(data_columns_to_publish, block_root); match &availability_processing_status { AvailabilityProcessingStatus::Imported(hash) => { debug!( diff --git a/beacon_node/network/src/network_beacon_processor/sync_methods.rs b/beacon_node/network/src/network_beacon_processor/sync_methods.rs index f24495cc54c..edeed7e98cf 100644 --- a/beacon_node/network/src/network_beacon_processor/sync_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/sync_methods.rs @@ -383,7 +383,7 @@ impl NetworkBeaconProcessor { "RPC custody data columns received" ); - let mut result = self + let result = self .chain .process_rpc_custody_columns(custody_columns) .await; @@ -404,17 +404,6 @@ impl NetworkBeaconProcessor { block_hash = %block_root, "Missing components over rpc" ); - // Attempt reconstruction here before notifying sync, to avoid sending out more requests - // that we may no longer need. - // We don't publish columns reconstructed from rpc columns to the gossip network, - // as these are likely historic columns. - let publish_columns = false; - if let Some(availability) = self - .attempt_data_column_reconstruction(block_root, publish_columns) - .await - { - result = Ok(availability) - } } }, Err(BlockError::DuplicateFullyImported(_)) => { From aba362709990d7ec7f4a880bcd1e60114d375450 Mon Sep 17 00:00:00 2001 From: Eitan Seri-Levi Date: Mon, 15 Sep 2025 22:18:28 -0700 Subject: [PATCH 06/45] Reduce reconstruction queue capacity (#8053) Co-Authored-By: Eitan Seri- Levi --- beacon_node/beacon_processor/src/lib.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/beacon_node/beacon_processor/src/lib.rs b/beacon_node/beacon_processor/src/lib.rs index ab9ab045f4e..84723fb6a09 100644 --- a/beacon_node/beacon_processor/src/lib.rs +++ b/beacon_node/beacon_processor/src/lib.rs @@ -181,7 +181,7 @@ impl BeaconProcessorQueueLengths { // We don't request more than `PARENT_DEPTH_TOLERANCE` (32) lookups, so we can limit // this queue size. With 48 max blobs per block, each column sidecar list could be up to 12MB. rpc_custody_column_queue: 64, - column_reconstruction_queue: 64, + column_reconstruction_queue: 1, chain_segment_queue: 64, backfill_chain_segment: 64, gossip_block_queue: 1024, @@ -867,7 +867,7 @@ impl BeaconProcessor { let mut rpc_blob_queue = FifoQueue::new(queue_lengths.rpc_blob_queue); let mut rpc_custody_column_queue = FifoQueue::new(queue_lengths.rpc_custody_column_queue); let mut column_reconstruction_queue = - FifoQueue::new(queue_lengths.column_reconstruction_queue); + LifoQueue::new(queue_lengths.column_reconstruction_queue); let mut chain_segment_queue = FifoQueue::new(queue_lengths.chain_segment_queue); let mut backfill_chain_segment = FifoQueue::new(queue_lengths.backfill_chain_segment); let mut gossip_block_queue = FifoQueue::new(queue_lengths.gossip_block_queue); @@ -1354,9 +1354,7 @@ impl BeaconProcessor { Work::RpcCustodyColumn { .. } => { rpc_custody_column_queue.push(work, work_id) } - Work::ColumnReconstruction(_) => { - column_reconstruction_queue.push(work, work_id) - } + Work::ColumnReconstruction(_) => column_reconstruction_queue.push(work), Work::ChainSegment { .. } => chain_segment_queue.push(work, work_id), Work::ChainSegmentBackfill { .. } => { backfill_chain_segment.push(work, work_id) From 242bdfcf1229254ac792039d8ae13b703bd1ab6b Mon Sep 17 00:00:00 2001 From: Eitan Seri-Levi Date: Mon, 15 Sep 2025 22:18:31 -0700 Subject: [PATCH 07/45] Add instrumentation to `recompute_head_at_slot` (#8049) Co-Authored-By: Eitan Seri- Levi --- beacon_node/beacon_chain/src/canonical_head.rs | 18 +++++++++++++++++- beacon_node/lighthouse_tracing/src/lib.rs | 3 +++ beacon_node/store/src/hot_cold_store.rs | 1 + consensus/state_processing/src/all_caches.rs | 2 ++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/beacon_node/beacon_chain/src/canonical_head.rs b/beacon_node/beacon_chain/src/canonical_head.rs index 3dbe8bf5c40..78005bf7995 100644 --- a/beacon_node/beacon_chain/src/canonical_head.rs +++ b/beacon_node/beacon_chain/src/canonical_head.rs @@ -47,6 +47,7 @@ use fork_choice::{ ResetPayloadStatuses, }; use itertools::process_results; +use lighthouse_tracing::SPAN_RECOMPUTE_HEAD; use logging::crit; use parking_lot::{Mutex, RwLock, RwLockReadGuard, RwLockUpgradableReadGuard, RwLockWriteGuard}; use slot_clock::SlotClock; @@ -57,6 +58,7 @@ use store::{ Error as StoreError, KeyValueStore, KeyValueStoreOp, StoreConfig, iter::StateRootsIterator, }; use task_executor::{JoinHandle, ShutdownReason}; +use tracing::info_span; use tracing::{debug, error, info, instrument, warn}; use types::*; @@ -383,6 +385,7 @@ impl CanonicalHead { /// /// This function is **not safe** to be public. See the module-level documentation for more /// information about protecting from deadlocks. + #[instrument(skip_all)] fn cached_head_write_lock(&self) -> RwLockWriteGuard<'_, CachedHead> { self.cached_head.write() } @@ -402,6 +405,7 @@ impl CanonicalHead { } /// Access a write-lock for fork choice. + #[instrument(skip_all)] pub fn fork_choice_write_lock(&self) -> RwLockWriteGuard<'_, BeaconForkChoice> { let _timer = metrics::start_timer(&metrics::FORK_CHOICE_WRITE_LOCK_AQUIRE_TIMES); self.fork_choice.write() @@ -509,13 +513,21 @@ impl BeaconChain { /// situation can be rectified. We avoid returning an error here so that calling functions /// can't abort block import because an error is returned here. pub async fn recompute_head_at_slot(self: &Arc, current_slot: Slot) { + let span = info_span!( + SPAN_RECOMPUTE_HEAD, + slot = %current_slot + ); + metrics::inc_counter(&metrics::FORK_CHOICE_REQUESTS); let _timer = metrics::start_timer(&metrics::FORK_CHOICE_TIMES); let chain = self.clone(); match self .spawn_blocking_handle( - move || chain.recompute_head_at_slot_internal(current_slot), + move || { + let _guard = span.enter(); + chain.recompute_head_at_slot_internal(current_slot) + }, "recompute_head_internal", ) .await @@ -773,6 +785,7 @@ impl BeaconChain { } /// Perform updates to caches and other components after the canonical head has been changed. + #[instrument(skip_all)] fn after_new_head( self: &Arc, old_cached_head: &CachedHead, @@ -911,6 +924,7 @@ impl BeaconChain { /// /// This function will take a write-lock on `canonical_head.fork_choice`, therefore it would be /// unwise to hold any lock on fork choice while calling this function. + #[instrument(skip_all)] fn after_finalization( self: &Arc, new_cached_head: &CachedHead, @@ -1046,6 +1060,7 @@ impl BeaconChain { /// /// This function is called whilst holding a write-lock on the `canonical_head`. To ensure dead-lock /// safety, **do not take any other locks inside this function**. +#[instrument(skip_all)] fn check_finalized_payload_validity( chain: &BeaconChain, finalized_proto_block: &ProtoBlock, @@ -1129,6 +1144,7 @@ fn perform_debug_logging( } } +#[instrument(skip_all)] fn spawn_execution_layer_updates( chain: Arc>, forkchoice_update_params: ForkchoiceUpdateParameters, diff --git a/beacon_node/lighthouse_tracing/src/lib.rs b/beacon_node/lighthouse_tracing/src/lib.rs index 1787399761b..60fda12cc20 100644 --- a/beacon_node/lighthouse_tracing/src/lib.rs +++ b/beacon_node/lighthouse_tracing/src/lib.rs @@ -27,6 +27,9 @@ pub const SPAN_PROCESS_RPC_BLOBS: &str = "process_rpc_blobs"; pub const SPAN_PROCESS_RPC_CUSTODY_COLUMNS: &str = "process_rpc_custody_columns"; pub const SPAN_PROCESS_CHAIN_SEGMENT: &str = "process_chain_segment"; +/// Fork choice root spans +pub const SPAN_RECOMPUTE_HEAD: &str = "recompute_head_at_slot"; + /// RPC methods root spans pub const SPAN_HANDLE_BLOCKS_BY_RANGE_REQUEST: &str = "handle_blocks_by_range_request"; pub const SPAN_HANDLE_BLOBS_BY_RANGE_REQUEST: &str = "handle_blobs_by_range_request"; diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index 7156c75f114..52e52fe7ce5 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -656,6 +656,7 @@ impl, Cold: ItemStore> HotColdDB } /// Fetch a full block with execution payload from the store. + #[instrument(skip_all)] pub fn get_full_block( &self, block_root: &Hash256, diff --git a/consensus/state_processing/src/all_caches.rs b/consensus/state_processing/src/all_caches.rs index e49eb395c40..d6c4fd3f880 100644 --- a/consensus/state_processing/src/all_caches.rs +++ b/consensus/state_processing/src/all_caches.rs @@ -1,5 +1,6 @@ use crate::common::update_progressive_balances_cache::initialize_progressive_balances_cache; use crate::epoch_cache::initialize_epoch_cache; +use tracing::instrument; use types::{ BeaconState, ChainSpec, EpochCacheError, EthSpec, FixedBytesExtended, Hash256, RelativeEpoch, }; @@ -23,6 +24,7 @@ pub trait AllCaches { } impl AllCaches for BeaconState { + #[instrument(skip_all)] fn build_all_caches(&mut self, spec: &ChainSpec) -> Result<(), EpochCacheError> { self.build_caches(spec)?; initialize_epoch_cache(self, spec)?; From 3de646c8b32b6da7d2ace48aab9ceb2e52bbe8a5 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Tue, 16 Sep 2025 18:17:43 +1000 Subject: [PATCH 08/45] Enable reconstruction for nodes custodying more than 50% of columns and instrument tracing (#8052) Co-Authored-By: Jimmy Chen Co-Authored-By: Jimmy Chen --- beacon_node/beacon_chain/src/beacon_chain.rs | 6 +- .../src/data_availability_checker.rs | 1 + .../overflow_lru_cache.rs | 22 +++++-- .../beacon_chain/src/validator_custody.rs | 10 ++- .../gossip_methods.rs | 66 ++++++++++--------- .../src/network_beacon_processor/mod.rs | 39 +++-------- .../src/network_beacon_processor/tests.rs | 4 -- 7 files changed, 76 insertions(+), 72 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index eeafefdff84..084a68bfeab 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -3299,10 +3299,14 @@ impl BeaconChain { let data_availability_checker = self.data_availability_checker.clone(); + let current_span = Span::current(); let result = self .task_executor .spawn_blocking_handle( - move || data_availability_checker.reconstruct_data_columns(&block_root), + move || { + let _guard = current_span.enter(); + data_availability_checker.reconstruct_data_columns(&block_root) + }, "reconstruct_data_columns", ) .ok_or(BeaconChainError::RuntimeShutdown)? diff --git a/beacon_node/beacon_chain/src/data_availability_checker.rs b/beacon_node/beacon_chain/src/data_availability_checker.rs index 9225ed6b47b..307dc0e227a 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker.rs @@ -547,6 +547,7 @@ impl DataAvailabilityChecker { } } + #[instrument(skip_all, level = "debug")] pub fn reconstruct_data_columns( &self, block_root: &Hash256, diff --git a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs index 9de63f61261..6afb680ddb8 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs @@ -592,9 +592,9 @@ impl DataAvailabilityCheckerInner { /// Check whether data column reconstruction should be attempted. /// - /// Potentially trigger reconstruction if: - /// - Our custody requirement is all columns (supernode), and we haven't got all columns - /// - We have >= 50% of columns, but not all columns + /// Potentially trigger reconstruction if all the following satisfy: + /// - Our custody requirement is more than 50% of total columns, + /// - We haven't received all required columns /// - Reconstruction hasn't been started for the block /// /// If reconstruction is required, returns `PendingComponents` which contains the @@ -609,15 +609,25 @@ impl DataAvailabilityCheckerInner { return ReconstructColumnsDecision::No("block already imported"); }; - // If we're sampling all columns, it means we must be custodying all columns. + let Some(epoch) = pending_components + .verified_data_columns + .first() + .map(|c| c.as_data_column().epoch()) + else { + return ReconstructColumnsDecision::No("not enough columns"); + }; + let total_column_count = T::EthSpec::number_of_columns(); + let sampling_column_count = self + .custody_context + .num_of_data_columns_to_sample(epoch, &self.spec); let received_column_count = pending_components.verified_data_columns.len(); if pending_components.reconstruction_started { return ReconstructColumnsDecision::No("already started"); } - if received_column_count >= total_column_count { - return ReconstructColumnsDecision::No("all columns received"); + if received_column_count >= sampling_column_count { + return ReconstructColumnsDecision::No("all sampling columns received"); } if received_column_count < total_column_count / 2 { return ReconstructColumnsDecision::No("not enough columns"); diff --git a/beacon_node/beacon_chain/src/validator_custody.rs b/beacon_node/beacon_chain/src/validator_custody.rs index 1c89624f9d7..3ab76828c9c 100644 --- a/beacon_node/beacon_chain/src/validator_custody.rs +++ b/beacon_node/beacon_chain/src/validator_custody.rs @@ -130,7 +130,7 @@ pub struct CustodyContext { /// and enr values. validator_custody_count: AtomicU64, /// Is the node run as a supernode based on current cli parameters. - pub current_is_supernode: bool, + current_is_supernode: bool, /// The persisted value for `is_supernode` based on the previous run of this node. /// /// Note: We require this value because if a user restarts the node with a higher cli custody @@ -307,6 +307,14 @@ impl CustodyContext { .expect("should compute node sampling size from valid chain spec") } + /// Returns whether the node should attempt reconstruction at a given epoch. + pub fn should_attempt_reconstruction(&self, epoch: Epoch, spec: &ChainSpec) -> bool { + let min_columns_for_reconstruction = E::number_of_columns() / 2; + // performing reconstruction is not necessary if sampling column count is exactly 50%, + // because the node doesn't need the remaining columns. + self.num_of_data_columns_to_sample(epoch, spec) > min_columns_for_reconstruction + } + /// Returns the ordered list of column indices that should be sampled for data availability checking at the given epoch. /// /// # Parameters diff --git a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs index bc44db40e9e..b3d717142f5 100644 --- a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs @@ -34,7 +34,6 @@ use std::path::PathBuf; use std::sync::Arc; use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; use store::hot_cold_store::HotColdDBError; -use tokio::sync::mpsc::error::TrySendError; use tracing::{Instrument, Span, debug, error, info, instrument, trace, warn}; use types::{ Attestation, AttestationData, AttestationRef, AttesterSlashing, BlobSidecar, DataColumnSidecar, @@ -1054,36 +1053,43 @@ impl NetworkBeaconProcessor { "Processed data column, waiting for other components" ); - // Instead of triggering reconstruction immediately, schedule it to be run. If - // another column arrives it either completes availability or pushes - // reconstruction back a bit. - let cloned_self = Arc::clone(self); - let block_root = *block_root; - let send_result = self.beacon_processor_send.try_send(WorkEvent { - drop_during_sync: false, - work: Work::Reprocess(ReprocessQueueMessage::DelayColumnReconstruction( - QueuedColumnReconstruction { - block_root, - slot: *slot, - process_fn: Box::pin(async move { - cloned_self - .attempt_data_column_reconstruction(block_root) - .await; - }), - }, - )), - }); - if let Err(TrySendError::Full(WorkEvent { - work: - Work::Reprocess(ReprocessQueueMessage::DelayColumnReconstruction( - reconstruction, - )), - .. - })) = send_result + if self + .chain + .data_availability_checker + .custody_context() + .should_attempt_reconstruction( + slot.epoch(T::EthSpec::slots_per_epoch()), + &self.chain.spec, + ) { - warn!("Unable to send reconstruction to reprocessing"); - // Execute it immediately instead. - reconstruction.process_fn.await; + // Instead of triggering reconstruction immediately, schedule it to be run. If + // another column arrives, it either completes availability or pushes + // reconstruction back a bit. + let cloned_self = Arc::clone(self); + let block_root = *block_root; + + if self + .beacon_processor_send + .try_send(WorkEvent { + drop_during_sync: false, + work: Work::Reprocess( + ReprocessQueueMessage::DelayColumnReconstruction( + QueuedColumnReconstruction { + block_root, + slot: *slot, + process_fn: Box::pin(async move { + cloned_self + .attempt_data_column_reconstruction(block_root) + .await; + }), + }, + ), + ), + }) + .is_err() + { + warn!("Unable to send reconstruction to reprocessing"); + } } } }, diff --git a/beacon_node/network/src/network_beacon_processor/mod.rs b/beacon_node/network/src/network_beacon_processor/mod.rs index 030f77be371..691c06f2687 100644 --- a/beacon_node/network/src/network_beacon_processor/mod.rs +++ b/beacon_node/network/src/network_beacon_processor/mod.rs @@ -28,7 +28,7 @@ use std::sync::Arc; use std::time::Duration; use task_executor::TaskExecutor; use tokio::sync::mpsc::{self, error::TrySendError}; -use tracing::{debug, error, trace, warn}; +use tracing::{debug, error, instrument, trace, warn}; use types::*; pub use sync_methods::ChainSegmentProcessId; @@ -825,30 +825,12 @@ impl NetworkBeaconProcessor { } } - /// Attempt to reconstruct all data columns if the following conditions satisfies: - /// - Our custody requirement is all columns - /// - We have >= 50% of columns, but not all columns - /// - /// Returns `Some(AvailabilityProcessingStatus)` if reconstruction is successfully performed, - /// otherwise returns `None`. - /// - /// The `publish_columns` parameter controls whether reconstructed columns should be published - /// to the gossip network. - async fn attempt_data_column_reconstruction( - self: &Arc, - block_root: Hash256, - ) -> Option { - // Only supernodes attempt reconstruction - if !self - .chain - .data_availability_checker - .custody_context() - .current_is_supernode - { - return None; - } - + /// Attempts to reconstruct all data columns if the conditions checked in + /// [`DataAvailabilityCheckerInner::check_and_set_reconstruction_started`] are satisfied. + #[instrument(level = "debug", skip_all, fields(?block_root))] + async fn attempt_data_column_reconstruction(self: &Arc, block_root: Hash256) { let result = self.chain.reconstruct_data_columns(block_root).await; + match result { Ok(Some((availability_processing_status, data_columns_to_publish))) => { self.publish_data_columns_gradually(data_columns_to_publish, block_root); @@ -864,21 +846,18 @@ impl NetworkBeaconProcessor { AvailabilityProcessingStatus::MissingComponents(_, _) => { debug!( result = "imported all custody columns", - block_hash = %block_root, + %block_root, "Block components still missing block after reconstruction" ); } } - - Some(availability_processing_status) } Ok(None) => { // reason is tracked via the `KZG_DATA_COLUMN_RECONSTRUCTION_INCOMPLETE_TOTAL` metric trace!( - block_hash = %block_root, + %block_root, "Reconstruction not required for block" ); - None } Err(e) => { error!( @@ -886,7 +865,6 @@ impl NetworkBeaconProcessor { error = ?e, "Error during data column reconstruction" ); - None } } } @@ -975,6 +953,7 @@ impl NetworkBeaconProcessor { /// by some nodes on the network as soon as possible. Our hope is that some columns arrive from /// other nodes in the meantime, obviating the need for us to publish them. If no other /// publisher exists for a column, it will eventually get published here. + #[instrument(level="debug", skip_all, fields(?block_root, data_column_count=data_columns_to_publish.len()))] fn publish_data_columns_gradually( self: &Arc, mut data_columns_to_publish: DataColumnSidecarList, diff --git a/beacon_node/network/src/network_beacon_processor/tests.rs b/beacon_node/network/src/network_beacon_processor/tests.rs index 2935c2d2132..d3a93d48637 100644 --- a/beacon_node/network/src/network_beacon_processor/tests.rs +++ b/beacon_node/network/src/network_beacon_processor/tests.rs @@ -1009,10 +1009,6 @@ async fn import_gossip_block_acceptably_early() { rig.assert_event_journal_completes(&[WorkType::GossipDataColumnSidecar]) .await; } - if num_data_columns > 0 { - rig.assert_event_journal_completes(&[WorkType::ColumnReconstruction]) - .await; - } // Note: this section of the code is a bit race-y. We're assuming that we can set the slot clock // and check the head in the time between the block arrived early and when its due for From 191570e4a162202df72713c177db1386464420dd Mon Sep 17 00:00:00 2001 From: jking-aus <72330194+jking-aus@users.noreply.github.com> Date: Wed, 17 Sep 2025 04:27:37 +1000 Subject: [PATCH 09/45] chore: Bump discv5 and remove generic DefaultProtocolId in metrics (#8056) Bump discv5 version Co-Authored-By: Josh King --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- common/network_utils/src/discovery_metrics.rs | 3 +-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 88b5b7b57d2..ba6a4587b6f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2163,7 +2163,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18e4fdb82bd54a12e42fb58a800dcae6b9e13982238ce2296dc3570b92148e1f" dependencies = [ "data-encoding", - "syn 2.0.100", + "syn 1.0.109", ] [[package]] @@ -2395,9 +2395,9 @@ dependencies = [ [[package]] name = "discv5" -version = "0.9.1" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4b4e7798d2ff74e29cee344dc490af947ae657d6ab5273dde35d58ce06a4d71" +checksum = "a20b702c8491b3325866a4935d0b5101e49144d74540384243b6293794aad6fa" dependencies = [ "aes 0.8.4", "aes-gcm", @@ -5122,7 +5122,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", - "windows-targets 0.52.6", + "windows-targets 0.48.5", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 0b930b605d6..99543dbfb49 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -134,7 +134,7 @@ deposit_contract = { path = "common/deposit_contract" } derivative = "2" directory = { path = "common/directory" } dirs = "3" -discv5 = { version = "0.9", features = ["libp2p"] } +discv5 = { version = "0.10", features = ["libp2p"] } doppelganger_service = { path = "validator_client/doppelganger_service" } either = "1.9" environment = { path = "lighthouse/environment" } diff --git a/common/network_utils/src/discovery_metrics.rs b/common/network_utils/src/discovery_metrics.rs index d105dee57af..26a9e8a45f5 100644 --- a/common/network_utils/src/discovery_metrics.rs +++ b/common/network_utils/src/discovery_metrics.rs @@ -35,8 +35,7 @@ pub static DISCOVERY_SESSIONS: LazyLock> = LazyLock::new(|| { }); pub fn scrape_discovery_metrics() { - let metrics = - discv5::metrics::Metrics::from(discv5::Discv5::::raw_metrics()); + let metrics = discv5::metrics::Metrics::from(discv5::Discv5::raw_metrics()); set_float_gauge(&DISCOVERY_REQS, metrics.unsolicited_requests_per_second); set_gauge(&DISCOVERY_SESSIONS, metrics.active_sessions as i64); set_gauge_vec(&DISCOVERY_BYTES, &["inbound"], metrics.bytes_recv as i64); From b7d78a91e03d4b3975806e2460bded01825f5a92 Mon Sep 17 00:00:00 2001 From: Lion - dapplion <35266934+dapplion@users.noreply.github.com> Date: Wed, 17 Sep 2025 03:02:29 +0200 Subject: [PATCH 10/45] Don't penalize peers for extending ignored chains (#8042) Lookup sync has a cache of block roots "failed_chains". If a peer triggers a lookup for a block or descendant of a root in failed_chains the lookup is dropped and the peer penalized. However blocks are inserted into failed_chains for a single reason: - If a chain is longer than 32 blocks the lookup is dropped to prevent OOM risks. However the peer is not at fault, since discovering an unknown chain longer than 32 blocks is not malicious. We just drop the lookup to sync the blocks from range forward sync. This discrepancy is probably an oversight when changing old code. Before we used to add blocks that failed too many times to process to that cache. However, we don't do that anymore. Adding a block that fails too many times to process is an optimization to save resources in rare cases where peers keep sending us invalid blocks. In case that happens, today we keep trying to process the block, downscoring the peers and eventually disconnecting them. _IF_ we found that optimization to be necessary we should merge this PR (_Stricter match of BlockError in lookup sync_) first. IMO we are fine without the failed_chains cache and the ignored_chains cache will be obsolete with [tree sync](https://github.com/sigp/lighthouse/issues/7678) as the OOM risk of long lookup chains does not exist anymore. Closes https://github.com/sigp/lighthouse/issues/7577 Rename `failed_chains` for `ignored_chains` and don't penalize peers that trigger lookups for those blocks Co-Authored-By: dapplion <35266934+dapplion@users.noreply.github.com> --- .../network/src/sync/block_lookups/mod.rs | 35 +++++------ beacon_node/network/src/sync/manager.rs | 8 +-- beacon_node/network/src/sync/tests/lookups.rs | 62 +++++++++---------- 3 files changed, 50 insertions(+), 55 deletions(-) diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index b60c21972fb..f8ffd298caf 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -59,7 +59,7 @@ mod single_block_lookup; /// reaches the maximum depth it will force trigger range sync. pub(crate) const PARENT_DEPTH_TOLERANCE: usize = SLOT_IMPORT_TOLERANCE; -const FAILED_CHAINS_CACHE_EXPIRY_SECONDS: u64 = 60; +const IGNORED_CHAINS_CACHE_EXPIRY_SECONDS: u64 = 60; pub const SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS: u8 = 4; /// Maximum time we allow a lookup to exist before assuming it is stuck and will never make @@ -110,8 +110,10 @@ enum Action { } pub struct BlockLookups { - /// A cache of failed chain lookups to prevent duplicate searches. - failed_chains: LRUTimeCache, + /// A cache of block roots that must be ignored for some time to prevent useless searches. For + /// example if a chain is too long, its lookup chain is dropped, and range sync is expected to + /// eventually sync those blocks + ignored_chains: LRUTimeCache, // TODO: Why not index lookups by block_root? single_block_lookups: FnvHashMap>, @@ -128,21 +130,21 @@ pub(crate) type BlockLookupSummary = (Id, Hash256, Option, Vec) impl BlockLookups { pub fn new() -> Self { Self { - failed_chains: LRUTimeCache::new(Duration::from_secs( - FAILED_CHAINS_CACHE_EXPIRY_SECONDS, + ignored_chains: LRUTimeCache::new(Duration::from_secs( + IGNORED_CHAINS_CACHE_EXPIRY_SECONDS, )), single_block_lookups: Default::default(), } } #[cfg(test)] - pub(crate) fn insert_failed_chain(&mut self, block_root: Hash256) { - self.failed_chains.insert(block_root); + pub(crate) fn insert_ignored_chain(&mut self, block_root: Hash256) { + self.ignored_chains.insert(block_root); } #[cfg(test)] - pub(crate) fn get_failed_chains(&mut self) -> Vec { - self.failed_chains.keys().cloned().collect() + pub(crate) fn get_ignored_chains(&mut self) -> Vec { + self.ignored_chains.keys().cloned().collect() } #[cfg(test)] @@ -184,7 +186,7 @@ impl BlockLookups { self.search_parent_of_child(parent_root, block_root, &[peer_id], cx); // Only create the child lookup if the parent exists if parent_lookup_exists { - // `search_parent_of_child` ensures that parent root is not a failed chain + // `search_parent_of_child` ensures that the parent lookup exists so we can safely wait for it self.new_current_lookup( block_root, Some(block_component), @@ -244,8 +246,8 @@ impl BlockLookups { debug!(block_root = ?block_root_to_search, "Parent lookup chain too long"); // Searching for this parent would extend a parent chain over the max - // Insert the tip only to failed chains - self.failed_chains.insert(parent_chain.tip); + // Insert the tip only to chains to ignore + self.ignored_chains.insert(parent_chain.tip); // Note: Drop only the chain that's too long until it merges with another chain // that's not too long. Consider this attack: there's a chain of valid unknown @@ -330,12 +332,9 @@ impl BlockLookups { peers: &[PeerId], cx: &mut SyncNetworkContext, ) -> bool { - // If this block or it's parent is part of a known failed chain, ignore it. - if self.failed_chains.contains(&block_root) { - debug!(?block_root, "Block is from a past failed chain. Dropping"); - for peer_id in peers { - cx.report_peer(*peer_id, PeerAction::MidToleranceError, "failed_chain"); - } + // If this block or it's parent is part of a known ignored chain, ignore it. + if self.ignored_chains.contains(&block_root) { + debug!(?block_root, "Dropping lookup for block marked ignored"); return false; } diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index 448e784ab6d..d7ba0280542 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -328,13 +328,13 @@ impl SyncManager { } #[cfg(test)] - pub(crate) fn get_failed_chains(&mut self) -> Vec { - self.block_lookups.get_failed_chains() + pub(crate) fn get_ignored_chains(&mut self) -> Vec { + self.block_lookups.get_ignored_chains() } #[cfg(test)] - pub(crate) fn insert_failed_chain(&mut self, block_root: Hash256) { - self.block_lookups.insert_failed_chain(block_root); + pub(crate) fn insert_ignored_chain(&mut self, block_root: Hash256) { + self.block_lookups.insert_ignored_chain(block_root); } #[cfg(test)] diff --git a/beacon_node/network/src/sync/tests/lookups.rs b/beacon_node/network/src/sync/tests/lookups.rs index b5bc10851dc..2edcd12f019 100644 --- a/beacon_node/network/src/sync/tests/lookups.rs +++ b/beacon_node/network/src/sync/tests/lookups.rs @@ -285,21 +285,21 @@ impl TestRig { ); } - fn insert_failed_chain(&mut self, block_root: Hash256) { - self.sync_manager.insert_failed_chain(block_root); + fn insert_ignored_chain(&mut self, block_root: Hash256) { + self.sync_manager.insert_ignored_chain(block_root); } - fn assert_not_failed_chain(&mut self, chain_hash: Hash256) { - let failed_chains = self.sync_manager.get_failed_chains(); - if failed_chains.contains(&chain_hash) { - panic!("failed chains contain {chain_hash:?}: {failed_chains:?}"); + fn assert_not_ignored_chain(&mut self, chain_hash: Hash256) { + let chains = self.sync_manager.get_ignored_chains(); + if chains.contains(&chain_hash) { + panic!("ignored chains contain {chain_hash:?}: {chains:?}"); } } - fn assert_failed_chain(&mut self, chain_hash: Hash256) { - let failed_chains = self.sync_manager.get_failed_chains(); - if !failed_chains.contains(&chain_hash) { - panic!("expected failed chains to contain {chain_hash:?}: {failed_chains:?}"); + fn assert_ignored_chain(&mut self, chain_hash: Hash256) { + let chains = self.sync_manager.get_ignored_chains(); + if !chains.contains(&chain_hash) { + panic!("expected ignored chains to contain {chain_hash:?}: {chains:?}"); } } @@ -1021,11 +1021,6 @@ impl TestRig { self.log(&format!("Found expected penalty {penalty_msg}")); } - pub fn expect_single_penalty(&mut self, peer_id: PeerId, expect_penalty_msg: &'static str) { - self.expect_penalty(peer_id, expect_penalty_msg); - self.expect_no_penalty_for(peer_id); - } - pub fn block_with_parent_and_blobs( &mut self, parent_root: Hash256, @@ -1461,7 +1456,7 @@ fn test_parent_lookup_too_many_download_attempts_no_blacklist() { // Trigger the request rig.trigger_unknown_parent_block(peer_id, block.into()); for i in 1..=PARENT_FAIL_TOLERANCE { - rig.assert_not_failed_chain(block_root); + rig.assert_not_ignored_chain(block_root); let id = rig.expect_block_parent_request(parent_root); if i % 2 != 0 { // The request fails. It should be tried again. @@ -1474,8 +1469,8 @@ fn test_parent_lookup_too_many_download_attempts_no_blacklist() { } } - rig.assert_not_failed_chain(block_root); - rig.assert_not_failed_chain(parent.canonical_root()); + rig.assert_not_ignored_chain(block_root); + rig.assert_not_ignored_chain(parent.canonical_root()); rig.expect_no_active_lookups_empty_network(); } @@ -1500,7 +1495,7 @@ fn test_parent_lookup_too_many_processing_attempts_must_blacklist() { for _ in 0..PROCESSING_FAILURES { let id = rig.expect_block_parent_request(parent_root); // Blobs are only requested in the previous first iteration as this test only retries blocks - rig.assert_not_failed_chain(block_root); + rig.assert_not_ignored_chain(block_root); // send the right parent but fail processing rig.parent_lookup_block_response(id, peer_id, Some(parent.clone().into())); rig.parent_block_processed(block_root, BlockError::BlockSlotLimitReached.into()); @@ -1508,7 +1503,7 @@ fn test_parent_lookup_too_many_processing_attempts_must_blacklist() { rig.expect_penalty(peer_id, "lookup_block_processing_failure"); } - rig.assert_not_failed_chain(block_root); + rig.assert_not_ignored_chain(block_root); rig.expect_no_active_lookups_empty_network(); } @@ -1551,12 +1546,14 @@ fn test_parent_lookup_too_deep_grow_ancestor() { ); // Should not penalize peer, but network is not clear because of the blocks_by_range requests rig.expect_no_penalty_for(peer_id); - rig.assert_failed_chain(chain_hash); + rig.assert_ignored_chain(chain_hash); } // Regression test for https://github.com/sigp/lighthouse/pull/7118 +// 8042 UPDATE: block was previously added to the failed_chains cache, now it's inserted into the +// ignored chains cache. The regression test still applies as the chaild lookup is not created #[test] -fn test_child_lookup_not_created_for_failed_chain_parent_after_processing() { +fn test_child_lookup_not_created_for_ignored_chain_parent_after_processing() { // GIVEN: A parent chain longer than PARENT_DEPTH_TOLERANCE. let mut rig = TestRig::test_setup(); let mut blocks = rig.rand_blockchain(PARENT_DEPTH_TOLERANCE + 1); @@ -1586,8 +1583,8 @@ fn test_child_lookup_not_created_for_failed_chain_parent_after_processing() { } // At this point, the chain should have been deemed too deep and pruned. - // The tip root should have been inserted into failed chains. - rig.assert_failed_chain(tip_root); + // The tip root should have been inserted into ignored chains. + rig.assert_ignored_chain(tip_root); rig.expect_no_penalty_for(peer_id); // WHEN: Trigger the extending block that points to the tip. @@ -1604,10 +1601,10 @@ fn test_child_lookup_not_created_for_failed_chain_parent_after_processing() { }), ); - // THEN: The extending block should not create a lookup because the tip was inserted into failed chains. + // THEN: The extending block should not create a lookup because the tip was inserted into + // ignored chains. rig.expect_no_active_lookups(); - // AND: The peer should be penalized for extending a failed chain. - rig.expect_single_penalty(peer_id, "failed_chain"); + rig.expect_no_penalty_for(peer_id); rig.expect_empty_network(); } @@ -1646,7 +1643,7 @@ fn test_parent_lookup_too_deep_grow_tip() { ); // Should not penalize peer, but network is not clear because of the blocks_by_range requests rig.expect_no_penalty_for(peer_id); - rig.assert_failed_chain(tip.canonical_root()); + rig.assert_ignored_chain(tip.canonical_root()); } #[test] @@ -1699,15 +1696,14 @@ fn test_lookup_add_peers_to_parent() { } #[test] -fn test_skip_creating_failed_parent_lookup() { +fn test_skip_creating_ignored_parent_lookup() { let mut rig = TestRig::test_setup(); let (_, block, parent_root, _) = rig.rand_block_and_parent(); let peer_id = rig.new_connected_peer(); - rig.insert_failed_chain(parent_root); + rig.insert_ignored_chain(parent_root); rig.trigger_unknown_parent_block(peer_id, block.into()); - // Expect single penalty for peer, despite dropping two lookups - rig.expect_single_penalty(peer_id, "failed_chain"); - // Both current and parent lookup should be rejected + rig.expect_no_penalty_for(peer_id); + // Both current and parent lookup should not be created rig.expect_no_active_lookups(); } From 5928407ce45b539082874ca1f9c5e3e0704f5d85 Mon Sep 17 00:00:00 2001 From: Toki <105550481+gitToki@users.noreply.github.com> Date: Wed, 17 Sep 2025 06:51:43 +0200 Subject: [PATCH 11/45] fix(rate_limiter): add missing prune calls for light client protocols (#8058) Co-Authored-By: Jimmy Chen Co-Authored-By: gitToki --- .github/mergify.yml | 4 ++ .../src/rpc/rate_limiter.rs | 45 ++++++++++++++----- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/.github/mergify.yml b/.github/mergify.yml index 4ab73bcf079..0b917b25467 100644 --- a/.github/mergify.yml +++ b/.github/mergify.yml @@ -105,6 +105,10 @@ queue_rules: {{ body | get_section("## Proposed Changes", "") }} + + {% for commit in commits | unique(attribute='email_author') %} + Co-Authored-By: {{ commit.author }} <{{ commit.email_author }}> + {% endfor %} queue_conditions: - "#approved-reviews-by >= 1" - "check-success=license/cla" diff --git a/beacon_node/lighthouse_network/src/rpc/rate_limiter.rs b/beacon_node/lighthouse_network/src/rpc/rate_limiter.rs index 65cd1c2e61e..8b364f506cc 100644 --- a/beacon_node/lighthouse_network/src/rpc/rate_limiter.rs +++ b/beacon_node/lighthouse_network/src/rpc/rate_limiter.rs @@ -382,16 +382,41 @@ impl RPCRateLimiter { pub fn prune(&mut self) { let time_since_start = self.init_time.elapsed(); - self.ping_rl.prune(time_since_start); - self.status_rl.prune(time_since_start); - self.metadata_rl.prune(time_since_start); - self.goodbye_rl.prune(time_since_start); - self.bbrange_rl.prune(time_since_start); - self.bbroots_rl.prune(time_since_start); - self.blbrange_rl.prune(time_since_start); - self.blbroot_rl.prune(time_since_start); - self.dcbrange_rl.prune(time_since_start); - self.dcbroot_rl.prune(time_since_start); + + let Self { + prune_interval: _, + init_time: _, + goodbye_rl, + ping_rl, + metadata_rl, + status_rl, + bbrange_rl, + bbroots_rl, + blbrange_rl, + blbroot_rl, + dcbroot_rl, + dcbrange_rl, + lc_bootstrap_rl, + lc_optimistic_update_rl, + lc_finality_update_rl, + lc_updates_by_range_rl, + fork_context: _, + } = self; + + goodbye_rl.prune(time_since_start); + ping_rl.prune(time_since_start); + metadata_rl.prune(time_since_start); + status_rl.prune(time_since_start); + bbrange_rl.prune(time_since_start); + bbroots_rl.prune(time_since_start); + blbrange_rl.prune(time_since_start); + blbroot_rl.prune(time_since_start); + dcbrange_rl.prune(time_since_start); + dcbroot_rl.prune(time_since_start); + lc_bootstrap_rl.prune(time_since_start); + lc_optimistic_update_rl.prune(time_since_start); + lc_finality_update_rl.prune(time_since_start); + lc_updates_by_range_rl.prune(time_since_start); } } From 3cb7e59be2ebcf66836dabae2c771b455822f654 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Thu, 18 Sep 2025 11:17:31 +1000 Subject: [PATCH 12/45] Update issue template (#7938) * Update issue template * Delete old issue template --- .../default-issue-template.md} | 9 +++++++++ 1 file changed, 9 insertions(+) rename .github/{ISSUE_TEMPLATE.md => ISSUE_TEMPLATE/default-issue-template.md} (79%) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE/default-issue-template.md similarity index 79% rename from .github/ISSUE_TEMPLATE.md rename to .github/ISSUE_TEMPLATE/default-issue-template.md index d73b9ff6f04..784add20f35 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE/default-issue-template.md @@ -1,3 +1,12 @@ +--- +name: Default issue template +about: Use this template for all issues +title: '' +labels: '' +assignees: '' + +--- + ## Description Please provide a brief description of the issue. From 521be2b7576e94a0ca01107cc08d0b3a35a96dee Mon Sep 17 00:00:00 2001 From: Eitan Seri-Levi Date: Wed, 17 Sep 2025 18:33:42 -0700 Subject: [PATCH 13/45] Prevent silently dropping cell proof chunks (#8023) Co-Authored-By: Eitan Seri- Levi --- beacon_node/beacon_chain/src/kzg_utils.rs | 7 +++++++ beacon_node/http_api/src/publish_blocks.rs | 2 +- consensus/types/src/data_column_sidecar.rs | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/beacon_node/beacon_chain/src/kzg_utils.rs b/beacon_node/beacon_chain/src/kzg_utils.rs index 2147ed59663..ad669e17291 100644 --- a/beacon_node/beacon_chain/src/kzg_utils.rs +++ b/beacon_node/beacon_chain/src/kzg_utils.rs @@ -174,6 +174,13 @@ pub fn blobs_to_data_column_sidecars( let kzg_commitments_inclusion_proof = block.message().body().kzg_commitments_merkle_proof()?; let signed_block_header = block.signed_block_header(); + if cell_proofs.len() != blobs.len() * E::number_of_columns() { + return Err(DataColumnSidecarError::InvalidCellProofLength { + expected: blobs.len() * E::number_of_columns(), + actual: cell_proofs.len(), + }); + } + let proof_chunks = cell_proofs .chunks_exact(E::number_of_columns()) .collect::>(); diff --git a/beacon_node/http_api/src/publish_blocks.rs b/beacon_node/http_api/src/publish_blocks.rs index b6411167d92..05a4a4b7a4a 100644 --- a/beacon_node/http_api/src/publish_blocks.rs +++ b/beacon_node/http_api/src/publish_blocks.rs @@ -412,7 +412,7 @@ fn build_data_columns( error!( error = ?e, %slot, - "Invalid data column - not publishing block" + "Invalid data column - not publishing data columns" ); warp_utils::reject::custom_bad_request(format!("{e:?}")) })?; diff --git a/consensus/types/src/data_column_sidecar.rs b/consensus/types/src/data_column_sidecar.rs index 57f7a88e193..2272b1695c9 100644 --- a/consensus/types/src/data_column_sidecar.rs +++ b/consensus/types/src/data_column_sidecar.rs @@ -143,6 +143,7 @@ pub enum DataColumnSidecarError { PreDeneb, SszError(SszError), BuildSidecarFailed(String), + InvalidCellProofLength { expected: usize, actual: usize }, } impl From for DataColumnSidecarError { From 684632df731a69d6e42531bc1c323557a7b45d7e Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Thu, 18 Sep 2025 15:16:59 +1000 Subject: [PATCH 14/45] Fix reprocess queue memory leak (#8065) Fix a memory leak in the reprocess queue. If the vec of attestation IDs for a block is never evicted from the reprocess queue by a `BlockImported` event, then it stays in the map forever consuming memory. The fix is to remove the entry when its last attestation times out. We do similarly for light client updates. In practice this will only occur if there is a race between adding an attestation to the queue and processing the `BlockImported` event, or if there are attestations for block roots that we never import (e.g. random block roots, block roots of invalid blocks). Co-Authored-By: Michael Sproul --- .../src/scheduler/work_reprocessing_queue.rs | 139 ++++++++++++++++-- 1 file changed, 130 insertions(+), 9 deletions(-) diff --git a/beacon_node/beacon_processor/src/scheduler/work_reprocessing_queue.rs b/beacon_node/beacon_processor/src/scheduler/work_reprocessing_queue.rs index 9565e57589d..3e755f08302 100644 --- a/beacon_node/beacon_processor/src/scheduler/work_reprocessing_queue.rs +++ b/beacon_node/beacon_processor/src/scheduler/work_reprocessing_queue.rs @@ -37,7 +37,9 @@ const TASK_NAME: &str = "beacon_processor_reprocess_queue"; const GOSSIP_BLOCKS: &str = "gossip_blocks"; const RPC_BLOCKS: &str = "rpc_blocks"; const ATTESTATIONS: &str = "attestations"; +const ATTESTATIONS_PER_ROOT: &str = "attestations_per_root"; const LIGHT_CLIENT_UPDATES: &str = "lc_updates"; +const LIGHT_CLIENT_UPDATES_PER_PARENT_ROOT: &str = "lc_updates_per_parent_root"; /// Queue blocks for re-processing with an `ADDITIONAL_QUEUED_BLOCK_DELAY` after the slot starts. /// This is to account for any slight drift in the system clock. @@ -829,10 +831,19 @@ impl ReprocessQueue { ); } - if let Some(queued_atts) = self.awaiting_attestations_per_root.get_mut(&root) - && let Some(index) = queued_atts.iter().position(|&id| id == queued_id) + if let Entry::Occupied(mut queued_atts) = + self.awaiting_attestations_per_root.entry(root) + && let Some(index) = + queued_atts.get().iter().position(|&id| id == queued_id) { - queued_atts.swap_remove(index); + let queued_atts_mut = queued_atts.get_mut(); + queued_atts_mut.swap_remove(index); + + // If the vec is empty after this attestation's removal, we need to delete + // the entry to prevent bloating the hashmap indefinitely. + if queued_atts_mut.is_empty() { + queued_atts.remove_entry(); + } } } } @@ -853,13 +864,19 @@ impl ReprocessQueue { error!("Failed to send scheduled light client optimistic update"); } - if let Some(queued_lc_updates) = self - .awaiting_lc_updates_per_parent_root - .get_mut(&parent_root) - && let Some(index) = - queued_lc_updates.iter().position(|&id| id == queued_id) + if let Entry::Occupied(mut queued_lc_updates) = + self.awaiting_lc_updates_per_parent_root.entry(parent_root) + && let Some(index) = queued_lc_updates + .get() + .iter() + .position(|&id| id == queued_id) { - queued_lc_updates.swap_remove(index); + let queued_lc_updates_mut = queued_lc_updates.get_mut(); + queued_lc_updates_mut.swap_remove(index); + + if queued_lc_updates_mut.is_empty() { + queued_lc_updates.remove_entry(); + } } } } @@ -929,11 +946,21 @@ impl ReprocessQueue { &[ATTESTATIONS], self.attestations_delay_queue.len() as i64, ); + metrics::set_gauge_vec( + &metrics::BEACON_PROCESSOR_REPROCESSING_QUEUE_TOTAL, + &[ATTESTATIONS_PER_ROOT], + self.awaiting_attestations_per_root.len() as i64, + ); metrics::set_gauge_vec( &metrics::BEACON_PROCESSOR_REPROCESSING_QUEUE_TOTAL, &[LIGHT_CLIENT_UPDATES], self.lc_updates_delay_queue.len() as i64, ); + metrics::set_gauge_vec( + &metrics::BEACON_PROCESSOR_REPROCESSING_QUEUE_TOTAL, + &[LIGHT_CLIENT_UPDATES_PER_PARENT_ROOT], + self.awaiting_lc_updates_per_parent_root.len() as i64, + ); } fn recompute_next_backfill_batch_event(&mut self) { @@ -979,6 +1006,7 @@ impl ReprocessQueue { #[cfg(test)] mod tests { use super::*; + use crate::BeaconProcessorConfig; use logging::create_test_tracing_subscriber; use slot_clock::{ManualSlotClock, TestingSlotClock}; use std::ops::Add; @@ -1101,4 +1129,97 @@ mod tests { Duration::from_secs(slot_duration), ) } + + fn test_queue() -> ReprocessQueue { + create_test_tracing_subscriber(); + + let config = BeaconProcessorConfig::default(); + let (ready_work_tx, _) = mpsc::channel::(config.max_scheduled_work_queue_len); + let (_, reprocess_work_rx) = + mpsc::channel::(config.max_scheduled_work_queue_len); + let slot_clock = Arc::new(testing_slot_clock(12)); + + ReprocessQueue::new(ready_work_tx, reprocess_work_rx, slot_clock) + } + + // This is a regression test for a memory leak in `awaiting_attestations_per_root`. + // See: https://github.com/sigp/lighthouse/pull/8065 + #[tokio::test] + async fn prune_awaiting_attestations_per_root() { + create_test_tracing_subscriber(); + + let mut queue = test_queue(); + + // Pause time so it only advances manually + tokio::time::pause(); + + let beacon_block_root = Hash256::repeat_byte(0xaf); + + // Insert an attestation. + let att = ReprocessQueueMessage::UnknownBlockUnaggregate(QueuedUnaggregate { + beacon_block_root, + process_fn: Box::new(|| {}), + }); + + // Process the event to enter it into the delay queue. + queue.handle_message(InboundEvent::Msg(att)); + + // Check that it is queued. + assert_eq!(queue.awaiting_attestations_per_root.len(), 1); + assert!( + queue + .awaiting_attestations_per_root + .contains_key(&beacon_block_root) + ); + + // Advance time to expire the attestation. + advance_time(&queue.slot_clock, 2 * QUEUED_ATTESTATION_DELAY).await; + let ready_msg = queue.next().await.unwrap(); + assert!(matches!(ready_msg, InboundEvent::ReadyAttestation(_))); + queue.handle_message(ready_msg); + + // The entry for the block root should be gone. + assert!(queue.awaiting_attestations_per_root.is_empty()); + } + + // This is a regression test for a memory leak in `awaiting_lc_updates_per_parent_root`. + // See: https://github.com/sigp/lighthouse/pull/8065 + #[tokio::test] + async fn prune_awaiting_lc_updates_per_parent_root() { + create_test_tracing_subscriber(); + + let mut queue = test_queue(); + + // Pause time so it only advances manually + tokio::time::pause(); + + let parent_root = Hash256::repeat_byte(0xaf); + + // Insert an attestation. + let msg = + ReprocessQueueMessage::UnknownLightClientOptimisticUpdate(QueuedLightClientUpdate { + parent_root, + process_fn: Box::new(|| {}), + }); + + // Process the event to enter it into the delay queue. + queue.handle_message(InboundEvent::Msg(msg)); + + // Check that it is queued. + assert_eq!(queue.awaiting_lc_updates_per_parent_root.len(), 1); + assert!( + queue + .awaiting_lc_updates_per_parent_root + .contains_key(&parent_root) + ); + + // Advance time to expire the update. + advance_time(&queue.slot_clock, 2 * QUEUED_LIGHT_CLIENT_UPDATE_DELAY).await; + let ready_msg = queue.next().await.unwrap(); + assert!(matches!(ready_msg, InboundEvent::ReadyLightClientUpdate(_))); + queue.handle_message(ready_msg); + + // The entry for the block root should be gone. + assert!(queue.awaiting_lc_updates_per_parent_root.is_empty()); + } } From 3543a20192bb67190855200d8e2203c1e6a03b3c Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Thu, 18 Sep 2025 15:17:03 +1000 Subject: [PATCH 15/45] Add experimental complete-blob-backfill flag (#7751) A different (and complementary) approach for: - https://github.com/sigp/lighthouse/issues/5391 This PR adds a flag to set the DA boundary to the Deneb fork. The effect of this change is that Lighthouse will try to backfill _all_ blobs. Most peers do not have this data, but I'm thinking that combined with `trusted-peers` this could be quite effective. Co-Authored-By: Michael Sproul --- beacon_node/beacon_chain/src/builder.rs | 2 ++ beacon_node/beacon_chain/src/chain_config.rs | 3 ++ .../src/data_availability_checker.rs | 27 +++++++++++++--- beacon_node/src/cli.rs | 10 ++++++ beacon_node/src/config.rs | 8 +++++ lighthouse/tests/beacon_node.rs | 31 +++++++++++++++++++ 6 files changed, 76 insertions(+), 5 deletions(-) diff --git a/beacon_node/beacon_chain/src/builder.rs b/beacon_node/beacon_chain/src/builder.rs index 5e7aa7d4f87..35432632cc2 100644 --- a/beacon_node/beacon_chain/src/builder.rs +++ b/beacon_node/beacon_chain/src/builder.rs @@ -899,6 +899,7 @@ where let genesis_time = head_snapshot.beacon_state.genesis_time(); let canonical_head = CanonicalHead::new(fork_choice, Arc::new(head_snapshot)); let shuffling_cache_size = self.chain_config.shuffling_cache_size; + let complete_blob_backfill = self.chain_config.complete_blob_backfill; // Calculate the weak subjectivity point in which to backfill blocks to. let genesis_backfill_slot = if self.chain_config.genesis_backfill { @@ -1013,6 +1014,7 @@ where genesis_backfill_slot, data_availability_checker: Arc::new( DataAvailabilityChecker::new( + complete_blob_backfill, slot_clock, self.kzg.clone(), store, diff --git a/beacon_node/beacon_chain/src/chain_config.rs b/beacon_node/beacon_chain/src/chain_config.rs index d6be96afe94..a7defa9fa2a 100644 --- a/beacon_node/beacon_chain/src/chain_config.rs +++ b/beacon_node/beacon_chain/src/chain_config.rs @@ -86,6 +86,8 @@ pub struct ChainConfig { /// If using a weak-subjectivity sync, whether we should download blocks all the way back to /// genesis. pub genesis_backfill: bool, + /// EXPERIMENTAL: backfill blobs and data columns beyond the data availability window. + pub complete_blob_backfill: bool, /// Whether to send payload attributes every slot, regardless of connected proposers. /// /// This is useful for block builders and testing. @@ -144,6 +146,7 @@ impl Default for ChainConfig { optimistic_finalized_sync: true, shuffling_cache_size: crate::shuffling_cache::DEFAULT_CACHE_SIZE, genesis_backfill: false, + complete_blob_backfill: false, always_prepare_payload: false, epochs_per_migration: crate::migrate::DEFAULT_EPOCHS_PER_MIGRATION, enable_light_client_server: true, diff --git a/beacon_node/beacon_chain/src/data_availability_checker.rs b/beacon_node/beacon_chain/src/data_availability_checker.rs index 307dc0e227a..88cd8f3aab4 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker.rs @@ -78,6 +78,7 @@ pub const STATE_LRU_CAPACITY: usize = STATE_LRU_CAPACITY_NON_ZERO.get(); /// proposer. Having a capacity > 1 is an optimization to prevent sync lookup from having re-fetch /// data during moments of unstable network conditions. pub struct DataAvailabilityChecker { + complete_blob_backfill: bool, availability_cache: Arc>, slot_clock: T::SlotClock, kzg: Arc, @@ -116,6 +117,7 @@ impl Debug for Availability { impl DataAvailabilityChecker { pub fn new( + complete_blob_backfill: bool, slot_clock: T::SlotClock, kzg: Arc, store: BeaconStore, @@ -129,6 +131,7 @@ impl DataAvailabilityChecker { spec.clone(), )?; Ok(Self { + complete_blob_backfill, availability_cache: Arc::new(inner), slot_clock, kzg, @@ -518,9 +521,15 @@ impl DataAvailabilityChecker { /// The epoch at which we require a data availability check in block processing. /// `None` if the `Deneb` fork is disabled. pub fn data_availability_boundary(&self) -> Option { - let current_epoch = self.slot_clock.now()?.epoch(T::EthSpec::slots_per_epoch()); - self.spec - .min_epoch_data_availability_boundary(current_epoch) + let fork_epoch = self.spec.deneb_fork_epoch?; + + if self.complete_blob_backfill { + Some(fork_epoch) + } else { + let current_epoch = self.slot_clock.now()?.epoch(T::EthSpec::slots_per_epoch()); + self.spec + .min_epoch_data_availability_boundary(current_epoch) + } } /// Returns true if the given epoch lies within the da boundary and false otherwise. @@ -1076,7 +1085,15 @@ mod test { let kzg = get_kzg(&spec); let store = Arc::new(HotColdDB::open_ephemeral(<_>::default(), spec.clone()).unwrap()); let custody_context = Arc::new(CustodyContext::new(false)); - DataAvailabilityChecker::new(slot_clock, kzg, store, custody_context, spec) - .expect("should initialise data availability checker") + let complete_blob_backfill = false; + DataAvailabilityChecker::new( + complete_blob_backfill, + slot_clock, + kzg, + store, + custody_context, + spec, + ) + .expect("should initialise data availability checker") } } diff --git a/beacon_node/src/cli.rs b/beacon_node/src/cli.rs index 386eb721a04..9a981c65812 100644 --- a/beacon_node/src/cli.rs +++ b/beacon_node/src/cli.rs @@ -401,6 +401,16 @@ pub fn cli_app() -> Command { .help_heading(FLAG_HEADER) .display_order(0) ) + .arg( + Arg::new("complete-blob-backfill") + .long("complete-blob-backfill") + .help("Download all blobs back to the Deneb fork epoch. This will likely result in \ + the node banning most of its peers.") + .action(ArgAction::SetTrue) + .help_heading(FLAG_HEADER) + .display_order(0) + .hide(true) + ) .arg( Arg::new("enable-private-discovery") .long("enable-private-discovery") diff --git a/beacon_node/src/config.rs b/beacon_node/src/config.rs index 1b5f25b3175..3681556d11e 100644 --- a/beacon_node/src/config.rs +++ b/beacon_node/src/config.rs @@ -825,6 +825,14 @@ pub fn get_config( client_config.chain.genesis_backfill = true; } + client_config.chain.complete_blob_backfill = cli_args.get_flag("complete-blob-backfill"); + + // Ensure `prune_blobs` is false whenever complete-blob-backfill is set. This overrides any + // setting of `--prune-blobs true` applied earlier in flag parsing. + if client_config.chain.complete_blob_backfill { + client_config.store.prune_blobs = false; + } + // Backfill sync rate-limiting client_config.beacon_processor.enable_backfill_rate_limiting = !cli_args.get_flag("disable-backfill-rate-limiting"); diff --git a/lighthouse/tests/beacon_node.rs b/lighthouse/tests/beacon_node.rs index 1fd3cc1b792..0660073bbc5 100644 --- a/lighthouse/tests/beacon_node.rs +++ b/lighthouse/tests/beacon_node.rs @@ -392,6 +392,37 @@ fn genesis_backfill_with_historic_flag() { .with_config(|config| assert!(config.chain.genesis_backfill)); } +#[test] +fn complete_blob_backfill_default() { + CommandLineTest::new() + .run_with_zero_port() + .with_config(|config| assert!(!config.chain.complete_blob_backfill)); +} + +#[test] +fn complete_blob_backfill_flag() { + CommandLineTest::new() + .flag("complete-blob-backfill", None) + .run_with_zero_port() + .with_config(|config| { + assert!(config.chain.complete_blob_backfill); + assert!(!config.store.prune_blobs); + }); +} + +// Even if `--prune-blobs true` is provided, `--complete-blob-backfill` should override it to false. +#[test] +fn complete_blob_backfill_and_prune_blobs_true() { + CommandLineTest::new() + .flag("complete-blob-backfill", None) + .flag("prune-blobs", Some("true")) + .run_with_zero_port() + .with_config(|config| { + assert!(config.chain.complete_blob_backfill); + assert!(!config.store.prune_blobs); + }); +} + // Tests for Eth1 flags. // DEPRECATED but should not crash #[test] From 92f60b8fd2a9b62a7999da2fc91043e3c87fd4b8 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Thu, 18 Sep 2025 16:13:27 +1000 Subject: [PATCH 16/45] Add release helper script to list PRs and breaking changes (#7737) Output for 7.1.0 release: ``` # Commit SHA PR Number Has backwards-incompat Label PR Title --- ------------ ----------- ------------------------------ -------------------------------------------- 1 d5a03c9d86bf 6872 False Add more range sync tests (#6872) 2 ec2fe3812edc - - [NO PR MATCH]: Merge remote-tracking branch 'origin/release-v7.0.0-beta.0' into unstable 3 3992d6ba74c9 6862 False Fix misc PeerDAS todos (#6862) 4 d60388134d07 6928 False Add PeerDAS metrics to track subnets without peers (#6928) 5 431dd7c39828 6917 False Remove un-used batch sync error condition (#6917) 6 0055af56b685 6932 False Unsubscribe blob topics at Fulu fork (#6932) 7 6ab6eae40c0e - - [NO PR MATCH]: Merge remote-tracking branch 'origin/release-v7.0.0-beta.0' into unstable 8 193061ff7376 6634 False Use RpcSend on RPC::self_limiter::ready_requests (#6634) 9 e5e43ecd8129 - - [NO PR MATCH]: Merge remote-tracking branch 'origin/release-v7.0.0' into unstable 10 b4be5141823f 7012 False Add spamoor_blob in network_params.yaml (#7012) 11 01df433dfd02 7021 False update codeowners, to be more specific (#7021) 12 60964fc7b530 6829 False Expose blst internals (#6829) 13 3fab6a2c0ba7 6866 False Block availability data enum (#6866) 14 6e11bddd4bd0 6947 False feat: adds CLI flags to delay publishing for edge case testing on PeerDAS devnets (#6947) 15 454c7d05c40b 7017 False Remove LC server config from HTTP API (#7017) 16 54b4150a6220 7030 False Add test flag to override `SYNC_TOLERANCE_EPOCHS` for range sync testing (#7030) 17 cf4104abe5e2 - - [NO PR MATCH]: Merge remote-tracking branch 'origin/release-v7.0.0' into unstable 18 8a772520a50a 7034 False Cache validator registration only after successful publish (#7034) 19 1235d4480225 7048 False Remove `watch` (#7048) 20 3bc5f1f2a58b 7081 False Validator Registration ssz support (#7081) 21 b4e79edf2a09 - - [NO PR MATCH]: Merge remote-tracking branch 'origin/release-v7.0.0' into unstable 22 8d1abce26ed5 6915 False Bump SSZ version for larger bitfield `SmallVec` (#6915) 23 1916a2ac5ad3 7020 False chore: update to rust-eth-kzg to 0.5.4 (#7020) 24 1a08e6f0a090 7109 False Remove duplicate sync_tolerance_epochs config (#7109) 25 f23f984f8575 7057 False switch to upstream gossipsub (#7057) 26 d60c24ef1cc0 6339 True Integrate tracing (#6339) 27 a6bdc474db01 6991 False Log range sync download errors (#6991) 28 574b204bdb39 6680 False decouple `eth2` from `store` and `lighthouse_network` (#6680) 29 c095a0a58feb 7130 False update gossipsub to the latest upstream revision (#7130) 30 5cda1641ea2f 7137 False Log `file appender` initialization errors properly (#7137) 31 d96123b02882 7149 False Remove unnecessary `filter_layer` in logger builder (#7149) 32 a1b1d7ae589f 7150 False Remove `discv5` logs from logfile output (#7150) 33 ca237652f1da 6998 False Track request IDs in RangeBlockComponentsRequest (#6998) 34 d323699fde01 7183 False Add missing `osaka-time` lcli param (#7183) 35 cbf1c04a1486 - - [NO PR MATCH]: resolve merge conflicts between untstable and release-v7.0.0 36 2f37bf4de5e3 - - [NO PR MATCH]: Fix more merge conflicts between unstable and release-v7.0.0 37 3f6c11db0eb6 6995 False Some updates to Lighthouse book (#6995) 38 9dce729cb6a0 7182 False Ensure sqlite and rusqlite are optional in `consensus/types` (#7182) 39 6f31d4434308 7033 False Remove CGC from data_availability checker (#7033) 40 ca8eaea11677 7169 True Remove `crit` as an option from the CLI entirely (#7169) 41 bde0f1ef0b29 - - [NO PR MATCH]: Merge remote-tracking branch 'origin/release-v7.0.0' into unstable 42 fb7ec0d151d4 7112 False Change `genesis-state-url-timeout` (#7112) 43 4839ed620fa9 7168 False Tracing cleanup (#7168) 44 578db67755cb - - [NO PR MATCH]: Merge remote-tracking branch 'origin/release-v7.0.0' into backmerge-apr-2 45 80626e58d224 7244 False Attempt to fix flaky network tests (#7244) 46 d6cd049a453b 7238 False RPC RequestId Cleanup (#7238) 47 0e6da0fcafe2 - - [NO PR MATCH]: Merge branch 'release-v7.0.0' into v7-backmerge 48 57abffcd997f 7240 False Disable log color when running in non-interactive mode (#7240) 49 6a75f24ab13e 7188 False Fix the `getBlobs` metric and ensure it is recorded promptly to prevent miscounts (#7188) 50 7cc64cab8352 6990 False Add missing error log and remove redundant id field from lookup logs (#6990) 51 591fb7df141d - - [NO PR MATCH]: Merge branch 'release-v7.0.0' into backmerge-for-openssl 52 e77fb01a063c 7265 False Remove CLI conflict for secrets-dir and datadir (#7265) 53 b5d40e3db06d 7256 False Align logs (#7256) 54 70850fe58d56 6744 True Drop head tracker for summaries DAG (#6744) 55 47a85cd1186d 7269 False Bump version to v7.1.0-beta.0 (not a release) (#7269) 56 e924264e17b8 7258 False Fullnodes to publish data columns from EL `getBlobs` (#7258) 57 759b0612b37f 7117 False Offloading KZG Proof Computation from the beacon node (#7117) 58 d96b73152e0e 7192 False Fix for #6296: Deterministic RNG in peer DAS publish block tests (#7192) 59 39eb8145f89e - - [NO PR MATCH]: Merge branch 'release-v7.0.0' into unstable 60 70f8ab9a6fc2 7309 False Add riscv64 build support (#7309) 61 be68dd24d05f 7281 False Fix wrong custody column count for lookup blocks (#7281) 62 08882c64cae5 6996 False Fix execution engine integration tests with latest geth version (#6996) 63 476f3a593c20 7161 False Add `MAX_BLOBS_PER_BLOCK_FULU` config (#7161) 64 c32569ab83bb 7225 False Restore HTTP API logging and add more metrics (#7225) 65 410af7c5f5dc 7279 False feat: update mainnet bootnodes (#7279) 66 80fe133d2c4c 7280 False Update Lighthouse Book for Electra features (#7280) 67 9f4b0cdc2855 7343 False Fix Kurtosis doppelganger CI (#7343) 68 e61e92b926d5 - - [NO PR MATCH]: Merge remote-tracking branch 'origin/stable' into unstable 69 5527125f5e13 7340 False Fix GitHub releases page looks bad in GitHub dark theme (#7340) 70 c13e069c9c63 7324 False Revise logging when `queue is full` (#7324) 71 1dd37048b9d1 7346 False Enable cross-compiling for riscv64 architecture (#7346) 72 402a81cdd78e 7350 False Fix Kurtosis testnet (#7350) 73 1324d3d3c4c2 5923 False Delayed RPC Send Using Tokens (#5923) 74 6fad18644bbe 6747 False feat: presign for validator account (#6747) 75 2e2b0d2176e0 7351 False Revise consolidation info in Lighthouse book (#7351) 76 63a10eaaea62 6956 True Changing `boot_enr.yaml` to expect `bootstap_nodes.yaml` for pectra devnet (#6956) 77 34a6c3a93029 6897 True vc: increase default gas limit (#6897) 78 94ccd7608ea8 6653 False Add documentation for VC API `/lighthouse/beacon/health` (#6653) 79 9779b4ba2c04 7326 False Optimize `validate_data_columns` (#7326) 80 93ec9df13760 7304 False Compute proposer shuffling only once in gossip verification (#7304) 81 2aa5d5c25e22 7359 False Make sure to log SyncingChain ID (#7359) 82 c8224c8d5e19 7387 False docs: fix broken link to voluntary exit guide (#7387) 83 43c38a6fa0cc 7378 False Change slog to tracing in comments (#7378) 84 beb0ce68bdf6 6922 False Make range sync peer loadbalancing PeerDAS-friendly (#6922) 85 3d92e3663b74 6705 False Modularize validator store (#6705) 86 058dae064184 7405 False Add requires --http when using vc subcommands --http-port (#7405) 87 0f13029c7d51 7409 False Don't publish data columns reconstructed from RPC columns to the gossip network (#7409) 88 8dc3d23af083 7400 False Add a default timeout to all `BeaconNodeHttpClient` requests (#7400) 89 e90fcbe6577c 7416 False Add ARM binary for macOS in release (#7416) 90 4b9c16fc7175 7199 False Add Electra forks to basic sim tests (#7199) 91 a497ec601cae 6975 False Retry custody requests after peer metadata updates (#6975) 92 e0c1f27e1303 7394 False simulator: Persist beacon logs (#7394) 93 92391cdac665 7284 False update gossipsub to the latest upstream revision (#7284) 94 593390162f47 7399 False `peerdas-devnet-7`: update `DataColumnSidecarsByRoot` request to use `DataColumnsByRootIdentifier` (#7399) 95 5b25a48af34b 7404 False Siren installation improvement (#7404) 96 e051c7ca89c8 7396 False Siren Pectra Feature Updates (#7396) 97 0a917989b218 7370 False impl test random for some types (#7370) 98 807848bc7ac4 7443 False Next sync committee branch bug (#7443) 99 851ee2bcedfc 7454 False Extract get_domain for VoluntaryExit (#7454) 100 c2c7fb87a862 7460 False Make DAG construction more permissive (#7460) 101 b1138c28fb94 7451 False Add additional mergify rules to automate triaging (#7451) 102 cc6ae9d3f09c 7463 False Fix mergify infinite loop. (#7463) 103 1853d836b7e4 7458 False Added E::slots_per_epoch() to deneb time calculation (#7458) 104 c4182e362b8f 7433 False simulator: Write dependency logs to separate files (#7433) 105 e0ee148d6aca 7470 False Prevent mergify from updating labels while CI is still running. (#7470) 106 e21198c08baa 7472 False One more attempt to fix mergify condition. (#7472) 107 268809a53069 7471 False Rust clippy 1.87 lint fixes (#7471) 108 b051a5d6cc7b 7469 False Delete `at-most` in `lighthouse vm create` (#7469) 109 1d27855db7be 7369 False impl from hash256 for `ExecutionBlockHash` (#7369) 110 23ad833747b6 7417 False Change default EngineState to online (#7417) 111 fcfcbf9a11b3 7481 False Update mdlint to disable descriptive-link-text (#7481) 112 7684d1f866ab 7372 False ContextDeserialize and Beacon API Improvements (#7372) 113 5393d33af823 7411 False Silence `Uninitialized` warn log on start-up (#7411) 114 1e6cdeb88a6a 6799 False feat: Add docker reproducible builds (#6799) 115 50dbfdf61243 7455 False Some updates to Lighthouse book (#7455) 116 af87135e3020 7484 False Move MD059 rule to configuration file (#7484) 117 805c2dc831e6 5047 False Correct reward denominator in op pool (#5047) 118 7e2df6b602a1 7474 False Empty list `[]` to return all validators balances (#7474) 119 f06d1d034615 7495 False Fix blob download from checkpointz servers (#7495) 120 0688932de28d 7497 False Pass blobs into `ValidatorStore::sign_block` (#7497) 121 e29b607257d8 7427 False Move notifier and latency service to `validator_services` (#7427) 122 7759cb8f91c0 7494 False Update mergify rule to not evaluate PRs that are not ready for review - to reduce noise and avoid updating stale PRs. (#7494) 123 2e96e9769b99 7507 False Use slice.is_sorted now that it's stable (#7507) 124 a8035d7395ea 7506 False Enable stdout logging in rpc_tests (#7506) 125 817f14c3491a 7500 False Send execution_requests in fulu (#7500) 126 537fc5bde860 7459 False Revive network-test logs files in CI (#7459) 127 cf0f95985540 7180 False Improve log readability during rpc_tests (#7180) 128 ce8d0814ad71 7246 False Ensure logfile permissions are maintained after rotation (#7246) 129 6af8c187e0b7 7052 False Publish EL Info in Metrics (#7052) 130 a2797d4bbde9 7512 False Fix formatting errors from cargo-sort (#7512) 131 f01dc556d157 7505 False Update `engine_getBlobsV2` response type and add `getBlobsV2` tests (#7505) 132 e6ef644db4e8 7493 False Verify `getBlobsV2` response and avoid reprocessing imported data columns (#7493) 133 7c89b970afe2 7382 False Handle attestation validation errors (#7382) 134 8dde5bdb4413 - - [NO PR MATCH]: Update mergify rules so that I can add `waiting-on-author` on a PR that's passing CI. Remove noisy comments. 135 8989ef8fb11e 7025 False Enable arithmetic lint in rate-limiter (#7025) 136 b7fc03437bba - - [NO PR MATCH]: Fix condition 137 9e9c51be6fef - - [NO PR MATCH]: Remove redundant `and` 138 999b04517e35 - - [NO PR MATCH]: Merge pull request #7525 from jimmygchen/mergify-again 139 0ddf9a99d64a 7332 False Remove support for database migrations prior to schema version v22 (#7332) 140 5cda6a6f9e4b 7522 False Mitigate flakiness in test_delayed_rpc_response (#7522) 141 4d21846aba6b 7533 False Prevent `AvailabilityCheckError` when there's no new custody columns to import (#7533) 142 39744df93f0b 7393 False simulator: Fix `Failed to initialize dependency logging` (#7393) 143 38a5f338fad7 7529 False Add `console-subscriber` feature for debugging (#7529) 144 886ceb7e25e0 6882 False Run Assertoor tests in CI (#6882) 145 94a1446ac955 7541 False Fix unexpected blob error and duplicate import in fetch blobs (#7541) 146 ae30480926b6 7521 False Implement EIP-7892 BPO hardforks (#7521) 147 f67068e1ec53 7518 False Update `staking-deposit-cli` to `ethstaker-deposit-cli` (#7518) 148 cd83d8d95ddd 7544 False Add a name to the Tokio task (#7544) 149 357a8ccbb996 7549 False Checkpoint sync without the blobs from Fulu (#7549) 150 2d9fc34d4326 7540 False Fulu EF tests v1.6.0-alpha.0 (#7540) 151 dcee76c0dc88 7548 False Update key generation in validator manager (#7548) 152 9a4972053eb5 7530 False Add e2e sync tests to CI (#7530) 153 d457ceeaafae 7118 False Don't create child lookup if parent is faulty (#7118) 154 2f807e21bede 7538 False Add support for nightly tests (#7538) 155 e098f667380c 7570 False Update kurtosis config and EL images (#7570) 156 b2e8b67e3446 7566 False Reduce number of basic sim test nodes from 7 to 4 (#7566) 157 170cd0f5875d 7579 False Store the libp2p/discv5 logs when stopping local-testnet (#7579) 158 b08d49c4cb34 7559 False Changes for `fusaka-devnet-1` (#7559) 159 8c6abc0b69b7 7574 False Optimise parallelism in compute cells operations by zipping first (#7574) 160 7416d06dce8e 7561 False Add genesis sync test to CI (#7561) 161 076a1c3faead 7587 False Data column sidecar event (#7587) 162 5f208bb85829 7578 True Implement basic validator custody framework (no backfill) (#7578) 163 9803d69d8045 7590 False Implement status v2 version (#7590) 164 5472cb85008b 7582 False Batch verify KZG proofs for getBlobsV2 (#7582) 165 a65f78222d69 7594 False Drop stale registrations without reducing CGC (#7594) 166 ccd99c138c27 7588 False Wait before column reconstruction (#7588) 167 dc5f5af3eb53 7595 False Fix flaky test_rpc_block_reprocessing (#7595) 168 4fc0665ccdd6 7592 False Add more context to Late Block Re-orgs (#7592) 169 6135f417a2f4 7591 False Add data columns sidecars debug beacon API (#7591) 170 3d2d65bf8d24 7593 False Advertise `--advertise-false-custody-group-count` for testing PeerDAS (#7593) 171 6786b9d12a6d 7444 True Single attestation "Full" implementation (#7444) 172 dd985341581f 6750 True Hierarchical state diffs in hot DB (#6750) 173 f67084a571d1 7437 False Remove reprocess channel (#7437) 174 d50924677a34 7620 False Remove instrumenting log level (#7620) 175 11bcccb353c0 7133 True Remove all prod eth1 related code (#7133) 176 e34a9a0c65d5 6551 False Allow the `--beacon-nodes` list to be updated at runtime (#6551) 177 3fefda68e5c1 7611 False Send byrange responses in the correct requested range (#7611) 178 cef04ee2ee48 7462 False Implement `validator_identities` Beacon API endpoint (#7462) 179 fd643c310c4e 7632 False Un-ignore EF test for v1.6.0-alpha.1 (#7632) 180 56b2d4b5253b 7636 False Remove instrumenting log level (#7636) 181 8e3c5d152413 7644 False Rust 1.89 compiler lint fix (#7644) 182 a0a6b9300f11 7551 False Do not compute sync selection proofs for the sync duty at the current slot (#7551) 183 9b1f3ed9d1a4 7652 False Add gossip check (#7652) 184 83cad25d9880 7657 False Fix Rust 1.88 clippy errors & execution engine tests (#7657) 185 522e00f48df7 7656 False Fix incorrect `waker` update condition (#7656) 186 6ea5f14b3988 7597 False feat: better error message for light_client/bootstrap endpoint (#7597) 187 2d759f78be6c 6576 False Fix beacon_chain metrics descriptions (#6576) 188 6be646ca1153 7666 True Bump DB schema to v25 (#7666) 189 e45ba846aef5 7673 False Increase http client default timeout to 2s in `http-api` tests. (#7673) 190 25ea8a83b77b 7667 False Add Michael as codeowner for store crate (#7667) 191 c1f94d9b7bf8 7669 False Test database schema stability (#7669) 192 257d2707182c 6612 False Add voluntary exit via validator manager (#6612) 193 e305cb1b921f 7661 True Custody persist fix (#7661) 194 41742ce2bde9 7683 False Update `SAMPLES_PER_SLOT` to be number of custody groups instead of data columns (#7683) 195 69c9c7038af7 7681 False Use prepare_beacon_proposer endpoint for validator custody registration (#7681) 196 fcc602a7872a 7646 False Update fulu network configs and add `MIN_EPOCHS_FOR_DATA_COLUMN_SIDECARS_REQUESTS` (#7646) 197 a459a9af98c9 7689 False Fix and test checkpoint sync from genesis (#7689) 198 b35854b71f04 7692 False Record v2 beacon blocks http api metrics separately (#7692) 199 c7bb3b00e409 7693 False Fix lookups of the block at `oldest_block_slot` (#7693) 200 0f895f3066a3 7695 False Bump default gas limit (#7695) 201 56485cc9865a 7707 False Remove unneeded spans that caused debug logs to appear when level is set to `info` (#7707) 202 bd8a2a8ffbaa 7023 False Gossip recently computed light client data (#7023) 203 7b2f138ca7e7 - - [NO PR MATCH]: Merge remote-tracking branch 'origin/stable' into release-v7.1.0 204 8e55684b066f 7723 False Reintroduce `--logfile` with deprecation warning (#7723) 205 8b5ccacac9c0 7663 False Error from RPC `send_response` when request doesn't exist on the active inbound requests (#7663) 206 cfb1f7331064 7609 False Release v7.1.0 (#7609) ``` Co-Authored-By: Jimmy Chen --- scripts/print_release_diffs.py | 72 ++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 scripts/print_release_diffs.py diff --git a/scripts/print_release_diffs.py b/scripts/print_release_diffs.py new file mode 100644 index 00000000000..d910b1be5bd --- /dev/null +++ b/scripts/print_release_diffs.py @@ -0,0 +1,72 @@ +""" +Summarise pull requests between two Lighthouse releases. + +Usage: + export GITHUB_TOKEN=your_token + python -m pip install requests==2.32.4 + python print_release_diffs.py --base v7.0.1 --head release-v7.1.0 + +Shows commit SHA, PR number, 'backwards-incompat' label status, and PR title. +""" + +import requests +import re +import argparse +import os + +GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN") +if not GITHUB_TOKEN: + raise SystemExit("Error: Please set the GITHUB_TOKEN environment variable.") + +parser = argparse.ArgumentParser(description="Summarise PRs between two Lighthouse versions.") +parser.add_argument("--base", required=True, help="Base tag or branch (older release)") +parser.add_argument("--head", required=True, help="Head tag or branch (newer release)") +args = parser.parse_args() + +BASE = args.base +HEAD = args.head +OWNER = 'sigp' +REPO = 'lighthouse' + +HEADERS = { + 'Authorization': f'token {GITHUB_TOKEN}', + 'Accept': 'application/vnd.github+json' +} + +def get_commits_between(base, head): + url = f'https://api.github.com/repos/{OWNER}/{REPO}/compare/{base}...{head}' + response = requests.get(url, headers=HEADERS) + response.raise_for_status() + return response.json()['commits'] + +def has_backwards_incompat_label(pr_number): + url = f'https://api.github.com/repos/{OWNER}/{REPO}/issues/{pr_number}' + response = requests.get(url, headers=HEADERS) + if response.status_code != 200: + raise Exception(f"Failed to fetch PR #{pr_number}") + labels = response.json().get('labels', []) + return any(label['name'] == 'backwards-incompat' for label in labels) + +def main(): + commits = get_commits_between(BASE, HEAD) + print(" # Commit SHA PR Number Has backwards-incompat Label PR Title") + print("--- ------------ ----------- ------------------------------ --------------------------------------------") + + for i, commit in enumerate(commits, 1): + sha = commit['sha'][:12] + message = commit['commit']['message'] + pr_match = re.search(r"\(#(\d+)\)", message) + + if not pr_match: + print(f"{i:<3} {sha} {'-':<11} {'-':<30} [NO PR MATCH]: {message.splitlines()[0]}") + continue + + pr_number = int(pr_match.group(1)) + try: + has_label = has_backwards_incompat_label(pr_number) + print(f"{i:<3} {sha} {pr_number:<11} {str(has_label):<30} {message.splitlines()[0]}") + except Exception as e: + print(f"{i:<3} {sha} {pr_number:<11} {'ERROR':<30} [ERROR FETCHING PR]: {e}") + +if __name__ == '__main__': + main() From 51321daabb5f0a401bff41d7f9b5d2f4e9646a75 Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Thu, 18 Sep 2025 17:10:18 +1000 Subject: [PATCH 17/45] Make the block cache optional (#8066) Address contention on the store's `block_cache` by allowing it to be disabled when `--block-cache-size 0` is provided, and also making this the default. Co-Authored-By: Michael Sproul --- beacon_node/src/cli.rs | 2 +- beacon_node/store/src/config.rs | 4 +- beacon_node/store/src/hot_cold_store.rs | 194 ++++++++++++++---------- book/src/help_bn.md | 2 +- lighthouse/tests/beacon_node.rs | 15 +- 5 files changed, 133 insertions(+), 84 deletions(-) diff --git a/beacon_node/src/cli.rs b/beacon_node/src/cli.rs index 9a981c65812..238907adce8 100644 --- a/beacon_node/src/cli.rs +++ b/beacon_node/src/cli.rs @@ -779,7 +779,7 @@ pub fn cli_app() -> Command { .long("block-cache-size") .value_name("SIZE") .help("Specifies how many blocks the database should cache in memory") - .default_value("5") + .default_value("0") .action(ArgAction::Set) .display_order(0) ) diff --git a/beacon_node/store/src/config.rs b/beacon_node/store/src/config.rs index ad81fa6076a..c0f15f2417b 100644 --- a/beacon_node/store/src/config.rs +++ b/beacon_node/store/src/config.rs @@ -19,7 +19,7 @@ pub const DEFAULT_BACKEND: DatabaseBackend = DatabaseBackend::LevelDb; pub const PREV_DEFAULT_SLOTS_PER_RESTORE_POINT: u64 = 2048; pub const DEFAULT_SLOTS_PER_RESTORE_POINT: u64 = 8192; pub const DEFAULT_EPOCHS_PER_STATE_DIFF: u64 = 8; -pub const DEFAULT_BLOCK_CACHE_SIZE: NonZeroUsize = new_non_zero_usize(64); +pub const DEFAULT_BLOCK_CACHE_SIZE: usize = 0; pub const DEFAULT_STATE_CACHE_SIZE: NonZeroUsize = new_non_zero_usize(128); pub const DEFAULT_STATE_CACHE_HEADROOM: NonZeroUsize = new_non_zero_usize(1); pub const DEFAULT_COMPRESSION_LEVEL: i32 = 1; @@ -34,7 +34,7 @@ pub const DEFAULT_BLOB_PUNE_MARGIN_EPOCHS: u64 = 0; #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct StoreConfig { /// Maximum number of blocks to store in the in-memory block cache. - pub block_cache_size: NonZeroUsize, + pub block_cache_size: usize, /// Maximum number of states to store in the in-memory state cache. pub state_cache_size: NonZeroUsize, /// Minimum number of states to cull from the state cache upon fullness. diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index 52e52fe7ce5..0d8a65e0644 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -70,7 +70,7 @@ pub struct HotColdDB, Cold: ItemStore> { /// The hot database also contains all blocks. pub hot_db: Hot, /// LRU cache of deserialized blocks and blobs. Updated whenever a block or blob is loaded. - block_cache: Mutex>, + block_cache: Option>>, /// Cache of beacon states. /// /// LOCK ORDERING: this lock must always be locked *after* the `split` if both are required. @@ -229,7 +229,9 @@ impl HotColdDB, MemoryStore> { cold_db: MemoryStore::open(), blobs_db: MemoryStore::open(), hot_db: MemoryStore::open(), - block_cache: Mutex::new(BlockCache::new(config.block_cache_size)), + block_cache: NonZeroUsize::new(config.block_cache_size) + .map(BlockCache::new) + .map(Mutex::new), state_cache: Mutex::new(StateCache::new( config.state_cache_size, config.state_cache_headroom, @@ -281,7 +283,9 @@ impl HotColdDB, BeaconNodeBackend> { blobs_db: BeaconNodeBackend::open(&config, blobs_db_path)?, cold_db: BeaconNodeBackend::open(&config, cold_path)?, hot_db, - block_cache: Mutex::new(BlockCache::new(config.block_cache_size)), + block_cache: NonZeroUsize::new(config.block_cache_size) + .map(BlockCache::new) + .map(Mutex::new), state_cache: Mutex::new(StateCache::new( config.state_cache_size, config.state_cache_headroom, @@ -488,14 +492,17 @@ impl, Cold: ItemStore> HotColdDB pub fn register_metrics(&self) { let hsc_metrics = self.historic_state_cache.lock().metrics(); - metrics::set_gauge( - &metrics::STORE_BEACON_BLOCK_CACHE_SIZE, - self.block_cache.lock().block_cache.len() as i64, - ); - metrics::set_gauge( - &metrics::STORE_BEACON_BLOB_CACHE_SIZE, - self.block_cache.lock().blob_cache.len() as i64, - ); + if let Some(block_cache) = &self.block_cache { + let cache = block_cache.lock(); + metrics::set_gauge( + &metrics::STORE_BEACON_BLOCK_CACHE_SIZE, + cache.block_cache.len() as i64, + ); + metrics::set_gauge( + &metrics::STORE_BEACON_BLOB_CACHE_SIZE, + cache.blob_cache.len() as i64, + ); + } let state_cache = self.state_cache.lock(); metrics::set_gauge( &metrics::STORE_BEACON_STATE_CACHE_SIZE, @@ -553,7 +560,9 @@ impl, Cold: ItemStore> HotColdDB let block = self.block_as_kv_store_ops(block_root, block, &mut ops)?; self.hot_db.do_atomically(ops)?; // Update cache. - self.block_cache.lock().put_block(*block_root, block); + self.block_cache + .as_ref() + .inspect(|cache| cache.lock().put_block(*block_root, block)); Ok(()) } @@ -605,7 +614,9 @@ impl, Cold: ItemStore> HotColdDB metrics::inc_counter(&metrics::BEACON_BLOCK_GET_COUNT); // Check the cache. - if let Some(block) = self.block_cache.lock().get_block(block_root) { + if let Some(cache) = &self.block_cache + && let Some(block) = cache.lock().get_block(block_root) + { metrics::inc_counter(&metrics::BEACON_BLOCK_CACHE_HIT_COUNT); return Ok(Some(DatabaseBlock::Full(block.clone()))); } @@ -630,8 +641,8 @@ impl, Cold: ItemStore> HotColdDB // Add to cache. self.block_cache - .lock() - .put_block(*block_root, full_block.clone()); + .as_ref() + .inspect(|cache| cache.lock().put_block(*block_root, full_block.clone())); DatabaseBlock::Full(full_block) } else if !self.config.prune_payloads { @@ -902,7 +913,9 @@ impl, Cold: ItemStore> HotColdDB /// Delete a block from the store and the block cache. pub fn delete_block(&self, block_root: &Hash256) -> Result<(), Error> { - self.block_cache.lock().delete(block_root); + self.block_cache + .as_ref() + .inspect(|cache| cache.lock().delete(block_root)); self.hot_db .key_delete(DBColumn::BeaconBlock, block_root.as_slice())?; self.hot_db @@ -917,7 +930,9 @@ impl, Cold: ItemStore> HotColdDB block_root.as_slice(), &blobs.as_ssz_bytes(), )?; - self.block_cache.lock().put_blobs(*block_root, blobs); + self.block_cache + .as_ref() + .inspect(|cache| cache.lock().put_blobs(*block_root, blobs)); Ok(()) } @@ -945,9 +960,11 @@ impl, Cold: ItemStore> HotColdDB self.blobs_db .put(&DATA_COLUMN_CUSTODY_INFO_KEY, &data_column_custody_info)?; - self.block_cache - .lock() - .put_data_column_custody_info(Some(data_column_custody_info)); + self.block_cache.as_ref().inspect(|cache| { + cache + .lock() + .put_data_column_custody_info(Some(data_column_custody_info)) + }); Ok(()) } @@ -964,8 +981,8 @@ impl, Cold: ItemStore> HotColdDB &data_column.as_ssz_bytes(), )?; self.block_cache - .lock() - .put_data_column(*block_root, data_column); + .as_ref() + .inspect(|cache| cache.lock().put_data_column(*block_root, data_column)); } Ok(()) } @@ -1399,7 +1416,7 @@ impl, Cold: ItemStore> HotColdDB // Update database whilst holding a lock on cache, to ensure that the cache updates // atomically with the database. - let mut guard = self.block_cache.lock(); + let guard = self.block_cache.as_ref().map(|cache| cache.lock()); let blob_cache_ops = blobs_ops.clone(); // Try to execute blobs store ops. @@ -1446,57 +1463,68 @@ impl, Cold: ItemStore> HotColdDB return Err(e); } - for op in hot_db_cache_ops { + // Delete from the state cache. + for op in &hot_db_cache_ops { match op { - StoreOp::PutBlock(block_root, block) => { - guard.put_block(block_root, (*block).clone()); + StoreOp::DeleteBlock(block_root) => { + self.state_cache.lock().delete_block_states(block_root); } + StoreOp::DeleteState(state_root, _) => { + self.state_cache.lock().delete_state(state_root) + } + _ => (), + } + } - StoreOp::PutBlobs(_, _) => (), + // If the block cache is enabled, also delete from the block cache. + if let Some(mut guard) = guard { + for op in hot_db_cache_ops { + match op { + StoreOp::PutBlock(block_root, block) => { + guard.put_block(block_root, (*block).clone()); + } - StoreOp::PutDataColumns(_, _) => (), + StoreOp::PutBlobs(_, _) => (), - StoreOp::PutState(_, _) => (), + StoreOp::PutDataColumns(_, _) => (), - StoreOp::PutStateSummary(_, _) => (), + StoreOp::PutState(_, _) => (), - StoreOp::DeleteBlock(block_root) => { - guard.delete_block(&block_root); - self.state_cache.lock().delete_block_states(&block_root); - } + StoreOp::PutStateSummary(_, _) => (), - StoreOp::DeleteState(state_root, _) => { - self.state_cache.lock().delete_state(&state_root) - } + StoreOp::DeleteBlock(block_root) => { + guard.delete_block(&block_root); + } - StoreOp::DeleteBlobs(_) => (), + StoreOp::DeleteState(_, _) => (), - StoreOp::DeleteDataColumns(_, _) => (), + StoreOp::DeleteBlobs(_) => (), - StoreOp::DeleteExecutionPayload(_) => (), + StoreOp::DeleteDataColumns(_, _) => (), - StoreOp::DeleteSyncCommitteeBranch(_) => (), + StoreOp::DeleteExecutionPayload(_) => (), - StoreOp::KeyValueOp(_) => (), - } - } + StoreOp::DeleteSyncCommitteeBranch(_) => (), - for op in blob_cache_ops { - match op { - StoreOp::PutBlobs(block_root, blobs) => { - guard.put_blobs(block_root, blobs); + StoreOp::KeyValueOp(_) => (), } + } - StoreOp::DeleteBlobs(block_root) => { - guard.delete_blobs(&block_root); - } + for op in blob_cache_ops { + match op { + StoreOp::PutBlobs(block_root, blobs) => { + guard.put_blobs(block_root, blobs); + } - _ => (), + StoreOp::DeleteBlobs(block_root) => { + guard.delete_blobs(&block_root); + } + + _ => (), + } } } - drop(guard); - Ok(()) } @@ -2425,21 +2453,23 @@ impl, Cold: ItemStore> HotColdDB /// If custody info doesn't exist in the cache, /// try to fetch from the DB and prime the cache. pub fn get_data_column_custody_info(&self) -> Result, Error> { - let Some(data_column_custody_info) = self.block_cache.lock().get_data_column_custody_info() - else { - let data_column_custody_info = self - .blobs_db - .get::(&DATA_COLUMN_CUSTODY_INFO_KEY)?; + if let Some(cache) = &self.block_cache + && let Some(data_column_custody_info) = cache.lock().get_data_column_custody_info() + { + return Ok(Some(data_column_custody_info)); + } + let data_column_custody_info = self + .blobs_db + .get::(&DATA_COLUMN_CUSTODY_INFO_KEY)?; - // Update the cache - self.block_cache + // Update the cache + self.block_cache.as_ref().inspect(|cache| { + cache .lock() - .put_data_column_custody_info(data_column_custody_info.clone()); - - return Ok(data_column_custody_info); - }; + .put_data_column_custody_info(data_column_custody_info.clone()) + }); - Ok(Some(data_column_custody_info)) + Ok(data_column_custody_info) } /// Fetch all columns for a given block from the store. @@ -2460,9 +2490,13 @@ impl, Cold: ItemStore> HotColdDB /// Fetch blobs for a given block from the store. pub fn get_blobs(&self, block_root: &Hash256) -> Result, Error> { // Check the cache. - if let Some(blobs) = self.block_cache.lock().get_blobs(block_root) { + if let Some(blobs) = self + .block_cache + .as_ref() + .and_then(|cache| cache.lock().get_blobs(block_root).cloned()) + { metrics::inc_counter(&metrics::BEACON_BLOBS_CACHE_HIT_COUNT); - return Ok(blobs.clone().into()); + return Ok(blobs.into()); } match self @@ -2481,8 +2515,8 @@ impl, Cold: ItemStore> HotColdDB { let blobs = BlobSidecarList::new(blobs, max_blobs_per_block as usize)?; self.block_cache - .lock() - .put_blobs(*block_root, blobs.clone()); + .as_ref() + .inspect(|cache| cache.lock().put_blobs(*block_root, blobs.clone())); Ok(BlobSidecarListFromRoot::Blobs(blobs)) } else { @@ -2515,8 +2549,8 @@ impl, Cold: ItemStore> HotColdDB // Check the cache. if let Some(data_column) = self .block_cache - .lock() - .get_data_column(block_root, column_index) + .as_ref() + .and_then(|cache| cache.lock().get_data_column(block_root, column_index)) { metrics::inc_counter(&metrics::BEACON_DATA_COLUMNS_CACHE_HIT_COUNT); return Ok(Some(data_column)); @@ -2528,9 +2562,11 @@ impl, Cold: ItemStore> HotColdDB )? { Some(ref data_column_bytes) => { let data_column = Arc::new(DataColumnSidecar::from_ssz_bytes(data_column_bytes)?); - self.block_cache - .lock() - .put_data_column(*block_root, data_column.clone()); + self.block_cache.as_ref().inspect(|cache| { + cache + .lock() + .put_data_column(*block_root, data_column.clone()) + }); Ok(Some(data_column)) } None => Ok(None), @@ -3264,11 +3300,11 @@ impl, Cold: ItemStore> HotColdDB } // Remove deleted blobs from the cache. - let mut block_cache = self.block_cache.lock(); - for block_root in removed_block_roots { - block_cache.delete_blobs(&block_root); + if let Some(mut block_cache) = self.block_cache.as_ref().map(|cache| cache.lock()) { + for block_root in removed_block_roots { + block_cache.delete_blobs(&block_root); + } } - drop(block_cache); let new_blob_info = BlobInfo { oldest_blob_slot: Some(end_slot + 1), diff --git a/book/src/help_bn.md b/book/src/help_bn.md index ea02b39bee6..eba6814863f 100644 --- a/book/src/help_bn.md +++ b/book/src/help_bn.md @@ -22,7 +22,7 @@ Options: Data directory for the blobs database. --block-cache-size Specifies how many blocks the database should cache in memory - [default: 5] + [default: 0] --boot-nodes One or more comma-delimited base64-encoded ENR's to bootstrap the p2p network. Multiaddr is also supported. diff --git a/lighthouse/tests/beacon_node.rs b/lighthouse/tests/beacon_node.rs index 0660073bbc5..629c2e1e9a1 100644 --- a/lighthouse/tests/beacon_node.rs +++ b/lighthouse/tests/beacon_node.rs @@ -1839,12 +1839,25 @@ fn slots_per_restore_point_flag() { .run_with_zero_port(); } +#[test] +fn block_cache_size_default() { + CommandLineTest::new() + .run_with_zero_port() + .with_config(|config| assert_eq!(config.store.block_cache_size, 0)); +} #[test] fn block_cache_size_flag() { CommandLineTest::new() .flag("block-cache-size", Some("4")) .run_with_zero_port() - .with_config(|config| assert_eq!(config.store.block_cache_size, new_non_zero_usize(4))); + .with_config(|config| assert_eq!(config.store.block_cache_size, 4)); +} +#[test] +fn block_cache_size_zero() { + CommandLineTest::new() + .flag("block-cache-size", Some("0")) + .run_with_zero_port() + .with_config(|config| assert_eq!(config.store.block_cache_size, 0)); } #[test] fn state_cache_size_default() { From 4111bcb39bb8edaacf3086c621bbc6a895c5433e Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Thu, 18 Sep 2025 17:10:23 +1000 Subject: [PATCH 18/45] Use scoped rayon pool for backfill chain segment processing (#7924) Part of #7866 - Continuation of #7921 In the above PR, we enabled rayon for batch KZG verification in chain segment processing. However, using the global rayon thread pool for backfill is likely to create resource contention with higher-priority beacon processor work. This PR introduces a dedicated low-priority rayon thread pool `LOW_PRIORITY_RAYON_POOL` and uses it for processing backfill chain segments. This prevents backfill KZG verification from using the global rayon thread pool and competing with high-priority beacon processor tasks for CPU resources. However, this PR by itself doesn't prevent CPU oversubscription because other tasks could still fill up the global rayon thread pool, and having an extra thread pool could make things worse. To address this we need the beacon processor to coordinate total CPU allocation across all tasks, which is covered in: - #7789 Co-Authored-By: Jimmy Chen Co-Authored-By: Eitan Seri- Levi Co-Authored-By: Eitan Seri-Levi --- Cargo.lock | 1 + beacon_node/beacon_processor/Cargo.toml | 1 + beacon_node/beacon_processor/src/lib.rs | 34 ++- .../beacon_processor/src/rayon_manager.rs | 27 +++ .../src/scheduler/work_reprocessing_queue.rs | 4 +- beacon_node/client/src/builder.rs | 2 + beacon_node/http_api/src/test_utils.rs | 2 + beacon_node/lighthouse_tracing/src/lib.rs | 2 + .../src/network_beacon_processor/mod.rs | 38 ++-- .../network_beacon_processor/sync_methods.rs | 193 +++++++++++------- .../src/network_beacon_processor/tests.rs | 36 +++- 11 files changed, 230 insertions(+), 110 deletions(-) create mode 100644 beacon_node/beacon_processor/src/rayon_manager.rs diff --git a/Cargo.lock b/Cargo.lock index ba6a4587b6f..0e559182438 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -980,6 +980,7 @@ dependencies = [ "metrics", "num_cpus", "parking_lot 0.12.3", + "rayon", "serde", "slot_clock", "strum", diff --git a/beacon_node/beacon_processor/Cargo.toml b/beacon_node/beacon_processor/Cargo.toml index afd4660c9a3..262badf7f97 100644 --- a/beacon_node/beacon_processor/Cargo.toml +++ b/beacon_node/beacon_processor/Cargo.toml @@ -12,6 +12,7 @@ logging = { workspace = true } metrics = { workspace = true } num_cpus = { workspace = true } parking_lot = { workspace = true } +rayon = { workspace = true } serde = { workspace = true } slot_clock = { workspace = true } strum = { workspace = true } diff --git a/beacon_node/beacon_processor/src/lib.rs b/beacon_node/beacon_processor/src/lib.rs index 84723fb6a09..64aeb4ceaf2 100644 --- a/beacon_node/beacon_processor/src/lib.rs +++ b/beacon_node/beacon_processor/src/lib.rs @@ -38,6 +38,7 @@ //! checks the queues to see if there are more parcels of work that can be spawned in a new worker //! task. +use crate::rayon_manager::RayonManager; use crate::work_reprocessing_queue::{ QueuedBackfillBatch, QueuedColumnReconstruction, QueuedGossipBlock, ReprocessQueueMessage, }; @@ -47,6 +48,7 @@ use lighthouse_network::{MessageId, NetworkGlobals, PeerId}; use logging::TimeLatch; use logging::crit; use parking_lot::Mutex; +use rayon::ThreadPool; pub use scheduler::work_reprocessing_queue; use serde::{Deserialize, Serialize}; use slot_clock::SlotClock; @@ -74,6 +76,7 @@ use work_reprocessing_queue::{ }; mod metrics; +pub mod rayon_manager; pub mod scheduler; /// The maximum size of the channel for work events to the `BeaconProcessor`. @@ -603,7 +606,7 @@ pub enum Work { process_fn: BlockingFn, }, ChainSegment(AsyncFn), - ChainSegmentBackfill(AsyncFn), + ChainSegmentBackfill(BlockingFn), Status(BlockingFn), BlocksByRangeRequest(AsyncFn), BlocksByRootsRequest(AsyncFn), @@ -807,6 +810,7 @@ pub struct BeaconProcessor { pub network_globals: Arc>, pub executor: TaskExecutor, pub current_workers: usize, + pub rayon_manager: RayonManager, pub config: BeaconProcessorConfig, } @@ -1603,7 +1607,17 @@ impl BeaconProcessor { Work::BlocksByRangeRequest(work) | Work::BlocksByRootsRequest(work) => { task_spawner.spawn_async(work) } - Work::ChainSegmentBackfill(process_fn) => task_spawner.spawn_async(process_fn), + Work::ChainSegmentBackfill(process_fn) => { + if self.config.enable_backfill_rate_limiting { + task_spawner.spawn_blocking_with_rayon( + self.rayon_manager.low_priority_threadpool.clone(), + process_fn, + ) + } else { + // use the global rayon thread pool if backfill rate limiting is disabled. + task_spawner.spawn_blocking(process_fn) + } + } Work::ApiRequestP0(process_fn) | Work::ApiRequestP1(process_fn) => match process_fn { BlockingOrAsync::Blocking(process_fn) => task_spawner.spawn_blocking(process_fn), BlockingOrAsync::Async(process_fn) => task_spawner.spawn_async(process_fn), @@ -1665,6 +1679,22 @@ impl TaskSpawner { WORKER_TASK_NAME, ) } + + /// Spawns a blocking task on a rayon thread pool, dropping the `SendOnDrop` after task completion. + fn spawn_blocking_with_rayon(self, thread_pool: Arc, task: F) + where + F: FnOnce() + Send + 'static, + { + self.executor.spawn_blocking( + move || { + thread_pool.install(|| { + task(); + }); + drop(self.send_idle_on_drop) + }, + WORKER_TASK_NAME, + ) + } } /// This struct will send a message on `self.tx` when it is dropped. An error will be logged diff --git a/beacon_node/beacon_processor/src/rayon_manager.rs b/beacon_node/beacon_processor/src/rayon_manager.rs new file mode 100644 index 00000000000..99fe32d5cc4 --- /dev/null +++ b/beacon_node/beacon_processor/src/rayon_manager.rs @@ -0,0 +1,27 @@ +use rayon::{ThreadPool, ThreadPoolBuilder}; +use std::sync::Arc; + +const DEFAULT_LOW_PRIORITY_DIVISOR: usize = 4; +const MINIMUM_LOW_PRIORITY_THREAD_COUNT: usize = 1; + +pub struct RayonManager { + /// Smaller rayon thread pool for lower-priority, compute-intensive tasks. + /// By default ~25% of CPUs or a minimum of 1 thread. + pub low_priority_threadpool: Arc, +} + +impl Default for RayonManager { + fn default() -> Self { + let low_prio_threads = + (num_cpus::get() / DEFAULT_LOW_PRIORITY_DIVISOR).max(MINIMUM_LOW_PRIORITY_THREAD_COUNT); + let low_priority_threadpool = Arc::new( + ThreadPoolBuilder::new() + .num_threads(low_prio_threads) + .build() + .expect("failed to build low-priority rayon pool"), + ); + Self { + low_priority_threadpool, + } + } +} diff --git a/beacon_node/beacon_processor/src/scheduler/work_reprocessing_queue.rs b/beacon_node/beacon_processor/src/scheduler/work_reprocessing_queue.rs index 3e755f08302..8c33cf58693 100644 --- a/beacon_node/beacon_processor/src/scheduler/work_reprocessing_queue.rs +++ b/beacon_node/beacon_processor/src/scheduler/work_reprocessing_queue.rs @@ -173,7 +173,7 @@ pub struct IgnoredRpcBlock { } /// A backfill batch work that has been queued for processing later. -pub struct QueuedBackfillBatch(pub AsyncFn); +pub struct QueuedBackfillBatch(pub BlockingFn); pub struct QueuedColumnReconstruction { pub block_root: Hash256, @@ -1084,7 +1084,7 @@ mod tests { // Now queue a backfill sync batch. work_reprocessing_tx .try_send(ReprocessQueueMessage::BackfillSync(QueuedBackfillBatch( - Box::pin(async {}), + Box::new(|| {}), ))) .unwrap(); tokio::task::yield_now().await; diff --git a/beacon_node/client/src/builder.rs b/beacon_node/client/src/builder.rs index d984d5fedce..87cdcc45ef7 100644 --- a/beacon_node/client/src/builder.rs +++ b/beacon_node/client/src/builder.rs @@ -17,6 +17,7 @@ use beacon_chain::{ store::{HotColdDB, ItemStore, StoreConfig}, }; use beacon_chain::{Kzg, LightClientProducerEvent}; +use beacon_processor::rayon_manager::RayonManager; use beacon_processor::{BeaconProcessor, BeaconProcessorChannels}; use beacon_processor::{BeaconProcessorConfig, BeaconProcessorQueueLengths}; use environment::RuntimeContext; @@ -680,6 +681,7 @@ where executor: beacon_processor_context.executor.clone(), current_workers: 0, config: beacon_processor_config, + rayon_manager: RayonManager::default(), } .spawn_manager( beacon_processor_channels.beacon_processor_rx, diff --git a/beacon_node/http_api/src/test_utils.rs b/beacon_node/http_api/src/test_utils.rs index fe9e0dff704..7be8960e691 100644 --- a/beacon_node/http_api/src/test_utils.rs +++ b/beacon_node/http_api/src/test_utils.rs @@ -5,6 +5,7 @@ use beacon_chain::{ }; use beacon_processor::{ BeaconProcessor, BeaconProcessorChannels, BeaconProcessorConfig, BeaconProcessorQueueLengths, + rayon_manager::RayonManager, }; use directory::DEFAULT_ROOT_DIR; use eth2::{BeaconNodeHttpClient, Timeouts}; @@ -247,6 +248,7 @@ pub async fn create_api_server_with_config( executor: test_runtime.task_executor.clone(), current_workers: 0, config: beacon_processor_config, + rayon_manager: RayonManager::default(), } .spawn_manager( beacon_processor_rx, diff --git a/beacon_node/lighthouse_tracing/src/lib.rs b/beacon_node/lighthouse_tracing/src/lib.rs index 60fda12cc20..18a9874252a 100644 --- a/beacon_node/lighthouse_tracing/src/lib.rs +++ b/beacon_node/lighthouse_tracing/src/lib.rs @@ -26,6 +26,7 @@ pub const SPAN_PROCESS_RPC_BLOCK: &str = "process_rpc_block"; pub const SPAN_PROCESS_RPC_BLOBS: &str = "process_rpc_blobs"; pub const SPAN_PROCESS_RPC_CUSTODY_COLUMNS: &str = "process_rpc_custody_columns"; pub const SPAN_PROCESS_CHAIN_SEGMENT: &str = "process_chain_segment"; +pub const SPAN_PROCESS_CHAIN_SEGMENT_BACKFILL: &str = "process_chain_segment_backfill"; /// Fork choice root spans pub const SPAN_RECOMPUTE_HEAD: &str = "recompute_head_at_slot"; @@ -61,6 +62,7 @@ pub const LH_BN_ROOT_SPAN_NAMES: &[&str] = &[ SPAN_PROCESS_RPC_BLOBS, SPAN_PROCESS_RPC_CUSTODY_COLUMNS, SPAN_PROCESS_CHAIN_SEGMENT, + SPAN_PROCESS_CHAIN_SEGMENT_BACKFILL, SPAN_HANDLE_BLOCKS_BY_RANGE_REQUEST, SPAN_HANDLE_BLOBS_BY_RANGE_REQUEST, SPAN_HANDLE_DATA_COLUMNS_BY_RANGE_REQUEST, diff --git a/beacon_node/network/src/network_beacon_processor/mod.rs b/beacon_node/network/src/network_beacon_processor/mod.rs index 691c06f2687..85ccde1d591 100644 --- a/beacon_node/network/src/network_beacon_processor/mod.rs +++ b/beacon_node/network/src/network_beacon_processor/mod.rs @@ -6,9 +6,7 @@ use beacon_chain::data_column_verification::{GossipDataColumnError, observe_goss use beacon_chain::fetch_blobs::{ EngineGetBlobsOutput, FetchEngineBlobError, fetch_and_process_engine_blobs, }; -use beacon_chain::{ - AvailabilityProcessingStatus, BeaconChain, BeaconChainTypes, BlockError, NotifyExecutionLayer, -}; +use beacon_chain::{AvailabilityProcessingStatus, BeaconChain, BeaconChainTypes, BlockError}; use beacon_processor::{ BeaconProcessorSend, DuplicateCache, GossipAggregatePackage, GossipAttestationPackage, Work, WorkEvent as BeaconWorkEvent, @@ -500,33 +498,23 @@ impl NetworkBeaconProcessor { process_id: ChainSegmentProcessId, blocks: Vec>, ) -> Result<(), Error> { - let is_backfill = matches!(&process_id, ChainSegmentProcessId::BackSyncBatchId { .. }); debug!(blocks = blocks.len(), id = ?process_id, "Batch sending for process"); - let processor = self.clone(); - let process_fn = async move { - let notify_execution_layer = if processor - .network_globals - .sync_state - .read() - .is_syncing_finalized() - { - NotifyExecutionLayer::No - } else { - NotifyExecutionLayer::Yes - }; - processor - .process_chain_segment(process_id, blocks, notify_execution_layer) - .await; - }; - let process_fn = Box::pin(process_fn); // Back-sync batches are dispatched with a different `Work` variant so // they can be rate-limited. - let work = if is_backfill { - Work::ChainSegmentBackfill(process_fn) - } else { - Work::ChainSegment(process_fn) + let work = match process_id { + ChainSegmentProcessId::RangeBatchId(_, _) => { + let process_fn = async move { + processor.process_chain_segment(process_id, blocks).await; + }; + Work::ChainSegment(Box::pin(process_fn)) + } + ChainSegmentProcessId::BackSyncBatchId(_) => { + let process_fn = + move || processor.process_chain_segment_backfill(process_id, blocks); + Work::ChainSegmentBackfill(Box::new(process_fn)) + } }; self.try_send(BeaconWorkEvent { diff --git a/beacon_node/network/src/network_beacon_processor/sync_methods.rs b/beacon_node/network/src/network_beacon_processor/sync_methods.rs index edeed7e98cf..b61a6e25c50 100644 --- a/beacon_node/network/src/network_beacon_processor/sync_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/sync_methods.rs @@ -19,9 +19,10 @@ use beacon_processor::{ use beacon_processor::{Work, WorkEvent}; use lighthouse_network::PeerAction; use lighthouse_tracing::{ - SPAN_PROCESS_CHAIN_SEGMENT, SPAN_PROCESS_RPC_BLOBS, SPAN_PROCESS_RPC_BLOCK, - SPAN_PROCESS_RPC_CUSTODY_COLUMNS, + SPAN_PROCESS_CHAIN_SEGMENT, SPAN_PROCESS_CHAIN_SEGMENT_BACKFILL, SPAN_PROCESS_RPC_BLOBS, + SPAN_PROCESS_RPC_BLOCK, SPAN_PROCESS_RPC_CUSTODY_COLUMNS, }; +use logging::crit; use std::sync::Arc; use std::time::Duration; use store::KzgCommitment; @@ -434,27 +435,42 @@ impl NetworkBeaconProcessor { parent = None, level = "debug", skip_all, - fields(sync_type = ?sync_type, downloaded_blocks = downloaded_blocks.len()) + fields(process_id = ?process_id, downloaded_blocks = downloaded_blocks.len()) )] pub async fn process_chain_segment( &self, - sync_type: ChainSegmentProcessId, + process_id: ChainSegmentProcessId, downloaded_blocks: Vec>, - notify_execution_layer: NotifyExecutionLayer, ) { - let result = match sync_type { - // this a request from the range sync - ChainSegmentProcessId::RangeBatchId(chain_id, epoch) => { - let start_slot = downloaded_blocks.first().map(|b| b.slot().as_u64()); - let end_slot = downloaded_blocks.last().map(|b| b.slot().as_u64()); - let sent_blocks = downloaded_blocks.len(); - - match self - .process_blocks(downloaded_blocks.iter(), notify_execution_layer) - .await - { - (imported_blocks, Ok(_)) => { - debug!( + let ChainSegmentProcessId::RangeBatchId(chain_id, epoch) = process_id else { + // This is a request from range sync, this should _never_ happen + crit!( + error = "process_chain_segment called on a variant other than RangeBatchId", + "Please notify the devs" + ); + return; + }; + + let start_slot = downloaded_blocks.first().map(|b| b.slot().as_u64()); + let end_slot = downloaded_blocks.last().map(|b| b.slot().as_u64()); + let sent_blocks = downloaded_blocks.len(); + let notify_execution_layer = if self + .network_globals + .sync_state + .read() + .is_syncing_finalized() + { + NotifyExecutionLayer::No + } else { + NotifyExecutionLayer::Yes + }; + + let result = match self + .process_blocks(downloaded_blocks.iter(), notify_execution_layer) + .await + { + (imported_blocks, Ok(_)) => { + debug!( batch_epoch = %epoch, first_block_slot = start_slot, chain = chain_id, @@ -462,13 +478,13 @@ impl NetworkBeaconProcessor { processed_blocks = sent_blocks, service= "sync", "Batch processed"); - BatchProcessResult::Success { - sent_blocks, - imported_blocks, - } - } - (imported_blocks, Err(e)) => { - debug!( + BatchProcessResult::Success { + sent_blocks, + imported_blocks, + } + } + (imported_blocks, Err(e)) => { + debug!( batch_epoch = %epoch, first_block_slot = start_slot, chain = chain_id, @@ -477,33 +493,61 @@ impl NetworkBeaconProcessor { error = %e.message, service = "sync", "Batch processing failed"); - match e.peer_action { - Some(penalty) => BatchProcessResult::FaultyFailure { - imported_blocks, - penalty, - }, - None => BatchProcessResult::NonFaultyFailure, - } - } + match e.peer_action { + Some(penalty) => BatchProcessResult::FaultyFailure { + imported_blocks, + penalty, + }, + None => BatchProcessResult::NonFaultyFailure, } } - // this a request from the Backfill sync - ChainSegmentProcessId::BackSyncBatchId(epoch) => { - let start_slot = downloaded_blocks.first().map(|b| b.slot().as_u64()); - let end_slot = downloaded_blocks.last().map(|b| b.slot().as_u64()); - let sent_blocks = downloaded_blocks.len(); - let n_blobs = downloaded_blocks - .iter() - .map(|wrapped| wrapped.n_blobs()) - .sum::(); - let n_data_columns = downloaded_blocks - .iter() - .map(|wrapped| wrapped.n_data_columns()) - .sum::(); - - match self.process_backfill_blocks(downloaded_blocks) { - (imported_blocks, Ok(_)) => { - debug!( + }; + + self.send_sync_message(SyncMessage::BatchProcessed { + sync_type: process_id, + result, + }); + } + + /// Attempt to import the chain segment (`blocks`) to the beacon chain, informing the sync + /// thread if more blocks are needed to process it. + #[instrument( + name = SPAN_PROCESS_CHAIN_SEGMENT_BACKFILL, + parent = None, + level = "debug", + skip_all, + fields(downloaded_blocks = downloaded_blocks.len()) + )] + pub fn process_chain_segment_backfill( + &self, + process_id: ChainSegmentProcessId, + downloaded_blocks: Vec>, + ) { + let ChainSegmentProcessId::BackSyncBatchId(epoch) = process_id else { + // this a request from RangeSync, this should _never_ happen + crit!( + error = + "process_chain_segment_backfill called on a variant other than BackSyncBatchId", + "Please notify the devs" + ); + return; + }; + + let start_slot = downloaded_blocks.first().map(|b| b.slot().as_u64()); + let end_slot = downloaded_blocks.last().map(|b| b.slot().as_u64()); + let sent_blocks = downloaded_blocks.len(); + let n_blobs = downloaded_blocks + .iter() + .map(|wrapped| wrapped.n_blobs()) + .sum::(); + let n_data_columns = downloaded_blocks + .iter() + .map(|wrapped| wrapped.n_data_columns()) + .sum::(); + + let result = match self.process_backfill_blocks(downloaded_blocks) { + (imported_blocks, Ok(_)) => { + debug!( batch_epoch = %epoch, first_block_slot = start_slot, keep_execution_payload = !self.chain.store.get_config().prune_payloads, @@ -513,34 +557,35 @@ impl NetworkBeaconProcessor { processed_data_columns = n_data_columns, service= "sync", "Backfill batch processed"); - BatchProcessResult::Success { - sent_blocks, - imported_blocks, - } - } - (_, Err(e)) => { - debug!( - batch_epoch = %epoch, - first_block_slot = start_slot, - last_block_slot = end_slot, - processed_blobs = n_blobs, - error = %e.message, - service = "sync", - "Backfill batch processing failed" - ); - match e.peer_action { - Some(penalty) => BatchProcessResult::FaultyFailure { - imported_blocks: 0, - penalty, - }, - None => BatchProcessResult::NonFaultyFailure, - } - } + BatchProcessResult::Success { + sent_blocks, + imported_blocks, + } + } + (_, Err(e)) => { + debug!( + batch_epoch = %epoch, + first_block_slot = start_slot, + last_block_slot = end_slot, + processed_blobs = n_blobs, + error = %e.message, + service = "sync", + "Backfill batch processing failed" + ); + match e.peer_action { + Some(penalty) => BatchProcessResult::FaultyFailure { + imported_blocks: 0, + penalty, + }, + None => BatchProcessResult::NonFaultyFailure, } } }; - self.send_sync_message(SyncMessage::BatchProcessed { sync_type, result }); + self.send_sync_message(SyncMessage::BatchProcessed { + sync_type: process_id, + result, + }); } /// Helper function to process blocks batches which only consumes the chain and blocks to process. diff --git a/beacon_node/network/src/network_beacon_processor/tests.rs b/beacon_node/network/src/network_beacon_processor/tests.rs index d3a93d48637..99410bc5e51 100644 --- a/beacon_node/network/src/network_beacon_processor/tests.rs +++ b/beacon_node/network/src/network_beacon_processor/tests.rs @@ -17,6 +17,7 @@ use beacon_chain::test_utils::{ test_spec, }; use beacon_chain::{BeaconChain, WhenSlotSkipped}; +use beacon_processor::rayon_manager::RayonManager; use beacon_processor::{work_reprocessing_queue::*, *}; use gossipsub::MessageAcceptance; use itertools::Itertools; @@ -266,6 +267,7 @@ impl TestRig { executor, current_workers: 0, config: beacon_processor_config, + rayon_manager: RayonManager::default(), } .spawn_manager( beacon_processor_rx, @@ -458,10 +460,10 @@ impl TestRig { .unwrap(); } - pub fn enqueue_backfill_batch(&self) { + pub fn enqueue_backfill_batch(&self, epoch: Epoch) { self.network_beacon_processor .send_chain_segment( - ChainSegmentProcessId::BackSyncBatchId(Epoch::default()), + ChainSegmentProcessId::BackSyncBatchId(epoch), Vec::default(), ) .unwrap(); @@ -606,7 +608,7 @@ impl TestRig { } pub async fn assert_event_journal(&mut self, expected: &[&str]) { - self.assert_event_journal_with_timeout(expected, STANDARD_TIMEOUT) + self.assert_event_journal_with_timeout(expected, STANDARD_TIMEOUT, false, false) .await } @@ -623,6 +625,8 @@ impl TestRig { .chain(std::iter::once(NOTHING_TO_DO)) .collect::>(), timeout, + false, + false, ) .await } @@ -666,11 +670,21 @@ impl TestRig { &mut self, expected: &[&str], timeout: Duration, + ignore_worker_freed: bool, + ignore_nothing_to_do: bool, ) { let mut events = Vec::with_capacity(expected.len()); let drain_future = async { while let Some(event) = self.work_journal_rx.recv().await { + if event == WORKER_FREED && ignore_worker_freed { + continue; + } + + if event == NOTHING_TO_DO && ignore_nothing_to_do { + continue; + } + events.push(event); // Break as soon as we collect the desired number of events. @@ -1384,6 +1398,8 @@ async fn requeue_unknown_block_gossip_attestation_without_import() { NOTHING_TO_DO, ], Duration::from_secs(1) + QUEUED_ATTESTATION_DELAY, + false, + false, ) .await; @@ -1424,6 +1440,8 @@ async fn requeue_unknown_block_gossip_aggregated_attestation_without_import() { NOTHING_TO_DO, ], Duration::from_secs(1) + QUEUED_ATTESTATION_DELAY, + false, + false, ) .await; @@ -1558,8 +1576,8 @@ async fn test_backfill_sync_processing() { // (not straight forward to manipulate `TestingSlotClock` due to cloning of `SlotClock` in code) // and makes the test very slow, hence timing calculation is unit tested separately in // `work_reprocessing_queue`. - for _ in 0..1 { - rig.enqueue_backfill_batch(); + for i in 0..1 { + rig.enqueue_backfill_batch(Epoch::new(i)); // ensure queued batch is not processed until later rig.assert_no_events_for(Duration::from_millis(100)).await; // A new batch should be processed within a slot. @@ -1570,6 +1588,8 @@ async fn test_backfill_sync_processing() { NOTHING_TO_DO, ], rig.chain.slot_clock.slot_duration(), + false, + false, ) .await; } @@ -1590,8 +1610,8 @@ async fn test_backfill_sync_processing_rate_limiting_disabled() { ) .await; - for _ in 0..3 { - rig.enqueue_backfill_batch(); + for i in 0..3 { + rig.enqueue_backfill_batch(Epoch::new(i)); } // ensure all batches are processed @@ -1602,6 +1622,8 @@ async fn test_backfill_sync_processing_rate_limiting_disabled() { WorkType::ChainSegmentBackfill.into(), ], Duration::from_millis(100), + true, + true, ) .await; } From 78d330e4b7e2b76ab503cd88f4a365a6d7a0bcf0 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Fri, 19 Sep 2025 17:01:13 +1000 Subject: [PATCH 19/45] Consolidate `reqresp_pre_import_cache` into `data_availability_checker` (#8045) This PR consolidates the `reqresp_pre_import_cache` into the `data_availability_checker` for the following reasons: - the `reqresp_pre_import_cache` suffers from the same TOCTOU bug we had with `data_availability_checker` earlier, and leads to unbounded memory leak, which we have observed over the last 6 months on some nodes. - the `reqresp_pre_import_cache` is no longer necessary, because we now hold blocks in the `data_availability_checker` for longer since (#7961), and recent blocks can be served from the DA checker. This PR also maintains the following functionalities - Serving pre-executed blocks over RPC, and they're now served from the `data_availability_checker` instead. - Using the cache for de-duplicating lookup requests. Co-Authored-By: Jimmy Chen Co-Authored-By: Jimmy Chen --- beacon_node/beacon_chain/src/beacon_chain.rs | 144 ++++-------- beacon_node/beacon_chain/src/builder.rs | 1 - .../src/data_availability_checker.rs | 38 +++- .../overflow_lru_cache.rs | 208 +++++++++++++----- .../state_lru_cache.rs | 10 - beacon_node/beacon_chain/src/metrics.rs | 12 - .../gossip_methods.rs | 5 +- .../network_beacon_processor/sync_methods.rs | 5 +- beacon_node/network/src/sync/tests/lookups.rs | 24 +- 9 files changed, 239 insertions(+), 208 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 084a68bfeab..ef3c2f52e0f 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -340,10 +340,6 @@ pub enum BlockProcessStatus { ExecutionValidated(Arc>), } -pub struct BeaconChainMetrics { - pub reqresp_pre_import_cache_len: usize, -} - pub type LightClientProducerEvent = (Hash256, Slot, SyncAggregate); pub type BeaconForkChoice = ForkChoice< @@ -363,9 +359,6 @@ pub type BeaconStore = Arc< >, >; -/// Cache gossip verified blocks to serve over ReqResp before they are imported -type ReqRespPreImportCache = HashMap>>; - /// Represents the "Beacon Chain" component of Ethereum 2.0. Allows import of blocks and block /// operations and chooses a canonical head. pub struct BeaconChain { @@ -462,8 +455,6 @@ pub struct BeaconChain { pub(crate) attester_cache: Arc, /// A cache used when producing attestations whilst the head block is still being imported. pub early_attester_cache: EarlyAttesterCache, - /// Cache gossip verified blocks to serve over ReqResp before they are imported - pub reqresp_pre_import_cache: Arc>>, /// A cache used to keep track of various block timings. pub block_times_cache: Arc>, /// A cache used to track pre-finalization block roots for quick rejection. @@ -1289,18 +1280,8 @@ impl BeaconChain { /// chain. Used by sync to learn the status of a block and prevent repeated downloads / /// processing attempts. pub fn get_block_process_status(&self, block_root: &Hash256) -> BlockProcessStatus { - if let Some(block) = self - .data_availability_checker - .get_execution_valid_block(block_root) - { - return BlockProcessStatus::ExecutionValidated(block); - } - - if let Some(block) = self.reqresp_pre_import_cache.read().get(block_root) { - // A block is on the `reqresp_pre_import_cache` but NOT in the - // `data_availability_checker` only if it is actively processing. We can expect a future - // event with the result of processing - return BlockProcessStatus::NotValidated(block.clone()); + if let Some(cached_block) = self.data_availability_checker.get_cached_block(block_root) { + return cached_block; } BlockProcessStatus::Unknown @@ -3054,8 +3035,7 @@ impl BeaconChain { self.emit_sse_blob_sidecar_events(&block_root, std::iter::once(blob.as_blob())); - let r = self.check_gossip_blob_availability_and_import(blob).await; - self.remove_notified(&block_root, r) + self.check_gossip_blob_availability_and_import(blob).await } /// Cache the data columns in the processing cache, process it, then evict it from the cache if it was @@ -3092,15 +3072,13 @@ impl BeaconChain { data_columns.iter().map(|column| column.as_data_column()), ); - let r = self - .check_gossip_data_columns_availability_and_import( - slot, - block_root, - data_columns, - publish_fn, - ) - .await; - self.remove_notified(&block_root, r) + self.check_gossip_data_columns_availability_and_import( + slot, + block_root, + data_columns, + publish_fn, + ) + .await } /// Cache the blobs in the processing cache, process it, then evict it from the cache if it was @@ -3139,10 +3117,8 @@ impl BeaconChain { self.emit_sse_blob_sidecar_events(&block_root, blobs.iter().flatten().map(Arc::as_ref)); - let r = self - .check_rpc_blob_availability_and_import(slot, block_root, blobs) - .await; - self.remove_notified(&block_root, r) + self.check_rpc_blob_availability_and_import(slot, block_root, blobs) + .await } /// Process blobs retrieved from the EL and returns the `AvailabilityProcessingStatus`. @@ -3174,10 +3150,8 @@ impl BeaconChain { } } - let r = self - .check_engine_blobs_availability_and_import(slot, block_root, engine_get_blobs_output) - .await; - self.remove_notified(&block_root, r) + self.check_engine_blobs_availability_and_import(slot, block_root, engine_get_blobs_output) + .await } fn emit_sse_blob_sidecar_events<'a, I>(self: &Arc, block_root: &Hash256, blobs_iter: I) @@ -3270,10 +3244,8 @@ impl BeaconChain { custody_columns.iter().map(|column| column.as_ref()), ); - let r = self - .check_rpc_custody_columns_availability_and_import(slot, block_root, custody_columns) - .await; - self.remove_notified(&block_root, r) + self.check_rpc_custody_columns_availability_and_import(slot, block_root, custody_columns) + .await } pub async fn reconstruct_data_columns( @@ -3320,10 +3292,8 @@ impl BeaconChain { return Ok(None); }; - let r = self - .process_availability(slot, availability, || Ok(())) - .await; - self.remove_notified(&block_root, r) + self.process_availability(slot, availability, || Ok(())) + .await .map(|availability_processing_status| { Some((availability_processing_status, data_columns_to_publish)) }) @@ -3340,46 +3310,6 @@ impl BeaconChain { } } - /// Remove any block components from the *processing cache* if we no longer require them. If the - /// block was imported full or erred, we no longer require them. - fn remove_notified( - &self, - block_root: &Hash256, - r: Result, - ) -> Result { - let has_missing_components = - matches!(r, Ok(AvailabilityProcessingStatus::MissingComponents(_, _))); - if !has_missing_components { - self.reqresp_pre_import_cache.write().remove(block_root); - } - r - } - - /// Wraps `process_block` in logic to cache the block's commitments in the processing cache - /// and evict if the block was imported or errored. - pub async fn process_block_with_early_caching>( - self: &Arc, - block_root: Hash256, - unverified_block: B, - block_source: BlockImportSource, - notify_execution_layer: NotifyExecutionLayer, - ) -> Result { - self.reqresp_pre_import_cache - .write() - .insert(block_root, unverified_block.block_cloned()); - - let r = self - .process_block( - block_root, - unverified_block, - notify_execution_layer, - block_source, - || Ok(()), - ) - .await; - self.remove_notified(&block_root, r) - } - /// Check for known and configured invalid block roots before processing. pub fn check_invalid_block_roots(&self, block_root: Hash256) -> Result<(), BlockError> { if self.config.invalid_block_roots.contains(&block_root) { @@ -3411,12 +3341,6 @@ impl BeaconChain { block_source: BlockImportSource, publish_fn: impl FnOnce() -> Result<(), BlockError>, ) -> Result { - // Start the Prometheus timer. - let _full_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_TIMES); - - // Increment the Prometheus counter for block processing requests. - metrics::inc_counter(&metrics::BLOCK_PROCESSING_REQUESTS); - let block_slot = unverified_block.block().slot(); // Set observed time if not already set. Usually this should be set by gossip or RPC, @@ -3431,6 +3355,15 @@ impl BeaconChain { ); } + self.data_availability_checker + .put_pre_execution_block(block_root, unverified_block.block_cloned())?; + + // Start the Prometheus timer. + let _full_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_TIMES); + + // Increment the Prometheus counter for block processing requests. + metrics::inc_counter(&metrics::BLOCK_PROCESSING_REQUESTS); + // A small closure to group the verification and import errors. let chain = self.clone(); let import_block = async move { @@ -3448,7 +3381,18 @@ impl BeaconChain { .set_time_consensus_verified(block_root, block_slot, timestamp) } - let executed_block = chain.into_executed_block(execution_pending).await?; + let executed_block = chain + .into_executed_block(execution_pending) + .await + .inspect_err(|_| { + // If the block fails execution for whatever reason (e.g. engine offline), + // and we keep it in the cache, then the node will NOT perform lookup and + // reprocess this block until the block is evicted from DA checker, causing the + // chain to get stuck temporarily if the block is canonical. Therefore we remove + // it from the cache if execution fails. + self.data_availability_checker + .remove_block_on_execution_error(&block_root); + })?; // Record the *additional* time it took to wait for execution layer verification. if let Some(timestamp) = self.slot_clock.now_duration() { @@ -3574,9 +3518,7 @@ impl BeaconChain { block: AvailabilityPendingExecutedBlock, ) -> Result { let slot = block.block.slot(); - let availability = self - .data_availability_checker - .put_pending_executed_block(block)?; + let availability = self.data_availability_checker.put_executed_block(block)?; self.process_availability(slot, availability, || Ok(())) .await } @@ -7156,12 +7098,6 @@ impl BeaconChain { ) } - pub fn metrics(&self) -> BeaconChainMetrics { - BeaconChainMetrics { - reqresp_pre_import_cache_len: self.reqresp_pre_import_cache.read().len(), - } - } - pub(crate) fn get_blobs_or_columns_store_op( &self, block_root: Hash256, diff --git a/beacon_node/beacon_chain/src/builder.rs b/beacon_node/beacon_chain/src/builder.rs index 35432632cc2..5564c7916fa 100644 --- a/beacon_node/beacon_chain/src/builder.rs +++ b/beacon_node/beacon_chain/src/builder.rs @@ -998,7 +998,6 @@ where validator_pubkey_cache: RwLock::new(validator_pubkey_cache), attester_cache: <_>::default(), early_attester_cache: <_>::default(), - reqresp_pre_import_cache: <_>::default(), light_client_server_cache: LightClientServerCache::new(), light_client_server_tx: self.light_client_server_tx, shutdown_sender: self diff --git a/beacon_node/beacon_chain/src/data_availability_checker.rs b/beacon_node/beacon_chain/src/data_availability_checker.rs index 88cd8f3aab4..a0ad1c2112d 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker.rs @@ -7,7 +7,9 @@ use crate::block_verification_types::{ use crate::data_availability_checker::overflow_lru_cache::{ DataAvailabilityCheckerInner, ReconstructColumnsDecision, }; -use crate::{BeaconChain, BeaconChainTypes, BeaconStore, CustodyContext, metrics}; +use crate::{ + BeaconChain, BeaconChainTypes, BeaconStore, BlockProcessStatus, CustodyContext, metrics, +}; use kzg::Kzg; use slot_clock::SlotClock; use std::fmt; @@ -27,6 +29,7 @@ mod error; mod overflow_lru_cache; mod state_lru_cache; +use crate::data_availability_checker::error::Error; use crate::data_column_verification::{ CustodyDataColumn, GossipVerifiedDataColumn, KzgVerifiedCustodyDataColumn, KzgVerifiedDataColumn, verify_kzg_for_data_column_list, @@ -144,14 +147,12 @@ impl DataAvailabilityChecker { &self.custody_context } - /// Checks if the block root is currenlty in the availability cache awaiting import because + /// Checks if the block root is currently in the availability cache awaiting import because /// of missing components. - pub fn get_execution_valid_block( - &self, - block_root: &Hash256, - ) -> Option>> { - self.availability_cache - .get_execution_valid_block(block_root) + /// + /// Returns the cache block wrapped in a `BlockProcessStatus` enum if it exists. + pub fn get_cached_block(&self, block_root: &Hash256) -> Option> { + self.availability_cache.get_cached_block(block_root) } /// Return the set of cached blob indexes for `block_root`. Returns None if there is no block @@ -340,12 +341,29 @@ impl DataAvailabilityChecker { /// Check if we have all the blobs for a block. Returns `Availability` which has information /// about whether all components have been received or more are required. - pub fn put_pending_executed_block( + pub fn put_executed_block( &self, executed_block: AvailabilityPendingExecutedBlock, ) -> Result, AvailabilityCheckError> { + self.availability_cache.put_executed_block(executed_block) + } + + /// Inserts a pre-execution block into the cache. + /// This does NOT override an existing executed block. + pub fn put_pre_execution_block( + &self, + block_root: Hash256, + block: Arc>, + ) -> Result<(), Error> { + self.availability_cache + .put_pre_execution_block(block_root, block) + } + + /// Removes a pre-execution block from the cache. + /// This does NOT remove an existing executed block. + pub fn remove_block_on_execution_error(&self, block_root: &Hash256) { self.availability_cache - .put_pending_executed_block(executed_block) + .remove_pre_execution_block(block_root); } /// Verifies kzg commitments for an RpcBlock, returns a `MaybeAvailableBlock` that may diff --git a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs index 6afb680ddb8..bb440096627 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs @@ -1,6 +1,5 @@ use super::AvailableBlockData; use super::state_lru_cache::{DietAvailabilityPendingExecutedBlock, StateLRUCache}; -use crate::BeaconChainTypes; use crate::CustodyContext; use crate::beacon_chain::BeaconStore; use crate::blob_verification::KzgVerifiedBlob; @@ -9,6 +8,7 @@ use crate::block_verification_types::{ }; use crate::data_availability_checker::{Availability, AvailabilityCheckError}; use crate::data_column_verification::KzgVerifiedCustodyDataColumn; +use crate::{BeaconChainTypes, BlockProcessStatus}; use lighthouse_tracing::SPAN_PENDING_COMPONENTS; use lru::LruCache; use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard, RwLockWriteGuard}; @@ -16,12 +16,46 @@ use std::cmp::Ordering; use std::num::NonZeroUsize; use std::sync::Arc; use tracing::{Span, debug, debug_span}; +use types::beacon_block_body::KzgCommitments; use types::blob_sidecar::BlobIdentifier; use types::{ BlobSidecar, ChainSpec, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, Epoch, EthSpec, Hash256, RuntimeFixedVector, RuntimeVariableList, SignedBeaconBlock, }; +#[derive(Clone)] +pub enum CachedBlock { + PreExecution(Arc>), + Executed(Box>), +} + +impl CachedBlock { + pub fn get_commitments(&self) -> KzgCommitments { + let block = self.as_block(); + block + .message() + .body() + .blob_kzg_commitments() + .cloned() + .unwrap_or_default() + } + + fn as_block(&self) -> &SignedBeaconBlock { + match self { + CachedBlock::PreExecution(b) => b, + CachedBlock::Executed(b) => b.as_block(), + } + } + + pub fn num_blobs_expected(&self) -> usize { + self.as_block() + .message() + .body() + .blob_kzg_commitments() + .map_or(0, |commitments| commitments.len()) + } +} + /// This represents the components of a partially available block /// /// The blobs are all gossip and kzg verified. @@ -39,22 +73,25 @@ pub struct PendingComponents { pub block_root: Hash256, pub verified_blobs: RuntimeFixedVector>>, pub verified_data_columns: Vec>, - pub executed_block: Option>, + pub block: Option>, pub reconstruction_started: bool, span: Span, } impl PendingComponents { - /// Returns an immutable reference to the cached block. - pub fn get_cached_block(&self) -> &Option> { - &self.executed_block - } - /// Returns an immutable reference to the fixed vector of cached blobs. pub fn get_cached_blobs(&self) -> &RuntimeFixedVector>> { &self.verified_blobs } + #[cfg(test)] + fn get_diet_block(&self) -> Option<&DietAvailabilityPendingExecutedBlock> { + self.block.as_ref().and_then(|block| match block { + CachedBlock::Executed(block) => Some(block.as_ref()), + _ => None, + }) + } + /// Returns an immutable reference to the cached data column. pub fn get_cached_data_column( &self, @@ -66,11 +103,6 @@ impl PendingComponents { .map(|d| d.clone_arc()) } - /// Returns a mutable reference to the cached block. - pub fn get_cached_block_mut(&mut self) -> &mut Option> { - &mut self.executed_block - } - /// Returns a mutable reference to the fixed vector of cached blobs. pub fn get_cached_blobs_mut(&mut self) -> &mut RuntimeFixedVector>> { &mut self.verified_blobs @@ -96,9 +128,17 @@ impl PendingComponents { .collect() } - /// Inserts a block into the cache. - pub fn insert_block(&mut self, block: DietAvailabilityPendingExecutedBlock) { - *self.get_cached_block_mut() = Some(block) + /// Inserts an executed block into the cache. + pub fn insert_executed_block(&mut self, block: DietAvailabilityPendingExecutedBlock) { + self.block = Some(CachedBlock::Executed(Box::new(block))) + } + + /// Inserts a pre-execution block into the cache. + /// This does NOT override an existing executed block. + pub fn insert_pre_execution_block(&mut self, block: Arc>) { + if self.block.is_none() { + self.block = Some(CachedBlock::PreExecution(block)) + } } /// Inserts a blob at a specific index in the cache. @@ -128,7 +168,7 @@ impl PendingComponents { /// 1. The blob entry at the index is empty and no block exists, or /// 2. The block exists and its commitment matches the blob's commitment. pub fn merge_single_blob(&mut self, index: usize, blob: KzgVerifiedBlob) { - if let Some(cached_block) = self.get_cached_block() { + if let Some(cached_block) = &self.block { let block_commitment_opt = cached_block.get_commitments().get(index).copied(); if let Some(block_commitment) = block_commitment_opt && block_commitment == *blob.get_commitment() @@ -158,7 +198,7 @@ impl PendingComponents { /// /// Blobs that don't match the new block's commitments are evicted. pub fn merge_block(&mut self, block: DietAvailabilityPendingExecutedBlock) { - self.insert_block(block); + self.insert_executed_block(block); let reinsert = self.get_cached_blobs_mut().take(); self.merge_blobs(reinsert); } @@ -180,7 +220,7 @@ impl PendingComponents { &Span, ) -> Result, AvailabilityCheckError>, { - let Some(block) = &self.executed_block else { + let Some(CachedBlock::Executed(block)) = &self.block else { // Block not available yet return Ok(None); }; @@ -267,7 +307,7 @@ impl PendingComponents { block, import_data, payload_verification_outcome, - } = recover(block.clone(), &self.span)?; + } = recover(*block.clone(), &self.span)?; let available_block = AvailableBlock { block_root: self.block_root, @@ -295,7 +335,7 @@ impl PendingComponents { block_root, verified_blobs: RuntimeFixedVector::new(vec![None; max_len]), verified_data_columns: vec![], - executed_block: None, + block: None, reconstruction_started: false, span, } @@ -307,9 +347,9 @@ impl PendingComponents { /// - The first data column /// Otherwise, returns None pub fn epoch(&self) -> Option { - // Get epoch from cached executed block - if let Some(executed_block) = &self.executed_block { - return Some(executed_block.as_block().epoch()); + // Get epoch from cached block + if let Some(block) = &self.block { + return Some(block.as_block().epoch()); } // Or, get epoch from first available blob @@ -326,7 +366,7 @@ impl PendingComponents { } pub fn status_str(&self, num_expected_columns_opt: Option) -> String { - let block_count = if self.executed_block.is_some() { 1 } else { 0 }; + let block_count = if self.block.is_some() { 1 } else { 0 }; if let Some(num_expected_columns) = num_expected_columns_opt { format!( "block {} data_columns {}/{}", @@ -335,7 +375,7 @@ impl PendingComponents { num_expected_columns ) } else { - let num_expected_blobs = if let Some(block) = self.get_cached_block() { + let num_expected_blobs = if let Some(block) = &self.block { &block.num_blobs_expected().to_string() } else { "?" @@ -387,18 +427,17 @@ impl DataAvailabilityCheckerInner { } /// Returns true if the block root is known, without altering the LRU ordering - pub fn get_execution_valid_block( - &self, - block_root: &Hash256, - ) -> Option>> { + pub fn get_cached_block(&self, block_root: &Hash256) -> Option> { self.critical .read() .peek(block_root) .and_then(|pending_components| { - pending_components - .executed_block - .as_ref() - .map(|block| block.block_cloned()) + pending_components.block.as_ref().map(|block| match block { + CachedBlock::PreExecution(b) => BlockProcessStatus::NotValidated(b.clone()), + CachedBlock::Executed(b) => { + BlockProcessStatus::ExecutionValidated(b.block_cloned()) + } + }) }) } @@ -647,9 +686,46 @@ impl DataAvailabilityCheckerInner { } } + /// Inserts a pre executed block into the cache. + /// - This does NOT trigger the availability check as the block still needs to be executed. + /// - This does NOT override an existing cached block to avoid overwriting an executed block. + pub fn put_pre_execution_block( + &self, + block_root: Hash256, + block: Arc>, + ) -> Result<(), AvailabilityCheckError> { + let epoch = block.epoch(); + let pending_components = + self.update_or_insert_pending_components(block_root, epoch, |pending_components| { + pending_components.insert_pre_execution_block(block); + Ok(()) + })?; + + let num_expected_columns_opt = self.get_num_expected_columns(epoch); + + pending_components.span.in_scope(|| { + debug!( + component = "pre execution block", + status = pending_components.status_str(num_expected_columns_opt), + "Component added to data availability checker" + ); + }); + + Ok(()) + } + + /// Removes a pre-execution block from the cache. + /// This does NOT remove an existing executed block. + pub fn remove_pre_execution_block(&self, block_root: &Hash256) { + // The read lock is immediately dropped so we can safely remove the block from the cache. + if let Some(BlockProcessStatus::NotValidated(_)) = self.get_cached_block(block_root) { + self.critical.write().pop(block_root); + } + } + /// Check if we have all the blobs for a block. If we do, return the Availability variant that /// triggers import of the block. - pub fn put_pending_executed_block( + pub fn put_executed_block( &self, executed_block: AvailabilityPendingExecutedBlock, ) -> Result, AvailabilityCheckError> { @@ -667,14 +743,7 @@ impl DataAvailabilityCheckerInner { Ok(()) })?; - let num_expected_columns_opt = if self.spec.is_peer_das_enabled_for_epoch(epoch) { - let num_of_column_samples = self - .custody_context - .num_of_data_columns_to_sample(epoch, &self.spec); - Some(num_of_column_samples) - } else { - None - }; + let num_expected_columns_opt = self.get_num_expected_columns(epoch); pending_components.span.in_scope(|| { debug!( @@ -691,6 +760,17 @@ impl DataAvailabilityCheckerInner { ) } + fn get_num_expected_columns(&self, epoch: Epoch) -> Option { + if self.spec.is_peer_das_enabled_for_epoch(epoch) { + let num_of_column_samples = self + .custody_context + .num_of_data_columns_to_sample(epoch, &self.spec); + Some(num_of_column_samples) + } else { + None + } + } + /// maintain the cache pub fn do_maintenance(&self, cutoff_epoch: Epoch) -> Result<(), AvailabilityCheckError> { // clean up any lingering states in the state cache @@ -964,7 +1044,7 @@ mod test { ); assert!(cache.critical.read().is_empty(), "cache should be empty"); let availability = cache - .put_pending_executed_block(pending_block) + .put_executed_block(pending_block) .expect("should put block"); if blobs_expected == 0 { assert!( @@ -1031,7 +1111,7 @@ mod test { ); } let availability = cache - .put_pending_executed_block(pending_block) + .put_executed_block(pending_block) .expect("should put block"); assert!( matches!(availability, Availability::Available(_)), @@ -1093,7 +1173,7 @@ mod test { // put the block in the cache let availability = cache - .put_pending_executed_block(pending_block) + .put_executed_block(pending_block) .expect("should put block"); // grab the diet block from the cache for later testing @@ -1101,12 +1181,7 @@ mod test { .critical .read() .peek(&block_root) - .map(|pending_components| { - pending_components - .executed_block - .clone() - .expect("should exist") - }) + .and_then(|pending_components| pending_components.get_diet_block().cloned()) .expect("should exist"); pushed_diet_blocks.push_back(diet_block); @@ -1267,7 +1342,7 @@ mod pending_components_tests { } pub fn assert_cache_consistent(cache: PendingComponents, max_len: usize) { - if let Some(cached_block) = cache.get_cached_block() { + if let Some(cached_block) = &cache.block { let cached_block_commitments = cached_block.get_commitments(); for index in 0..max_len { let block_commitment = cached_block_commitments.get(index).copied(); @@ -1373,4 +1448,33 @@ mod pending_components_tests { assert_cache_consistent(cache, max_len); } + + #[test] + fn should_not_insert_pre_execution_block_if_executed_block_exists() { + let (pre_execution_block, blobs, random_blobs, max_len) = pre_setup(); + let (executed_block, _blobs, _random_blobs) = + setup_pending_components(pre_execution_block.clone(), blobs, random_blobs); + + let block_root = pre_execution_block.canonical_root(); + let mut pending_component = >::empty(block_root, max_len); + + let pre_execution_block = Arc::new(pre_execution_block); + pending_component.insert_pre_execution_block(pre_execution_block.clone()); + assert!( + matches!(pending_component.block, Some(CachedBlock::PreExecution(_))), + "pre execution block inserted" + ); + + pending_component.insert_executed_block(executed_block); + assert!( + matches!(pending_component.block, Some(CachedBlock::Executed(_))), + "executed block inserted" + ); + + pending_component.insert_pre_execution_block(pre_execution_block); + assert!( + matches!(pending_component.block, Some(CachedBlock::Executed(_))), + "executed block should remain" + ); + } } diff --git a/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs index 57c236efcf6..24f9237e3c9 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs @@ -10,7 +10,6 @@ use state_processing::BlockReplayer; use std::sync::Arc; use store::OnDiskConsensusContext; use tracing::{Span, debug_span, instrument}; -use types::beacon_block_body::KzgCommitments; use types::{BeaconState, BlindedPayload, ChainSpec, Epoch, EthSpec, Hash256, SignedBeaconBlock}; /// This mirrors everything in the `AvailabilityPendingExecutedBlock`, except @@ -43,15 +42,6 @@ impl DietAvailabilityPendingExecutedBlock { .map_or(0, |commitments| commitments.len()) } - pub fn get_commitments(&self) -> KzgCommitments { - self.as_block() - .message() - .body() - .blob_kzg_commitments() - .cloned() - .unwrap_or_default() - } - /// Returns the epoch corresponding to `self.slot()`. pub fn epoch(&self) -> Epoch { self.block.slot().epoch(E::slots_per_epoch()) diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index 3da3cf163a4..0d34ffdcd15 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -458,12 +458,6 @@ pub static BEACON_EARLY_ATTESTER_CACHE_HITS: LazyLock> = Lazy ) }); -pub static BEACON_REQRESP_PRE_IMPORT_CACHE_SIZE: LazyLock> = LazyLock::new(|| { - try_create_int_gauge( - "beacon_reqresp_pre_import_cache_size", - "Current count of items of the reqresp pre import cache", - ) -}); pub static BEACON_REQRESP_PRE_IMPORT_CACHE_HITS: LazyLock> = LazyLock::new(|| { try_create_int_counter( @@ -1965,7 +1959,6 @@ pub fn scrape_for_metrics(beacon_chain: &BeaconChain) { } let attestation_stats = beacon_chain.op_pool.attestation_stats(); - let chain_metrics = beacon_chain.metrics(); // Kept duplicated for backwards compatibility set_gauge_by_usize( @@ -1973,11 +1966,6 @@ pub fn scrape_for_metrics(beacon_chain: &BeaconChain) { beacon_chain.store.state_cache_len(), ); - set_gauge_by_usize( - &BEACON_REQRESP_PRE_IMPORT_CACHE_SIZE, - chain_metrics.reqresp_pre_import_cache_len, - ); - let da_checker_metrics = beacon_chain.data_availability_checker.metrics(); set_gauge_by_usize( &DATA_AVAILABILITY_OVERFLOW_MEMORY_BLOCK_CACHE_SIZE, diff --git a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs index b3d717142f5..5fc94c29587 100644 --- a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs @@ -1500,11 +1500,12 @@ impl NetworkBeaconProcessor { let result = self .chain - .process_block_with_early_caching( + .process_block( block_root, verified_block, - BlockImportSource::Gossip, NotifyExecutionLayer::Yes, + BlockImportSource::Gossip, + || Ok(()), ) .await; register_process_result_metrics(&result, metrics::BlockSource::Gossip, "block"); diff --git a/beacon_node/network/src/network_beacon_processor/sync_methods.rs b/beacon_node/network/src/network_beacon_processor/sync_methods.rs index b61a6e25c50..f139724702f 100644 --- a/beacon_node/network/src/network_beacon_processor/sync_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/sync_methods.rs @@ -168,11 +168,12 @@ impl NetworkBeaconProcessor { let signed_beacon_block = block.block_cloned(); let result = self .chain - .process_block_with_early_caching( + .process_block( block_root, block, - BlockImportSource::Lookup, NotifyExecutionLayer::Yes, + BlockImportSource::Lookup, + || Ok(()), ) .await; register_process_result_metrics(&result, metrics::BlockSource::Rpc, "block"); diff --git a/beacon_node/network/src/sync/tests/lookups.rs b/beacon_node/network/src/sync/tests/lookups.rs index 2edcd12f019..27968a06351 100644 --- a/beacon_node/network/src/sync/tests/lookups.rs +++ b/beacon_node/network/src/sync/tests/lookups.rs @@ -1079,7 +1079,7 @@ impl TestRig { .harness .chain .data_availability_checker - .put_pending_executed_block(executed_block) + .put_executed_block(executed_block) .unwrap() { Availability::Available(_) => panic!("block removed from da_checker, available"), @@ -1109,20 +1109,19 @@ impl TestRig { }; } - fn insert_block_to_processing_cache(&mut self, block: Arc>) { + fn insert_block_to_availability_cache(&mut self, block: Arc>) { self.harness .chain - .reqresp_pre_import_cache - .write() - .insert(block.canonical_root(), block); + .data_availability_checker + .put_pre_execution_block(block.canonical_root(), block) + .unwrap(); } fn simulate_block_gossip_processing_becomes_invalid(&mut self, block_root: Hash256) { self.harness .chain - .reqresp_pre_import_cache - .write() - .remove(&block_root); + .data_availability_checker + .remove_block_on_execution_error(&block_root); self.send_sync_message(SyncMessage::GossipBlockProcessResult { block_root, @@ -1135,11 +1134,6 @@ impl TestRig { block: Arc>, ) { let block_root = block.canonical_root(); - self.harness - .chain - .reqresp_pre_import_cache - .write() - .remove(&block_root); self.insert_block_to_da_checker(block); @@ -1841,7 +1835,7 @@ fn block_in_processing_cache_becomes_invalid() { let (block, blobs) = r.rand_block_and_blobs(NumBlobs::Number(1)); let block_root = block.canonical_root(); let peer_id = r.new_connected_peer(); - r.insert_block_to_processing_cache(block.clone().into()); + r.insert_block_to_availability_cache(block.clone().into()); r.trigger_unknown_block_from_attestation(block_root, peer_id); // Should trigger blob request let id = r.expect_blob_lookup_request(block_root); @@ -1867,7 +1861,7 @@ fn block_in_processing_cache_becomes_valid_imported() { let (block, blobs) = r.rand_block_and_blobs(NumBlobs::Number(1)); let block_root = block.canonical_root(); let peer_id = r.new_connected_peer(); - r.insert_block_to_processing_cache(block.clone().into()); + r.insert_block_to_availability_cache(block.clone().into()); r.trigger_unknown_block_from_attestation(block_root, peer_id); // Should trigger blob request let id = r.expect_blob_lookup_request(block_root); From 4efe47b3c3ccf5bfe88cd76b6abe1ce7b080e0d0 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Fri, 19 Sep 2025 17:01:16 +1000 Subject: [PATCH 20/45] Rename `--subscribe-all-data-column-subnets` to `--supernode` and make it visible in help (#8083) Rename `--subscribe-all-data-column-subnets` to `--supernode` as it's now been officially accepted in the spec. Also make it visible in help in preparation for the fusaka release. https://github.com/ethereum/consensus-specs/blob/dev/specs/fulu/p2p-interface.md#supernodes Co-Authored-By: Jimmy Chen --- beacon_node/src/cli.rs | 15 ++++++++------- beacon_node/src/config.rs | 2 +- book/src/help_bn.md | 7 +++++++ lighthouse/tests/beacon_node.rs | 13 +++++++++++++ 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/beacon_node/src/cli.rs b/beacon_node/src/cli.rs index 238907adce8..569d1e4ad81 100644 --- a/beacon_node/src/cli.rs +++ b/beacon_node/src/cli.rs @@ -47,16 +47,17 @@ pub fn cli_app() -> Command { * Network parameters. */ .arg( - Arg::new("subscribe-all-data-column-subnets") - .long("subscribe-all-data-column-subnets") + Arg::new("supernode") + .long("supernode") + .alias("subscribe-all-data-column-subnets") .action(ArgAction::SetTrue) .help_heading(FLAG_HEADER) - .help("Subscribe to all data column subnets and participate in data custody for \ - all columns. This will also advertise the beacon node as being long-lived \ - subscribed to all data column subnets. \ - NOTE: this is an experimental flag and may change any time without notice!") + .help("Run as a voluntary supernode. This node will subscribe to all data column \ + subnets, custody all data columns, and perform reconstruction and cross-seeding. \ + This requires significantly more bandwidth, storage, and computation requirements but \ + the node will have direct access to all blobs via the beacon API and it \ + helps network resilience by serving all data columns to syncing peers.") .display_order(0) - .hide(true) ) .arg( // TODO(das): remove this before PeerDAS release diff --git a/beacon_node/src/config.rs b/beacon_node/src/config.rs index 3681556d11e..230350fade4 100644 --- a/beacon_node/src/config.rs +++ b/beacon_node/src/config.rs @@ -1162,7 +1162,7 @@ pub fn set_network_config( config.network_dir = data_dir.join(DEFAULT_NETWORK_DIR); }; - if parse_flag(cli_args, "subscribe-all-data-column-subnets") { + if parse_flag(cli_args, "supernode") { config.subscribe_all_data_column_subnets = true; } diff --git a/book/src/help_bn.md b/book/src/help_bn.md index eba6814863f..d5396321f2c 100644 --- a/book/src/help_bn.md +++ b/book/src/help_bn.md @@ -571,6 +571,13 @@ Flags: Subscribe to all subnets regardless of validator count. This will also advertise the beacon node as being long-lived subscribed to all subnets. + --supernode + Run as a voluntary supernode. This node will subscribe to all data + column subnets, custody all data columns, and perform reconstruction + and cross-seeding. This requires significantly more bandwidth, + storage, and computation requirements but the node will have direct + access to all blobs via the beacon API and it helps network resilience + by serving all data columns to syncing peers. --validator-monitor-auto Enables the automatic detection and monitoring of validators connected to the HTTP API and using the subnet subscription endpoint. This diff --git a/lighthouse/tests/beacon_node.rs b/lighthouse/tests/beacon_node.rs index 629c2e1e9a1..8f6d040b62a 100644 --- a/lighthouse/tests/beacon_node.rs +++ b/lighthouse/tests/beacon_node.rs @@ -833,6 +833,19 @@ fn network_subscribe_all_data_column_subnets_flag() { .with_config(|config| assert!(config.network.subscribe_all_data_column_subnets)); } #[test] +fn network_supernode_flag() { + CommandLineTest::new() + .flag("supernode", None) + .run_with_zero_port() + .with_config(|config| assert!(config.network.subscribe_all_data_column_subnets)); +} +#[test] +fn network_subscribe_all_data_column_subnets_default() { + CommandLineTest::new() + .run_with_zero_port() + .with_config(|config| assert!(!config.network.subscribe_all_data_column_subnets)); +} +#[test] fn blob_publication_batches() { CommandLineTest::new() .flag("blob-publication-batches", Some("3")) From 366fb0ee0dc3d87eeb6995847f05ecab8e48d11f Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Fri, 19 Sep 2025 22:58:46 +1000 Subject: [PATCH 21/45] Always upload sim test logs (#8082) This CI job failed https://github.com/sigp/lighthouse/actions/runs/17815533375/job/50647915897 But we lost the logs because they aren't uploaded when the job fails. This PR changes the step to always upload job, even in the case of failure. Co-Authored-By: Jimmy Chen --- .github/workflows/test-suite.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index 59a045c7d3f..0201bf9ae30 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -225,6 +225,7 @@ jobs: TEST_FEATURES: portable CI_LOGGER_DIR: ${{ runner.temp }}/network_test_logs - name: Upload logs + if: always() uses: actions/upload-artifact@v4 with: name: network_test_logs @@ -328,6 +329,7 @@ jobs: - name: Run a basic beacon chain sim that starts from Deneb run: cargo run --release --bin simulator basic-sim --disable-stdout-logging --log-dir ${{ runner.temp }}/basic_simulator_logs - name: Upload logs + if: always() uses: actions/upload-artifact@v4 with: name: basic_simulator_logs @@ -349,6 +351,7 @@ jobs: - name: Run a beacon chain sim which tests VC fallback behaviour run: cargo run --release --bin simulator fallback-sim --disable-stdout-logging --log-dir ${{ runner.temp }}/fallback_simulator_logs - name: Upload logs + if: always() uses: actions/upload-artifact@v4 with: name: fallback_simulator_logs From 1dbc4f861b3f678516f6b3ba9cb448e3550b1b31 Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Mon, 22 Sep 2025 15:03:47 +1000 Subject: [PATCH 22/45] Refine HTTP status logs (#8098) Ensure that we don't log a warning for HTTP 202s, which are expected on the blinded block endpoints after Fulu. Co-Authored-By: Michael Sproul --- beacon_node/http_api/src/lib.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/beacon_node/http_api/src/lib.rs b/beacon_node/http_api/src/lib.rs index 5c6a9df7391..1b18ed50a3f 100644 --- a/beacon_node/http_api/src/lib.rs +++ b/beacon_node/http_api/src/lib.rs @@ -294,10 +294,7 @@ pub fn tracing_logging() -> warp::filters::log::Log Date: Mon, 22 Sep 2025 21:37:33 -0700 Subject: [PATCH 23/45] Reduce `TARGET_BACKFILL_SLOTS` in checkpoint sync test (#8102) Co-Authored-By: Eitan Seri- Levi --- scripts/tests/checkpoint-sync-config-devnet.yaml | 4 ++++ scripts/tests/checkpoint-sync.sh | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/tests/checkpoint-sync-config-devnet.yaml b/scripts/tests/checkpoint-sync-config-devnet.yaml index f1b96dc9e52..2392011ed33 100644 --- a/scripts/tests/checkpoint-sync-config-devnet.yaml +++ b/scripts/tests/checkpoint-sync-config-devnet.yaml @@ -4,11 +4,15 @@ participants: cl_image: lighthouse:local el_type: geth el_image: ethpandaops/geth:master + cl_extra_params: + - --disable-backfill-rate-limiting supernode: true - cl_type: lighthouse cl_image: lighthouse:local el_type: geth el_image: ethpandaops/geth:master + cl_extra_params: + - --disable-backfill-rate-limiting supernode: false checkpoint_sync_enabled: true diff --git a/scripts/tests/checkpoint-sync.sh b/scripts/tests/checkpoint-sync.sh index a170d1e94dc..df03da042e5 100755 --- a/scripts/tests/checkpoint-sync.sh +++ b/scripts/tests/checkpoint-sync.sh @@ -15,7 +15,7 @@ CONFIG=${2:-$SCRIPT_DIR/checkpoint-sync-config-sepolia.yaml} # Interval for polling the /lighthouse/syncing endpoint for sync status POLL_INTERVAL_SECS=5 # Target number of slots to backfill to complete this test. -TARGET_BACKFILL_SLOTS=1024 +TARGET_BACKFILL_SLOTS=256 # Timeout for this test, if the node(s) fail to backfill `TARGET_BACKFILL_SLOTS` slots, fail the test. TIMEOUT_MINS=10 TIMEOUT_SECS=$((TIMEOUT_MINS * 60)) From d80c0ff5b57c043f60ee3cdc48730077fc484d75 Mon Sep 17 00:00:00 2001 From: Antonio Viggiano Date: Tue, 23 Sep 2025 22:20:10 -0300 Subject: [PATCH 24/45] Use HTTPS for xdelta3 in Cargo.toml (#8094) No issue Use HTTPS for dependency Co-Authored-By: Antonio Viggiano --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0e559182438..c100fa5ae24 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11165,7 +11165,7 @@ dependencies = [ [[package]] name = "xdelta3" version = "0.1.5" -source = "git+http://github.com/sigp/xdelta3-rs?rev=4db64086bb02e9febb584ba93b9d16bb2ae3825a#4db64086bb02e9febb584ba93b9d16bb2ae3825a" +source = "git+https://github.com/sigp/xdelta3-rs?rev=4db64086bb02e9febb584ba93b9d16bb2ae3825a#4db64086bb02e9febb584ba93b9d16bb2ae3825a" dependencies = [ "bindgen", "cc", diff --git a/Cargo.toml b/Cargo.toml index 99543dbfb49..66378a16c46 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -279,7 +279,7 @@ validator_test_rig = { path = "testing/validator_test_rig" } warp = { version = "0.3.7", default-features = false, features = ["tls"] } warp_utils = { path = "common/warp_utils" } workspace_members = { path = "common/workspace_members" } -xdelta3 = { git = "http://github.com/sigp/xdelta3-rs", rev = "4db64086bb02e9febb584ba93b9d16bb2ae3825a" } +xdelta3 = { git = "https://github.com/sigp/xdelta3-rs", rev = "4db64086bb02e9febb584ba93b9d16bb2ae3825a" } zeroize = { version = "1", features = ["zeroize_derive", "serde"] } zip = "0.6" zstd = "0.13" From af274029e8c61fe01048105ba1f192cc762effeb Mon Sep 17 00:00:00 2001 From: Eitan Seri-Levi Date: Tue, 23 Sep 2025 23:37:34 -0700 Subject: [PATCH 25/45] Run reconstruction inside a scoped rayon pool (#8075) Co-Authored-By: Jimmy Chen Co-Authored-By: Eitan Seri- Levi Co-Authored-By: Eitan Seri-Levi --- Cargo.lock | 3 +- beacon_node/beacon_chain/src/beacon_chain.rs | 16 ++--- beacon_node/beacon_processor/Cargo.toml | 1 - beacon_node/beacon_processor/src/lib.rs | 20 ++----- .../beacon_processor/src/rayon_manager.rs | 27 --------- beacon_node/client/src/builder.rs | 2 - beacon_node/http_api/src/test_utils.rs | 2 - .../src/network_beacon_processor/tests.rs | 2 - common/task_executor/Cargo.toml | 2 + common/task_executor/src/lib.rs | 50 +++++++++++++++- .../task_executor/src/rayon_pool_provider.rs | 58 +++++++++++++++++++ 11 files changed, 123 insertions(+), 60 deletions(-) delete mode 100644 beacon_node/beacon_processor/src/rayon_manager.rs create mode 100644 common/task_executor/src/rayon_pool_provider.rs diff --git a/Cargo.lock b/Cargo.lock index c100fa5ae24..ee651080973 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -980,7 +980,6 @@ dependencies = [ "metrics", "num_cpus", "parking_lot 0.12.3", - "rayon", "serde", "slot_clock", "strum", @@ -9232,6 +9231,8 @@ dependencies = [ "async-channel 1.9.0", "futures", "metrics", + "num_cpus", + "rayon", "tokio", "tracing", ] diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index ef3c2f52e0f..4f0c6aada0a 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -124,7 +124,7 @@ use store::{ BlobSidecarListFromRoot, DBColumn, DatabaseBlock, Error as DBError, HotColdDB, HotStateSummary, KeyValueStore, KeyValueStoreOp, StoreItem, StoreOp, }; -use task_executor::{ShutdownReason, TaskExecutor}; +use task_executor::{RayonPoolType, ShutdownReason, TaskExecutor}; use tokio_stream::Stream; use tracing::{Span, debug, debug_span, error, info, info_span, instrument, trace, warn}; use tree_hash::TreeHash; @@ -3274,16 +3274,12 @@ impl BeaconChain { let current_span = Span::current(); let result = self .task_executor - .spawn_blocking_handle( - move || { - let _guard = current_span.enter(); - data_availability_checker.reconstruct_data_columns(&block_root) - }, - "reconstruct_data_columns", - ) - .ok_or(BeaconChainError::RuntimeShutdown)? + .spawn_blocking_with_rayon_async(RayonPoolType::HighPriority, move || { + let _guard = current_span.enter(); + data_availability_checker.reconstruct_data_columns(&block_root) + }) .await - .map_err(BeaconChainError::TokioJoin)??; + .map_err(|_| BeaconChainError::RuntimeShutdown)??; match result { DataColumnReconstructionResult::Success((availability, data_columns_to_publish)) => { diff --git a/beacon_node/beacon_processor/Cargo.toml b/beacon_node/beacon_processor/Cargo.toml index 262badf7f97..afd4660c9a3 100644 --- a/beacon_node/beacon_processor/Cargo.toml +++ b/beacon_node/beacon_processor/Cargo.toml @@ -12,7 +12,6 @@ logging = { workspace = true } metrics = { workspace = true } num_cpus = { workspace = true } parking_lot = { workspace = true } -rayon = { workspace = true } serde = { workspace = true } slot_clock = { workspace = true } strum = { workspace = true } diff --git a/beacon_node/beacon_processor/src/lib.rs b/beacon_node/beacon_processor/src/lib.rs index 64aeb4ceaf2..28ed0cca913 100644 --- a/beacon_node/beacon_processor/src/lib.rs +++ b/beacon_node/beacon_processor/src/lib.rs @@ -38,7 +38,6 @@ //! checks the queues to see if there are more parcels of work that can be spawned in a new worker //! task. -use crate::rayon_manager::RayonManager; use crate::work_reprocessing_queue::{ QueuedBackfillBatch, QueuedColumnReconstruction, QueuedGossipBlock, ReprocessQueueMessage, }; @@ -48,7 +47,6 @@ use lighthouse_network::{MessageId, NetworkGlobals, PeerId}; use logging::TimeLatch; use logging::crit; use parking_lot::Mutex; -use rayon::ThreadPool; pub use scheduler::work_reprocessing_queue; use serde::{Deserialize, Serialize}; use slot_clock::SlotClock; @@ -61,7 +59,7 @@ use std::sync::Arc; use std::task::Context; use std::time::{Duration, Instant}; use strum::IntoStaticStr; -use task_executor::TaskExecutor; +use task_executor::{RayonPoolType, TaskExecutor}; use tokio::sync::mpsc; use tokio::sync::mpsc::error::TrySendError; use tracing::{debug, error, trace, warn}; @@ -76,7 +74,6 @@ use work_reprocessing_queue::{ }; mod metrics; -pub mod rayon_manager; pub mod scheduler; /// The maximum size of the channel for work events to the `BeaconProcessor`. @@ -810,7 +807,6 @@ pub struct BeaconProcessor { pub network_globals: Arc>, pub executor: TaskExecutor, pub current_workers: usize, - pub rayon_manager: RayonManager, pub config: BeaconProcessorConfig, } @@ -1609,10 +1605,7 @@ impl BeaconProcessor { } Work::ChainSegmentBackfill(process_fn) => { if self.config.enable_backfill_rate_limiting { - task_spawner.spawn_blocking_with_rayon( - self.rayon_manager.low_priority_threadpool.clone(), - process_fn, - ) + task_spawner.spawn_blocking_with_rayon(RayonPoolType::LowPriority, process_fn) } else { // use the global rayon thread pool if backfill rate limiting is disabled. task_spawner.spawn_blocking(process_fn) @@ -1681,17 +1674,16 @@ impl TaskSpawner { } /// Spawns a blocking task on a rayon thread pool, dropping the `SendOnDrop` after task completion. - fn spawn_blocking_with_rayon(self, thread_pool: Arc, task: F) + fn spawn_blocking_with_rayon(self, rayon_pool_type: RayonPoolType, task: F) where F: FnOnce() + Send + 'static, { - self.executor.spawn_blocking( + self.executor.spawn_blocking_with_rayon( move || { - thread_pool.install(|| { - task(); - }); + task(); drop(self.send_idle_on_drop) }, + rayon_pool_type, WORKER_TASK_NAME, ) } diff --git a/beacon_node/beacon_processor/src/rayon_manager.rs b/beacon_node/beacon_processor/src/rayon_manager.rs deleted file mode 100644 index 99fe32d5cc4..00000000000 --- a/beacon_node/beacon_processor/src/rayon_manager.rs +++ /dev/null @@ -1,27 +0,0 @@ -use rayon::{ThreadPool, ThreadPoolBuilder}; -use std::sync::Arc; - -const DEFAULT_LOW_PRIORITY_DIVISOR: usize = 4; -const MINIMUM_LOW_PRIORITY_THREAD_COUNT: usize = 1; - -pub struct RayonManager { - /// Smaller rayon thread pool for lower-priority, compute-intensive tasks. - /// By default ~25% of CPUs or a minimum of 1 thread. - pub low_priority_threadpool: Arc, -} - -impl Default for RayonManager { - fn default() -> Self { - let low_prio_threads = - (num_cpus::get() / DEFAULT_LOW_PRIORITY_DIVISOR).max(MINIMUM_LOW_PRIORITY_THREAD_COUNT); - let low_priority_threadpool = Arc::new( - ThreadPoolBuilder::new() - .num_threads(low_prio_threads) - .build() - .expect("failed to build low-priority rayon pool"), - ); - Self { - low_priority_threadpool, - } - } -} diff --git a/beacon_node/client/src/builder.rs b/beacon_node/client/src/builder.rs index 87cdcc45ef7..d984d5fedce 100644 --- a/beacon_node/client/src/builder.rs +++ b/beacon_node/client/src/builder.rs @@ -17,7 +17,6 @@ use beacon_chain::{ store::{HotColdDB, ItemStore, StoreConfig}, }; use beacon_chain::{Kzg, LightClientProducerEvent}; -use beacon_processor::rayon_manager::RayonManager; use beacon_processor::{BeaconProcessor, BeaconProcessorChannels}; use beacon_processor::{BeaconProcessorConfig, BeaconProcessorQueueLengths}; use environment::RuntimeContext; @@ -681,7 +680,6 @@ where executor: beacon_processor_context.executor.clone(), current_workers: 0, config: beacon_processor_config, - rayon_manager: RayonManager::default(), } .spawn_manager( beacon_processor_channels.beacon_processor_rx, diff --git a/beacon_node/http_api/src/test_utils.rs b/beacon_node/http_api/src/test_utils.rs index 7be8960e691..fe9e0dff704 100644 --- a/beacon_node/http_api/src/test_utils.rs +++ b/beacon_node/http_api/src/test_utils.rs @@ -5,7 +5,6 @@ use beacon_chain::{ }; use beacon_processor::{ BeaconProcessor, BeaconProcessorChannels, BeaconProcessorConfig, BeaconProcessorQueueLengths, - rayon_manager::RayonManager, }; use directory::DEFAULT_ROOT_DIR; use eth2::{BeaconNodeHttpClient, Timeouts}; @@ -248,7 +247,6 @@ pub async fn create_api_server_with_config( executor: test_runtime.task_executor.clone(), current_workers: 0, config: beacon_processor_config, - rayon_manager: RayonManager::default(), } .spawn_manager( beacon_processor_rx, diff --git a/beacon_node/network/src/network_beacon_processor/tests.rs b/beacon_node/network/src/network_beacon_processor/tests.rs index 99410bc5e51..4137c974bf3 100644 --- a/beacon_node/network/src/network_beacon_processor/tests.rs +++ b/beacon_node/network/src/network_beacon_processor/tests.rs @@ -17,7 +17,6 @@ use beacon_chain::test_utils::{ test_spec, }; use beacon_chain::{BeaconChain, WhenSlotSkipped}; -use beacon_processor::rayon_manager::RayonManager; use beacon_processor::{work_reprocessing_queue::*, *}; use gossipsub::MessageAcceptance; use itertools::Itertools; @@ -267,7 +266,6 @@ impl TestRig { executor, current_workers: 0, config: beacon_processor_config, - rayon_manager: RayonManager::default(), } .spawn_manager( beacon_processor_rx, diff --git a/common/task_executor/Cargo.toml b/common/task_executor/Cargo.toml index d4faf1e4b82..92a4fc4b596 100644 --- a/common/task_executor/Cargo.toml +++ b/common/task_executor/Cargo.toml @@ -8,6 +8,8 @@ edition = { workspace = true } async-channel = { workspace = true } futures = { workspace = true } metrics = { workspace = true } +num_cpus = { workspace = true } +rayon = { workspace = true } tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } tracing = { workspace = true } diff --git a/common/task_executor/src/lib.rs b/common/task_executor/src/lib.rs index 5f0c822b03f..0b8e9f8eba5 100644 --- a/common/task_executor/src/lib.rs +++ b/common/task_executor/src/lib.rs @@ -1,12 +1,15 @@ mod metrics; +mod rayon_pool_provider; pub mod test_utils; use futures::channel::mpsc::Sender; use futures::prelude::*; -use std::sync::Weak; +use std::sync::{Arc, Weak}; use tokio::runtime::{Handle, Runtime}; use tracing::debug; +use crate::rayon_pool_provider::RayonPoolProvider; +pub use crate::rayon_pool_provider::RayonPoolType; pub use tokio::task::JoinHandle; /// Provides a reason when Lighthouse is shut down. @@ -84,6 +87,8 @@ pub struct TaskExecutor { // FIXME(sproul): delete? #[allow(dead_code)] service_name: String, + + rayon_pool_provider: Arc, } impl TaskExecutor { @@ -105,6 +110,7 @@ impl TaskExecutor { exit, signal_tx, service_name, + rayon_pool_provider: Arc::new(RayonPoolProvider::default()), } } @@ -115,6 +121,7 @@ impl TaskExecutor { exit: self.exit.clone(), signal_tx: self.signal_tx.clone(), service_name, + rayon_pool_provider: self.rayon_pool_provider.clone(), } } @@ -226,6 +233,47 @@ impl TaskExecutor { } } + /// Spawns a blocking task on a dedicated tokio thread pool and installs a rayon context within it. + pub fn spawn_blocking_with_rayon( + self, + task: F, + rayon_pool_type: RayonPoolType, + name: &'static str, + ) where + F: FnOnce() + Send + 'static, + { + let thread_pool = self.rayon_pool_provider.get_thread_pool(rayon_pool_type); + self.spawn_blocking( + move || { + thread_pool.install(|| { + task(); + }); + }, + name, + ) + } + + /// Spawns a blocking computation on a rayon thread pool and awaits the result. + pub async fn spawn_blocking_with_rayon_async( + &self, + rayon_pool_type: RayonPoolType, + task: F, + ) -> Result + where + F: FnOnce() -> R + Send + 'static, + R: Send + 'static, + { + let thread_pool = self.rayon_pool_provider.get_thread_pool(rayon_pool_type); + let (tx, rx) = tokio::sync::oneshot::channel(); + + thread_pool.spawn(move || { + let result = task(); + let _ = tx.send(result); + }); + + rx.await + } + /// Spawn a future on the tokio runtime wrapped in an `async-channel::Receiver` returning an optional /// join handle to the future. /// The task is cancelled when the corresponding async-channel is dropped. diff --git a/common/task_executor/src/rayon_pool_provider.rs b/common/task_executor/src/rayon_pool_provider.rs new file mode 100644 index 00000000000..8e12f7eaa49 --- /dev/null +++ b/common/task_executor/src/rayon_pool_provider.rs @@ -0,0 +1,58 @@ +use rayon::{ThreadPool, ThreadPoolBuilder}; +use std::sync::Arc; + +const DEFAULT_LOW_PRIORITY_CPU_PERCENTAGE: usize = 25; +const DEFAULT_HIGH_PRIORITY_CPU_PERCENTAGE: usize = 80; +const MINIMUM_THREAD_COUNT: usize = 1; + +pub enum RayonPoolType { + HighPriority, + LowPriority, +} + +pub struct RayonPoolProvider { + /// Smaller rayon thread pool for lower-priority, compute-intensive tasks. + /// By default ~25% of CPUs or a minimum of 1 thread. + low_priority_thread_pool: Arc, + /// Larger rayon thread pool for high-priority, compute-intensive tasks. + /// By default ~80% of CPUs or a minimum of 1 thread. Citical/highest + /// priority tasks should use the global pool instead. + high_priority_thread_pool: Arc, +} + +impl Default for RayonPoolProvider { + fn default() -> Self { + let low_prio_threads = + (num_cpus::get() * DEFAULT_LOW_PRIORITY_CPU_PERCENTAGE / 100).max(MINIMUM_THREAD_COUNT); + let low_priority_thread_pool = Arc::new( + ThreadPoolBuilder::new() + .num_threads(low_prio_threads) + .build() + .expect("failed to build low-priority rayon pool"), + ); + + let high_prio_threads = (num_cpus::get() * DEFAULT_HIGH_PRIORITY_CPU_PERCENTAGE / 100) + .max(MINIMUM_THREAD_COUNT); + let high_priority_thread_pool = Arc::new( + ThreadPoolBuilder::new() + .num_threads(high_prio_threads) + .build() + .expect("failed to build high-priority rayon pool"), + ); + Self { + low_priority_thread_pool, + high_priority_thread_pool, + } + } +} + +impl RayonPoolProvider { + /// Get a scoped thread pool by priority level. + /// For critical/highest priority tasks, use the global pool instead. + pub fn get_thread_pool(&self, rayon_pool_type: RayonPoolType) -> Arc { + match rayon_pool_type { + RayonPoolType::HighPriority => self.high_priority_thread_pool.clone(), + RayonPoolType::LowPriority => self.low_priority_thread_pool.clone(), + } + } +} From 79b33214ea8e6838b426bd19d1c410e98182970e Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Thu, 25 Sep 2025 12:52:07 +1000 Subject: [PATCH 26/45] Only send data coumn subnet discovery requests after peerdas is scheduled (#8109) #8105 (to be confirmed) I noticed a large number of failed discovery requests after deploying latest `unstable` to some of our testnet and mainnet nodes. This is because of a recent PeerDAS change to attempt to maintain sufficient peers across data column subnets - this shouldn't be enabled on network without peerdas scheduled, otherwise it will keep retrying discovery on these subnets and never succeed. Also removed some unused files. Co-Authored-By: Jimmy Chen Co-Authored-By: Jimmy Chen --- .../src/peer_manager/mod.rs | 69 +- .../src/subnet_service/attestation_subnets.rs | 681 ------------------ .../src/subnet_service/sync_subnets.rs | 345 --------- 3 files changed, 67 insertions(+), 1028 deletions(-) delete mode 100644 beacon_node/network/src/subnet_service/attestation_subnets.rs delete mode 100644 beacon_node/network/src/subnet_service/sync_subnets.rs diff --git a/beacon_node/lighthouse_network/src/peer_manager/mod.rs b/beacon_node/lighthouse_network/src/peer_manager/mod.rs index 592fccdc741..ad16bb0421c 100644 --- a/beacon_node/lighthouse_network/src/peer_manager/mod.rs +++ b/beacon_node/lighthouse_network/src/peer_manager/mod.rs @@ -23,6 +23,7 @@ pub use libp2p::identity::Keypair; pub mod peerdb; use crate::peer_manager::peerdb::client::ClientKind; +use crate::types::GossipKind; use libp2p::multiaddr; use network_utils::discovery_metrics; use network_utils::enr_ext::{EnrExt, peer_id_to_node_id}; @@ -1434,8 +1435,16 @@ impl PeerManager { // Update peer score metrics; self.update_peer_score_metrics(); - // Maintain minimum count for custody peers. - self.maintain_custody_peers(); + // Maintain minimum count for custody peers if we are subscribed to any data column topics (i.e. PeerDAS activated) + let peerdas_enabled = self + .network_globals + .gossipsub_subscriptions + .read() + .iter() + .any(|topic| matches!(topic.kind(), &GossipKind::DataColumnSidecar(_))); + if peerdas_enabled { + self.maintain_custody_peers(); + } // Maintain minimum count for sync committee peers. self.maintain_sync_committee_peers(); @@ -3140,4 +3149,60 @@ mod tests { }) } } + + #[tokio::test] + async fn test_custody_peer_logic_only_runs_when_peerdas_enabled() { + use crate::types::{GossipEncoding, GossipTopic}; + + let mut peer_manager = build_peer_manager(5).await; + + // Set up sampling subnets so maintain_custody_peers would have work to do + *peer_manager.network_globals.sampling_subnets.write() = std::collections::HashSet::from([ + DataColumnSubnetId::new(0), + DataColumnSubnetId::new(1), + ]); + + // Test 1: No data column subscriptions - custody peer logic should NOT run + peer_manager.heartbeat(); + + // Should be no new DiscoverSubnetPeers events since PeerDAS is not enabled + let discovery_events: Vec<_> = peer_manager + .events + .iter() + .filter(|event| matches!(event, PeerManagerEvent::DiscoverSubnetPeers(_))) + .collect(); + assert!( + discovery_events.is_empty(), + "Should not generate discovery events when PeerDAS is disabled, but found: {:?}", + discovery_events + ); + + // Test 2: Add data column subscription - custody peer logic should run + let data_column_topic = GossipTopic::new( + GossipKind::DataColumnSidecar(DataColumnSubnetId::new(0)), + GossipEncoding::SSZSnappy, + [0, 0, 0, 0], // fork_digest + ); + peer_manager + .network_globals + .gossipsub_subscriptions + .write() + .insert(data_column_topic); + + // Clear any existing events to isolate the test + peer_manager.events.clear(); + + peer_manager.heartbeat(); + + // Should now have DiscoverSubnetPeers events since PeerDAS is enabled + let discovery_events: Vec<_> = peer_manager + .events + .iter() + .filter(|event| matches!(event, PeerManagerEvent::DiscoverSubnetPeers(_))) + .collect(); + assert!( + !discovery_events.is_empty(), + "Should generate discovery events when PeerDAS is enabled, but found no discovery events" + ); + } } diff --git a/beacon_node/network/src/subnet_service/attestation_subnets.rs b/beacon_node/network/src/subnet_service/attestation_subnets.rs deleted file mode 100644 index 0da27c6a21f..00000000000 --- a/beacon_node/network/src/subnet_service/attestation_subnets.rs +++ /dev/null @@ -1,681 +0,0 @@ -//! This service keeps track of which shard subnet the beacon node should be subscribed to at any -//! given time. It schedules subscriptions to shard subnets, requests peer discoveries and -//! determines whether attestations should be aggregated and/or passed to the beacon node. - -use super::SubnetServiceMessage; -use std::collections::HashSet; -use std::collections::{HashMap, VecDeque}; -use std::pin::Pin; -use std::sync::Arc; -use std::task::{Context, Poll}; -use std::time::Duration; - -use beacon_chain::{BeaconChain, BeaconChainTypes}; -use delay_map::{HashMapDelay, HashSetDelay}; -use futures::prelude::*; -use lighthouse_network::{discv5::enr::NodeId, NetworkConfig, Subnet, SubnetDiscovery}; -use slot_clock::SlotClock; -use tracing::{debug, error, info, trace, warn}; -use types::{Attestation, EthSpec, Slot, SubnetId, ValidatorSubscription}; - -use crate::metrics; - -/// The minimum number of slots ahead that we attempt to discover peers for a subscription. If the -/// slot is less than this number, skip the peer discovery process. -/// Subnet discovery query takes at most 30 secs, 2 slots take 24s. -pub(crate) const MIN_PEER_DISCOVERY_SLOT_LOOK_AHEAD: u64 = 2; -/// The fraction of a slot that we subscribe to a subnet before the required slot. -/// -/// Currently a whole slot ahead. -const ADVANCE_SUBSCRIBE_SLOT_FRACTION: u32 = 1; - -/// The number of slots after an aggregator duty where we remove the entry from -/// `aggregate_validators_on_subnet` delay map. -const UNSUBSCRIBE_AFTER_AGGREGATOR_DUTY: u32 = 2; - -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] -pub(crate) enum SubscriptionKind { - /// Long lived subscriptions. - /// - /// These have a longer duration and are advertised in our ENR. - LongLived, - /// Short lived subscriptions. - /// - /// Subscribing to these subnets has a short duration and we don't advertise it in our ENR. - ShortLived, -} - -/// A particular subnet at a given slot. -#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy)] -pub struct ExactSubnet { - /// The `SubnetId` associated with this subnet. - pub subnet_id: SubnetId, - /// The `Slot` associated with this subnet. - pub slot: Slot, -} - -pub struct AttestationService { - /// Queued events to return to the driving service. - events: VecDeque, - - /// A reference to the beacon chain to process received attestations. - pub(crate) beacon_chain: Arc>, - - /// Subnets we are currently subscribed to as short lived subscriptions. - /// - /// Once they expire, we unsubscribe from these. - /// We subscribe to subnets when we are an aggregator for an exact subnet. - short_lived_subscriptions: HashMapDelay, - - /// Subnets we are currently subscribed to as long lived subscriptions. - /// - /// We advertise these in our ENR. When these expire, the subnet is removed from our ENR. - /// These are required of all beacon nodes. The exact number is determined by the chain - /// specification. - long_lived_subscriptions: HashSet, - - /// Short lived subscriptions that need to be executed in the future. - scheduled_short_lived_subscriptions: HashSetDelay, - - /// A collection timeouts to track the existence of aggregate validator subscriptions at an - /// `ExactSubnet`. - aggregate_validators_on_subnet: Option>, - - /// The waker for the current thread. - waker: Option, - - /// The discovery mechanism of lighthouse is disabled. - discovery_disabled: bool, - - /// We are always subscribed to all subnets. - subscribe_all_subnets: bool, - - /// Our Discv5 node_id. - node_id: NodeId, - - /// Future used to manage subscribing and unsubscribing from long lived subnets. - next_long_lived_subscription_event: Pin>, - - /// Whether this node is a block proposer-only node. - proposer_only: bool, -} - -impl AttestationService { - /* Public functions */ - - /// Establish the service based on the passed configuration. - pub fn new(beacon_chain: Arc>, node_id: NodeId, config: &NetworkConfig) -> Self { - let slot_duration = beacon_chain.slot_clock.slot_duration(); - - if config.subscribe_all_subnets { - info!("Subscribing to all subnets"); - } else { - info!( - subnets_per_node = beacon_chain.spec.subnets_per_node, - subscription_duration_in_epochs = beacon_chain.spec.epochs_per_subnet_subscription, - "Deterministic long lived subnets enabled" - ); - } - - let track_validators = !config.import_all_attestations; - let aggregate_validators_on_subnet = - track_validators.then(|| HashSetDelay::new(slot_duration)); - let mut service = AttestationService { - events: VecDeque::with_capacity(10), - beacon_chain, - short_lived_subscriptions: HashMapDelay::new(slot_duration), - long_lived_subscriptions: HashSet::default(), - scheduled_short_lived_subscriptions: HashSetDelay::default(), - aggregate_validators_on_subnet, - waker: None, - discovery_disabled: config.disable_discovery, - subscribe_all_subnets: config.subscribe_all_subnets, - node_id, - next_long_lived_subscription_event: { - // Set a dummy sleep. Calculating the current subnet subscriptions will update this - // value with a smarter timing - Box::pin(tokio::time::sleep(Duration::from_secs(1))) - }, - proposer_only: config.proposer_only, - }; - - // If we are not subscribed to all subnets, handle the deterministic set of subnets - if !config.subscribe_all_subnets { - service.recompute_long_lived_subnets(); - } - - service - } - - /// Return count of all currently subscribed subnets (long-lived **and** short-lived). - #[cfg(test)] - pub fn subscription_count(&self) -> usize { - if self.subscribe_all_subnets { - self.beacon_chain.spec.attestation_subnet_count as usize - } else { - let count = self - .short_lived_subscriptions - .keys() - .chain(self.long_lived_subscriptions.iter()) - .collect::>() - .len(); - count - } - } - - /// Returns whether we are subscribed to a subnet for testing purposes. - #[cfg(test)] - pub(crate) fn is_subscribed( - &self, - subnet_id: &SubnetId, - subscription_kind: SubscriptionKind, - ) -> bool { - match subscription_kind { - SubscriptionKind::LongLived => self.long_lived_subscriptions.contains(subnet_id), - SubscriptionKind::ShortLived => self.short_lived_subscriptions.contains_key(subnet_id), - } - } - - #[cfg(test)] - pub(crate) fn long_lived_subscriptions(&self) -> &HashSet { - &self.long_lived_subscriptions - } - - /// Processes a list of validator subscriptions. - /// - /// This will: - /// - Register new validators as being known. - /// - Search for peers for required subnets. - /// - Request subscriptions for subnets on specific slots when required. - /// - Build the timeouts for each of these events. - /// - /// This returns a result simply for the ergonomics of using ?. The result can be - /// safely dropped. - pub fn validator_subscriptions( - &mut self, - subscriptions: impl Iterator, - ) -> Result<(), String> { - // If the node is in a proposer-only state, we ignore all subnet subscriptions. - if self.proposer_only { - return Ok(()); - } - - // Maps each subnet_id subscription to it's highest slot - let mut subnets_to_discover: HashMap = HashMap::new(); - - // Registers the validator with the attestation service. - for subscription in subscriptions { - metrics::inc_counter(&metrics::SUBNET_SUBSCRIPTION_REQUESTS); - - trace!(?subscription, "Validator subscription"); - - // Compute the subnet that is associated with this subscription - let subnet_id = match SubnetId::compute_subnet::( - subscription.slot, - subscription.attestation_committee_index, - subscription.committee_count_at_slot, - &self.beacon_chain.spec, - ) { - Ok(subnet_id) => subnet_id, - Err(e) => { - warn!( - error = ?e, - "Failed to compute subnet id for validator subscription" - ); - continue; - } - }; - // Ensure each subnet_id inserted into the map has the highest slot as it's value. - // Higher slot corresponds to higher min_ttl in the `SubnetDiscovery` entry. - if let Some(slot) = subnets_to_discover.get(&subnet_id) { - if subscription.slot > *slot { - subnets_to_discover.insert(subnet_id, subscription.slot); - } - } else if !self.discovery_disabled { - subnets_to_discover.insert(subnet_id, subscription.slot); - } - - let exact_subnet = ExactSubnet { - subnet_id, - slot: subscription.slot, - }; - - // Determine if the validator is an aggregator. If so, we subscribe to the subnet and - // if successful add the validator to a mapping of known aggregators for that exact - // subnet. - - if subscription.is_aggregator { - metrics::inc_counter(&metrics::SUBNET_SUBSCRIPTION_AGGREGATOR_REQUESTS); - if let Err(e) = self.subscribe_to_short_lived_subnet(exact_subnet) { - warn!(error = e, "Subscription to subnet error"); - } else { - trace!(?exact_subnet, "Subscribed to subnet for aggregator duties"); - } - } - } - - // If the discovery mechanism isn't disabled, attempt to set up a peer discovery for the - // required subnets. - if !self.discovery_disabled { - if let Err(e) = self.discover_peers_request( - subnets_to_discover - .into_iter() - .map(|(subnet_id, slot)| ExactSubnet { subnet_id, slot }), - ) { - warn!(error = e, "Discovery lookup request error"); - }; - } - - Ok(()) - } - - fn recompute_long_lived_subnets(&mut self) { - // Ensure the next computation is scheduled even if assigning subnets fails. - let next_subscription_event = self - .recompute_long_lived_subnets_inner() - .unwrap_or_else(|_| self.beacon_chain.slot_clock.slot_duration()); - - debug!("Recomputing deterministic long lived subnets"); - self.next_long_lived_subscription_event = - Box::pin(tokio::time::sleep(next_subscription_event)); - - if let Some(waker) = self.waker.as_ref() { - waker.wake_by_ref(); - } - } - - /// Gets the long lived subnets the node should be subscribed to during the current epoch and - /// the remaining duration for which they remain valid. - fn recompute_long_lived_subnets_inner(&mut self) -> Result { - let current_epoch = self.beacon_chain.epoch().map_err(|e| { - if !self - .beacon_chain - .slot_clock - .is_prior_to_genesis() - .unwrap_or(false) - { - error!(err = ?e,"Failed to get the current epoch from clock") - } - })?; - - let (subnets, next_subscription_epoch) = SubnetId::compute_subnets_for_epoch::( - self.node_id.raw(), - current_epoch, - &self.beacon_chain.spec, - ) - .map_err(|e| error!(err = e, "Could not compute subnets for current epoch"))?; - - let next_subscription_slot = - next_subscription_epoch.start_slot(T::EthSpec::slots_per_epoch()); - let next_subscription_event = self - .beacon_chain - .slot_clock - .duration_to_slot(next_subscription_slot) - .ok_or_else(|| { - error!("Failed to compute duration to next to long lived subscription event") - })?; - - self.update_long_lived_subnets(subnets.collect()); - - Ok(next_subscription_event) - } - - /// Updates the long lived subnets. - /// - /// New subnets are registered as subscribed, removed subnets as unsubscribed and the Enr - /// updated accordingly. - fn update_long_lived_subnets(&mut self, mut subnets: HashSet) { - info!(subnets = ?subnets.iter().collect::>(),"Subscribing to long-lived subnets"); - for subnet in &subnets { - // Add the events for those subnets that are new as long lived subscriptions. - if !self.long_lived_subscriptions.contains(subnet) { - // Check if this subnet is new and send the subscription event if needed. - if !self.short_lived_subscriptions.contains_key(subnet) { - debug!( - ?subnet, - subscription_kind = ?SubscriptionKind::LongLived, - "Subscribing to subnet" - ); - self.queue_event(SubnetServiceMessage::Subscribe(Subnet::Attestation( - *subnet, - ))); - } - self.queue_event(SubnetServiceMessage::EnrAdd(Subnet::Attestation(*subnet))); - if !self.discovery_disabled { - self.queue_event(SubnetServiceMessage::DiscoverPeers(vec![SubnetDiscovery { - subnet: Subnet::Attestation(*subnet), - min_ttl: None, - }])) - } - } - } - - // Update the long_lived_subnets set and check for subnets that are being removed - std::mem::swap(&mut self.long_lived_subscriptions, &mut subnets); - for subnet in subnets { - if !self.long_lived_subscriptions.contains(&subnet) { - self.handle_removed_subnet(subnet, SubscriptionKind::LongLived); - } - } - } - - /// Checks if we have subscribed aggregate validators for the subnet. If not, checks the gossip - /// verification, re-propagates and returns false. - pub fn should_process_attestation( - &self, - subnet: SubnetId, - attestation: &Attestation, - ) -> bool { - // Proposer-only mode does not need to process attestations - if self.proposer_only { - return false; - } - self.aggregate_validators_on_subnet - .as_ref() - .map(|tracked_vals| { - tracked_vals.contains_key(&ExactSubnet { - subnet_id: subnet, - slot: attestation.data().slot, - }) - }) - .unwrap_or(true) - } - - /* Internal private functions */ - - /// Adds an event to the event queue and notifies that this service is ready to be polled - /// again. - fn queue_event(&mut self, ev: SubnetServiceMessage) { - self.events.push_back(ev); - if let Some(waker) = &self.waker { - waker.wake_by_ref() - } - } - /// Checks if there are currently queued discovery requests and the time required to make the - /// request. - /// - /// If there is sufficient time, queues a peer discovery request for all the required subnets. - fn discover_peers_request( - &mut self, - exact_subnets: impl Iterator, - ) -> Result<(), &'static str> { - let current_slot = self - .beacon_chain - .slot_clock - .now() - .ok_or("Could not get the current slot")?; - - let discovery_subnets: Vec = exact_subnets - .filter_map(|exact_subnet| { - // Check if there is enough time to perform a discovery lookup. - if exact_subnet.slot - >= current_slot.saturating_add(MIN_PEER_DISCOVERY_SLOT_LOOK_AHEAD) - { - // Send out an event to start looking for peers. - // Require the peer for an additional slot to ensure we keep the peer for the - // duration of the subscription. - let min_ttl = self - .beacon_chain - .slot_clock - .duration_to_slot(exact_subnet.slot + 1) - .map(|duration| std::time::Instant::now() + duration); - Some(SubnetDiscovery { - subnet: Subnet::Attestation(exact_subnet.subnet_id), - min_ttl, - }) - } else { - // We may want to check the global PeerInfo to see estimated timeouts for each - // peer before they can be removed. - warn!( - subnet_id = ?exact_subnet, - "Not enough time for a discovery search" - ); - None - } - }) - .collect(); - - if !discovery_subnets.is_empty() { - self.queue_event(SubnetServiceMessage::DiscoverPeers(discovery_subnets)); - } - Ok(()) - } - - // Subscribes to the subnet if it should be done immediately, or schedules it if required. - fn subscribe_to_short_lived_subnet( - &mut self, - ExactSubnet { subnet_id, slot }: ExactSubnet, - ) -> Result<(), &'static str> { - let slot_duration = self.beacon_chain.slot_clock.slot_duration(); - - // The short time we schedule the subscription before it's actually required. This - // ensures we are subscribed on time, and allows consecutive subscriptions to the same - // subnet to overlap, reducing subnet churn. - let advance_subscription_duration = slot_duration / ADVANCE_SUBSCRIBE_SLOT_FRACTION; - // The time to the required slot. - let time_to_subscription_slot = self - .beacon_chain - .slot_clock - .duration_to_slot(slot) - .unwrap_or_default(); // If this is a past slot we will just get a 0 duration. - - // Calculate how long before we need to subscribe to the subnet. - let time_to_subscription_start = - time_to_subscription_slot.saturating_sub(advance_subscription_duration); - - // The time after a duty slot where we no longer need it in the `aggregate_validators_on_subnet` - // delay map. - let time_to_unsubscribe = - time_to_subscription_slot + UNSUBSCRIBE_AFTER_AGGREGATOR_DUTY * slot_duration; - if let Some(tracked_vals) = self.aggregate_validators_on_subnet.as_mut() { - tracked_vals.insert_at(ExactSubnet { subnet_id, slot }, time_to_unsubscribe); - } - - // If the subscription should be done in the future, schedule it. Otherwise subscribe - // immediately. - if time_to_subscription_start.is_zero() { - // This is a current or past slot, we subscribe immediately. - self.subscribe_to_short_lived_subnet_immediately(subnet_id, slot + 1)?; - } else { - // This is a future slot, schedule subscribing. - trace!(subnet = ?subnet_id, ?time_to_subscription_start,"Scheduling subnet subscription"); - self.scheduled_short_lived_subscriptions - .insert_at(ExactSubnet { subnet_id, slot }, time_to_subscription_start); - } - - Ok(()) - } - - /* A collection of functions that handle the various timeouts */ - - /// Registers a subnet as subscribed. - /// - /// Checks that the time in which the subscription would end is not in the past. If we are - /// already subscribed, extends the timeout if necessary. If this is a new subscription, we send - /// out the appropriate events. - /// - /// On determinist long lived subnets, this is only used for short lived subscriptions. - fn subscribe_to_short_lived_subnet_immediately( - &mut self, - subnet_id: SubnetId, - end_slot: Slot, - ) -> Result<(), &'static str> { - if self.subscribe_all_subnets { - // Case not handled by this service. - return Ok(()); - } - - let time_to_subscription_end = self - .beacon_chain - .slot_clock - .duration_to_slot(end_slot) - .unwrap_or_default(); - - // First check this is worth doing. - if time_to_subscription_end.is_zero() { - return Err("Time when subscription would end has already passed."); - } - - let subscription_kind = SubscriptionKind::ShortLived; - - // We need to check and add a subscription for the right kind, regardless of the presence - // of the subnet as a subscription of the other kind. This is mainly since long lived - // subscriptions can be removed at any time when a validator goes offline. - - let (subscriptions, already_subscribed_as_other_kind) = ( - &mut self.short_lived_subscriptions, - self.long_lived_subscriptions.contains(&subnet_id), - ); - - match subscriptions.get(&subnet_id) { - Some(current_end_slot) => { - // We are already subscribed. Check if we need to extend the subscription. - if &end_slot > current_end_slot { - trace!( - subnet = ?subnet_id, - prev_end_slot = %current_end_slot, - new_end_slot = %end_slot, - ?subscription_kind, - "Extending subscription to subnet" - ); - subscriptions.insert_at(subnet_id, end_slot, time_to_subscription_end); - } - } - None => { - // This is a new subscription. Add with the corresponding timeout and send the - // notification. - subscriptions.insert_at(subnet_id, end_slot, time_to_subscription_end); - - // Inform of the subscription. - if !already_subscribed_as_other_kind { - debug!( - subnet = ?subnet_id, - %end_slot, - ?subscription_kind, - "Subscribing to subnet" - ); - self.queue_event(SubnetServiceMessage::Subscribe(Subnet::Attestation( - subnet_id, - ))); - } - } - } - - Ok(()) - } - - // Unsubscribes from a subnet that was removed if it does not continue to exist as a - // subscription of the other kind. For long lived subscriptions, it also removes the - // advertisement from our ENR. - fn handle_removed_subnet(&mut self, subnet_id: SubnetId, subscription_kind: SubscriptionKind) { - let exists_in_other_subscriptions = match subscription_kind { - SubscriptionKind::LongLived => self.short_lived_subscriptions.contains_key(&subnet_id), - SubscriptionKind::ShortLived => self.long_lived_subscriptions.contains(&subnet_id), - }; - - if !exists_in_other_subscriptions { - // Subscription no longer exists as short lived or long lived. - debug!( - subnet = ?subnet_id, - ?subscription_kind, - "Unsubscribing from subnet" - ); - self.queue_event(SubnetServiceMessage::Unsubscribe(Subnet::Attestation( - subnet_id, - ))); - } - - if subscription_kind == SubscriptionKind::LongLived { - // Remove from our ENR even if we remain subscribed in other way. - self.queue_event(SubnetServiceMessage::EnrRemove(Subnet::Attestation( - subnet_id, - ))); - } - } -} - -impl Stream for AttestationService { - type Item = SubnetServiceMessage; - - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - // Update the waker if needed. - if let Some(waker) = &self.waker { - if !waker.will_wake(cx.waker()) { - self.waker = Some(cx.waker().clone()); - } - } else { - self.waker = Some(cx.waker().clone()); - } - - // Send out any generated events. - if let Some(event) = self.events.pop_front() { - return Poll::Ready(Some(event)); - } - - // If we aren't subscribed to all subnets, handle the deterministic long-lived subnets - if !self.subscribe_all_subnets { - match self.next_long_lived_subscription_event.as_mut().poll(cx) { - Poll::Ready(_) => { - self.recompute_long_lived_subnets(); - // We re-wake the task as there could be other subscriptions to process - self.waker - .as_ref() - .expect("Waker has been set") - .wake_by_ref(); - } - Poll::Pending => {} - } - } - - // Process scheduled subscriptions that might be ready, since those can extend a soon to - // expire subscription. - match self.scheduled_short_lived_subscriptions.poll_next_unpin(cx) { - Poll::Ready(Some(Ok(ExactSubnet { subnet_id, slot }))) => { - if let Err(e) = - self.subscribe_to_short_lived_subnet_immediately(subnet_id, slot + 1) - { - debug!(subnet = ?subnet_id, err = e,"Failed to subscribe to short lived subnet"); - } - self.waker - .as_ref() - .expect("Waker has been set") - .wake_by_ref(); - } - Poll::Ready(Some(Err(e))) => { - error!( - error = e, - "Failed to check for scheduled subnet subscriptions" - ); - } - Poll::Ready(None) | Poll::Pending => {} - } - - // Finally process any expired subscriptions. - match self.short_lived_subscriptions.poll_next_unpin(cx) { - Poll::Ready(Some(Ok((subnet_id, _end_slot)))) => { - self.handle_removed_subnet(subnet_id, SubscriptionKind::ShortLived); - // We re-wake the task as there could be other subscriptions to process - self.waker - .as_ref() - .expect("Waker has been set") - .wake_by_ref(); - } - Poll::Ready(Some(Err(e))) => { - error!(error = e, "Failed to check for subnet unsubscription times"); - } - Poll::Ready(None) | Poll::Pending => {} - } - - // Poll to remove entries on expiration, no need to act on expiration events. - if let Some(tracked_vals) = self.aggregate_validators_on_subnet.as_mut() { - if let Poll::Ready(Some(Err(e))) = tracked_vals.poll_next_unpin(cx) { - error!( - error = e, - "Failed to check for aggregate validator on subnet expirations" - ); - } - } - - Poll::Pending - } -} diff --git a/beacon_node/network/src/subnet_service/sync_subnets.rs b/beacon_node/network/src/subnet_service/sync_subnets.rs deleted file mode 100644 index 6b3834e1958..00000000000 --- a/beacon_node/network/src/subnet_service/sync_subnets.rs +++ /dev/null @@ -1,345 +0,0 @@ -//! This service keeps track of which sync committee subnet the beacon node should be subscribed to at any -//! given time. It schedules subscriptions to sync committee subnets and requests peer discoveries. - -use std::collections::{hash_map::Entry, HashMap, VecDeque}; -use std::pin::Pin; -use std::sync::Arc; -use std::task::{Context, Poll}; -use std::time::Duration; - -use futures::prelude::*; -use tracing::{debug, error, trace, warn}; - -use super::SubnetServiceMessage; -use beacon_chain::{BeaconChain, BeaconChainTypes}; -use delay_map::HashSetDelay; -use lighthouse_network::{NetworkConfig, Subnet, SubnetDiscovery}; -use slot_clock::SlotClock; -use types::{Epoch, EthSpec, SyncCommitteeSubscription, SyncSubnetId}; - -use crate::metrics; - -/// The minimum number of slots ahead that we attempt to discover peers for a subscription. If the -/// slot is less than this number, skip the peer discovery process. -/// Subnet discovery query takes at most 30 secs, 2 slots take 24s. -const MIN_PEER_DISCOVERY_SLOT_LOOK_AHEAD: u64 = 2; - -/// A particular subnet at a given slot. -#[derive(PartialEq, Eq, Hash, Clone, Debug)] -pub struct ExactSubnet { - /// The `SyncSubnetId` associated with this subnet. - pub subnet_id: SyncSubnetId, - /// The epoch until which we need to stay subscribed to the subnet. - pub until_epoch: Epoch, -} -pub struct SyncCommitteeService { - /// Queued events to return to the driving service. - events: VecDeque, - - /// A reference to the beacon chain to process received attestations. - pub(crate) beacon_chain: Arc>, - - /// The collection of all currently subscribed subnets. - subscriptions: HashMap, - - /// A collection of timeouts for when to unsubscribe from a subnet. - unsubscriptions: HashSetDelay, - - /// The waker for the current thread. - waker: Option, - - /// The discovery mechanism of lighthouse is disabled. - discovery_disabled: bool, - - /// We are always subscribed to all subnets. - subscribe_all_subnets: bool, - - /// Whether this node is a block proposer-only node. - proposer_only: bool, -} - -impl SyncCommitteeService { - /* Public functions */ - - pub fn new(beacon_chain: Arc>, config: &NetworkConfig) -> Self { - let spec = &beacon_chain.spec; - let epoch_duration_secs = - beacon_chain.slot_clock.slot_duration().as_secs() * T::EthSpec::slots_per_epoch(); - let default_timeout = - epoch_duration_secs.saturating_mul(spec.epochs_per_sync_committee_period.as_u64()); - - SyncCommitteeService { - events: VecDeque::with_capacity(10), - beacon_chain, - subscriptions: HashMap::new(), - unsubscriptions: HashSetDelay::new(Duration::from_secs(default_timeout)), - waker: None, - subscribe_all_subnets: config.subscribe_all_subnets, - discovery_disabled: config.disable_discovery, - proposer_only: config.proposer_only, - } - } - - /// Return count of all currently subscribed subnets. - #[cfg(test)] - pub fn subscription_count(&self) -> usize { - use types::consts::altair::SYNC_COMMITTEE_SUBNET_COUNT; - if self.subscribe_all_subnets { - SYNC_COMMITTEE_SUBNET_COUNT as usize - } else { - self.subscriptions.len() - } - } - - /// Processes a list of sync committee subscriptions. - /// - /// This will: - /// - Search for peers for required subnets. - /// - Request subscriptions required subnets. - /// - Build the timeouts for each of these events. - /// - /// This returns a result simply for the ergonomics of using ?. The result can be - /// safely dropped. - pub fn validator_subscriptions( - &mut self, - subscriptions: Vec, - ) -> Result<(), String> { - // A proposer-only node does not subscribe to any sync-committees - if self.proposer_only { - return Ok(()); - } - - let mut subnets_to_discover = Vec::new(); - for subscription in subscriptions { - metrics::inc_counter(&metrics::SYNC_COMMITTEE_SUBSCRIPTION_REQUESTS); - //NOTE: We assume all subscriptions have been verified before reaching this service - - // Registers the validator with the subnet service. - // This will subscribe to long-lived random subnets if required. - trace!(?subscription, "Sync committee subscription"); - - let subnet_ids = match SyncSubnetId::compute_subnets_for_sync_committee::( - &subscription.sync_committee_indices, - ) { - Ok(subnet_ids) => subnet_ids, - Err(e) => { - warn!( - error = ?e, - validator_index = subscription.validator_index, - "Failed to compute subnet id for sync committee subscription" - ); - continue; - } - }; - - for subnet_id in subnet_ids { - let exact_subnet = ExactSubnet { - subnet_id, - until_epoch: subscription.until_epoch, - }; - subnets_to_discover.push(exact_subnet.clone()); - if let Err(e) = self.subscribe_to_subnet(exact_subnet.clone()) { - warn!( - error = e, - validator_index = subscription.validator_index, - "Subscription to sync subnet error" - ); - } else { - trace!( - ?exact_subnet, - validator_index = subscription.validator_index, - "Subscribed to subnet for sync committee duties" - ); - } - } - } - // If the discovery mechanism isn't disabled, attempt to set up a peer discovery for the - // required subnets. - if !self.discovery_disabled { - if let Err(e) = self.discover_peers_request(subnets_to_discover.iter()) { - warn!(error = e, "Discovery lookup request error"); - }; - } - - // pre-emptively wake the thread to check for new events - if let Some(waker) = &self.waker { - waker.wake_by_ref(); - } - Ok(()) - } - - /* Internal private functions */ - - /// Checks if there are currently queued discovery requests and the time required to make the - /// request. - /// - /// If there is sufficient time, queues a peer discovery request for all the required subnets. - fn discover_peers_request<'a>( - &mut self, - exact_subnets: impl Iterator, - ) -> Result<(), &'static str> { - let current_slot = self - .beacon_chain - .slot_clock - .now() - .ok_or("Could not get the current slot")?; - - let slots_per_epoch = T::EthSpec::slots_per_epoch(); - - let discovery_subnets: Vec = exact_subnets - .filter_map(|exact_subnet| { - let until_slot = exact_subnet.until_epoch.end_slot(slots_per_epoch); - // check if there is enough time to perform a discovery lookup - if until_slot >= current_slot.saturating_add(MIN_PEER_DISCOVERY_SLOT_LOOK_AHEAD) { - // if the slot is more than epoch away, add an event to start looking for peers - // add one slot to ensure we keep the peer for the subscription slot - let min_ttl = self - .beacon_chain - .slot_clock - .duration_to_slot(until_slot + 1) - .map(|duration| std::time::Instant::now() + duration); - Some(SubnetDiscovery { - subnet: Subnet::SyncCommittee(exact_subnet.subnet_id), - min_ttl, - }) - } else { - // We may want to check the global PeerInfo to see estimated timeouts for each - // peer before they can be removed. - warn!( - subnet_id = ?exact_subnet, - "Not enough time for a discovery search" - ); - None - } - }) - .collect(); - - if !discovery_subnets.is_empty() { - self.events - .push_back(SubnetServiceMessage::DiscoverPeers(discovery_subnets)); - } - Ok(()) - } - - /// Adds a subscription event and an associated unsubscription event if required. - fn subscribe_to_subnet(&mut self, exact_subnet: ExactSubnet) -> Result<(), &'static str> { - // Return if we have subscribed to all subnets - if self.subscribe_all_subnets { - return Ok(()); - } - - // Return if we already have a subscription for exact_subnet - if self.subscriptions.get(&exact_subnet.subnet_id) == Some(&exact_subnet.until_epoch) { - return Ok(()); - } - - // Return if we already have subscription set to expire later than the current request. - if let Some(until_epoch) = self.subscriptions.get(&exact_subnet.subnet_id) { - if *until_epoch >= exact_subnet.until_epoch { - return Ok(()); - } - } - - // initialise timing variables - let current_slot = self - .beacon_chain - .slot_clock - .now() - .ok_or("Could not get the current slot")?; - - let slots_per_epoch = T::EthSpec::slots_per_epoch(); - let until_slot = exact_subnet.until_epoch.end_slot(slots_per_epoch); - // Calculate the duration to the unsubscription event. - let expected_end_subscription_duration = if current_slot >= until_slot { - warn!( - %current_slot, - ?exact_subnet, - "Sync committee subscription is past expiration" - ); - return Ok(()); - } else { - let slot_duration = self.beacon_chain.slot_clock.slot_duration(); - - // the duration until we no longer need this subscription. We assume a single slot is - // sufficient. - self.beacon_chain - .slot_clock - .duration_to_slot(until_slot) - .ok_or("Unable to determine duration to unsubscription slot")? - + slot_duration - }; - - if let Entry::Vacant(e) = self.subscriptions.entry(exact_subnet.subnet_id) { - // We are not currently subscribed and have no waiting subscription, create one - debug!(subnet = *exact_subnet.subnet_id, until_epoch = ?exact_subnet.until_epoch, "Subscribing to subnet"); - e.insert(exact_subnet.until_epoch); - self.events - .push_back(SubnetServiceMessage::Subscribe(Subnet::SyncCommittee( - exact_subnet.subnet_id, - ))); - - // add the subnet to the ENR bitfield - self.events - .push_back(SubnetServiceMessage::EnrAdd(Subnet::SyncCommittee( - exact_subnet.subnet_id, - ))); - - // add an unsubscription event to remove ourselves from the subnet once completed - self.unsubscriptions - .insert_at(exact_subnet.subnet_id, expected_end_subscription_duration); - } else { - // We are already subscribed, extend the unsubscription duration - self.unsubscriptions - .update_timeout(&exact_subnet.subnet_id, expected_end_subscription_duration); - } - - Ok(()) - } - - /// A queued unsubscription is ready. - fn handle_unsubscriptions(&mut self, subnet_id: SyncSubnetId) { - debug!(subnet = *subnet_id, "Unsubscribing from subnet"); - - self.subscriptions.remove(&subnet_id); - self.events - .push_back(SubnetServiceMessage::Unsubscribe(Subnet::SyncCommittee( - subnet_id, - ))); - - self.events - .push_back(SubnetServiceMessage::EnrRemove(Subnet::SyncCommittee( - subnet_id, - ))); - } -} - -impl Stream for SyncCommitteeService { - type Item = SubnetServiceMessage; - - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - // update the waker if needed - if let Some(waker) = &self.waker { - if !waker.will_wake(cx.waker()) { - self.waker = Some(cx.waker().clone()); - } - } else { - self.waker = Some(cx.waker().clone()); - } - - // process any un-subscription events - match self.unsubscriptions.poll_next_unpin(cx) { - Poll::Ready(Some(Ok(exact_subnet))) => self.handle_unsubscriptions(exact_subnet), - Poll::Ready(Some(Err(e))) => { - error!(error = e, "Failed to check for subnet unsubscription times"); - } - Poll::Ready(None) | Poll::Pending => {} - } - - // process any generated events - if let Some(event) = self.events.pop_front() { - return Poll::Ready(Some(event)); - } - - Poll::Pending - } -} From ffa7b2b2b9e3b4e70678e2c749b8bc45234febd7 Mon Sep 17 00:00:00 2001 From: Lion - dapplion <35266934+dapplion@users.noreply.github.com> Date: Thu, 25 Sep 2025 05:52:27 +0200 Subject: [PATCH 27/45] Only mark block lookups as pending if block is importing from gossip (#8112) - PR https://github.com/sigp/lighthouse/pull/8045 introduced a regression of how lookup sync interacts with the da_checker. Now in unstable block import from the HTTP API also insert the block in the da_checker while the block is being execution verified. If lookup sync finds the block in the da_checker in `NotValidated` state it expects a `GossipBlockProcessResult` message sometime later. That message is only sent after block import in gossip. I confirmed in our node's logs for 4/4 cases of stuck lookups are caused by this sequence of events: - Receive block through API, insert into da_checker in fn process_block in put_pre_execution_block - Create lookup and leave in AwaitingDownload(block in processing cache) state - Block from HTTP API finishes importing - Lookup is left stuck Closes https://github.com/sigp/lighthouse/issues/8104 - https://github.com/sigp/lighthouse/pull/8110 was my initial solution attempt but we can't send the `GossipBlockProcessResult` event from the `http_api` crate without adding new channels, which seems messy. For a given node it's rare that a lookup is created at the same time that a block is being published. This PR solves https://github.com/sigp/lighthouse/issues/8104 by allowing lookup sync to import the block twice in that case. Co-Authored-By: dapplion <35266934+dapplion@users.noreply.github.com> --- .../beacon_chain/src/beacon_block_streamer.rs | 2 +- beacon_node/beacon_chain/src/beacon_chain.rs | 9 +++-- .../src/data_availability_checker.rs | 7 ++-- .../overflow_lru_cache.rs | 37 +++++++++++++------ .../sync/block_lookups/single_block_lookup.rs | 2 +- .../network/src/sync/network_context.rs | 32 +++++++++++----- beacon_node/network/src/sync/tests/lookups.rs | 6 +-- consensus/types/src/beacon_block.rs | 1 + 8 files changed, 63 insertions(+), 33 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_block_streamer.rs b/beacon_node/beacon_chain/src/beacon_block_streamer.rs index d4ce38927b2..c816a0b29f3 100644 --- a/beacon_node/beacon_chain/src/beacon_block_streamer.rs +++ b/beacon_node/beacon_chain/src/beacon_block_streamer.rs @@ -404,7 +404,7 @@ impl BeaconBlockStreamer { if self.check_caches == CheckCaches::Yes { match self.beacon_chain.get_block_process_status(&root) { BlockProcessStatus::Unknown => None, - BlockProcessStatus::NotValidated(block) + BlockProcessStatus::NotValidated(block, _) | BlockProcessStatus::ExecutionValidated(block) => { metrics::inc_counter(&metrics::BEACON_REQRESP_PRE_IMPORT_CACHE_HITS); Some(block) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 4f0c6aada0a..08e0d1c6745 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -334,7 +334,7 @@ pub enum BlockProcessStatus { /// Block is not in any pre-import cache. Block may be in the data-base or in the fork-choice. Unknown, /// Block is currently processing but not yet validated. - NotValidated(Arc>), + NotValidated(Arc>, BlockImportSource), /// Block is fully valid, but not yet imported. It's cached in the da_checker while awaiting /// missing block components. ExecutionValidated(Arc>), @@ -3351,8 +3351,11 @@ impl BeaconChain { ); } - self.data_availability_checker - .put_pre_execution_block(block_root, unverified_block.block_cloned())?; + self.data_availability_checker.put_pre_execution_block( + block_root, + unverified_block.block_cloned(), + block_source, + )?; // Start the Prometheus timer. let _full_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_TIMES); diff --git a/beacon_node/beacon_chain/src/data_availability_checker.rs b/beacon_node/beacon_chain/src/data_availability_checker.rs index a0ad1c2112d..43b7d8f7ea3 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker.rs @@ -21,8 +21,8 @@ use task_executor::TaskExecutor; use tracing::{debug, error, instrument}; use types::blob_sidecar::{BlobIdentifier, BlobSidecar, FixedBlobSidecarList}; use types::{ - BlobSidecarList, ChainSpec, DataColumnSidecar, DataColumnSidecarList, Epoch, EthSpec, Hash256, - SignedBeaconBlock, Slot, + BlobSidecarList, BlockImportSource, ChainSpec, DataColumnSidecar, DataColumnSidecarList, Epoch, + EthSpec, Hash256, SignedBeaconBlock, Slot, }; mod error; @@ -354,9 +354,10 @@ impl DataAvailabilityChecker { &self, block_root: Hash256, block: Arc>, + source: BlockImportSource, ) -> Result<(), Error> { self.availability_cache - .put_pre_execution_block(block_root, block) + .put_pre_execution_block(block_root, block, source) } /// Removes a pre-execution block from the cache. diff --git a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs index bb440096627..42f6dbd8567 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs @@ -19,13 +19,14 @@ use tracing::{Span, debug, debug_span}; use types::beacon_block_body::KzgCommitments; use types::blob_sidecar::BlobIdentifier; use types::{ - BlobSidecar, ChainSpec, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, Epoch, EthSpec, - Hash256, RuntimeFixedVector, RuntimeVariableList, SignedBeaconBlock, + BlobSidecar, BlockImportSource, ChainSpec, ColumnIndex, DataColumnSidecar, + DataColumnSidecarList, Epoch, EthSpec, Hash256, RuntimeFixedVector, RuntimeVariableList, + SignedBeaconBlock, }; #[derive(Clone)] pub enum CachedBlock { - PreExecution(Arc>), + PreExecution(Arc>, BlockImportSource), Executed(Box>), } @@ -42,7 +43,7 @@ impl CachedBlock { fn as_block(&self) -> &SignedBeaconBlock { match self { - CachedBlock::PreExecution(b) => b, + CachedBlock::PreExecution(b, _) => b, CachedBlock::Executed(b) => b.as_block(), } } @@ -135,9 +136,13 @@ impl PendingComponents { /// Inserts a pre-execution block into the cache. /// This does NOT override an existing executed block. - pub fn insert_pre_execution_block(&mut self, block: Arc>) { + pub fn insert_pre_execution_block( + &mut self, + block: Arc>, + source: BlockImportSource, + ) { if self.block.is_none() { - self.block = Some(CachedBlock::PreExecution(block)) + self.block = Some(CachedBlock::PreExecution(block, source)) } } @@ -433,7 +438,9 @@ impl DataAvailabilityCheckerInner { .peek(block_root) .and_then(|pending_components| { pending_components.block.as_ref().map(|block| match block { - CachedBlock::PreExecution(b) => BlockProcessStatus::NotValidated(b.clone()), + CachedBlock::PreExecution(b, source) => { + BlockProcessStatus::NotValidated(b.clone(), *source) + } CachedBlock::Executed(b) => { BlockProcessStatus::ExecutionValidated(b.block_cloned()) } @@ -693,11 +700,12 @@ impl DataAvailabilityCheckerInner { &self, block_root: Hash256, block: Arc>, + source: BlockImportSource, ) -> Result<(), AvailabilityCheckError> { let epoch = block.epoch(); let pending_components = self.update_or_insert_pending_components(block_root, epoch, |pending_components| { - pending_components.insert_pre_execution_block(block); + pending_components.insert_pre_execution_block(block, source); Ok(()) })?; @@ -718,7 +726,7 @@ impl DataAvailabilityCheckerInner { /// This does NOT remove an existing executed block. pub fn remove_pre_execution_block(&self, block_root: &Hash256) { // The read lock is immediately dropped so we can safely remove the block from the cache. - if let Some(BlockProcessStatus::NotValidated(_)) = self.get_cached_block(block_root) { + if let Some(BlockProcessStatus::NotValidated(_, _)) = self.get_cached_block(block_root) { self.critical.write().pop(block_root); } } @@ -1459,9 +1467,13 @@ mod pending_components_tests { let mut pending_component = >::empty(block_root, max_len); let pre_execution_block = Arc::new(pre_execution_block); - pending_component.insert_pre_execution_block(pre_execution_block.clone()); + pending_component + .insert_pre_execution_block(pre_execution_block.clone(), BlockImportSource::Gossip); assert!( - matches!(pending_component.block, Some(CachedBlock::PreExecution(_))), + matches!( + pending_component.block, + Some(CachedBlock::PreExecution(_, _)) + ), "pre execution block inserted" ); @@ -1471,7 +1483,8 @@ mod pending_components_tests { "executed block inserted" ); - pending_component.insert_pre_execution_block(pre_execution_block); + pending_component + .insert_pre_execution_block(pre_execution_block, BlockImportSource::Gossip); assert!( matches!(pending_component.block, Some(CachedBlock::Executed(_))), "executed block should remain" diff --git a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs index 36509d2563e..8fb3248a871 100644 --- a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs +++ b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs @@ -219,7 +219,7 @@ impl SingleBlockLookup { // can assert that this is the correct value of `blob_kzg_commitments_count`. match cx.chain.get_block_process_status(&self.block_root) { BlockProcessStatus::Unknown => None, - BlockProcessStatus::NotValidated(block) + BlockProcessStatus::NotValidated(block, _) | BlockProcessStatus::ExecutionValidated(block) => Some(block.clone()), } }) { diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs index 17a42957009..ac2991c1474 100644 --- a/beacon_node/network/src/sync/network_context.rs +++ b/beacon_node/network/src/sync/network_context.rs @@ -49,8 +49,8 @@ use tokio::sync::mpsc; use tracing::{Span, debug, debug_span, error, warn}; use types::blob_sidecar::FixedBlobSidecarList; use types::{ - BlobSidecar, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, EthSpec, ForkContext, - Hash256, SignedBeaconBlock, Slot, + BlobSidecar, BlockImportSource, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, EthSpec, + ForkContext, Hash256, SignedBeaconBlock, Slot, }; pub mod custody; @@ -835,14 +835,26 @@ impl SyncNetworkContext { match self.chain.get_block_process_status(&block_root) { // Unknown block, continue request to download BlockProcessStatus::Unknown => {} - // Block is known are currently processing, expect a future event with the result of - // processing. - BlockProcessStatus::NotValidated { .. } => { - // Lookup sync event safety: If the block is currently in the processing cache, we - // are guaranteed to receive a `SyncMessage::GossipBlockProcessResult` that will - // make progress on this lookup - return Ok(LookupRequestResult::Pending("block in processing cache")); - } + // Block is known and currently processing. Imports from gossip and HTTP API insert the + // block in the da_cache. However, HTTP API is unable to notify sync when it completes + // block import. Returning `Pending` here will result in stuck lookups if the block is + // importing from sync. + BlockProcessStatus::NotValidated(_, source) => match source { + BlockImportSource::Gossip => { + // Lookup sync event safety: If the block is currently in the processing cache, we + // are guaranteed to receive a `SyncMessage::GossipBlockProcessResult` that will + // make progress on this lookup + return Ok(LookupRequestResult::Pending("block in processing cache")); + } + BlockImportSource::Lookup + | BlockImportSource::RangeSync + | BlockImportSource::HttpApi => { + // Lookup, RangeSync or HttpApi block import don't emit the GossipBlockProcessResult + // event. If a lookup happens to be created during block import from one of + // those sources just import the block twice. Otherwise the lookup will get + // stuck. Double imports are fine, they just waste resources. + } + }, // Block is fully validated. If it's not yet imported it's waiting for missing block // components. Consider this request completed and do nothing. BlockProcessStatus::ExecutionValidated { .. } => { diff --git a/beacon_node/network/src/sync/tests/lookups.rs b/beacon_node/network/src/sync/tests/lookups.rs index 27968a06351..fc641861754 100644 --- a/beacon_node/network/src/sync/tests/lookups.rs +++ b/beacon_node/network/src/sync/tests/lookups.rs @@ -41,8 +41,8 @@ use slot_clock::{SlotClock, TestingSlotClock}; use tokio::sync::mpsc; use tracing::info; use types::{ - BeaconState, BeaconStateBase, BlobSidecar, DataColumnSidecar, EthSpec, ForkContext, ForkName, - Hash256, MinimalEthSpec as E, SignedBeaconBlock, Slot, + BeaconState, BeaconStateBase, BlobSidecar, BlockImportSource, DataColumnSidecar, EthSpec, + ForkContext, ForkName, Hash256, MinimalEthSpec as E, SignedBeaconBlock, Slot, data_column_sidecar::ColumnIndex, test_utils::{SeedableRng, TestRandom, XorShiftRng}, }; @@ -1113,7 +1113,7 @@ impl TestRig { self.harness .chain .data_availability_checker - .put_pre_execution_block(block.canonical_root(), block) + .put_pre_execution_block(block.canonical_root(), block, BlockImportSource::Gossip) .unwrap(); } diff --git a/consensus/types/src/beacon_block.rs b/consensus/types/src/beacon_block.rs index f4e4e369661..61c32dd4ac9 100644 --- a/consensus/types/src/beacon_block.rs +++ b/consensus/types/src/beacon_block.rs @@ -843,6 +843,7 @@ impl<'de, E: EthSpec, Payload: AbstractExecPayload> ContextDeserialize<'de, F } } +#[derive(Clone, Copy)] pub enum BlockImportSource { Gossip, Lookup, From 20c6ce455300e26815540acc112a5a1f6094f61c Mon Sep 17 00:00:00 2001 From: Eitan Seri-Levi Date: Fri, 26 Sep 2025 02:12:47 -0700 Subject: [PATCH 28/45] Fulu testnet configs (#8117) Holesky - #8096 Hoodi - #8097 Sepolia - #8099 Testnet configs for Holesky, Hoodi and Sepolia Holesky - https://github.com/eth-clients/holesky/pull/132 Hoodi - https://github.com/eth-clients/hoodi/pull/21 Sepolia - https://github.com/eth-clients/sepolia/pull/111 Co-Authored-By: Eitan Seri- Levi --- .../holesky/config.yaml | 33 +++++++++++++++++- .../hoodi/config.yaml | 34 ++++++++++++++++++- .../sepolia/config.yaml | 34 ++++++++++++++++++- 3 files changed, 98 insertions(+), 3 deletions(-) diff --git a/common/eth2_network_config/built_in_network_configs/holesky/config.yaml b/common/eth2_network_config/built_in_network_configs/holesky/config.yaml index ab5f0f3bde0..b1e9faea1d6 100644 --- a/common/eth2_network_config/built_in_network_configs/holesky/config.yaml +++ b/common/eth2_network_config/built_in_network_configs/holesky/config.yaml @@ -38,7 +38,7 @@ ELECTRA_FORK_VERSION: 0x06017000 ELECTRA_FORK_EPOCH: 115968 # Fulu FULU_FORK_VERSION: 0x07017000 -FULU_FORK_EPOCH: 18446744073709551615 +FULU_FORK_EPOCH: 165120 # Gloas GLOAS_FORK_VERSION: 0x08017000 GLOAS_FORK_EPOCH: 18446744073709551615 @@ -47,6 +47,8 @@ GLOAS_FORK_EPOCH: 18446744073709551615 # --------------------------------------------------------------- # 12 seconds SECONDS_PER_SLOT: 12 +# 1200 milliseconds +SLOT_DURATION_MS: 12000 # 14 (estimate from Eth1 mainnet) SECONDS_PER_ETH1_BLOCK: 14 # 2**8 (= 256) epochs ~27 hours @@ -55,6 +57,18 @@ MIN_VALIDATOR_WITHDRAWABILITY_DELAY: 256 SHARD_COMMITTEE_PERIOD: 256 # 2**11 (= 2,048) Eth1 blocks ~8 hours ETH1_FOLLOW_DISTANCE: 2048 +# 1667 basis points, ~17% of SLOT_DURATION_MS +PROPOSER_REORG_CUTOFF_BPS: 1667 +# 3333 basis points, ~33% of SLOT_DURATION_MS +ATTESTATION_DUE_BPS: 3333 +# 6667 basis points, ~67% of SLOT_DURATION_MS +AGGREGATE_DUE_BPS: 6667 + +# Altair +# 3333 basis points, ~33% of SLOT_DURATION_MS +SYNC_MESSAGE_DUE_BPS: 3333 +# 6667 basis points, ~67% of SLOT_DURATION_MS +CONTRIBUTION_DUE_BPS: 6667 # Validator cycle # --------------------------------------------------------------- @@ -141,13 +155,30 @@ MAX_BLOBS_PER_BLOCK_ELECTRA: 9 MAX_REQUEST_BLOB_SIDECARS_ELECTRA: 1152 # Fulu +# 2**7 (= 128) groups NUMBER_OF_CUSTODY_GROUPS: 128 +# 2**7 (= 128) subnets DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 +# MAX_REQUEST_BLOCKS_DENEB * NUMBER_OF_COLUMNS (= 128 * 128) sidecars MAX_REQUEST_DATA_COLUMN_SIDECARS: 16384 +# 2**3 (= 8) samples SAMPLES_PER_SLOT: 8 +# 2**2 (= 4) sidecars CUSTODY_REQUIREMENT: 4 +# 2**3 (= 8) sidecars VALIDATOR_CUSTODY_REQUIREMENT: 8 +# 2**5 * 10**9 (= 32,000,000,000) Gwei BALANCE_PER_ADDITIONAL_CUSTODY_GROUP: 32000000000 +# 2**12 (= 4,096) epochs MIN_EPOCHS_FOR_DATA_COLUMN_SIDECARS_REQUESTS: 4096 +# Blob Scheduling +# --------------------------------------------------------------- + +BLOB_SCHEDULE: + - EPOCH: 166400 + MAX_BLOBS_PER_BLOCK: 15 + - EPOCH: 167936 + MAX_BLOBS_PER_BLOCK: 21 + # Gloas \ No newline at end of file diff --git a/common/eth2_network_config/built_in_network_configs/hoodi/config.yaml b/common/eth2_network_config/built_in_network_configs/hoodi/config.yaml index 01322974c8e..256957e1197 100644 --- a/common/eth2_network_config/built_in_network_configs/hoodi/config.yaml +++ b/common/eth2_network_config/built_in_network_configs/hoodi/config.yaml @@ -42,7 +42,7 @@ ELECTRA_FORK_EPOCH: 2048 # Fulu FULU_FORK_VERSION: 0x70000910 -FULU_FORK_EPOCH: 18446744073709551615 +FULU_FORK_EPOCH: 50688 # Gloas GLOAS_FORK_VERSION: 0x80000910 @@ -53,6 +53,8 @@ GLOAS_FORK_EPOCH: 18446744073709551615 # --------------------------------------------------------------- # 12 seconds SECONDS_PER_SLOT: 12 +# 12000 milliseconds +SLOT_DURATION_MS: 12000 # 14 (estimate from Eth1 mainnet) SECONDS_PER_ETH1_BLOCK: 12 # 2**8 (= 256) epochs ~27 hours @@ -61,6 +63,18 @@ MIN_VALIDATOR_WITHDRAWABILITY_DELAY: 256 SHARD_COMMITTEE_PERIOD: 256 # 2**11 (= 2,048) Eth1 blocks ~8 hours ETH1_FOLLOW_DISTANCE: 2048 +# 1667 basis points, ~17% of SLOT_DURATION_MS +PROPOSER_REORG_CUTOFF_BPS: 1667 +# 3333 basis points, ~33% of SLOT_DURATION_MS +ATTESTATION_DUE_BPS: 3333 +# 6667 basis points, ~67% of SLOT_DURATION_MS +AGGREGATE_DUE_BPS: 6667 + +# Altair +# 3333 basis points, ~33% of SLOT_DURATION_MS +SYNC_MESSAGE_DUE_BPS: 3333 +# 6667 basis points, ~67% of SLOT_DURATION_MS +CONTRIBUTION_DUE_BPS: 6667 # Validator cycle # --------------------------------------------------------------- @@ -154,15 +168,33 @@ WHISK_EPOCHS_PER_SHUFFLING_PHASE: 256 WHISK_PROPOSER_SELECTION_GAP: 2 # Fulu +# 2**7 (= 128) groups NUMBER_OF_CUSTODY_GROUPS: 128 +# 2**7 (= 128) subnets DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 +# MAX_REQUEST_BLOCKS_DENEB * NUMBER_OF_COLUMNS (= 128 * 128) sidecars MAX_REQUEST_DATA_COLUMN_SIDECARS: 16384 +# 2**3 (= 8) samples SAMPLES_PER_SLOT: 8 +# 2**2 (= 4) sidecars CUSTODY_REQUIREMENT: 4 +# 2**3 (= 8) sidecars VALIDATOR_CUSTODY_REQUIREMENT: 8 +# 2**5 * 10**9 (= 32,000,000,000) Gwei BALANCE_PER_ADDITIONAL_CUSTODY_GROUP: 32000000000 +# 2**12 (= 4,096) epochs MIN_EPOCHS_FOR_DATA_COLUMN_SIDECARS_REQUESTS: 4096 + +# Blob Scheduling +# --------------------------------------------------------------- + +BLOB_SCHEDULE: + - EPOCH: 52480 + MAX_BLOBS_PER_BLOCK: 15 + - EPOCH: 54016 + MAX_BLOBS_PER_BLOCK: 21 + # Gloas # EIP7732 diff --git a/common/eth2_network_config/built_in_network_configs/sepolia/config.yaml b/common/eth2_network_config/built_in_network_configs/sepolia/config.yaml index 9802e409fbf..b1a01933d70 100644 --- a/common/eth2_network_config/built_in_network_configs/sepolia/config.yaml +++ b/common/eth2_network_config/built_in_network_configs/sepolia/config.yaml @@ -42,7 +42,7 @@ ELECTRA_FORK_EPOCH: 222464 # Fulu FULU_FORK_VERSION: 0x90000075 -FULU_FORK_EPOCH: 18446744073709551615 +FULU_FORK_EPOCH: 272640 # Gloas GLOAS_FORK_VERSION: 0x90000076 @@ -52,6 +52,8 @@ GLOAS_FORK_EPOCH: 18446744073709551615 # --------------------------------------------------------------- # 12 seconds SECONDS_PER_SLOT: 12 +# 12000 milliseconds +SLOT_DURATION_MS: 12000 # 14 (estimate from Eth1 mainnet) SECONDS_PER_ETH1_BLOCK: 14 # 2**8 (= 256) epochs ~27 hours @@ -60,6 +62,18 @@ MIN_VALIDATOR_WITHDRAWABILITY_DELAY: 256 SHARD_COMMITTEE_PERIOD: 256 # 2**11 (= 2,048) Eth1 blocks ~8 hours ETH1_FOLLOW_DISTANCE: 2048 +# 1667 basis points, ~17% of SLOT_DURATION_MS +PROPOSER_REORG_CUTOFF_BPS: 1667 +# 3333 basis points, ~33% of SLOT_DURATION_MS +ATTESTATION_DUE_BPS: 3333 +# 6667 basis points, ~67% of SLOT_DURATION_MS +AGGREGATE_DUE_BPS: 6667 + +# Altair +# 3333 basis points, ~33% of SLOT_DURATION_MS +SYNC_MESSAGE_DUE_BPS: 3333 +# 6667 basis points, ~67% of SLOT_DURATION_MS +CONTRIBUTION_DUE_BPS: 6667 # Validator cycle @@ -147,13 +161,31 @@ MAX_BLOBS_PER_BLOCK_ELECTRA: 9 MAX_REQUEST_BLOB_SIDECARS_ELECTRA: 1152 # Fulu +# 2**7 (= 128) groups NUMBER_OF_CUSTODY_GROUPS: 128 +# 2**7 (= 128) subnets DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 +# MAX_REQUEST_BLOCKS_DENEB * NUMBER_OF_COLUMNS (= 128 * 128) sidecars MAX_REQUEST_DATA_COLUMN_SIDECARS: 16384 +# 2**3 (= 8) samples SAMPLES_PER_SLOT: 8 +# 2**2 (= 4) sidecars CUSTODY_REQUIREMENT: 4 +# 2**3 (= 8) sidecars VALIDATOR_CUSTODY_REQUIREMENT: 8 +# 2**5 * 10**9 (= 32,000,000,000) Gwei BALANCE_PER_ADDITIONAL_CUSTODY_GROUP: 32000000000 +# 2**12 (= 4,096) epochs MIN_EPOCHS_FOR_DATA_COLUMN_SIDECARS_REQUESTS: 4096 + +# Blob Scheduling +# --------------------------------------------------------------- + +BLOB_SCHEDULE: + - EPOCH: 274176 + MAX_BLOBS_PER_BLOCK: 15 + - EPOCH: 275712 + MAX_BLOBS_PER_BLOCK: 21 + # Gloas \ No newline at end of file From c754234b2c94d90ed658788b5fb69ee405ed6cb7 Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Sat, 27 Sep 2025 00:44:50 +1000 Subject: [PATCH 29/45] Fix bugs in proposer calculation post-Fulu (#8101) As identified by a researcher during the Fusaka security competition, we were computing the proposer index incorrectly in some places by computing without lookahead. - [x] Add "low level" checks to computation functions in `consensus/types` to ensure they error cleanly - [x] Re-work the determination of proposer shuffling decision roots, which are now fork aware. - [x] Re-work and simplify the beacon proposer cache to be fork-aware. - [x] Optimise `with_proposer_cache` to use `OnceCell`. - [x] All tests passing. - [x] Resolve all remaining `FIXME(sproul)`s. - [x] Unit tests for `ProtoBlock::proposer_shuffling_root_for_child_block`. - [x] End-to-end regression test. - [x] Test on pre-Fulu network. - [x] Test on post-Fulu network. Co-Authored-By: Michael Sproul --- beacon_node/beacon_chain/src/beacon_chain.rs | 193 ++++++++----- .../beacon_chain/src/beacon_proposer_cache.rs | 101 ++++--- .../beacon_chain/src/blob_verification.rs | 81 ++---- .../beacon_chain/src/block_verification.rs | 80 ++---- .../beacon_chain/src/canonical_head.rs | 2 +- .../src/data_column_verification.rs | 91 ++---- beacon_node/beacon_chain/src/errors.rs | 17 ++ .../beacon_chain/src/validator_monitor.rs | 8 +- beacon_node/beacon_chain/tests/store_tests.rs | 265 ++++++++++++++++++ .../beacon_chain/tests/validator_monitor.rs | 27 +- beacon_node/http_api/src/proposer_duties.rs | 57 ++-- .../src/proto_array_fork_choice.rs | 44 +++ consensus/state_processing/src/all_caches.rs | 7 +- consensus/state_processing/src/epoch_cache.rs | 4 +- .../state_processing/src/upgrade/fulu.rs | 4 +- consensus/types/src/beacon_state.rs | 103 +++++-- consensus/types/src/chain_spec.rs | 22 ++ consensus/types/src/epoch_cache.rs | 10 +- testing/ef_tests/src/cases/fork.rs | 2 +- 19 files changed, 766 insertions(+), 352 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 08e0d1c6745..afbf3278fe0 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -5,8 +5,9 @@ use crate::attestation_verification::{ }; use crate::attester_cache::{AttesterCache, AttesterCacheKey}; use crate::beacon_block_streamer::{BeaconBlockStreamer, CheckCaches}; -use crate::beacon_proposer_cache::BeaconProposerCache; -use crate::beacon_proposer_cache::compute_proposer_duties_from_head; +use crate::beacon_proposer_cache::{ + BeaconProposerCache, EpochBlockProposers, ensure_state_can_determine_proposers_for_epoch, +}; use crate::blob_verification::{GossipBlobError, GossipVerifiedBlob}; use crate::block_times_cache::BlockTimesCache; use crate::block_verification::POS_PANDA_BANNER; @@ -4698,65 +4699,54 @@ impl BeaconChain { // Compute the proposer index. let head_epoch = cached_head.head_slot().epoch(T::EthSpec::slots_per_epoch()); - let shuffling_decision_root = if head_epoch == proposal_epoch { - cached_head - .snapshot - .beacon_state - .proposer_shuffling_decision_root(proposer_head)? - } else { - proposer_head - }; - let cached_proposer = self - .beacon_proposer_cache - .lock() - .get_slot::(shuffling_decision_root, proposal_slot); - let proposer_index = if let Some(proposer) = cached_proposer { - proposer.index as u64 - } else { - if head_epoch + self.config.sync_tolerance_epochs < proposal_epoch { - warn!( - msg = "this is a non-critical issue that can happen on unhealthy nodes or \ - networks.", - %proposal_epoch, - %head_epoch, - "Skipping proposer preparation" - ); - - // Don't skip the head forward more than two epochs. This avoids burdening an - // unhealthy node. - // - // Although this node might miss out on preparing for a proposal, they should still - // be able to propose. This will prioritise beacon chain health over efficient - // packing of execution blocks. - return Ok(None); - } - - let (proposers, decision_root, _, fork) = - compute_proposer_duties_from_head(proposal_epoch, self)?; - - let proposer_offset = (proposal_slot % T::EthSpec::slots_per_epoch()).as_usize(); - let proposer = *proposers - .get(proposer_offset) - .ok_or(BeaconChainError::NoProposerForSlot(proposal_slot))?; - - self.beacon_proposer_cache.lock().insert( - proposal_epoch, - decision_root, - proposers, - fork, - )?; + let shuffling_decision_root = cached_head + .snapshot + .beacon_state + .proposer_shuffling_decision_root_at_epoch(proposal_epoch, proposer_head, &self.spec)?; + + let Some(proposer_index) = self.with_proposer_cache( + shuffling_decision_root, + proposal_epoch, + |proposers| proposers.get_slot::(proposal_slot).map(|p| p.index as u64), + || { + if head_epoch + self.config.sync_tolerance_epochs < proposal_epoch { + warn!( + msg = "this is a non-critical issue that can happen on unhealthy nodes or \ + networks", + %proposal_epoch, + %head_epoch, + "Skipping proposer preparation" + ); - // It's possible that the head changes whilst computing these duties. If so, abandon - // this routine since the change of head would have also spawned another instance of - // this routine. - // - // Exit now, after updating the cache. - if decision_root != shuffling_decision_root { - warn!("Head changed during proposer preparation"); - return Ok(None); + // Don't skip the head forward too many epochs. This avoids burdening an + // unhealthy node. + // + // Although this node might miss out on preparing for a proposal, they should + // still be able to propose. This will prioritise beacon chain health over + // efficient packing of execution blocks. + Err(Error::SkipProposerPreparation) + } else { + let head = self.canonical_head.cached_head(); + Ok(( + head.head_state_root(), + head.snapshot.beacon_state.clone(), + )) + } + }, + ).map_or_else(|e| { + match e { + Error::ProposerCacheIncorrectState { .. } => { + warn!("Head changed during proposer preparation"); + Ok(None) + } + Error::SkipProposerPreparation => { + // Warning logged for this above. + Ok(None) + } + e => Err(e) } - - proposer as u64 + }, |value| Ok(Some(value)))? else { + return Ok(None); }; // Get the `prev_randao` and parent block number. @@ -4916,14 +4906,19 @@ impl BeaconChain { // Only attempt a re-org if we have a proposer registered for the re-org slot. let proposing_at_re_org_slot = { - // The proposer shuffling has the same decision root as the next epoch attestation - // shuffling. We know our re-org block is not on the epoch boundary, so it has the - // same proposer shuffling as the head (but not necessarily the parent which may lie - // in the previous epoch). - let shuffling_decision_root = info - .head_node - .next_epoch_shuffling_id - .shuffling_decision_block; + // We know our re-org block is not on the epoch boundary, so it has the same proposer + // shuffling as the head (but not necessarily the parent which may lie in the previous + // epoch). + let shuffling_decision_root = if self + .spec + .fork_name_at_slot::(re_org_block_slot) + .fulu_enabled() + { + info.head_node.current_epoch_shuffling_id + } else { + info.head_node.next_epoch_shuffling_id + } + .shuffling_decision_block; let proposer_index = self .beacon_proposer_cache .lock() @@ -6558,6 +6553,70 @@ impl BeaconChain { } } + pub fn with_proposer_cache + From>( + &self, + shuffling_decision_block: Hash256, + proposal_epoch: Epoch, + accessor: impl Fn(&EpochBlockProposers) -> Result, + state_provider: impl FnOnce() -> Result<(Hash256, BeaconState), E>, + ) -> Result { + let cache_entry = self + .beacon_proposer_cache + .lock() + .get_or_insert_key(proposal_epoch, shuffling_decision_block); + + // If the cache entry is not initialised, run the code to initialise it inside a OnceCell. + // This prevents duplication of work across multiple threads. + // + // If it is already initialised, then `get_or_try_init` will return immediately without + // executing the initialisation code at all. + let epoch_block_proposers = cache_entry.get_or_try_init(|| { + debug!( + ?shuffling_decision_block, + %proposal_epoch, + "Proposer shuffling cache miss" + ); + + // Fetch the state on-demand if the required epoch was missing from the cache. + // If the caller wants to not compute the state they must return an error here and then + // catch it at the call site. + let (state_root, mut state) = state_provider()?; + + // Ensure the state can compute proposer duties for `epoch`. + ensure_state_can_determine_proposers_for_epoch( + &mut state, + state_root, + proposal_epoch, + &self.spec, + )?; + + // Sanity check the state. + let latest_block_root = state.get_latest_block_root(state_root); + let state_decision_block_root = state.proposer_shuffling_decision_root_at_epoch( + proposal_epoch, + latest_block_root, + &self.spec, + )?; + if state_decision_block_root != shuffling_decision_block { + return Err(Error::ProposerCacheIncorrectState { + state_decision_block_root, + requested_decision_block_root: shuffling_decision_block, + } + .into()); + } + + let proposers = state.get_beacon_proposer_indices(proposal_epoch, &self.spec)?; + Ok::<_, E>(EpochBlockProposers::new( + proposal_epoch, + state.fork(), + proposers, + )) + })?; + + // Run the accessor function on the computed epoch proposers. + accessor(epoch_block_proposers).map_err(Into::into) + } + /// Runs the `map_fn` with the committee cache for `shuffling_epoch` from the chain with head /// `head_block_root`. The `map_fn` will be supplied two values: /// diff --git a/beacon_node/beacon_chain/src/beacon_proposer_cache.rs b/beacon_node/beacon_chain/src/beacon_proposer_cache.rs index 12970214c6a..47c44542c0f 100644 --- a/beacon_node/beacon_chain/src/beacon_proposer_cache.rs +++ b/beacon_node/beacon_chain/src/beacon_proposer_cache.rs @@ -12,9 +12,9 @@ use crate::{BeaconChain, BeaconChainError, BeaconChainTypes}; use fork_choice::ExecutionStatus; use lru::LruCache; use once_cell::sync::OnceCell; +use safe_arith::SafeArith; use smallvec::SmallVec; use state_processing::state_advance::partial_state_advance; -use std::cmp::Ordering; use std::num::NonZeroUsize; use std::sync::Arc; use types::non_zero_usize::new_non_zero_usize; @@ -51,6 +51,34 @@ pub struct EpochBlockProposers { pub(crate) proposers: SmallVec<[usize; TYPICAL_SLOTS_PER_EPOCH]>, } +impl EpochBlockProposers { + pub fn new(epoch: Epoch, fork: Fork, proposers: Vec) -> Self { + Self { + epoch, + fork, + proposers: proposers.into(), + } + } + + pub fn get_slot(&self, slot: Slot) -> Result { + let epoch = slot.epoch(E::slots_per_epoch()); + if epoch == self.epoch { + self.proposers + .get(slot.as_usize() % E::SlotsPerEpoch::to_usize()) + .map(|&index| Proposer { + index, + fork: self.fork, + }) + .ok_or(BeaconChainError::ProposerCacheOutOfBounds { slot, epoch }) + } else { + Err(BeaconChainError::ProposerCacheWrongEpoch { + request_epoch: epoch, + cache_epoch: self.epoch, + }) + } + } +} + /// A cache to store the proposers for some epoch. /// /// See the module-level documentation for more information. @@ -76,23 +104,8 @@ impl BeaconProposerCache { ) -> Option { let epoch = slot.epoch(E::slots_per_epoch()); let key = (epoch, shuffling_decision_block); - let cache_opt = self.cache.get(&key).and_then(|cell| cell.get()); - if let Some(cache) = cache_opt { - // This `if` statement is likely unnecessary, but it feels like good practice. - if epoch == cache.epoch { - cache - .proposers - .get(slot.as_usize() % E::SlotsPerEpoch::to_usize()) - .map(|&index| Proposer { - index, - fork: cache.fork, - }) - } else { - None - } - } else { - None - } + let cache = self.cache.get(&key)?.get()?; + cache.get_slot::(slot).ok() } /// As per `Self::get_slot`, but returns all proposers in all slots for the given `epoch`. @@ -142,11 +155,7 @@ impl BeaconProposerCache { ) -> Result<(), BeaconStateError> { let key = (epoch, shuffling_decision_block); if !self.cache.contains(&key) { - let epoch_proposers = EpochBlockProposers { - epoch, - fork, - proposers: proposers.into(), - }; + let epoch_proposers = EpochBlockProposers::new(epoch, fork, proposers); self.cache .put(key, Arc::new(OnceCell::with_value(epoch_proposers))); } @@ -178,7 +187,12 @@ pub fn compute_proposer_duties_from_head( .ok_or(BeaconChainError::HeadMissingFromForkChoice(head_block_root))?; // Advance the state into the requested epoch. - ensure_state_is_in_epoch(&mut state, head_state_root, request_epoch, &chain.spec)?; + ensure_state_can_determine_proposers_for_epoch( + &mut state, + head_state_root, + request_epoch, + &chain.spec, + )?; let indices = state .get_beacon_proposer_indices(request_epoch, &chain.spec) @@ -186,13 +200,13 @@ pub fn compute_proposer_duties_from_head( let dependent_root = state // The only block which decides its own shuffling is the genesis block. - .proposer_shuffling_decision_root(chain.genesis_block_root) + .proposer_shuffling_decision_root(chain.genesis_block_root, &chain.spec) .map_err(BeaconChainError::from)?; Ok((indices, dependent_root, execution_status, state.fork())) } -/// If required, advance `state` to `target_epoch`. +/// If required, advance `state` to the epoch required to determine proposer indices in `target_epoch`. /// /// ## Details /// @@ -200,22 +214,33 @@ pub fn compute_proposer_duties_from_head( /// - No-op if `state.current_epoch() == target_epoch`. /// - It must be the case that `state.canonical_root() == state_root`, but this function will not /// check that. -pub fn ensure_state_is_in_epoch( +pub fn ensure_state_can_determine_proposers_for_epoch( state: &mut BeaconState, state_root: Hash256, target_epoch: Epoch, spec: &ChainSpec, ) -> Result<(), BeaconChainError> { - match state.current_epoch().cmp(&target_epoch) { - // Protects against an inconsistent slot clock. - Ordering::Greater => Err(BeaconStateError::SlotOutOfBounds.into()), - // The state needs to be advanced. - Ordering::Less => { - let target_slot = target_epoch.start_slot(E::slots_per_epoch()); - partial_state_advance(state, Some(state_root), target_slot, spec) - .map_err(BeaconChainError::from) - } - // The state is suitable, nothing to do. - Ordering::Equal => Ok(()), + // The decision slot is the end of an epoch, so we add 1 to reach the first slot of the epoch + // at which the shuffling is determined. + let minimum_slot = spec + .proposer_shuffling_decision_slot::(target_epoch) + .safe_add(1)?; + let minimum_epoch = minimum_slot.epoch(E::slots_per_epoch()); + + // Before and after Fulu, the oldest epoch reachable from a state at epoch N is epoch N itself, + // i.e. we can never "look back". + let maximum_epoch = target_epoch; + + if state.current_epoch() > maximum_epoch { + Err(BeaconStateError::SlotOutOfBounds.into()) + } else if state.current_epoch() >= minimum_epoch { + // Fulu allows us to access shufflings in multiple epochs (thanks to lookahead). + // Pre-Fulu we expect `minimum_epoch == maximum_epoch`, and this branch covers that case. + Ok(()) + } else { + // State's current epoch is less than the minimum epoch. + // Advance the state up to the minimum epoch. + partial_state_advance(state, Some(state_root), minimum_slot, spec) + .map_err(BeaconChainError::from) } } diff --git a/beacon_node/beacon_chain/src/blob_verification.rs b/beacon_node/beacon_chain/src/blob_verification.rs index 53676c0b248..53f2eff0ca3 100644 --- a/beacon_node/beacon_chain/src/blob_verification.rs +++ b/beacon_node/beacon_chain/src/blob_verification.rs @@ -5,8 +5,7 @@ use std::sync::Arc; use crate::beacon_chain::{BeaconChain, BeaconChainTypes}; use crate::block_verification::{ - BlockSlashInfo, cheap_state_advance_to_obtain_committees, get_validator_pubkey_cache, - process_block_slash_info, + BlockSlashInfo, get_validator_pubkey_cache, process_block_slash_info, }; use crate::kzg_utils::{validate_blob, validate_blobs}; use crate::observed_data_sidecars::{ObservationStrategy, Observe}; @@ -494,59 +493,31 @@ pub fn validate_blob_sidecar_for_gossip(proposer_shuffling_root, blob_slot); - - let (proposer_index, fork) = if let Some(proposer) = proposer_opt { - (proposer.index, proposer.fork) - } else { - debug!( - %block_root, - %blob_index, - "Proposer shuffling cache miss for blob verification" - ); - let (parent_state_root, mut parent_state) = chain - .store - .get_advanced_hot_state(block_parent_root, blob_slot, parent_block.state_root) - .map_err(|e| GossipBlobError::BeaconChainError(Box::new(e.into())))? - .ok_or_else(|| { - BeaconChainError::DBInconsistent(format!( - "Missing state for parent block {block_parent_root:?}", - )) - })?; - - let state = cheap_state_advance_to_obtain_committees::<_, GossipBlobError>( - &mut parent_state, - Some(parent_state_root), - blob_slot, - &chain.spec, - )?; - - let epoch = state.current_epoch(); - let proposers = state.get_beacon_proposer_indices(epoch, &chain.spec)?; - let proposer_index = *proposers - .get(blob_slot.as_usize() % T::EthSpec::slots_per_epoch() as usize) - .ok_or_else(|| BeaconChainError::NoProposerForSlot(blob_slot))?; - - // Prime the proposer shuffling cache with the newly-learned value. - chain.beacon_proposer_cache.lock().insert( - blob_epoch, - proposer_shuffling_root, - proposers, - state.fork(), - )?; - (proposer_index, state.fork()) - }; + parent_block.proposer_shuffling_root_for_child_block(blob_epoch, &chain.spec); + + let proposer = chain.with_proposer_cache( + proposer_shuffling_root, + blob_epoch, + |proposers| proposers.get_slot::(blob_slot), + || { + debug!( + %block_root, + index = %blob_index, + "Proposer shuffling cache miss for blob verification" + ); + chain + .store + .get_advanced_hot_state(block_parent_root, blob_slot, parent_block.state_root) + .map_err(|e| GossipBlobError::BeaconChainError(Box::new(e.into())))? + .ok_or_else(|| { + GossipBlobError::BeaconChainError(Box::new(BeaconChainError::DBInconsistent( + format!("Missing state for parent block {block_parent_root:?}",), + ))) + }) + }, + )?; + let proposer_index = proposer.index; + let fork = proposer.fork; // Signature verify the signed block header. let signature_is_valid = { diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index 1d10fae0a49..d0ed8258e55 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -948,61 +948,35 @@ impl GossipVerifiedBlock { } let proposer_shuffling_decision_block = - if parent_block.slot.epoch(T::EthSpec::slots_per_epoch()) == block_epoch { - parent_block - .next_epoch_shuffling_id - .shuffling_decision_block - } else { - parent_block.root - }; + parent_block.proposer_shuffling_root_for_child_block(block_epoch, &chain.spec); // We assign to a variable instead of using `if let Some` directly to ensure we drop the // write lock before trying to acquire it again in the `else` clause. - let proposer_opt = chain - .beacon_proposer_cache - .lock() - .get_slot::(proposer_shuffling_decision_block, block.slot()); - let (expected_proposer, fork, parent, block) = if let Some(proposer) = proposer_opt { - // The proposer index was cached and we can return it without needing to load the - // parent. - (proposer.index, proposer.fork, None, block) - } else { - // The proposer index was *not* cached and we must load the parent in order to determine - // the proposer index. - let (mut parent, block) = load_parent(block, chain)?; - - debug!( - parent_root = ?parent.beacon_block_root, - parent_slot = %parent.beacon_block.slot(), - ?block_root, - block_slot = %block.slot(), - "Proposer shuffling cache miss" - ); - - // The state produced is only valid for determining proposer/attester shuffling indices. - let state = cheap_state_advance_to_obtain_committees::<_, BlockError>( - &mut parent.pre_state, - parent.beacon_state_root, - block.slot(), - &chain.spec, - )?; - - let epoch = state.current_epoch(); - let proposers = state.get_beacon_proposer_indices(epoch, &chain.spec)?; - let proposer_index = *proposers - .get(block.slot().as_usize() % T::EthSpec::slots_per_epoch() as usize) - .ok_or_else(|| BeaconChainError::NoProposerForSlot(block.slot()))?; - - // Prime the proposer shuffling cache with the newly-learned value. - chain.beacon_proposer_cache.lock().insert( - block_epoch, - proposer_shuffling_decision_block, - proposers, - state.fork(), - )?; - - (proposer_index, state.fork(), Some(parent), block) - }; + let block_slot = block.slot(); + let mut opt_parent = None; + let proposer = chain.with_proposer_cache::<_, BlockError>( + proposer_shuffling_decision_block, + block_epoch, + |proposers| proposers.get_slot::(block_slot), + || { + // The proposer index was *not* cached and we must load the parent in order to + // determine the proposer index. + let (mut parent, _) = load_parent(block.clone(), chain)?; + let parent_state_root = if let Some(state_root) = parent.beacon_state_root { + state_root + } else { + // This is potentially a little inefficient, although we are likely to need + // the state's hash eventually (if the block is valid), and we are also likely + // to already have the hash cached (if fetched from the state cache). + parent.pre_state.canonical_root()? + }; + let parent_state = parent.pre_state.clone(); + opt_parent = Some(parent); + Ok((parent_state_root, parent_state)) + }, + )?; + let expected_proposer = proposer.index; + let fork = proposer.fork; let signature_is_valid = { let pubkey_cache = get_validator_pubkey_cache(chain)?; @@ -1077,7 +1051,7 @@ impl GossipVerifiedBlock { Ok(Self { block, block_root, - parent, + parent: opt_parent, consensus_context, }) } diff --git a/beacon_node/beacon_chain/src/canonical_head.rs b/beacon_node/beacon_chain/src/canonical_head.rs index 78005bf7995..cfc7a9637b2 100644 --- a/beacon_node/beacon_chain/src/canonical_head.rs +++ b/beacon_node/beacon_chain/src/canonical_head.rs @@ -829,7 +829,7 @@ impl BeaconChain { let head_slot = new_snapshot.beacon_state.slot(); let dependent_root = new_snapshot .beacon_state - .proposer_shuffling_decision_root(self.genesis_block_root); + .attester_shuffling_decision_root(self.genesis_block_root, RelativeEpoch::Next); let prev_dependent_root = new_snapshot .beacon_state .attester_shuffling_decision_root(self.genesis_block_root, RelativeEpoch::Current); diff --git a/beacon_node/beacon_chain/src/data_column_verification.rs b/beacon_node/beacon_chain/src/data_column_verification.rs index 608e003a228..600b107c1d1 100644 --- a/beacon_node/beacon_chain/src/data_column_verification.rs +++ b/beacon_node/beacon_chain/src/data_column_verification.rs @@ -1,7 +1,5 @@ -use crate::beacon_proposer_cache::EpochBlockProposers; use crate::block_verification::{ - BlockSlashInfo, cheap_state_advance_to_obtain_committees, get_validator_pubkey_cache, - process_block_slash_info, + BlockSlashInfo, get_validator_pubkey_cache, process_block_slash_info, }; use crate::kzg_utils::{reconstruct_data_columns, validate_data_columns}; use crate::observed_data_sidecars::{ObservationStrategy, Observe}; @@ -641,65 +639,34 @@ fn verify_proposer_and_signature( let block_root = data_column.block_root(); let block_parent_root = data_column.block_parent_root(); - let proposer_shuffling_root = if parent_block.slot.epoch(slots_per_epoch) == column_epoch { - parent_block - .next_epoch_shuffling_id - .shuffling_decision_block - } else { - parent_block.root - }; - - // We lock the cache briefly to get or insert a OnceCell, then drop the lock - // before doing proposer shuffling calculation via `OnceCell::get_or_try_init`. This avoids - // holding the lock during the computation, while still ensuring the result is cached and - // initialised only once. - // - // This approach exposes the cache internals (`OnceCell` & `EpochBlockProposers`) - // as a trade-off for avoiding lock contention. - let epoch_proposers_cell = chain - .beacon_proposer_cache - .lock() - .get_or_insert_key(column_epoch, proposer_shuffling_root); - - let epoch_proposers = epoch_proposers_cell.get_or_try_init(move || { - debug!( - %block_root, - index = %column_index, - "Proposer shuffling cache miss for column verification" - ); - let (parent_state_root, mut parent_state) = chain - .store - .get_advanced_hot_state(block_parent_root, column_slot, parent_block.state_root) - .map_err(|e| GossipDataColumnError::BeaconChainError(Box::new(e.into())))? - .ok_or_else(|| { - BeaconChainError::DBInconsistent(format!( - "Missing state for parent block {block_parent_root:?}", - )) - })?; - - let state = cheap_state_advance_to_obtain_committees::<_, GossipDataColumnError>( - &mut parent_state, - Some(parent_state_root), - column_slot, - &chain.spec, - )?; - - let epoch = state.current_epoch(); - let proposers = state.get_beacon_proposer_indices(epoch, &chain.spec)?; - // Prime the proposer shuffling cache with the newly-learned value. - Ok::<_, GossipDataColumnError>(EpochBlockProposers { - epoch: column_epoch, - fork: state.fork(), - proposers: proposers.into(), - }) - })?; - - let proposer_index = *epoch_proposers - .proposers - .get(column_slot.as_usize() % slots_per_epoch as usize) - .ok_or_else(|| BeaconChainError::NoProposerForSlot(column_slot))?; - - let fork = epoch_proposers.fork; + let proposer_shuffling_root = + parent_block.proposer_shuffling_root_for_child_block(column_epoch, &chain.spec); + + let proposer = chain.with_proposer_cache( + proposer_shuffling_root, + column_epoch, + |proposers| proposers.get_slot::(column_slot), + || { + debug!( + %block_root, + index = %column_index, + "Proposer shuffling cache miss for column verification" + ); + chain + .store + .get_advanced_hot_state(block_parent_root, column_slot, parent_block.state_root) + .map_err(|e| GossipDataColumnError::BeaconChainError(Box::new(e.into())))? + .ok_or_else(|| { + GossipDataColumnError::BeaconChainError(Box::new( + BeaconChainError::DBInconsistent(format!( + "Missing state for parent block {block_parent_root:?}", + )), + )) + }) + }, + )?; + let proposer_index = proposer.index; + let fork = proposer.fork; // Signature verify the signed block header. let signature_is_valid = { diff --git a/beacon_node/beacon_chain/src/errors.rs b/beacon_node/beacon_chain/src/errors.rs index a1a0ec74f66..7b04a36faec 100644 --- a/beacon_node/beacon_chain/src/errors.rs +++ b/beacon_node/beacon_chain/src/errors.rs @@ -230,6 +230,23 @@ pub enum BeaconChainError { columns_found: usize, }, FailedToReconstructBlobs(String), + ProposerCacheIncorrectState { + state_decision_block_root: Hash256, + requested_decision_block_root: Hash256, + }, + ProposerCacheAccessorFailure { + decision_block_root: Hash256, + proposal_epoch: Epoch, + }, + ProposerCacheOutOfBounds { + slot: Slot, + epoch: Epoch, + }, + ProposerCacheWrongEpoch { + request_epoch: Epoch, + cache_epoch: Epoch, + }, + SkipProposerPreparation, } easy_from_to!(SlotProcessingError, BeaconChainError); diff --git a/beacon_node/beacon_chain/src/validator_monitor.rs b/beacon_node/beacon_chain/src/validator_monitor.rs index 23f1a7d4308..00c30e5ab1d 100644 --- a/beacon_node/beacon_chain/src/validator_monitor.rs +++ b/beacon_node/beacon_chain/src/validator_monitor.rs @@ -497,7 +497,7 @@ impl ValidatorMonitor { }); // Add missed non-finalized blocks for the monitored validators - self.add_validators_missed_blocks(state); + self.add_validators_missed_blocks(state, spec); self.process_unaggregated_attestations(state, spec); // Update metrics for individual validators. @@ -588,7 +588,7 @@ impl ValidatorMonitor { } /// Add missed non-finalized blocks for the monitored validators - fn add_validators_missed_blocks(&mut self, state: &BeaconState) { + fn add_validators_missed_blocks(&mut self, state: &BeaconState, spec: &ChainSpec) { // Define range variables let current_slot = state.slot(); let current_epoch = current_slot.epoch(E::slots_per_epoch()); @@ -616,8 +616,8 @@ impl ValidatorMonitor { if block_root == prev_block_root { let slot_epoch = slot.epoch(E::slots_per_epoch()); - if let Ok(shuffling_decision_block) = - state.proposer_shuffling_decision_root_at_epoch(slot_epoch, *block_root) + if let Ok(shuffling_decision_block) = state + .proposer_shuffling_decision_root_at_epoch(slot_epoch, *block_root, spec) { // Update the cache if it has not yet been initialised, or if it is // initialised for a prior epoch. This is an optimisation to avoid bouncing diff --git a/beacon_node/beacon_chain/tests/store_tests.rs b/beacon_node/beacon_chain/tests/store_tests.rs index fbb592b510f..efa16978e02 100644 --- a/beacon_node/beacon_chain/tests/store_tests.rs +++ b/beacon_node/beacon_chain/tests/store_tests.rs @@ -1191,6 +1191,271 @@ fn check_shuffling_compatible( } } +/// These tests check the consistency of: +/// +/// - ProtoBlock::proposer_shuffling_root_for_child_block, and +/// - BeaconState::proposer_shuffling_decision_root{_at_epoch} +async fn proposer_shuffling_root_consistency_test(parent_slot: u64, child_slot: u64) { + let child_slot = Slot::new(child_slot); + let db_path = tempdir().unwrap(); + let store = get_store(&db_path); + let validators_keypairs = + types::test_utils::generate_deterministic_keypairs(LOW_VALIDATOR_COUNT); + let harness = TestHarness::builder(MinimalEthSpec) + .default_spec() + .keypairs(validators_keypairs) + .fresh_disk_store(store) + .mock_execution_layer() + .build(); + let spec = &harness.chain.spec; + + // Build chain out to parent block. + let initial_slots: Vec = (1..=parent_slot).map(Into::into).collect(); + let (state, state_root) = harness.get_current_state_and_root(); + let all_validators = harness.get_all_validators(); + let (_, _, parent_root, _) = harness + .add_attested_blocks_at_slots(state, state_root, &initial_slots, &all_validators) + .await; + + // Add the child block. + let (state, state_root) = harness.get_current_state_and_root(); + let all_validators = harness.get_all_validators(); + let (_, _, child_root, child_block_state) = harness + .add_attested_blocks_at_slots(state, state_root, &[child_slot], &all_validators) + .await; + + let child_block_epoch = child_slot.epoch(E::slots_per_epoch()); + + // Load parent block from fork choice. + let fc_parent = harness + .chain + .canonical_head + .fork_choice_read_lock() + .get_block(&parent_root.into()) + .unwrap(); + + // The proposer shuffling decision root computed using fork choice should equal the root + // computed from the child state. + let decision_root = fc_parent.proposer_shuffling_root_for_child_block(child_block_epoch, spec); + + assert_eq!( + decision_root, + child_block_state + .proposer_shuffling_decision_root(child_root.into(), spec) + .unwrap() + ); + assert_eq!( + decision_root, + child_block_state + .proposer_shuffling_decision_root_at_epoch(child_block_epoch, child_root.into(), spec) + .unwrap() + ); + + // The passed block root argument should be irrelevant for all blocks except the genesis block. + assert_eq!( + decision_root, + child_block_state + .proposer_shuffling_decision_root(Hash256::ZERO, spec) + .unwrap() + ); + assert_eq!( + decision_root, + child_block_state + .proposer_shuffling_decision_root_at_epoch(child_block_epoch, Hash256::ZERO, spec) + .unwrap() + ); +} + +#[tokio::test] +async fn proposer_shuffling_root_consistency_same_epoch() { + proposer_shuffling_root_consistency_test(32, 39).await; +} + +#[tokio::test] +async fn proposer_shuffling_root_consistency_next_epoch() { + proposer_shuffling_root_consistency_test(32, 47).await; +} + +#[tokio::test] +async fn proposer_shuffling_root_consistency_two_epochs() { + proposer_shuffling_root_consistency_test(32, 55).await; +} + +#[tokio::test] +async fn proposer_shuffling_changing_with_lookahead() { + let initial_blocks = E::slots_per_epoch() * 4 - 1; + + let spec = ForkName::Fulu.make_genesis_spec(E::default_spec()); + let db_path = tempdir().unwrap(); + let store = get_store_generic(&db_path, Default::default(), spec.clone()); + let validators_keypairs = + types::test_utils::generate_deterministic_keypairs(LOW_VALIDATOR_COUNT); + let harness = TestHarness::builder(MinimalEthSpec) + .spec(spec.into()) + .keypairs(validators_keypairs) + .fresh_disk_store(store) + .mock_execution_layer() + .build(); + let spec = &harness.chain.spec; + + // Start with some blocks, finishing with one slot before a new epoch. + harness.advance_slot(); + harness + .extend_chain( + initial_blocks as usize, + BlockStrategy::OnCanonicalHead, + AttestationStrategy::AllValidators, + ) + .await; + + let pre_deposit_state = harness.get_current_state(); + assert_eq!(pre_deposit_state.slot(), initial_blocks); + let topup_block_slot = Slot::new(initial_blocks + 1); + let validator_to_topup_index = 1; + let validator_to_topup = pre_deposit_state + .get_validator(validator_to_topup_index) + .unwrap() + .clone(); + + // Craft a block with a deposit request and consolidation. + // XXX: This is a really nasty way to do this, but we need better test facilities in + // MockExecutionLayer to address this. + let deposit_request: DepositRequest = DepositRequest { + index: pre_deposit_state.eth1_deposit_index(), + pubkey: validator_to_topup.pubkey, + withdrawal_credentials: validator_to_topup.withdrawal_credentials, + amount: 63_000_000_000, + signature: SignatureBytes::empty(), + }; + + let consolidation_request: ConsolidationRequest = ConsolidationRequest { + source_address: validator_to_topup + .get_execution_withdrawal_address(spec) + .unwrap(), + source_pubkey: validator_to_topup.pubkey, + target_pubkey: validator_to_topup.pubkey, + }; + + let execution_requests = ExecutionRequests:: { + deposits: VariableList::new(vec![deposit_request]).unwrap(), + withdrawals: vec![].into(), + consolidations: VariableList::new(vec![consolidation_request]).unwrap(), + }; + + let mut block = Box::pin(harness.make_block_with_modifier( + pre_deposit_state.clone(), + topup_block_slot, + |block| *block.body_mut().execution_requests_mut().unwrap() = execution_requests, + )) + .await + .0; + + let Err(BlockError::StateRootMismatch { + local: true_state_root, + .. + }) = harness + .process_block(topup_block_slot, block.0.canonical_root(), block.clone()) + .await + else { + panic!("state root should not match due to pending deposits changes/etc"); + }; + let mut new_block = block.0.message_fulu().unwrap().clone(); + new_block.state_root = true_state_root; + block.0 = Arc::new(harness.sign_beacon_block(new_block.into(), &pre_deposit_state)); + + harness + .process_block(topup_block_slot, block.0.canonical_root(), block.clone()) + .await + .unwrap(); + + // Advance two epochs to finalize the deposit and process it. + // Start with just a single epoch advance so we can grab the state one epoch prior to where + // we end up. + harness.advance_slot(); + harness + .extend_chain( + E::slots_per_epoch() as usize, + BlockStrategy::OnCanonicalHead, + AttestationStrategy::AllValidators, + ) + .await; + + // Grab the epoch start state. This is the state from which the proposers at the next epoch were + // computed. + let prev_epoch_state = harness.get_current_state(); + assert_eq!(prev_epoch_state.slot() % E::slots_per_epoch(), 0); + + // The deposit should be pending. + let pending_deposits = prev_epoch_state.pending_deposits().unwrap(); + assert_eq!(pending_deposits.len(), 1, "{pending_deposits:?}"); + + // Advance the 2nd epoch to finalize the deposit and process it. + harness.advance_slot(); + harness + .extend_chain( + E::slots_per_epoch() as usize, + BlockStrategy::OnCanonicalHead, + AttestationStrategy::AllValidators, + ) + .await; + + let current_epoch_state = harness.get_current_state(); + assert_eq!(current_epoch_state.slot() % E::slots_per_epoch(), 0); + + // Deposit is processed! + let pending_deposits = current_epoch_state.pending_deposits().unwrap(); + assert_eq!(pending_deposits.len(), 0, "{pending_deposits:?}"); + + let validator = current_epoch_state + .get_validator(validator_to_topup_index) + .unwrap(); + assert!(validator.has_compounding_withdrawal_credential(spec)); + assert_eq!(validator.effective_balance, 95_000_000_000); + + // The shuffling for the current epoch from `prev_epoch_state` should match the shuffling + // for the current epoch from `current_epoch_state` because we should be correctly using the + // stored lookahead. + let current_epoch = current_epoch_state.current_epoch(); + let proposer_shuffling = prev_epoch_state + .get_beacon_proposer_indices(current_epoch, spec) + .unwrap(); + + assert_eq!( + proposer_shuffling, + current_epoch_state + .get_beacon_proposer_indices(current_epoch, spec) + .unwrap() + ); + + // If we bypass the safety checks in `get_proposer_indices`, we should see that the shuffling + // differs due to the effective balance change. + let unsafe_get_proposer_indices = |state: &BeaconState, epoch| -> Vec { + let indices = state.get_active_validator_indices(epoch, spec).unwrap(); + let preimage = state.get_seed(epoch, Domain::BeaconProposer, spec).unwrap(); + epoch + .slot_iter(E::slots_per_epoch()) + .map(|slot| { + let mut preimage = preimage.to_vec(); + preimage.append(&mut int_to_bytes::int_to_bytes8(slot.as_u64())); + let seed = ethereum_hashing::hash(&preimage); + state.compute_proposer_index(&indices, &seed, spec).unwrap() + }) + .collect() + }; + + // The unsafe function is correct when used with lookahead. + assert_eq!( + unsafe_get_proposer_indices(&prev_epoch_state, current_epoch), + proposer_shuffling + ); + + // Computing the shuffling for current epoch without lookahead is WRONG. + assert_ne!( + unsafe_get_proposer_indices(¤t_epoch_state, current_epoch), + proposer_shuffling, + ); +} + // Ensure blocks from abandoned forks are pruned from the Hot DB #[tokio::test] async fn prunes_abandoned_fork_between_two_finalized_checkpoints() { diff --git a/beacon_node/beacon_chain/tests/validator_monitor.rs b/beacon_node/beacon_chain/tests/validator_monitor.rs index 4e2554d3d8d..95732abeb5d 100644 --- a/beacon_node/beacon_chain/tests/validator_monitor.rs +++ b/beacon_node/beacon_chain/tests/validator_monitor.rs @@ -3,7 +3,7 @@ use beacon_chain::test_utils::{ }; use beacon_chain::validator_monitor::{MISSED_BLOCK_LAG_SLOTS, ValidatorMonitorConfig}; use std::sync::LazyLock; -use types::{Epoch, EthSpec, Keypair, MainnetEthSpec, PublicKeyBytes, Slot}; +use types::{Epoch, EthSpec, Hash256, Keypair, MainnetEthSpec, PublicKeyBytes, Slot}; // Should ideally be divisible by 3. pub const VALIDATOR_COUNT: usize = 48; @@ -74,7 +74,7 @@ async fn missed_blocks_across_epochs() { .get_hot_state(state_roots_by_slot[&start_slot]) .unwrap(); let decision_root = state - .proposer_shuffling_decision_root(genesis_block_root) + .proposer_shuffling_decision_root(genesis_block_root, &harness.chain.spec) .unwrap(); proposer_shuffling_cache .insert( @@ -152,7 +152,7 @@ async fn missed_blocks_basic() { .unwrap(); let mut missed_block_proposer = validator_indexes[slot_in_epoch.as_usize()]; let mut proposer_shuffling_decision_root = _state - .proposer_shuffling_decision_root(duplicate_block_root) + .proposer_shuffling_decision_root(duplicate_block_root, &harness1.chain.spec) .unwrap(); let beacon_proposer_cache = harness1 @@ -234,21 +234,24 @@ async fn missed_blocks_basic() { // Let's fill the cache with the proposers for the current epoch // and push the duplicate_block_root to the block_roots vector + assert_eq!( + _state2.set_block_root(prev_slot, duplicate_block_root), + Ok(()) + ); + + let decision_block_root = _state2 + .proposer_shuffling_decision_root_at_epoch(epoch, Hash256::ZERO, &harness2.chain.spec) + .unwrap(); assert_eq!( beacon_proposer_cache.lock().insert( epoch, - duplicate_block_root, + decision_block_root, validator_indexes.clone(), _state2.fork() ), Ok(()) ); - assert_eq!( - _state2.set_block_root(prev_slot, duplicate_block_root), - Ok(()) - ); - { // Let's validate the state which will call the function responsible for // adding the missed blocks to the validator monitor @@ -326,7 +329,11 @@ async fn missed_blocks_basic() { .unwrap(); missed_block_proposer = validator_indexes[slot_in_epoch.as_usize()]; proposer_shuffling_decision_root = _state3 - .proposer_shuffling_decision_root_at_epoch(epoch, duplicate_block_root) + .proposer_shuffling_decision_root_at_epoch( + epoch, + duplicate_block_root, + &harness1.chain.spec, + ) .unwrap(); let beacon_proposer_cache = harness3 diff --git a/beacon_node/http_api/src/proposer_duties.rs b/beacon_node/http_api/src/proposer_duties.rs index 3705c399bd7..ceac60cbad1 100644 --- a/beacon_node/http_api/src/proposer_duties.rs +++ b/beacon_node/http_api/src/proposer_duties.rs @@ -3,12 +3,13 @@ use crate::state_id::StateId; use beacon_chain::{ BeaconChain, BeaconChainError, BeaconChainTypes, - beacon_proposer_cache::{compute_proposer_duties_from_head, ensure_state_is_in_epoch}, + beacon_proposer_cache::{ + compute_proposer_duties_from_head, ensure_state_can_determine_proposers_for_epoch, + }, }; use eth2::types::{self as api_types}; use safe_arith::SafeArith; use slot_clock::SlotClock; -use std::cmp::Ordering; use tracing::debug; use types::{Epoch, EthSpec, Hash256, Slot}; @@ -105,36 +106,29 @@ fn try_proposer_duties_from_cache( let head_decision_root = head .snapshot .beacon_state - .proposer_shuffling_decision_root(head_block_root) + .proposer_shuffling_decision_root(head_block_root, &chain.spec) .map_err(warp_utils::reject::beacon_state_error)?; let execution_optimistic = chain .is_optimistic_or_invalid_head_block(head_block) .map_err(warp_utils::reject::unhandled_error)?; - let dependent_root = match head_epoch.cmp(&request_epoch) { - // head_epoch == request_epoch - Ordering::Equal => head_decision_root, - // head_epoch < request_epoch - Ordering::Less => head_block_root, - // head_epoch > request_epoch - Ordering::Greater => { - return Err(warp_utils::reject::custom_server_error(format!( - "head epoch {} is later than request epoch {}", - head_epoch, request_epoch - ))); - } - }; + // This code path can't handle requests for past epochs. + if head_epoch > request_epoch { + return Err(warp_utils::reject::custom_server_error(format!( + "head epoch {head_epoch} is later than request epoch {request_epoch}", + ))); + } chain .beacon_proposer_cache .lock() - .get_epoch::(dependent_root, request_epoch) + .get_epoch::(head_decision_root, request_epoch) .cloned() .map(|indices| { convert_to_api_response( chain, request_epoch, - dependent_root, + head_decision_root, execution_optimistic, indices.to_vec(), ) @@ -204,18 +198,19 @@ fn compute_historic_proposer_duties( } }; - let (state, execution_optimistic) = - if let Some((state_root, mut state, execution_optimistic)) = state_opt { - // If we've loaded the head state it might be from a previous epoch, ensure it's in a - // suitable epoch. - ensure_state_is_in_epoch(&mut state, state_root, epoch, &chain.spec) - .map_err(warp_utils::reject::unhandled_error)?; - (state, execution_optimistic) - } else { - let (state, execution_optimistic, _finalized) = - StateId::from_slot(epoch.start_slot(T::EthSpec::slots_per_epoch())).state(chain)?; - (state, execution_optimistic) - }; + let (state, execution_optimistic) = if let Some((state_root, mut state, execution_optimistic)) = + state_opt + { + // If we've loaded the head state it might be from a previous epoch, ensure it's in a + // suitable epoch. + ensure_state_can_determine_proposers_for_epoch(&mut state, state_root, epoch, &chain.spec) + .map_err(warp_utils::reject::unhandled_error)?; + (state, execution_optimistic) + } else { + let (state, execution_optimistic, _finalized) = + StateId::from_slot(epoch.start_slot(T::EthSpec::slots_per_epoch())).state(chain)?; + (state, execution_optimistic) + }; // Ensure the state lookup was correct. if state.current_epoch() != epoch { @@ -234,7 +229,7 @@ fn compute_historic_proposer_duties( // We can supply the genesis block root as the block root since we know that the only block that // decides its own root is the genesis block. let dependent_root = state - .proposer_shuffling_decision_root(chain.genesis_block_root) + .proposer_shuffling_decision_root(chain.genesis_block_root, &chain.spec) .map_err(BeaconChainError::from) .map_err(warp_utils::reject::unhandled_error)?; diff --git a/consensus/proto_array/src/proto_array_fork_choice.rs b/consensus/proto_array/src/proto_array_fork_choice.rs index 4b31dc60bd3..8c7b58c4d41 100644 --- a/consensus/proto_array/src/proto_array_fork_choice.rs +++ b/consensus/proto_array/src/proto_array_fork_choice.rs @@ -160,6 +160,50 @@ pub struct Block { pub unrealized_finalized_checkpoint: Option, } +impl Block { + /// Compute the proposer shuffling decision root of a child block in `child_block_epoch`. + /// + /// This function assumes that `child_block_epoch >= self.epoch`. It is the responsibility of + /// the caller to check this condition, or else incorrect results will be produced. + pub fn proposer_shuffling_root_for_child_block( + &self, + child_block_epoch: Epoch, + spec: &ChainSpec, + ) -> Hash256 { + let block_epoch = self.current_epoch_shuffling_id.shuffling_epoch; + + if !spec.fork_name_at_epoch(child_block_epoch).fulu_enabled() { + // Prior to Fulu the proposer shuffling decision root for the current epoch is the same + // as the attestation shuffling for the *next* epoch, i.e. it is determined at the start + // of the current epoch. + if block_epoch == child_block_epoch { + self.next_epoch_shuffling_id.shuffling_decision_block + } else { + // Otherwise, the child block epoch is greater, so its decision root is its parent + // root itself (this block's root). + self.root + } + } else { + // After Fulu the proposer shuffling is determined with lookahead, so if the block + // lies in the same epoch as its parent, its decision root is the same as the + // parent's current epoch attester shuffling + // + // i.e. the block from the end of epoch N - 2. + if child_block_epoch == block_epoch { + self.current_epoch_shuffling_id.shuffling_decision_block + } else if child_block_epoch == block_epoch + 1 { + // If the block is the next epoch, then it instead shares its decision root with + // the parent's *next epoch* attester shuffling. + self.next_epoch_shuffling_id.shuffling_decision_block + } else { + // The child block lies in the future beyond the lookahead, at the point where this + // block (its parent) will be the decision block. + self.root + } + } + } +} + /// A Vec-wrapper which will grow to match any request. /// /// E.g., a `get` or `insert` to an out-of-bounds element will cause the Vec to grow (using diff --git a/consensus/state_processing/src/all_caches.rs b/consensus/state_processing/src/all_caches.rs index d6c4fd3f880..0381bb820f2 100644 --- a/consensus/state_processing/src/all_caches.rs +++ b/consensus/state_processing/src/all_caches.rs @@ -1,9 +1,7 @@ use crate::common::update_progressive_balances_cache::initialize_progressive_balances_cache; use crate::epoch_cache::initialize_epoch_cache; use tracing::instrument; -use types::{ - BeaconState, ChainSpec, EpochCacheError, EthSpec, FixedBytesExtended, Hash256, RelativeEpoch, -}; +use types::{BeaconState, ChainSpec, EpochCacheError, EthSpec, Hash256, RelativeEpoch}; /// Mixin trait for the beacon state that provides operations on *all* caches. /// @@ -34,8 +32,7 @@ impl AllCaches for BeaconState { fn all_caches_built(&self) -> bool { let current_epoch = self.current_epoch(); - let Ok(epoch_cache_decision_block_root) = - self.proposer_shuffling_decision_root(Hash256::zero()) + let Ok(epoch_cache_decision_block_root) = self.epoch_cache_decision_root(Hash256::ZERO) else { return false; }; diff --git a/consensus/state_processing/src/epoch_cache.rs b/consensus/state_processing/src/epoch_cache.rs index 6654c6a7ef8..86db037446b 100644 --- a/consensus/state_processing/src/epoch_cache.rs +++ b/consensus/state_processing/src/epoch_cache.rs @@ -123,7 +123,7 @@ pub fn is_epoch_cache_initialized( let current_epoch = state.current_epoch(); let epoch_cache: &EpochCache = state.epoch_cache(); let decision_block_root = state - .proposer_shuffling_decision_root(Hash256::zero()) + .epoch_cache_decision_root(Hash256::zero()) .map_err(EpochCacheError::BeaconState)?; Ok(epoch_cache @@ -146,7 +146,7 @@ pub fn initialize_epoch_cache( let current_epoch = state.current_epoch(); let next_epoch = state.next_epoch().map_err(EpochCacheError::BeaconState)?; let decision_block_root = state - .proposer_shuffling_decision_root(Hash256::zero()) + .epoch_cache_decision_root(Hash256::zero()) .map_err(EpochCacheError::BeaconState)?; state.build_total_active_balance_cache(spec)?; diff --git a/consensus/state_processing/src/upgrade/fulu.rs b/consensus/state_processing/src/upgrade/fulu.rs index 6b038ad73a1..c2aced7047a 100644 --- a/consensus/state_processing/src/upgrade/fulu.rs +++ b/consensus/state_processing/src/upgrade/fulu.rs @@ -33,9 +33,7 @@ fn initialize_proposer_lookahead( ); } - Vector::new(lookahead).map_err(|e| { - Error::PleaseNotifyTheDevs(format!("Failed to initialize proposer lookahead: {:?}", e)) - }) + Vector::new(lookahead).map_err(|e| e.into()) } pub fn upgrade_state_to_fulu( diff --git a/consensus/types/src/beacon_state.rs b/consensus/types/src/beacon_state.rs index d2efbfe9095..0a3d768c593 100644 --- a/consensus/types/src/beacon_state.rs +++ b/consensus/types/src/beacon_state.rs @@ -173,7 +173,21 @@ pub enum Error { AggregatorNotInCommittee { aggregator_index: u64, }, - PleaseNotifyTheDevs(String), + ComputeProposerIndicesPastEpoch { + current_epoch: Epoch, + request_epoch: Epoch, + }, + ComputeProposerIndicesInsufficientLookahead { + current_epoch: Epoch, + request_epoch: Epoch, + }, + ComputeProposerIndicesExcessiveLookahead { + current_epoch: Epoch, + request_epoch: Epoch, + }, + ProposerLookaheadOutOfBounds { + i: usize, + }, } /// Control whether an epoch-indexed field can be indexed at the next epoch or not. @@ -886,8 +900,9 @@ impl BeaconState { &self, epoch: Epoch, block_root: Hash256, + spec: &ChainSpec, ) -> Result { - let decision_slot = self.proposer_shuffling_decision_slot(epoch); + let decision_slot = spec.proposer_shuffling_decision_slot::(epoch); if self.slot() <= decision_slot { Ok(block_root) } else { @@ -902,19 +917,18 @@ impl BeaconState { /// /// The `block_root` covers the one-off scenario where the genesis block decides its own /// shuffling. It should be set to the latest block applied to `self` or the genesis block root. - pub fn proposer_shuffling_decision_root(&self, block_root: Hash256) -> Result { - let decision_slot = self.proposer_shuffling_decision_slot(self.current_epoch()); - if self.slot() == decision_slot { - Ok(block_root) - } else { - self.get_block_root(decision_slot).copied() - } + pub fn proposer_shuffling_decision_root( + &self, + block_root: Hash256, + spec: &ChainSpec, + ) -> Result { + self.proposer_shuffling_decision_root_at_epoch(self.current_epoch(), block_root, spec) } - /// Returns the slot at which the proposer shuffling was decided. The block root at this slot - /// can be used to key the proposer shuffling for the given epoch. - fn proposer_shuffling_decision_slot(&self, epoch: Epoch) -> Slot { - epoch.start_slot(E::slots_per_epoch()).saturating_sub(1_u64) + pub fn epoch_cache_decision_root(&self, block_root: Hash256) -> Result { + // Epoch cache decision root for the current epoch (N) is the block root at the end of epoch + // N - 1. This is the same as the root that determines the next epoch attester shuffling. + self.attester_shuffling_decision_root(block_root, RelativeEpoch::Next) } /// Returns the block root which decided the attester shuffling for the given `relative_epoch`. @@ -998,6 +1012,45 @@ impl BeaconState { indices: &[usize], spec: &ChainSpec, ) -> Result, Error> { + // Regardless of fork, we never support computing proposer indices for past epochs. + let current_epoch = self.current_epoch(); + if epoch < current_epoch { + return Err(Error::ComputeProposerIndicesPastEpoch { + current_epoch, + request_epoch: epoch, + }); + } + + if spec.fork_name_at_epoch(epoch).fulu_enabled() { + // Post-Fulu we must never compute proposer indices using insufficient lookahead. This + // would be very dangerous as it would lead to conflicts between the *true* proposer as + // defined by `self.proposer_lookahead` and the output of this function. + // With MIN_SEED_LOOKAHEAD=1 (common config), this is equivalent to checking that the + // requested epoch is not the current epoch. + // + // We do not run this check if this function is called from `upgrade_to_fulu`, + // which runs *after* the slot is incremented, and needs to compute the proposer + // shuffling for the epoch that was just transitioned into. + if self.fork_name_unchecked().fulu_enabled() + && epoch < current_epoch.safe_add(spec.min_seed_lookahead)? + { + return Err(Error::ComputeProposerIndicesInsufficientLookahead { + current_epoch, + request_epoch: epoch, + }); + } + } else { + // Pre-Fulu the situation is reversed, we *should not* compute proposer indices using + // too much lookahead. To do so would make us vulnerable to changes in the proposer + // indices caused by effective balance changes. + if epoch >= current_epoch.safe_add(spec.min_seed_lookahead)? { + return Err(Error::ComputeProposerIndicesExcessiveLookahead { + current_epoch, + request_epoch: epoch, + }); + } + } + epoch .slot_iter(E::slots_per_epoch()) .map(|slot| { @@ -1146,10 +1199,7 @@ impl BeaconState { let index = slot.as_usize().safe_rem(E::slots_per_epoch() as usize)?; proposer_lookahead .get(index) - .ok_or(Error::PleaseNotifyTheDevs(format!( - "Proposer lookahead out of bounds: {} for slot: {}", - index, slot - ))) + .ok_or(Error::ProposerLookaheadOutOfBounds { i: index }) .map(|index| *index as usize) } else { // Pre-Fulu @@ -1168,6 +1218,25 @@ impl BeaconState { epoch: Epoch, spec: &ChainSpec, ) -> Result, Error> { + // This isn't in the spec, but we remove the footgun that is requesting the current epoch + // for a Fulu state. + if let Ok(proposer_lookahead) = self.proposer_lookahead() + && epoch >= self.current_epoch() + && epoch <= self.next_epoch()? + { + let slots_per_epoch = E::slots_per_epoch() as usize; + let start_offset = if epoch == self.current_epoch() { + 0 + } else { + slots_per_epoch + }; + return Ok(proposer_lookahead + .iter_from(start_offset)? + .take(slots_per_epoch) + .map(|x| *x as usize) + .collect()); + } + // Not using the cached validator indices since they are shuffled. let indices = self.get_active_validator_indices(epoch, spec)?; diff --git a/consensus/types/src/chain_spec.rs b/consensus/types/src/chain_spec.rs index a1005d904ae..6670fff6298 100644 --- a/consensus/types/src/chain_spec.rs +++ b/consensus/types/src/chain_spec.rs @@ -865,6 +865,28 @@ impl ChainSpec { ) } + /// Returns the slot at which the proposer shuffling was decided. + /// + /// The block root at this slot can be used to key the proposer shuffling for the given epoch. + pub fn proposer_shuffling_decision_slot(&self, epoch: Epoch) -> Slot { + if self.fork_name_at_epoch(epoch).fulu_enabled() { + // Post-Fulu the proposer shuffling decision slot for epoch N is the slot at the end + // of epoch N - 2 (note: min_seed_lookahead=1 in all current configs). + epoch + .saturating_sub(self.min_seed_lookahead) + .start_slot(E::slots_per_epoch()) + .saturating_sub(1_u64) + } else { + // Pre-Fulu the proposer shuffling decision slot for epoch N is the slot at the end of + // epoch N - 1 (note: +1 -1 for min_seed_lookahead=1 in all current configs). + epoch + .saturating_add(Epoch::new(1)) + .saturating_sub(self.min_seed_lookahead) + .start_slot(E::slots_per_epoch()) + .saturating_sub(1_u64) + } + } + /// Returns a `ChainSpec` compatible with the Ethereum Foundation specification. pub fn mainnet() -> Self { Self { diff --git a/consensus/types/src/epoch_cache.rs b/consensus/types/src/epoch_cache.rs index ef91c20d753..9956cb400a7 100644 --- a/consensus/types/src/epoch_cache.rs +++ b/consensus/types/src/epoch_cache.rs @@ -5,9 +5,13 @@ use std::sync::Arc; /// Cache of values which are uniquely determined at the start of an epoch. /// /// The values are fixed with respect to the last block of the _prior_ epoch, which we refer -/// to as the "decision block". This cache is very similar to the `BeaconProposerCache` in that -/// beacon proposers are determined at exactly the same time as the values in this cache, so -/// the keys for the two caches are identical. +/// to as the "decision block". +/// +/// Prior to Fulu this cache was similar to the `BeaconProposerCache` in that beacon proposers were +/// determined at exactly the same time as the values in this cache, so the keys for the two caches +/// were identical. +/// +/// Post-Fulu, we use a different key (the proposers have more lookahead). #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] #[derive(Debug, PartialEq, Eq, Clone, Default)] pub struct EpochCache { diff --git a/testing/ef_tests/src/cases/fork.rs b/testing/ef_tests/src/cases/fork.rs index 78d802c2283..54efb9f9cec 100644 --- a/testing/ef_tests/src/cases/fork.rs +++ b/testing/ef_tests/src/cases/fork.rs @@ -60,7 +60,7 @@ impl Case for ForkTest { fn result(&self, _case_index: usize, fork_name: ForkName) -> Result<(), Error> { let mut result_state = self.pre.clone(); let mut expected = Some(self.post.clone()); - let spec = &E::default_spec(); + let spec = &fork_name.make_genesis_spec(E::default_spec()); let mut result = match fork_name { ForkName::Base => panic!("phase0 not supported"), From edcfee636cd7c32ee63e981ec0487a2798ec6518 Mon Sep 17 00:00:00 2001 From: Pawan Dhananjay Date: Sat, 27 Sep 2025 21:03:25 -0700 Subject: [PATCH 30/45] Fix bug in fork calculation at fork boundaries (#8121) N/A In #8101 , when we modified the logic to get the proposer index post fulu, we seem to have missed advancing the state at the fork boundaries to get the right `Fork` for signature verification. This led to lighthouse failing all gossip verification right after transitioning to fulu that was observed on the holesky shadow fork ``` Sep 26 14:24:00.088 DEBUG Rejected gossip block error: "InvalidSignature(ProposerSignature)", graffiti: "grandine-geth-super-1", slot: 640 Sep 26 14:24:00.099 WARN Could not verify block for gossip. Rejecting the block error: InvalidSignature(ProposerSignature) ``` I'm not completely sure this is the correct fix, but this fixes the issue with `InvalidProposerSignature` on the holesky shadow fork. Thanks to @eserilev for helping debug this Co-Authored-By: Pawan Dhananjay --- beacon_node/beacon_chain/src/beacon_proposer_cache.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_proposer_cache.rs b/beacon_node/beacon_chain/src/beacon_proposer_cache.rs index 47c44542c0f..a64b4981cc4 100644 --- a/beacon_node/beacon_chain/src/beacon_proposer_cache.rs +++ b/beacon_node/beacon_chain/src/beacon_proposer_cache.rs @@ -234,8 +234,14 @@ pub fn ensure_state_can_determine_proposers_for_epoch( if state.current_epoch() > maximum_epoch { Err(BeaconStateError::SlotOutOfBounds.into()) } else if state.current_epoch() >= minimum_epoch { - // Fulu allows us to access shufflings in multiple epochs (thanks to lookahead). - // Pre-Fulu we expect `minimum_epoch == maximum_epoch`, and this branch covers that case. + if target_epoch > state.current_epoch() { + let target_slot = target_epoch.start_slot(E::slots_per_epoch()); + + // Advance the state into the same epoch as the block. Use the "partial" method since state + // roots are not important for proposer/attester shuffling. + partial_state_advance(state, Some(state_root), target_slot, spec) + .map_err(BeaconChainError::from)?; + } Ok(()) } else { // State's current epoch is less than the minimum epoch. From 38fdaf791ce7a41590dbf5a4e6694eb1c4621721 Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Mon, 29 Sep 2025 11:13:33 +1000 Subject: [PATCH 31/45] Fix proposer shuffling decision slot at boundary (#8128) Follow-up to the bug fixed in: - https://github.com/sigp/lighthouse/pull/8121 This fixes the root cause of that bug, which was introduced by me in: - https://github.com/sigp/lighthouse/pull/8101 Lion identified the issue here: - https://github.com/sigp/lighthouse/pull/8101#discussion_r2382710356 In the methods that compute the proposer shuffling decision root, ensure we don't use lookahead for the Fulu fork epoch itself. This is accomplished by checking if Fulu is enabled at `epoch - 1`, i.e. if `epoch > fulu_fork_epoch`. I haven't updated the methods that _compute_ shufflings to use these new corrected bounds (e.g. `BeaconState::compute_proposer_indices`), although we could make this change in future. The `get_beacon_proposer_indices` method already gracefully handles the Fulu boundary case by using the `proposer_lookahead` field (if initialised). Co-Authored-By: Michael Sproul --- beacon_node/beacon_chain/tests/store_tests.rs | 57 +++++++++++++++++-- .../src/proto_array_fork_choice.rs | 8 ++- consensus/types/src/chain_spec.rs | 36 +++++++++++- 3 files changed, 93 insertions(+), 8 deletions(-) diff --git a/beacon_node/beacon_chain/tests/store_tests.rs b/beacon_node/beacon_chain/tests/store_tests.rs index efa16978e02..cd4032f55d9 100644 --- a/beacon_node/beacon_chain/tests/store_tests.rs +++ b/beacon_node/beacon_chain/tests/store_tests.rs @@ -1195,14 +1195,18 @@ fn check_shuffling_compatible( /// /// - ProtoBlock::proposer_shuffling_root_for_child_block, and /// - BeaconState::proposer_shuffling_decision_root{_at_epoch} -async fn proposer_shuffling_root_consistency_test(parent_slot: u64, child_slot: u64) { +async fn proposer_shuffling_root_consistency_test( + spec: ChainSpec, + parent_slot: u64, + child_slot: u64, +) { let child_slot = Slot::new(child_slot); let db_path = tempdir().unwrap(); - let store = get_store(&db_path); + let store = get_store_generic(&db_path, Default::default(), spec.clone()); let validators_keypairs = types::test_utils::generate_deterministic_keypairs(LOW_VALIDATOR_COUNT); let harness = TestHarness::builder(MinimalEthSpec) - .default_spec() + .spec(spec.into()) .keypairs(validators_keypairs) .fresh_disk_store(store) .mock_execution_layer() @@ -1268,17 +1272,58 @@ async fn proposer_shuffling_root_consistency_test(parent_slot: u64, child_slot: #[tokio::test] async fn proposer_shuffling_root_consistency_same_epoch() { - proposer_shuffling_root_consistency_test(32, 39).await; + let spec = test_spec::(); + proposer_shuffling_root_consistency_test(spec, 32, 39).await; } #[tokio::test] async fn proposer_shuffling_root_consistency_next_epoch() { - proposer_shuffling_root_consistency_test(32, 47).await; + let spec = test_spec::(); + proposer_shuffling_root_consistency_test(spec, 32, 47).await; } #[tokio::test] async fn proposer_shuffling_root_consistency_two_epochs() { - proposer_shuffling_root_consistency_test(32, 55).await; + let spec = test_spec::(); + proposer_shuffling_root_consistency_test(spec, 32, 55).await; +} + +#[tokio::test] +async fn proposer_shuffling_root_consistency_at_fork_boundary() { + let mut spec = ForkName::Electra.make_genesis_spec(E::default_spec()); + spec.fulu_fork_epoch = Some(Epoch::new(4)); + + // Parent block in epoch prior to Fulu fork epoch, child block in Fulu fork epoch. + proposer_shuffling_root_consistency_test( + spec.clone(), + 3 * E::slots_per_epoch(), + 4 * E::slots_per_epoch(), + ) + .await; + + // Parent block and child block in Fulu fork epoch. + proposer_shuffling_root_consistency_test( + spec.clone(), + 4 * E::slots_per_epoch(), + 4 * E::slots_per_epoch() + 1, + ) + .await; + + // Parent block in Fulu fork epoch and child block in epoch after. + proposer_shuffling_root_consistency_test( + spec.clone(), + 4 * E::slots_per_epoch(), + 5 * E::slots_per_epoch(), + ) + .await; + + // Parent block in epoch prior and child block in epoch after. + proposer_shuffling_root_consistency_test( + spec, + 3 * E::slots_per_epoch(), + 5 * E::slots_per_epoch(), + ) + .await; } #[tokio::test] diff --git a/consensus/proto_array/src/proto_array_fork_choice.rs b/consensus/proto_array/src/proto_array_fork_choice.rs index 8c7b58c4d41..dea853d245d 100644 --- a/consensus/proto_array/src/proto_array_fork_choice.rs +++ b/consensus/proto_array/src/proto_array_fork_choice.rs @@ -172,7 +172,13 @@ impl Block { ) -> Hash256 { let block_epoch = self.current_epoch_shuffling_id.shuffling_epoch; - if !spec.fork_name_at_epoch(child_block_epoch).fulu_enabled() { + // For child blocks in the Fulu fork epoch itself, we want to use the old logic. There is no + // lookahead in the first Fulu epoch. So we check whether Fulu is enabled at + // `child_block_epoch - 1`, i.e. whether `child_block_epoch > fulu_fork_epoch`. + if !spec + .fork_name_at_epoch(child_block_epoch.saturating_sub(1_u64)) + .fulu_enabled() + { // Prior to Fulu the proposer shuffling decision root for the current epoch is the same // as the attestation shuffling for the *next* epoch, i.e. it is determined at the start // of the current epoch. diff --git a/consensus/types/src/chain_spec.rs b/consensus/types/src/chain_spec.rs index 6670fff6298..7916e9fcdb1 100644 --- a/consensus/types/src/chain_spec.rs +++ b/consensus/types/src/chain_spec.rs @@ -869,7 +869,13 @@ impl ChainSpec { /// /// The block root at this slot can be used to key the proposer shuffling for the given epoch. pub fn proposer_shuffling_decision_slot(&self, epoch: Epoch) -> Slot { - if self.fork_name_at_epoch(epoch).fulu_enabled() { + // At the Fulu fork epoch itself, the shuffling is computed "the old way" with no lookahead. + // Therefore for `epoch == fulu_fork_epoch` we must take the `else` branch. Checking if Fulu + // is enabled at `epoch - 1` accomplishes this neatly. + if self + .fork_name_at_epoch(epoch.saturating_sub(1_u64)) + .fulu_enabled() + { // Post-Fulu the proposer shuffling decision slot for epoch N is the slot at the end // of epoch N - 2 (note: min_seed_lookahead=1 in all current configs). epoch @@ -2999,4 +3005,32 @@ mod yaml_tests { spec.min_epoch_data_availability_boundary(current_epoch) ); } + + #[test] + fn proposer_shuffling_decision_root_around_epoch_boundary() { + type E = MainnetEthSpec; + let fulu_fork_epoch = 5; + let spec = { + let mut spec = ForkName::Electra.make_genesis_spec(E::default_spec()); + spec.fulu_fork_epoch = Some(Epoch::new(fulu_fork_epoch)); + Arc::new(spec) + }; + + // For epochs prior to AND including the Fulu fork epoch, the decision slot is the end + // of the previous epoch (i.e. only 1 slot lookahead). + for epoch in (0..=fulu_fork_epoch).map(Epoch::new) { + assert_eq!( + spec.proposer_shuffling_decision_slot::(epoch), + epoch.start_slot(E::slots_per_epoch()) - 1 + ); + } + + // For epochs after Fulu, the decision slot is the end of the epoch two epochs prior. + for epoch in ((fulu_fork_epoch + 1)..(fulu_fork_epoch + 10)).map(Epoch::new) { + assert_eq!( + spec.proposer_shuffling_decision_slot::(epoch), + (epoch - 1).start_slot(E::slots_per_epoch()) - 1 + ); + } + } } From e5b4983d6baf85770fe4539a565d8a2dd462bc53 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Mon, 29 Sep 2025 12:17:30 +1000 Subject: [PATCH 32/45] Release v8.0.0 rc.0 (#8127) Testnet release for the upcoming Fusaka fork. Co-Authored-By: Jimmy Chen Co-Authored-By: Jimmy Chen --- Cargo.lock | 8 ++++---- beacon_node/Cargo.toml | 2 +- boot_node/Cargo.toml | 2 +- common/lighthouse_version/src/lib.rs | 6 +++--- lcli/Cargo.toml | 2 +- lighthouse/Cargo.toml | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ee651080973..352ff779752 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -918,7 +918,7 @@ dependencies = [ [[package]] name = "beacon_node" -version = "7.1.0" +version = "8.0.0-rc.0" dependencies = [ "account_utils", "beacon_chain", @@ -1193,7 +1193,7 @@ dependencies = [ [[package]] name = "boot_node" -version = "7.1.0" +version = "8.0.0-rc.0" dependencies = [ "beacon_node", "bytes", @@ -5051,7 +5051,7 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "lcli" -version = "7.1.0" +version = "8.0.0-rc.0" dependencies = [ "account_utils", "beacon_chain", @@ -5561,7 +5561,7 @@ dependencies = [ [[package]] name = "lighthouse" -version = "7.1.0" +version = "8.0.0-rc.0" dependencies = [ "account_manager", "account_utils", diff --git a/beacon_node/Cargo.toml b/beacon_node/Cargo.toml index dd7416af540..bb904a7619c 100644 --- a/beacon_node/Cargo.toml +++ b/beacon_node/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "beacon_node" -version = "7.1.0" +version = "8.0.0-rc.0" authors = [ "Paul Hauner ", "Age Manning "] edition = { workspace = true } diff --git a/common/lighthouse_version/src/lib.rs b/common/lighthouse_version/src/lib.rs index c45dbac4d3a..574fdfea35f 100644 --- a/common/lighthouse_version/src/lib.rs +++ b/common/lighthouse_version/src/lib.rs @@ -17,8 +17,8 @@ pub const VERSION: &str = git_version!( // NOTE: using --match instead of --exclude for compatibility with old Git "--match=thiswillnevermatchlol" ], - prefix = "Lighthouse/v7.1.0-", - fallback = "Lighthouse/v7.1.0" + prefix = "Lighthouse/v8.0.0-rc.0-", + fallback = "Lighthouse/v8.0.0-rc.0" ); /// Returns the first eight characters of the latest commit hash for this build. @@ -54,7 +54,7 @@ pub fn version_with_platform() -> String { /// /// `1.5.1` pub fn version() -> &'static str { - "7.1.0" + "8.0.0-rc.0" } /// Returns the name of the current client running. diff --git a/lcli/Cargo.toml b/lcli/Cargo.toml index 2eed9da4c05..8f020e03876 100644 --- a/lcli/Cargo.toml +++ b/lcli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "lcli" description = "Lighthouse CLI (modeled after zcli)" -version = "7.1.0" +version = "8.0.0-rc.0" authors = ["Paul Hauner "] edition = { workspace = true } diff --git a/lighthouse/Cargo.toml b/lighthouse/Cargo.toml index bf8241f8a2d..4139286b532 100644 --- a/lighthouse/Cargo.toml +++ b/lighthouse/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lighthouse" -version = "7.1.0" +version = "8.0.0-rc.0" authors = ["Sigma Prime "] edition = { workspace = true } autotests = false From 9c6d33110b910572c460b89005b6f47afe81ae84 Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Tue, 30 Sep 2025 15:10:42 +1000 Subject: [PATCH 33/45] Update book for DB schema v28 (#8132) Co-Authored-By: Michael Sproul --- book/src/advanced_database_migrations.md | 2 ++ wordlist.txt | 1 + 2 files changed, 3 insertions(+) diff --git a/book/src/advanced_database_migrations.md b/book/src/advanced_database_migrations.md index e29397619cf..3552a90b0e8 100644 --- a/book/src/advanced_database_migrations.md +++ b/book/src/advanced_database_migrations.md @@ -17,6 +17,7 @@ validator client or the slasher**. | Lighthouse version | Release date | Schema version | Downgrade available? | |--------------------|--------------|----------------|----------------------| +| v8.0.0-rc.0 | Sep 2025 | v28 | yes before Fulu | | v7.1.0 | Jul 2025 | v26 | yes | | v7.0.0 | Apr 2025 | v22 | no | | v6.0.0 | Nov 2024 | v22 | no | @@ -207,6 +208,7 @@ Here are the steps to prune historic states: | Lighthouse version | Release date | Schema version | Downgrade available? | |--------------------|--------------|----------------|-------------------------------------| +| v8.0.0-rc.0 | Sep 2025 | v28 | yes before Fulu | | v7.1.0 | Jul 2025 | v26 | yes | | v7.0.0 | Apr 2025 | v22 | no | | v6.0.0 | Nov 2024 | v22 | no | diff --git a/wordlist.txt b/wordlist.txt index 57674cf9749..58c4cf6db1e 100644 --- a/wordlist.txt +++ b/wordlist.txt @@ -39,6 +39,7 @@ EthStaker Exercism Extractable FFG +Fulu Geth GiB Gitcoin From af5cbfbd4483a6f95f3b257748922e1d60e9951d Mon Sep 17 00:00:00 2001 From: Mac L Date: Tue, 30 Sep 2025 17:42:27 +1000 Subject: [PATCH 34/45] Bump superstruct to `0.10.0` (#8133) Bump `superstruct` to the latest release `0.10.0`. This version uses a later version of `darling` which is helpful for https://github.com/sigp/lighthouse/pull/8125 Co-Authored-By: Mac L --- Cargo.lock | 14 +++++++------- Cargo.toml | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 352ff779752..94d0033d4bb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2163,7 +2163,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18e4fdb82bd54a12e42fb58a800dcae6b9e13982238ce2296dc3570b92148e1f" dependencies = [ "data-encoding", - "syn 1.0.109", + "syn 2.0.100", ] [[package]] @@ -5122,7 +5122,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] @@ -9065,16 +9065,16 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "superstruct" -version = "0.8.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf0f31f730ad9e579364950e10d6172b4a9bd04b447edf5988b066a860cc340e" +checksum = "3b986e4a629907f20a2c2a639a75bc22a8b5d99b444e0d83c395f4cb309022bf" dependencies = [ - "darling 0.13.4", - "itertools 0.10.5", + "darling 0.20.10", + "itertools 0.13.0", "proc-macro2", "quote", "smallvec", - "syn 1.0.109", + "syn 2.0.100", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 66378a16c46..e471c4e2388 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -242,7 +242,7 @@ ssz_types = "0.11.0" state_processing = { path = "consensus/state_processing" } store = { path = "beacon_node/store" } strum = { version = "0.24", features = ["derive"] } -superstruct = "0.8" +superstruct = "0.10" swap_or_not_shuffle = { path = "consensus/swap_or_not_shuffle" } syn = "1" sysinfo = "0.26" From 26575c594c77942a2014182b8c9a5c6832b7daa0 Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Wed, 1 Oct 2025 19:29:15 +1000 Subject: [PATCH 35/45] Improve spec compliance for `/eth/v1/config/spec` API (#8144) - [x] Remove the unnecessary `_MILLIS` suffix from `MAXIMUM_GOSSIP_CLOCK_DISPARITY` - [x] Add missing Deneb preset `KZG_COMMITMENT_INCLUSION_PROOF_DEPTH`, not to be confused with `KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH` (plural) from Fulu... Co-Authored-By: Michael Sproul --- consensus/types/src/chain_spec.rs | 20 ++++++++++---------- consensus/types/src/preset.rs | 3 +++ 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/consensus/types/src/chain_spec.rs b/consensus/types/src/chain_spec.rs index 7916e9fcdb1..50a2f268e00 100644 --- a/consensus/types/src/chain_spec.rs +++ b/consensus/types/src/chain_spec.rs @@ -227,7 +227,7 @@ pub struct ChainSpec { pub ttfb_timeout: u64, pub resp_timeout: u64, pub attestation_propagation_slot_range: u64, - pub maximum_gossip_clock_disparity_millis: u64, + pub maximum_gossip_clock_disparity: u64, pub message_domain_invalid_snappy: [u8; 4], pub message_domain_valid_snappy: [u8; 4], pub subnets_per_node: u8, @@ -670,7 +670,7 @@ impl ChainSpec { } pub fn maximum_gossip_clock_disparity(&self) -> Duration { - Duration::from_millis(self.maximum_gossip_clock_disparity_millis) + Duration::from_millis(self.maximum_gossip_clock_disparity) } pub fn ttfb_timeout(&self) -> Duration { @@ -1112,7 +1112,7 @@ impl ChainSpec { attestation_propagation_slot_range: default_attestation_propagation_slot_range(), attestation_subnet_count: 64, subnets_per_node: 2, - maximum_gossip_clock_disparity_millis: default_maximum_gossip_clock_disparity_millis(), + maximum_gossip_clock_disparity: default_maximum_gossip_clock_disparity(), target_aggregators_per_committee: 16, max_payload_size: default_max_payload_size(), min_epochs_for_block_requests: default_min_epochs_for_block_requests(), @@ -1458,7 +1458,7 @@ impl ChainSpec { attestation_propagation_slot_range: default_attestation_propagation_slot_range(), attestation_subnet_count: 64, subnets_per_node: 4, // Make this larger than usual to avoid network damage - maximum_gossip_clock_disparity_millis: default_maximum_gossip_clock_disparity_millis(), + maximum_gossip_clock_disparity: default_maximum_gossip_clock_disparity(), target_aggregators_per_committee: 16, max_payload_size: default_max_payload_size(), min_epochs_for_block_requests: 33024, @@ -1779,9 +1779,9 @@ pub struct Config { #[serde(default = "default_attestation_propagation_slot_range")] #[serde(with = "serde_utils::quoted_u64")] attestation_propagation_slot_range: u64, - #[serde(default = "default_maximum_gossip_clock_disparity_millis")] + #[serde(default = "default_maximum_gossip_clock_disparity")] #[serde(with = "serde_utils::quoted_u64")] - maximum_gossip_clock_disparity_millis: u64, + maximum_gossip_clock_disparity: u64, #[serde(default = "default_message_domain_invalid_snappy")] #[serde(with = "serde_utils::bytes_4_hex")] message_domain_invalid_snappy: [u8; 4], @@ -1995,7 +1995,7 @@ const fn default_attestation_propagation_slot_range() -> u64 { 32 } -const fn default_maximum_gossip_clock_disparity_millis() -> u64 { +const fn default_maximum_gossip_clock_disparity() -> u64 { 500 } @@ -2214,7 +2214,7 @@ impl Config { ttfb_timeout: spec.ttfb_timeout, resp_timeout: spec.resp_timeout, attestation_propagation_slot_range: spec.attestation_propagation_slot_range, - maximum_gossip_clock_disparity_millis: spec.maximum_gossip_clock_disparity_millis, + maximum_gossip_clock_disparity: spec.maximum_gossip_clock_disparity, message_domain_invalid_snappy: spec.message_domain_invalid_snappy, message_domain_valid_snappy: spec.message_domain_valid_snappy, max_request_blocks_deneb: spec.max_request_blocks_deneb, @@ -2302,7 +2302,7 @@ impl Config { message_domain_valid_snappy, max_request_blocks, attestation_propagation_slot_range, - maximum_gossip_clock_disparity_millis, + maximum_gossip_clock_disparity, max_request_blocks_deneb, max_request_blob_sidecars, max_request_data_column_sidecars, @@ -2378,7 +2378,7 @@ impl Config { attestation_subnet_prefix_bits, max_request_blocks, attestation_propagation_slot_range, - maximum_gossip_clock_disparity_millis, + maximum_gossip_clock_disparity, max_request_blocks_deneb, max_request_blob_sidecars, max_request_data_column_sidecars, diff --git a/consensus/types/src/preset.rs b/consensus/types/src/preset.rs index c31183192f2..ab54c0345f7 100644 --- a/consensus/types/src/preset.rs +++ b/consensus/types/src/preset.rs @@ -208,6 +208,8 @@ pub struct DenebPreset { #[serde(with = "serde_utils::quoted_u64")] pub max_blob_commitments_per_block: u64, #[serde(with = "serde_utils::quoted_u64")] + pub kzg_commitment_inclusion_proof_depth: u64, + #[serde(with = "serde_utils::quoted_u64")] pub field_elements_per_blob: u64, } @@ -215,6 +217,7 @@ impl DenebPreset { pub fn from_chain_spec(_spec: &ChainSpec) -> Self { Self { max_blob_commitments_per_block: E::max_blob_commitments_per_block() as u64, + kzg_commitment_inclusion_proof_depth: E::KzgCommitmentInclusionProofDepth::to_u64(), field_elements_per_blob: E::field_elements_per_blob() as u64, } } From ff8b514b3f012537de8b99cd526d15bdb4610698 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Tue, 7 Oct 2025 06:26:37 +1100 Subject: [PATCH 36/45] Remove unnecessary warning logs and update logging levels (#8145) @michaelsproul noticed this warning on a devnet-3 node ``` Oct 01 16:37:29.896 WARN Error when importing rpc custody columns error: ParentUnknown { parent_root: 0xe4cc85a2137b76eb083d7076255094a90f10caaec0afc8fd36807db742f6ff13 }, block_hash: 0x43ce63b2344990f5f4d8911b8f14e3d3b6b006edc35bbc833360e667df0edef7 ``` We're also seeing similar `WARN` logs for blobs on our live nodes. It's normal to get parent unknown in lookups and it's handled here https://github.com/sigp/lighthouse/blob/a134d43446f776fe2a84f420854afbff76ca93d8/beacon_node/network/src/sync/block_lookups/mod.rs#L611-L619 These shouldn't be a `WARN`, and we also log the same error in block lookups at `DEBUG` level here: https://github.com/sigp/lighthouse/blob/a134d43446f776fe2a84f420854afbff76ca93d8/beacon_node/network/src/sync/block_lookups/mod.rs#L643-L648 So i've removed these extra WARN logs. I've also lower the level of an `ERROR` log when unable to serve data column root requests - it's unexpected, but is unlikely to impact the nodes performance, so I think we can downgrade this. Co-Authored-By: Jimmy Chen --- .../network_beacon_processor/rpc_methods.rs | 6 +++--- .../network_beacon_processor/sync_methods.rs | 19 ++++--------------- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs index 9ddba86b81d..58e02ffe007 100644 --- a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs @@ -437,12 +437,12 @@ impl NetworkBeaconProcessor { } } Err(e) => { - // TODO(das): lower log level when feature is stabilized - error!( + // The node is expected to be able to serve these columns, but it fails to retrieve them. + warn!( block_root = ?data_column_ids_by_root.block_root, %peer_id, error = ?e, - "Error getting data column" + "Error getting data column for by root request " ); return Err((RpcErrorResponse::ServerError, "Error getting data column")); } diff --git a/beacon_node/network/src/network_beacon_processor/sync_methods.rs b/beacon_node/network/src/network_beacon_processor/sync_methods.rs index f139724702f..1d99540c299 100644 --- a/beacon_node/network/src/network_beacon_processor/sync_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/sync_methods.rs @@ -333,14 +333,8 @@ impl NetworkBeaconProcessor { "Blobs have already been imported" ); } - Err(e) => { - warn!( - error = ?e, - block_hash = %block_root, - %slot, - "Error when importing rpc blobs" - ); - } + // Errors are handled and logged in `block_lookups` + Err(_) => {} } // Sync handles these results @@ -414,13 +408,8 @@ impl NetworkBeaconProcessor { "Custody columns have already been imported" ); } - Err(e) => { - warn!( - error = ?e, - block_hash = %block_root, - "Error when importing rpc custody columns" - ); - } + // Errors are handled and logged in `block_lookups` + Err(_) => {} } self.send_sync_message(SyncMessage::BlockComponentProcessed { From 4eb89604f8b560876cadb410fdf9b7af08457f48 Mon Sep 17 00:00:00 2001 From: Eitan Seri-Levi Date: Tue, 7 Oct 2025 07:32:35 -0700 Subject: [PATCH 37/45] Fulu ASCII art (#8151) Co-Authored-By: Eitan Seri- Levi --- beacon_node/network/src/service.rs | 1 + consensus/types/src/fork_name.rs | 40 ++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/beacon_node/network/src/service.rs b/beacon_node/network/src/service.rs index c97206ea873..4bd649ba824 100644 --- a/beacon_node/network/src/service.rs +++ b/beacon_node/network/src/service.rs @@ -840,6 +840,7 @@ impl NetworkService { new_fork = ?new_fork_name, "Transitioned to new fork" ); + new_fork_name.fork_ascii(); } fork_context.update_current_fork(*new_fork_name, new_fork_digest, current_epoch); diff --git a/consensus/types/src/fork_name.rs b/consensus/types/src/fork_name.rs index f12b14ff6ed..363d9e77a22 100644 --- a/consensus/types/src/fork_name.rs +++ b/consensus/types/src/fork_name.rs @@ -201,6 +201,46 @@ impl ForkName { pub fn gloas_enabled(self) -> bool { self >= ForkName::Gloas } + + pub fn fork_ascii(self) { + if self == ForkName::Fulu { + println!( + r#" + ╔═══════════════════════════════════════╗ + ║ ║ + ║ TO FULU, MOAR BLOBS TO ETHEREUM ║ + ║ ║ + ║ III DECEMBER MMXXV ║ + ║ ║ + ╚═══════════════════════════════════════╝ + + ============================================================================= + |||| |||| + |---------------------------------------------------------------------------| + |___-----___-----___-----___-----___-----___-----___-----___-----___-----___| + / _ \===/ _ \ / _ \===/ _ \ / _ \===/ _ \ / _ \===/ _ \ + ( (.\ oOo /.) ) ( (.\ oOo /.) ) ( (.\ oOo /.) ) ( (.\ oOo /.) ) + \__/=====\__/ \__/=====\__/ \__/=====\__/ \__/=====\__/ + ||||||| ||||||| ||||||| ||||||| + ||||||| ||||||| \\/), ||||||| ||||||| + ||||||| ||||||| ,'.' /, ||||||| ||||||| + ||||||| ||||||| (_)- / /, ||||||| ||||||| + ||||||| ||||||| /\_/ |__..--, * ||||||| ||||||| + ||||||| ||||||| (\___/\ \ \ / ).' ||||||| ||||||| + ||||||| ||||||| \____/ / (_ // ||||||| ||||||| + ||||||| ||||||| \\_ ,'--'\_( ||||||| ||||||| + (oOoOo) (oOoOo) )_)_/ )_/ )_) (oOoOo) (oOoOo) + J%%%%%L J%%%%%L (_(_.'(_.'(_.' J%%%%%L J%%%%%L + ZZZZZZZZZ ZZZZZZZZZ ZZZZZZZZZ ZZZZZZZZZ + =========================================================================== + |_________________________________________________________________________| + |___________________________________________________________________________| + |_____________________________________________________________________________| + |_______________________________________________________________________________| + "# + ); + } + } } /// Map a fork name into a fork-versioned superstruct type like `BeaconBlock`. From a4ad3e492f420f484ae36871f8bc9217a0518232 Mon Sep 17 00:00:00 2001 From: Pawan Dhananjay Date: Tue, 7 Oct 2025 07:32:41 -0700 Subject: [PATCH 38/45] Fallback to getPayload v1 if v2 fails (#8163) N/A Post fulu, we should be calling the v2 api on the relays that doesn't return the blobs/data columns. However, we decided to start hitting the v2 api as soon as fulu is scheduled to avoid unexpected surprises at the fork. In the ACDT call, it seems like most clients are calling v2 only after the fulu fork. This PR aims to be the best of both worlds where we fallback to hitting v1 api if v2 fails. This way, we know beforehand if relays don't support it and can potentially alert them. Co-Authored-By: Pawan Dhananjay --- beacon_node/execution_layer/src/lib.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/beacon_node/execution_layer/src/lib.rs b/beacon_node/execution_layer/src/lib.rs index 401646f3670..a5fa0f34158 100644 --- a/beacon_node/execution_layer/src/lib.rs +++ b/beacon_node/execution_layer/src/lib.rs @@ -1914,9 +1914,19 @@ impl ExecutionLayer { ) -> Result, Error> { debug!(?block_root, "Sending block to builder"); if spec.is_fulu_scheduled() { - self.post_builder_blinded_blocks_v2(block_root, block) + let resp = self + .post_builder_blinded_blocks_v2(block_root, block) .await - .map(|()| SubmitBlindedBlockResponse::V2) + .map(|()| SubmitBlindedBlockResponse::V2); + // Fallback to v1 if v2 fails because the relay doesn't support it. + // Note: we should remove the fallback post fulu when all relays have support for v2. + if resp.is_err() { + self.post_builder_blinded_blocks_v1(block_root, block) + .await + .map(|full_payload| SubmitBlindedBlockResponse::V1(Box::new(full_payload))) + } else { + resp + } } else { self.post_builder_blinded_blocks_v1(block_root, block) .await From b5c2a9668edb6be72a39d136d333449934b75ac7 Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Wed, 8 Oct 2025 11:05:41 +1100 Subject: [PATCH 39/45] Quote `BeaconState::proposer_lookahead` in JSON repr (#8167) Use quoted integers for `state.proposer_lookahead` when serializing JSON. This is standard for all integer fields, but was missed for the newly added proposer lookahead. I noticed this issue while inspecting the head state on a local devnet. I'm glad we found this before someone reported it :P Co-Authored-By: Michael Sproul --- consensus/types/src/beacon_state.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/consensus/types/src/beacon_state.rs b/consensus/types/src/beacon_state.rs index 0a3d768c593..1bd4927fe87 100644 --- a/consensus/types/src/beacon_state.rs +++ b/consensus/types/src/beacon_state.rs @@ -592,6 +592,7 @@ where #[compare_fields(as_iter)] #[test_random(default)] #[superstruct(only(Fulu, Gloas))] + #[serde(with = "ssz_types::serde_utils::quoted_u64_fixed_vec")] pub proposer_lookahead: Vector, // Gloas From 2a433bc4066b949d8c61661d467bb645cc4b6b1e Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 8 Oct 2025 12:52:41 +1100 Subject: [PATCH 40/45] Remove deprecated CLI flags and references for v8.0.0 (#8142) Closes #8131 - [x] Remove deprecated flags from beacon_node/src/cli.rs: - [x] eth1-purge-cache - [x] eth1-blocks-per-log-query - [x] eth1-cache-follow-distance - [x] disable-deposit-contract-sync - [x] light-client-server - [x] Remove deprecated flags from lighthouse/src/main.rs: - [x] logfile - [x] terminal-total-difficulty-override - [x] terminal-block-hash-override - [x] terminal-block-hash-epoch-override - [x] safe-slots-to-import-optimistically - [x] Remove references to deprecated flags in config.rs files - [x] Remove warning messages for deprecated flags in main.rs - [x] Update/remove related tests in beacon_node.rs Co-Authored-By: Jimmy Chen Co-Authored-By: Jimmy Chen --- beacon_node/src/cli.rs | 51 -------------------- beacon_node/src/config.rs | 29 ----------- book/src/help_bn.md | 2 - lighthouse/src/main.rs | 73 +--------------------------- lighthouse/tests/beacon_node.rs | 85 --------------------------------- 5 files changed, 1 insertion(+), 239 deletions(-) diff --git a/beacon_node/src/cli.rs b/beacon_node/src/cli.rs index 569d1e4ad81..2e3b3fde4b0 100644 --- a/beacon_node/src/cli.rs +++ b/beacon_node/src/cli.rs @@ -699,38 +699,6 @@ pub fn cli_app() -> Command { .help_heading(FLAG_HEADER) .display_order(0) ) - - /* - * Eth1 Integration - */ - .arg( - Arg::new("eth1-purge-cache") - .long("eth1-purge-cache") - .value_name("PURGE-CACHE") - .help("DEPRECATED") - .action(ArgAction::SetTrue) - .help_heading(FLAG_HEADER) - .display_order(0) - .hide(true) - ) - .arg( - Arg::new("eth1-blocks-per-log-query") - .long("eth1-blocks-per-log-query") - .value_name("BLOCKS") - .help("DEPRECATED") - .action(ArgAction::Set) - .display_order(0) - .hide(true) - ) - .arg( - Arg::new("eth1-cache-follow-distance") - .long("eth1-cache-follow-distance") - .value_name("BLOCKS") - .help("DEPRECATED") - .action(ArgAction::Set) - .display_order(0) - .hide(true) - ) .arg( Arg::new("slots-per-restore-point") .long("slots-per-restore-point") @@ -1498,16 +1466,6 @@ pub fn cli_app() -> Command { .help_heading(FLAG_HEADER) .display_order(0) ) - .arg( - Arg::new("disable-deposit-contract-sync") - .long("disable-deposit-contract-sync") - .help("DEPRECATED") - .action(ArgAction::SetTrue) - .help_heading(FLAG_HEADER) - .conflicts_with("staking") - .display_order(0) - .hide(true) - ) .arg( Arg::new("disable-optimistic-finalized-sync") .long("disable-optimistic-finalized-sync") @@ -1518,15 +1476,6 @@ pub fn cli_app() -> Command { Lighthouse and only passed to the EL if initial verification fails.") .display_order(0) ) - .arg( - Arg::new("light-client-server") - .long("light-client-server") - .help("DEPRECATED") - .action(ArgAction::SetTrue) - - .help_heading(FLAG_HEADER) - .display_order(0) - ) .arg( Arg::new("disable-light-client-server") .long("disable-light-client-server") diff --git a/beacon_node/src/config.rs b/beacon_node/src/config.rs index 230350fade4..c2599ec0cd9 100644 --- a/beacon_node/src/config.rs +++ b/beacon_node/src/config.rs @@ -170,13 +170,6 @@ pub fn get_config( parse_required(cli_args, "http-duplicate-block-status")?; } - if cli_args.get_flag("light-client-server") { - warn!( - "The --light-client-server flag is deprecated. The light client server is enabled \ - by default" - ); - } - if cli_args.get_flag("disable-light-client-server") { client_config.chain.enable_light_client_server = false; } @@ -262,24 +255,6 @@ pub fn get_config( client_config.http_metrics.allocator_metrics_enabled = false; } - /* - * Deprecated Eth1 flags (can be removed in the next minor release after v7.1.0) - */ - if cli_args - .get_one::("eth1-blocks-per-log-query") - .is_some() - { - warn!("The eth1-blocks-per-log-query flag is deprecated"); - } - - if cli_args.get_flag("eth1-purge-cache") { - warn!("The eth1-purge-cache flag is deprecated"); - } - - if clap_utils::parse_optional::(cli_args, "eth1-cache-follow-distance")?.is_some() { - warn!("The eth1-cache-follow-distance flag is deprecated"); - } - // `--execution-endpoint` is required now. let endpoints: String = clap_utils::parse_required(cli_args, "execution-endpoint")?; let mut el_config = execution_layer::Config::default(); @@ -773,10 +748,6 @@ pub fn get_config( } } - if cli_args.get_flag("disable-deposit-contract-sync") { - warn!("The disable-deposit-contract-sync flag is deprecated"); - } - client_config.chain.prepare_payload_lookahead = clap_utils::parse_optional(cli_args, "prepare-payload-lookahead")? .map(Duration::from_millis) diff --git a/book/src/help_bn.md b/book/src/help_bn.md index d5396321f2c..6680202a277 100644 --- a/book/src/help_bn.md +++ b/book/src/help_bn.md @@ -513,8 +513,6 @@ Flags: subscriptions. This will only import attestations from already-subscribed subnets, use with --subscribe-all-subnets to ensure all attestations are received for import. - --light-client-server - DEPRECATED --log-color [] Enables/Disables colors for logs in terminal. Set it to false to disable colors. [default: true] [possible values: true, false] diff --git a/lighthouse/src/main.rs b/lighthouse/src/main.rs index 8660074e91d..c93016a0f54 100644 --- a/lighthouse/src/main.rs +++ b/lighthouse/src/main.rs @@ -28,7 +28,7 @@ use std::path::PathBuf; use std::process::exit; use std::sync::LazyLock; use task_executor::ShutdownReason; -use tracing::{Level, info, warn}; +use tracing::{Level, info}; use tracing_subscriber::{Layer, filter::EnvFilter, layer::SubscriberExt, util::SubscriberInitExt}; use types::{EthSpec, EthSpecId}; use validator_client::ProductionValidatorClient; @@ -126,16 +126,6 @@ fn main() { .global(true) .display_order(0), ) - .arg( - Arg::new("logfile") - .long("logfile") - .value_name("PATH") - .help("DEPRECATED") - .action(ArgAction::Set) - .global(true) - .hide(true) - .display_order(0) - ) .arg( Arg::new("logfile-dir") .long("logfile-dir") @@ -385,48 +375,6 @@ fn main() { .global(true) .display_order(0) ) - .arg( - Arg::new("terminal-total-difficulty-override") - .long("terminal-total-difficulty-override") - .value_name("INTEGER") - .help("DEPRECATED") - .action(ArgAction::Set) - .global(true) - .display_order(0) - .hide(true) - ) - .arg( - Arg::new("terminal-block-hash-override") - .long("terminal-block-hash-override") - .value_name("TERMINAL_BLOCK_HASH") - .help("DEPRECATED") - .requires("terminal-block-hash-epoch-override") - .action(ArgAction::Set) - .global(true) - .display_order(0) - .hide(true) - ) - .arg( - Arg::new("terminal-block-hash-epoch-override") - .long("terminal-block-hash-epoch-override") - .value_name("EPOCH") - .help("DEPRECATED") - .requires("terminal-block-hash-override") - .action(ArgAction::Set) - .global(true) - .display_order(0) - .hide(true) - ) - .arg( - Arg::new("safe-slots-to-import-optimistically") - .long("safe-slots-to-import-optimistically") - .value_name("INTEGER") - .help("DEPRECATED") - .action(ArgAction::Set) - .global(true) - .display_order(0) - .hide(true) - ) .arg( Arg::new("genesis-state-url") .long("genesis-state-url") @@ -780,11 +728,6 @@ fn run( // Allow Prometheus access to the version and commit of the Lighthouse build. metrics::expose_lighthouse_version(); - // DEPRECATED: can be removed in v7.2.0/v8.0.0. - if clap_utils::parse_optional::(matches, "logfile")?.is_some() { - warn!("The --logfile flag is deprecated and replaced by --logfile-dir"); - } - #[cfg(all(feature = "modern", target_arch = "x86_64"))] if !std::is_x86_feature_detected!("adx") { tracing::warn!( @@ -793,20 +736,6 @@ fn run( ); } - // Warn for DEPRECATED global flags. This code should be removed when we finish deleting these - // flags. - let deprecated_flags = [ - "terminal-total-difficulty-override", - "terminal-block-hash-override", - "terminal-block-hash-epoch-override", - "safe-slots-to-import-optimistically", - ]; - for flag in deprecated_flags { - if matches.get_one::(flag).is_some() { - warn!("The {} flag is deprecated and does nothing", flag); - } - } - // Note: the current code technically allows for starting a beacon node _and_ a validator // client at the same time. // diff --git a/lighthouse/tests/beacon_node.rs b/lighthouse/tests/beacon_node.rs index 8f6d040b62a..5a057d7d7f8 100644 --- a/lighthouse/tests/beacon_node.rs +++ b/lighthouse/tests/beacon_node.rs @@ -423,29 +423,6 @@ fn complete_blob_backfill_and_prune_blobs_true() { }); } -// Tests for Eth1 flags. -// DEPRECATED but should not crash -#[test] -fn eth1_blocks_per_log_query_flag() { - CommandLineTest::new() - .flag("eth1-blocks-per-log-query", Some("500")) - .run_with_zero_port(); -} -// DEPRECATED but should not crash -#[test] -fn eth1_purge_cache_flag() { - CommandLineTest::new() - .flag("eth1-purge-cache", None) - .run_with_zero_port(); -} -// DEPRECATED but should not crash -#[test] -fn eth1_cache_follow_distance_manual() { - CommandLineTest::new() - .flag("eth1-cache-follow-distance", Some("128")) - .run_with_zero_port(); -} - // Tests for Bellatrix flags. fn run_bellatrix_execution_endpoints_flag_test(flag: &str) { use sensitive_url::SensitiveUrl; @@ -781,31 +758,6 @@ fn jwt_optional_flags() { fn jwt_optional_alias_flags() { run_jwt_optional_flags_test("jwt-secrets", "jwt-id", "jwt-version"); } -// DEPRECATED. This flag is deprecated but should not cause a crash. -#[test] -fn terminal_total_difficulty_override_flag() { - CommandLineTest::new() - .flag("terminal-total-difficulty-override", Some("1337424242")) - .run_with_zero_port(); -} -// DEPRECATED. This flag is deprecated but should not cause a crash. -#[test] -fn terminal_block_hash_and_activation_epoch_override_flags() { - CommandLineTest::new() - .flag("terminal-block-hash-epoch-override", Some("1337")) - .flag( - "terminal-block-hash-override", - Some("0x4242424242424242424242424242424242424242424242424242424242424242"), - ) - .run_with_zero_port(); -} -// DEPRECATED. This flag is deprecated but should not cause a crash. -#[test] -fn safe_slots_to_import_optimistically_flag() { - CommandLineTest::new() - .flag("safe-slots-to-import-optimistically", Some("421337")) - .run_with_zero_port(); -} // Tests for Network flags. #[test] @@ -2523,42 +2475,6 @@ fn logfile_format_flag() { ) }); } -// DEPRECATED but should not crash. -#[test] -fn deprecated_logfile() { - CommandLineTest::new() - .flag("logfile", Some("test.txt")) - .run_with_zero_port(); -} - -// DEPRECATED but should not crash. -#[test] -fn sync_eth1_chain_disable_deposit_contract_sync_flag() { - let dir = TempDir::new().expect("Unable to create temporary directory"); - CommandLineTest::new_with_no_execution_endpoint() - .flag("disable-deposit-contract-sync", None) - .flag("execution-endpoints", Some("http://localhost:8551/")) - .flag( - "execution-jwt", - dir.path().join("jwt-file").as_os_str().to_str(), - ) - .run_with_zero_port(); -} - -#[test] -#[should_panic] -fn disable_deposit_contract_sync_conflicts_with_staking() { - let dir = TempDir::new().expect("Unable to create temporary directory"); - CommandLineTest::new_with_no_execution_endpoint() - .flag("disable-deposit-contract-sync", None) - .flag("staking", None) - .flag("execution-endpoints", Some("http://localhost:8551/")) - .flag( - "execution-jwt", - dir.path().join("jwt-file").as_os_str().to_str(), - ) - .run_with_zero_port(); -} #[test] fn light_client_server_default() { @@ -2573,7 +2489,6 @@ fn light_client_server_default() { #[test] fn light_client_server_enabled() { CommandLineTest::new() - .flag("light-client-server", None) .run_with_zero_port() .with_config(|config| { assert!(config.network.enable_light_client_server); From 13dfa9200f822c41ccd81b95a3f052df54c888e9 Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Wed, 8 Oct 2025 17:09:12 +1100 Subject: [PATCH 41/45] Block proposal optimisations (#8156) Closes: - https://github.com/sigp/lighthouse/issues/4412 This should reduce Lighthouse's block proposal times on Holesky and prevent us getting reorged. - [x] Allow the head state to be advanced further than 1 slot. This lets us avoid epoch processing on hot paths including block production, by having new epoch boundaries pre-computed and available in the state cache. - [x] Use the finalized state to prune the op pool. We were previously using the head state and trying to infer slashing/exit relevance based on `exit_epoch`. However some exit epochs are far in the future, despite occurring recently. Co-Authored-By: Michael Sproul --- beacon_node/beacon_chain/src/beacon_chain.rs | 4 ++ .../beacon_chain/src/canonical_head.rs | 30 ++++++---- .../beacon_chain/src/state_advance_timer.rs | 36 +----------- beacon_node/operation_pool/src/lib.rs | 55 +++++++++---------- 4 files changed, 53 insertions(+), 72 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index afbf3278fe0..f085684442b 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -5233,16 +5233,20 @@ impl BeaconChain { None }; + let slashings_and_exits_span = debug_span!("get_slashings_and_exits").entered(); let (mut proposer_slashings, mut attester_slashings, mut voluntary_exits) = self.op_pool.get_slashings_and_exits(&state, &self.spec); + drop(slashings_and_exits_span); let eth1_data = state.eth1_data().clone(); let deposits = vec![]; + let bls_changes_span = debug_span!("get_bls_to_execution_changes").entered(); let bls_to_execution_changes = self .op_pool .get_bls_to_execution_changes(&state, &self.spec); + drop(bls_changes_span); // Iterate through the naive aggregation pool and ensure all the attestations from there // are included in the operation pool. diff --git a/beacon_node/beacon_chain/src/canonical_head.rs b/beacon_node/beacon_chain/src/canonical_head.rs index cfc7a9637b2..7dd4c88c513 100644 --- a/beacon_node/beacon_chain/src/canonical_head.rs +++ b/beacon_node/beacon_chain/src/canonical_head.rs @@ -937,13 +937,6 @@ impl BeaconChain { .execution_status .is_optimistic_or_invalid(); - self.op_pool.prune_all( - &new_snapshot.beacon_block, - &new_snapshot.beacon_state, - self.epoch()?, - &self.spec, - ); - self.observed_block_producers.write().prune( new_view .finalized_checkpoint @@ -982,9 +975,9 @@ impl BeaconChain { })); } - // The store migration task requires the *state at the slot of the finalized epoch*, - // rather than the state of the latest finalized block. These two values will only - // differ when the first slot of the finalized epoch is a skip slot. + // The store migration task and op pool pruning require the *state at the first slot of the + // finalized epoch*, rather than the state of the latest finalized block. These two values + // will only differ when the first slot of the finalized epoch is a skip slot. // // Use the `StateRootsIterator` directly rather than `BeaconChain::state_root_at_slot` // to ensure we use the same state that we just set as the head. @@ -1006,6 +999,23 @@ impl BeaconChain { )? .ok_or(Error::MissingFinalizedStateRoot(new_finalized_slot))?; + let update_cache = true; + let new_finalized_state = self + .store + .get_hot_state(&new_finalized_state_root, update_cache)? + .ok_or(Error::MissingBeaconState(new_finalized_state_root))?; + + self.op_pool.prune_all( + &new_snapshot.beacon_block, + &new_snapshot.beacon_state, + &new_finalized_state, + self.epoch()?, + &self.spec, + ); + + // We just pass the state root to the finalization thread. It should be able to reload the + // state from the state_cache near instantly anyway. We could experiment with sending the + // state over a channel in future, but it's probably no quicker. self.store_migrator.process_finalization( new_finalized_state_root.into(), new_view.finalized_checkpoint, diff --git a/beacon_node/beacon_chain/src/state_advance_timer.rs b/beacon_node/beacon_chain/src/state_advance_timer.rs index 27c2c7c0a11..87348cb01be 100644 --- a/beacon_node/beacon_chain/src/state_advance_timer.rs +++ b/beacon_node/beacon_chain/src/state_advance_timer.rs @@ -33,7 +33,7 @@ use types::{AttestationShufflingId, BeaconStateError, EthSpec, Hash256, Relative /// /// This avoids doing unnecessary work whilst the node is syncing or has perhaps been put to sleep /// for some period of time. -const MAX_ADVANCE_DISTANCE: u64 = 4; +const MAX_ADVANCE_DISTANCE: u64 = 256; /// Similarly for fork choice: avoid the fork choice lookahead during sync. /// @@ -49,17 +49,7 @@ enum Error { HeadMissingFromSnapshotCache(#[allow(dead_code)] Hash256), BeaconState(#[allow(dead_code)] BeaconStateError), Store(#[allow(dead_code)] store::Error), - MaxDistanceExceeded { - current_slot: Slot, - head_slot: Slot, - }, - StateAlreadyAdvanced { - block_root: Hash256, - }, - BadStateSlot { - _state_slot: Slot, - _block_slot: Slot, - }, + MaxDistanceExceeded { current_slot: Slot, head_slot: Slot }, } impl From for Error { @@ -180,9 +170,6 @@ async fn state_advance_timer( error = ?e, "Failed to advance head state" ), - Err(Error::StateAlreadyAdvanced { block_root }) => { - debug!(?block_root, "State already advanced on slot") - } Err(Error::MaxDistanceExceeded { current_slot, head_slot, @@ -295,25 +282,6 @@ fn advance_head(beacon_chain: &Arc>) -> Resu .get_advanced_hot_state(head_block_root, current_slot, head_block_state_root)? .ok_or(Error::HeadMissingFromSnapshotCache(head_block_root))?; - // Protect against advancing a state more than a single slot. - // - // Advancing more than one slot without storing the intermediate state would corrupt the - // database. Future works might store intermediate states inside this function. - match state.slot().cmp(&state.latest_block_header().slot) { - std::cmp::Ordering::Equal => (), - std::cmp::Ordering::Greater => { - return Err(Error::StateAlreadyAdvanced { - block_root: head_block_root, - }); - } - std::cmp::Ordering::Less => { - return Err(Error::BadStateSlot { - _block_slot: state.latest_block_header().slot, - _state_slot: state.slot(), - }); - } - } - let initial_slot = state.slot(); let initial_epoch = state.current_epoch(); diff --git a/beacon_node/operation_pool/src/lib.rs b/beacon_node/operation_pool/src/lib.rs index dd01f568fa3..24e2cfbbb5d 100644 --- a/beacon_node/operation_pool/src/lib.rs +++ b/beacon_node/operation_pool/src/lib.rs @@ -457,32 +457,35 @@ impl OperationPool { .collect() } - /// Prune proposer slashings for validators which are exited in the finalized epoch. - pub fn prune_proposer_slashings(&self, head_state: &BeaconState) { + /// Prune proposer slashings for validators which are already slashed or exited in the finalized + /// epoch. + pub fn prune_proposer_slashings(&self, finalized_state: &BeaconState) { prune_validator_hash_map( &mut self.proposer_slashings.write(), - |_, validator| validator.exit_epoch <= head_state.finalized_checkpoint().epoch, - head_state, + |_, validator| { + validator.slashed || validator.exit_epoch <= finalized_state.current_epoch() + }, + finalized_state, ); } /// Prune attester slashings for all slashed or withdrawn validators, or attestations on another /// fork. - pub fn prune_attester_slashings(&self, head_state: &BeaconState) { + pub fn prune_attester_slashings(&self, finalized_state: &BeaconState) { self.attester_slashings.write().retain(|slashing| { // Check that the attestation's signature is still valid wrt the fork version. - let signature_ok = slashing.signature_is_still_valid(&head_state.fork()); + // We might be a bit slower to detect signature staleness by using the finalized state + // here, but we filter when proposing anyway, so in the worst case we just keep some + // stuff around until we finalize. + let signature_ok = slashing.signature_is_still_valid(&finalized_state.fork()); // Slashings that don't slash any validators can also be dropped. let slashing_ok = get_slashable_indices_modular( - head_state, + finalized_state, slashing.as_inner().to_ref(), |_, validator| { - // Declare that a validator is still slashable if they have not exited prior - // to the finalized epoch. - // - // We cannot check the `slashed` field since the `head` is not finalized and - // a fork could un-slash someone. - validator.exit_epoch > head_state.finalized_checkpoint().epoch + // Declare that a validator is still slashable if they have not been slashed in + // the finalized state, and have not exited at the finalized epoch. + !validator.slashed && validator.exit_epoch > finalized_state.current_epoch() }, ) .is_ok_and(|indices| !indices.is_empty()); @@ -531,17 +534,12 @@ impl OperationPool { ) } - /// Prune if validator has already exited at or before the finalized checkpoint of the head. - pub fn prune_voluntary_exits(&self, head_state: &BeaconState) { + /// Prune if validator has already exited in the finalized state. + pub fn prune_voluntary_exits(&self, finalized_state: &BeaconState, spec: &ChainSpec) { prune_validator_hash_map( &mut self.voluntary_exits.write(), - // This condition is slightly too loose, since there will be some finalized exits that - // are missed here. - // - // We choose simplicity over the gain of pruning more exits since they are small and - // should not be seen frequently. - |_, validator| validator.exit_epoch <= head_state.finalized_checkpoint().epoch, - head_state, + |_, validator| validator.exit_epoch != spec.far_future_epoch, + finalized_state, ); } @@ -642,14 +640,15 @@ impl OperationPool { &self, head_block: &SignedBeaconBlock, head_state: &BeaconState, + finalized_state: &BeaconState, current_epoch: Epoch, spec: &ChainSpec, ) { self.prune_attestations(current_epoch); self.prune_sync_contributions(head_state.slot()); - self.prune_proposer_slashings(head_state); - self.prune_attester_slashings(head_state); - self.prune_voluntary_exits(head_state); + self.prune_proposer_slashings(finalized_state); + self.prune_attester_slashings(finalized_state); + self.prune_voluntary_exits(finalized_state, spec); self.prune_bls_to_execution_changes(head_block, head_state, spec); } @@ -758,14 +757,14 @@ where fn prune_validator_hash_map( map: &mut HashMap>, prune_if: F, - head_state: &BeaconState, + state: &BeaconState, ) where F: Fn(u64, &Validator) -> bool, T: VerifyOperation, { map.retain(|&validator_index, op| { - op.signature_is_still_valid(&head_state.fork()) - && head_state + op.signature_is_still_valid(&state.fork()) + && state .validators() .get(validator_index as usize) .is_none_or(|validator| !prune_if(validator_index, validator)) From 8e382ceed9ae17a22a8f4e0a1b518194d2783592 Mon Sep 17 00:00:00 2001 From: Pawan Dhananjay Date: Wed, 8 Oct 2025 18:47:05 -0700 Subject: [PATCH 42/45] Bump kzg library versions (#8174) N/A Update c-kzg and rust-eth-kzg to their latest versions. Also removes the patch version hardcoding in Cargo.toml. Co-Authored-By: Pawan Dhananjay --- Cargo.lock | 53 ++++++++++++++++++++++++++--------------------------- Cargo.toml | 4 ++-- 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 94d0033d4bb..481d2048652 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1165,9 +1165,9 @@ dependencies = [ [[package]] name = "blst" -version = "0.3.15" +version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fd49896f12ac9b6dcd7a5998466b9b58263a695a3dd1ecc1aaca2e12a90b080" +checksum = "dcdb4c7013139a150f9fc55d123186dbfaba0d912817466282c73ac49e71fb45" dependencies = [ "cc", "glob", @@ -1296,11 +1296,10 @@ dependencies = [ [[package]] name = "c-kzg" -version = "2.1.0" +version = "2.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e7e3c397401eb76228c89561cf22f85f41c95aa799ee9d860de3ea1cbc728fc" +checksum = "e00bf4b112b07b505472dbefd19e37e53307e2bfed5a79e0cc161d58ccd0e687" dependencies = [ - "arbitrary", "blst", "cc", "glob", @@ -2163,7 +2162,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18e4fdb82bd54a12e42fb58a800dcae6b9e13982238ce2296dc3570b92148e1f" dependencies = [ "data-encoding", - "syn 2.0.100", + "syn 1.0.109", ] [[package]] @@ -2565,9 +2564,9 @@ dependencies = [ [[package]] name = "eip4844" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa86cda6af15a9a5e4cf680850addaee8cd427be95be3ec9d022b9d7b98a66c0" +checksum = "82ab45fc63db6bbe5c3eb7c79303b2aff7ee529c991b2111c46879d1ea38407e" dependencies = [ "ekzg-bls12-381", "ekzg-maybe-rayon", @@ -2590,9 +2589,9 @@ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "ekzg-bls12-381" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f0e00a7689af7f4f17e85ae07f5a92b568a47297a165f685b828edfd82e02b" +checksum = "05c599a59deba6188afd9f783507e4d89efc997f0fa340a758f0d0992b322416" dependencies = [ "blst", "blstrs", @@ -2604,9 +2603,9 @@ dependencies = [ [[package]] name = "ekzg-erasure-codes" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bfc7ab684a7bb0c5ee37fd6a73da7425858cdd28f4a285c70361f001d6d0efc" +checksum = "8474a41a30ddd2b651798b1aa9ce92011207c3667186fe9044184683250109e7" dependencies = [ "ekzg-bls12-381", "ekzg-polynomial", @@ -2614,15 +2613,15 @@ dependencies = [ [[package]] name = "ekzg-maybe-rayon" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e0a4876a612b9317be470768e134b671b8e645e412a82eb12fdd9b1958fa6f9" +checksum = "9cf94d1385185c1f7caef4973be49702c7d9ffdeaf832d126dbb9ed6efe09d40" [[package]] name = "ekzg-multi-open" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f7964754aa0921aaa89b1589100e4cae9b31f87f137eeb0af5403fdfca68bfc" +checksum = "e6d37456a32cf79bdbddd6685a2adec73210e2d60332370bc0e9a502b6d93beb" dependencies = [ "ekzg-bls12-381", "ekzg-maybe-rayon", @@ -2632,9 +2631,9 @@ dependencies = [ [[package]] name = "ekzg-polynomial" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed36d2ddf86661c9d18e9d5dfc47dce6c9b6e44db385e2da71952b10ba32df1" +checksum = "704751bac85af4754bb8a14457ef24d820738062d0b6f3763534d0980b1a1e81" dependencies = [ "ekzg-bls12-381", "ekzg-maybe-rayon", @@ -2642,9 +2641,9 @@ dependencies = [ [[package]] name = "ekzg-serialization" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c83402d591ac3534d1ae654feb8f56ee64cc2bacfe80bece7977c24ca5e72e2" +checksum = "3cb983d9f75b2804c00246def8d52c01cf05f70c22593b8d314fbcf0cf89042b" dependencies = [ "ekzg-bls12-381", "hex", @@ -2652,9 +2651,9 @@ dependencies = [ [[package]] name = "ekzg-single-open" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05e1dbb13023ccebbb24593e4753c87f77b7fb78254a20aef1a028e979145092" +checksum = "799d5806d51e1453fa0f528d6acf4127e2a89e98312c826151ebc24ee3448ec3" dependencies = [ "ekzg-bls12-381", "ekzg-polynomial", @@ -2663,9 +2662,9 @@ dependencies = [ [[package]] name = "ekzg-trusted-setup" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff1cb3e907b27fa51f35def95eeabe47e97765e2b6bac7e55967500937f94282" +checksum = "85314d56718dc2c6dd77c3b3630f1839defcb6f47d9c20195608a0f7976095ab" dependencies = [ "ekzg-bls12-381", "ekzg-serialization", @@ -7375,7 +7374,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.10.5", "proc-macro2", "quote", "syn 2.0.100", @@ -8013,9 +8012,9 @@ dependencies = [ [[package]] name = "rust_eth_kzg" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dc46814bb8e72bff20fe117db43b7455112e6fafdae7466f8f24d451ad773c0" +checksum = "1522b7a740cd7f5bc52ea49863618511c8de138dcdf3f8a80b15b3f764942a5b" dependencies = [ "eip4844", "ekzg-bls12-381", diff --git a/Cargo.toml b/Cargo.toml index e471c4e2388..a5f01a498de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -117,7 +117,7 @@ byteorder = "1" bytes = "1" # Turn off c-kzg's default features which include `blst/portable`. We can turn on blst's portable # feature ourselves when desired. -c-kzg = { version = "2.1.0", default-features = false } +c-kzg = { version = "2.1", default-features = false } cargo_metadata = "0.19" clap = { version = "4.5.4", features = ["derive", "cargo", "wrap_help"] } clap_utils = { path = "common/clap_utils" } @@ -224,7 +224,7 @@ reqwest = { version = "0.11", default-features = false, features = [ ring = "0.17" rpds = "0.11" rusqlite = { version = "0.28", features = ["bundled"] } -rust_eth_kzg = "0.9.0" +rust_eth_kzg = "0.9" safe_arith = { path = "consensus/safe_arith" } sensitive_url = { path = "common/sensitive_url" } serde = { version = "1", features = ["derive"] } From 3110ca325b6314003f52c0ee71f33877d7be371a Mon Sep 17 00:00:00 2001 From: chonghe <44791194+chong-he@users.noreply.github.com> Date: Thu, 9 Oct 2025 13:01:30 +0800 Subject: [PATCH 43/45] Implement `/eth/v1/beacon/blobs` endpoint (#8103) * #8085 Co-Authored-By: Tan Chee Keong Co-Authored-By: chonghe <44791194+chong-he@users.noreply.github.com> --- beacon_node/beacon_chain/src/kzg_utils.rs | 32 +++-- beacon_node/client/src/builder.rs | 2 +- beacon_node/http_api/src/block_id.rs | 71 ++++++++++- beacon_node/http_api/src/lib.rs | 50 +++++++- beacon_node/http_api/tests/tests.rs | 145 +++++++++++++++++++--- common/eth2/src/lib.rs | 42 ++++++- common/eth2/src/types.rs | 15 +++ consensus/types/src/beacon_response.rs | 24 +++- lcli/src/http_sync.rs | 2 +- testing/simulator/src/checks.rs | 2 +- 10 files changed, 345 insertions(+), 40 deletions(-) diff --git a/beacon_node/beacon_chain/src/kzg_utils.rs b/beacon_node/beacon_chain/src/kzg_utils.rs index ad669e17291..382775ab50f 100644 --- a/beacon_node/beacon_chain/src/kzg_utils.rs +++ b/beacon_node/beacon_chain/src/kzg_utils.rs @@ -299,6 +299,8 @@ pub(crate) fn build_data_column_sidecars( /// /// If `blob_indices_opt` is `None`, this function attempts to reconstruct all blobs associated /// with the block. +/// This function does NOT use rayon as this is primarily used by a non critical path in HTTP API +/// and it will be slow if the node needs to reconstruct the blobs pub fn reconstruct_blobs( kzg: &Kzg, data_columns: &[Arc>], @@ -320,7 +322,7 @@ pub fn reconstruct_blobs( }; let blob_sidecars = blob_indices - .into_par_iter() + .into_iter() .map(|row_index| { let mut cells: Vec = vec![]; let mut cell_ids: Vec = vec![]; @@ -337,16 +339,26 @@ pub fn reconstruct_blobs( cell_ids.push(data_column.index); } - let (cells, _kzg_proofs) = kzg - .recover_cells_and_compute_kzg_proofs(&cell_ids, &cells) - .map_err(|e| format!("Failed to recover cells and compute KZG proofs: {e:?}"))?; + let num_cells_original_blob = E::number_of_columns() / 2; + let blob_bytes = if data_columns.len() < E::number_of_columns() { + let (recovered_cells, _kzg_proofs) = kzg + .recover_cells_and_compute_kzg_proofs(&cell_ids, &cells) + .map_err(|e| { + format!("Failed to recover cells and compute KZG proofs: {e:?}") + })?; - let num_cells_original_blob = cells.len() / 2; - let blob_bytes = cells - .into_iter() - .take(num_cells_original_blob) - .flat_map(|cell| cell.into_iter()) - .collect(); + recovered_cells + .into_iter() + .take(num_cells_original_blob) + .flat_map(|cell| cell.into_iter()) + .collect() + } else { + cells + .into_iter() + .take(num_cells_original_blob) + .flat_map(|cell| (*cell).into_iter()) + .collect() + }; let blob = Blob::::new(blob_bytes).map_err(|e| format!("{e:?}"))?; let kzg_proof = KzgProof::empty(); diff --git a/beacon_node/client/src/builder.rs b/beacon_node/client/src/builder.rs index d984d5fedce..02c042bf282 100644 --- a/beacon_node/client/src/builder.rs +++ b/beacon_node/client/src/builder.rs @@ -412,7 +412,7 @@ where let blobs = if block.message().body().has_blobs() { debug!("Downloading finalized blobs"); if let Some(response) = remote - .get_blobs::(BlockId::Root(block_root), None, &spec) + .get_blob_sidecars::(BlockId::Root(block_root), None, &spec) .await .map_err(|e| format!("Error fetching finalized blobs from remote: {e:?}"))? { diff --git a/beacon_node/http_api/src/block_id.rs b/beacon_node/http_api/src/block_id.rs index e527e466f67..778067c32bb 100644 --- a/beacon_node/http_api/src/block_id.rs +++ b/beacon_node/http_api/src/block_id.rs @@ -2,15 +2,16 @@ use crate::version::inconsistent_fork_rejection; use crate::{ExecutionOptimistic, state_id::checkpoint_slot_and_execution_optimistic}; use beacon_chain::kzg_utils::reconstruct_blobs; use beacon_chain::{BeaconChain, BeaconChainError, BeaconChainTypes, WhenSlotSkipped}; -use eth2::types::BlobIndicesQuery; use eth2::types::BlockId as CoreBlockId; use eth2::types::DataColumnIndicesQuery; +use eth2::types::{BlobIndicesQuery, BlobWrapper, BlobsVersionedHashesQuery}; use std::fmt; use std::str::FromStr; use std::sync::Arc; use types::{ BlobSidecarList, DataColumnSidecarList, EthSpec, FixedBytesExtended, ForkName, Hash256, - SignedBeaconBlock, SignedBlindedBeaconBlock, Slot, + SignedBeaconBlock, SignedBlindedBeaconBlock, Slot, UnversionedResponse, + beacon_response::ExecutionOptimisticFinalizedMetadata, }; use warp::Rejection; @@ -352,6 +353,68 @@ impl BlockId { Ok((block, blob_sidecar_list, execution_optimistic, finalized)) } + #[allow(clippy::type_complexity)] + pub fn get_blobs_by_versioned_hashes( + &self, + query: BlobsVersionedHashesQuery, + chain: &BeaconChain, + ) -> Result< + UnversionedResponse>, ExecutionOptimisticFinalizedMetadata>, + warp::Rejection, + > { + let (root, execution_optimistic, finalized) = self.root(chain)?; + let block = BlockId::blinded_block_by_root(&root, chain)?.ok_or_else(|| { + warp_utils::reject::custom_not_found(format!("beacon block with root {}", root)) + })?; + + // Error if the block is pre-Deneb and lacks blobs. + let blob_kzg_commitments = block.message().body().blob_kzg_commitments().map_err(|_| { + warp_utils::reject::custom_bad_request( + "block is pre-Deneb and has no blobs".to_string(), + ) + })?; + + let blob_indices_opt = query.versioned_hashes.map(|versioned_hashes| { + versioned_hashes + .iter() + .flat_map(|versioned_hash| { + blob_kzg_commitments.iter().position(|commitment| { + let computed_hash = commitment.calculate_versioned_hash(); + computed_hash == *versioned_hash + }) + }) + .map(|index| index as u64) + .collect::>() + }); + + let max_blobs_per_block = chain.spec.max_blobs_per_block(block.epoch()) as usize; + let blob_sidecar_list = if !blob_kzg_commitments.is_empty() { + if chain.spec.is_peer_das_enabled_for_epoch(block.epoch()) { + Self::get_blobs_from_data_columns(chain, root, blob_indices_opt, &block)? + } else { + Self::get_blobs(chain, root, blob_indices_opt, max_blobs_per_block)? + } + } else { + BlobSidecarList::new(vec![], max_blobs_per_block) + .map_err(|e| warp_utils::reject::custom_server_error(format!("{:?}", e)))? + }; + + let blobs = blob_sidecar_list + .into_iter() + .map(|sidecar| BlobWrapper:: { + blob: sidecar.blob.clone(), + }) + .collect(); + + Ok(UnversionedResponse { + metadata: ExecutionOptimisticFinalizedMetadata { + execution_optimistic: Some(execution_optimistic), + finalized: Some(finalized), + }, + data: blobs, + }) + } + fn get_blobs( chain: &BeaconChain, root: Hash256, @@ -369,9 +432,9 @@ impl BlockId { let blob_sidecar_list_filtered = match indices { Some(vec) => { - let list: Vec<_> = blob_sidecar_list + let list: Vec<_> = vec .into_iter() - .filter(|blob_sidecar| vec.contains(&blob_sidecar.index)) + .flat_map(|index| blob_sidecar_list.get(index as usize).cloned()) .collect(); BlobSidecarList::new(list, max_blobs_per_block) diff --git a/beacon_node/http_api/src/lib.rs b/beacon_node/http_api/src/lib.rs index 1b18ed50a3f..7f6c97a0f85 100644 --- a/beacon_node/http_api/src/lib.rs +++ b/beacon_node/http_api/src/lib.rs @@ -214,6 +214,7 @@ pub fn prometheus_metrics() -> warp::filters::log::Log( */ // GET beacon/blob_sidecars/{block_id} - let get_blobs = eth_v1 + let get_blob_sidecars = eth_v1 .and(warp::path("beacon")) .and(warp::path("blob_sidecars")) .and(block_id_or_err) @@ -1947,6 +1948,52 @@ pub fn serve( }, ); + // GET beacon/blobs/{block_id} + let get_blobs = eth_v1 + .and(warp::path("beacon")) + .and(warp::path("blobs")) + .and(block_id_or_err) + .and(warp::path::end()) + .and(multi_key_query::()) + .and(task_spawner_filter.clone()) + .and(chain_filter.clone()) + .and(warp::header::optional::("accept")) + .then( + |block_id: BlockId, + version_hashes_res: Result, + task_spawner: TaskSpawner, + chain: Arc>, + accept_header: Option| { + task_spawner.blocking_response_task(Priority::P1, move || { + let versioned_hashes = version_hashes_res?; + let response = + block_id.get_blobs_by_versioned_hashes(versioned_hashes, &chain)?; + + match accept_header { + Some(api_types::Accept::Ssz) => Response::builder() + .status(200) + .body(response.data.as_ssz_bytes().into()) + .map(|res: Response| add_ssz_content_type_header(res)) + .map_err(|e| { + warp_utils::reject::custom_server_error(format!( + "failed to create response: {}", + e + )) + }), + _ => { + let res = execution_optimistic_finalized_beacon_response( + ResponseIncludesVersion::No, + response.metadata.execution_optimistic.unwrap_or(false), + response.metadata.finalized.unwrap_or(false), + response.data, + )?; + Ok(warp::reply::json(&res).into_response()) + } + } + }) + }, + ); + /* * beacon/pool */ @@ -4794,6 +4841,7 @@ pub fn serve( .uor(get_beacon_block_attestations) .uor(get_beacon_blinded_block) .uor(get_beacon_block_root) + .uor(get_blob_sidecars) .uor(get_blobs) .uor(get_beacon_pool_attestations) .uor(get_beacon_pool_attester_slashings) diff --git a/beacon_node/http_api/tests/tests.rs b/beacon_node/http_api/tests/tests.rs index 2072fb9932b..9c18a7c1e87 100644 --- a/beacon_node/http_api/tests/tests.rs +++ b/beacon_node/http_api/tests/tests.rs @@ -90,6 +90,7 @@ struct ApiTester { struct ApiTesterConfig { spec: ChainSpec, retain_historic_states: bool, + import_all_data_columns: bool, } impl Default for ApiTesterConfig { @@ -99,6 +100,7 @@ impl Default for ApiTesterConfig { Self { spec, retain_historic_states: false, + import_all_data_columns: false, } } } @@ -137,6 +139,7 @@ impl ApiTester { .deterministic_withdrawal_keypairs(VALIDATOR_COUNT) .fresh_ephemeral_store() .mock_execution_layer() + .import_all_data_columns(config.import_all_data_columns) .build(); harness @@ -441,10 +444,7 @@ impl ApiTester { } pub async fn new_mev_tester_default_payload_value() -> Self { - let mut config = ApiTesterConfig { - retain_historic_states: false, - spec: E::default_spec(), - }; + let mut config = ApiTesterConfig::default(); config.spec.altair_fork_epoch = Some(Epoch::new(0)); config.spec.bellatrix_fork_epoch = Some(Epoch::new(0)); let tester = Self::new_from_config(config) @@ -1858,7 +1858,7 @@ impl ApiTester { }; let result = match self .client - .get_blobs::( + .get_blob_sidecars::( CoreBlockId::Root(block_root), blob_indices.as_deref(), &self.chain.spec, @@ -1879,6 +1879,77 @@ impl ApiTester { self } + pub async fn test_get_blobs(self, versioned_hashes: bool) -> Self { + let block_id = BlockId(CoreBlockId::Finalized); + let (block_root, _, _) = block_id.root(&self.chain).unwrap(); + let (block, _, _) = block_id.full_block(&self.chain).await.unwrap(); + let num_blobs = block.num_expected_blobs(); + + let versioned_hashes: Option> = if versioned_hashes { + Some( + block + .message() + .body() + .blob_kzg_commitments() + .unwrap() + .iter() + .map(|commitment| commitment.calculate_versioned_hash()) + .collect(), + ) + } else { + None + }; + + let result = match self + .client + .get_blobs::(CoreBlockId::Root(block_root), versioned_hashes.as_deref()) + .await + { + Ok(response) => response.unwrap().into_data(), + Err(e) => panic!("query failed incorrectly: {e:?}"), + }; + + assert_eq!( + result.len(), + versioned_hashes.map_or(num_blobs, |versioned_hashes| versioned_hashes.len()) + ); + + self + } + + pub async fn test_get_blobs_post_fulu_full_node(self, versioned_hashes: bool) -> Self { + let block_id = BlockId(CoreBlockId::Finalized); + let (block_root, _, _) = block_id.root(&self.chain).unwrap(); + let (block, _, _) = block_id.full_block(&self.chain).await.unwrap(); + + let versioned_hashes: Option> = if versioned_hashes { + Some( + block + .message() + .body() + .blob_kzg_commitments() + .unwrap() + .iter() + .map(|commitment| commitment.calculate_versioned_hash()) + .collect(), + ) + } else { + None + }; + + match self + .client + .get_blobs::(CoreBlockId::Root(block_root), versioned_hashes.as_deref()) + .await + { + Ok(result) => panic!("Full node are unable to return blobs post-Fulu: {result:?}"), + // Post-Fulu, full nodes don't store blobs and return error 500 + Err(e) => assert_eq!(e.status().unwrap(), 500), + }; + + self + } + /// Test fetching of blob sidecars that are not available in the database due to pruning. /// /// If `zero_blobs` is false, test a block with >0 blobs, which should be unavailable. @@ -1918,7 +1989,7 @@ impl ApiTester { match self .client - .get_blobs::(CoreBlockId::Slot(test_slot), None, &self.chain.spec) + .get_blob_sidecars::(CoreBlockId::Slot(test_slot), None, &self.chain.spec) .await { Ok(result) => { @@ -1956,7 +2027,7 @@ impl ApiTester { match self .client - .get_blobs::(CoreBlockId::Slot(test_slot), None, &self.chain.spec) + .get_blob_sidecars::(CoreBlockId::Slot(test_slot), None, &self.chain.spec) .await { Ok(result) => panic!("queries for pre-Deneb slots should fail. got: {result:?}"), @@ -7704,10 +7775,7 @@ async fn builder_payload_chosen_by_profit_v3() { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn builder_works_post_capella() { - let mut config = ApiTesterConfig { - retain_historic_states: false, - spec: E::default_spec(), - }; + let mut config = ApiTesterConfig::default(); config.spec.altair_fork_epoch = Some(Epoch::new(0)); config.spec.bellatrix_fork_epoch = Some(Epoch::new(0)); config.spec.capella_fork_epoch = Some(Epoch::new(0)); @@ -7724,10 +7792,7 @@ async fn builder_works_post_capella() { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn builder_works_post_deneb() { - let mut config = ApiTesterConfig { - retain_historic_states: false, - spec: E::default_spec(), - }; + let mut config = ApiTesterConfig::default(); config.spec.altair_fork_epoch = Some(Epoch::new(0)); config.spec.bellatrix_fork_epoch = Some(Epoch::new(0)); config.spec.capella_fork_epoch = Some(Epoch::new(0)); @@ -7745,22 +7810,66 @@ async fn builder_works_post_deneb() { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn get_blob_sidecars() { + let mut config = ApiTesterConfig::default(); + config.spec.altair_fork_epoch = Some(Epoch::new(0)); + config.spec.bellatrix_fork_epoch = Some(Epoch::new(0)); + config.spec.capella_fork_epoch = Some(Epoch::new(0)); + config.spec.deneb_fork_epoch = Some(Epoch::new(0)); + + ApiTester::new_from_config(config) + .await + .test_post_beacon_blocks_valid() + .await + .test_get_blob_sidecars(false) + .await + .test_get_blob_sidecars(true) + .await + .test_get_blobs(false) + .await + .test_get_blobs(true) + .await; +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn get_blobs_post_fulu_supernode() { let mut config = ApiTesterConfig { retain_historic_states: false, spec: E::default_spec(), + // For supernode, we import all data columns + import_all_data_columns: true, }; config.spec.altair_fork_epoch = Some(Epoch::new(0)); config.spec.bellatrix_fork_epoch = Some(Epoch::new(0)); config.spec.capella_fork_epoch = Some(Epoch::new(0)); config.spec.deneb_fork_epoch = Some(Epoch::new(0)); + config.spec.electra_fork_epoch = Some(Epoch::new(0)); + config.spec.fulu_fork_epoch = Some(Epoch::new(0)); ApiTester::new_from_config(config) .await - .test_post_beacon_blocks_valid() + // We can call the same get_blobs function in this test + // because the function will call get_blobs_by_versioned_hashes which handles peerDAS post-Fulu + .test_get_blobs(false) .await - .test_get_blob_sidecars(false) + .test_get_blobs(true) + .await; +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn get_blobs_post_fulu_full_node() { + let mut config = ApiTesterConfig::default(); + config.spec.altair_fork_epoch = Some(Epoch::new(0)); + config.spec.bellatrix_fork_epoch = Some(Epoch::new(0)); + config.spec.capella_fork_epoch = Some(Epoch::new(0)); + config.spec.deneb_fork_epoch = Some(Epoch::new(0)); + config.spec.electra_fork_epoch = Some(Epoch::new(0)); + config.spec.fulu_fork_epoch = Some(Epoch::new(0)); + + ApiTester::new_from_config(config) .await - .test_get_blob_sidecars(true) + .test_get_blobs_post_fulu_full_node(false) + .await + .test_get_blobs_post_fulu_full_node(true) .await; } diff --git a/common/eth2/src/lib.rs b/common/eth2/src/lib.rs index 3368569d59f..0423794d0d5 100644 --- a/common/eth2/src/lib.rs +++ b/common/eth2/src/lib.rs @@ -1336,7 +1336,7 @@ impl BeaconNodeHttpClient { } /// Path for `v1/beacon/blob_sidecars/{block_id}` - pub fn get_blobs_path(&self, block_id: BlockId) -> Result { + pub fn get_blob_sidecars_path(&self, block_id: BlockId) -> Result { let mut path = self.eth_path(V1)?; path.path_segments_mut() .map_err(|()| Error::InvalidUrl(self.server.clone()))? @@ -1346,6 +1346,17 @@ impl BeaconNodeHttpClient { Ok(path) } + /// Path for `v1/beacon/blobs/{blob_id}` + pub fn get_blobs_path(&self, block_id: BlockId) -> Result { + let mut path = self.eth_path(V1)?; + path.path_segments_mut() + .map_err(|()| Error::InvalidUrl(self.server.clone()))? + .push("beacon") + .push("blobs") + .push(&block_id.to_string()); + Ok(path) + } + /// Path for `v1/beacon/blinded_blocks/{block_id}` pub fn get_beacon_blinded_blocks_path(&self, block_id: BlockId) -> Result { let mut path = self.eth_path(V1)?; @@ -1374,13 +1385,13 @@ impl BeaconNodeHttpClient { /// `GET v1/beacon/blob_sidecars/{block_id}` /// /// Returns `Ok(None)` on a 404 error. - pub async fn get_blobs( + pub async fn get_blob_sidecars( &self, block_id: BlockId, indices: Option<&[u64]>, spec: &ChainSpec, ) -> Result>>, Error> { - let mut path = self.get_blobs_path(block_id)?; + let mut path = self.get_blob_sidecars_path(block_id)?; if let Some(indices) = indices { let indices_string = indices .iter() @@ -1400,6 +1411,31 @@ impl BeaconNodeHttpClient { .map(|opt| opt.map(BeaconResponse::ForkVersioned)) } + /// `GET v1/beacon/blobs/{block_id}` + /// + /// Returns `Ok(None)` on a 404 error. + pub async fn get_blobs( + &self, + block_id: BlockId, + versioned_hashes: Option<&[Hash256]>, + ) -> Result>>>, Error> + { + let mut path = self.get_blobs_path(block_id)?; + if let Some(hashes) = versioned_hashes { + let hashes_string = hashes + .iter() + .map(|hash| hash.to_string()) + .collect::>() + .join(","); + path.query_pairs_mut() + .append_pair("versioned_hashes", &hashes_string); + } + + self.get_opt(path) + .await + .map(|opt| opt.map(BeaconResponse::Unversioned)) + } + /// `GET v1/beacon/blinded_blocks/{block_id}` /// /// Returns `Ok(None)` on a 404 error. diff --git a/common/eth2/src/types.rs b/common/eth2/src/types.rs index b72ab293801..8f553b57d9c 100644 --- a/common/eth2/src/types.rs +++ b/common/eth2/src/types.rs @@ -716,6 +716,13 @@ pub struct BlobIndicesQuery { pub indices: Option>, } +#[derive(Clone, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct BlobsVersionedHashesQuery { + #[serde(default, deserialize_with = "option_query_vec")] + pub versioned_hashes: Option>, +} + #[derive(Clone, Deserialize)] #[serde(deny_unknown_fields)] pub struct DataColumnIndicesQuery { @@ -2317,6 +2324,14 @@ pub struct StandardAttestationRewards { pub total_rewards: Vec, } +#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)] +#[serde(bound = "E: EthSpec")] +#[serde(transparent)] +pub struct BlobWrapper { + #[serde(with = "ssz_types::serde_utils::hex_fixed_vec")] + pub blob: Blob, +} + #[cfg(test)] mod test { use std::fmt::Debug; diff --git a/consensus/types/src/beacon_response.rs b/consensus/types/src/beacon_response.rs index 2e458543649..fc59fc94329 100644 --- a/consensus/types/src/beacon_response.rs +++ b/consensus/types/src/beacon_response.rs @@ -25,6 +25,7 @@ pub struct ForkVersionedResponse { /// `Deserialize`. #[derive(Debug, PartialEq, Clone, Serialize)] pub struct UnversionedResponse { + #[serde(flatten)] pub metadata: M, pub data: T, } @@ -195,9 +196,10 @@ impl From> for BeaconResponse { #[cfg(test)] mod fork_version_response_tests { + use crate::beacon_response::ExecutionOptimisticFinalizedMetadata; use crate::{ ExecutionPayload, ExecutionPayloadBellatrix, ForkName, ForkVersionedResponse, - MainnetEthSpec, + MainnetEthSpec, UnversionedResponse, }; use serde_json::json; @@ -236,4 +238,24 @@ mod fork_version_response_tests { assert!(result.is_err()); } + + // The following test should only pass by having the attribute #[serde(flatten)] on the metadata + #[test] + fn unversioned_response_serialize_dezerialize_round_trip_test() { + // Create an UnversionedResponse with some data + let data = UnversionedResponse { + metadata: ExecutionOptimisticFinalizedMetadata { + execution_optimistic: Some(false), + finalized: Some(false), + }, + data: "some_test_data".to_string(), + }; + + let serialized = serde_json::to_string(&data); + + let deserialized = + serde_json::from_str(&serialized.unwrap()).expect("Failed to deserialize"); + + assert_eq!(data, deserialized); + } } diff --git a/lcli/src/http_sync.rs b/lcli/src/http_sync.rs index 2e36eadf235..6f7dcdb5956 100644 --- a/lcli/src/http_sync.rs +++ b/lcli/src/http_sync.rs @@ -124,7 +124,7 @@ async fn get_block_from_source( .unwrap() .unwrap(); let blobs_from_source = source - .get_blobs::(block_id, None, spec) + .get_blob_sidecars::(block_id, None, spec) .await .unwrap() .unwrap() diff --git a/testing/simulator/src/checks.rs b/testing/simulator/src/checks.rs index 1368c495cd8..1240785121a 100644 --- a/testing/simulator/src/checks.rs +++ b/testing/simulator/src/checks.rs @@ -424,7 +424,7 @@ pub async fn verify_full_blob_production_up_to( // the `verify_full_block_production_up_to` function. if block.is_some() { remote_node - .get_blobs::(BlockId::Slot(Slot::new(slot)), None, &E::default_spec()) + .get_blobs::(BlockId::Slot(Slot::new(slot)), None) .await .map_err(|e| format!("Failed to get blobs at slot {slot:?}: {e:?}"))? .ok_or_else(|| format!("No blobs available at slot {slot:?}"))?; From 538b70495ccc2cbdcf38b7d73ea1989ba94f1784 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Thu, 9 Oct 2025 18:32:43 +1100 Subject: [PATCH 44/45] Reject data columns that does not descend from finalize root instead of ignoring it (#8179) This issue was identified during the fusaka audit competition. The [`verify_parent_block_and_finalized_descendant`](https://github.com/sigp/lighthouse/blob/62d9302e0f9dd9f94d0325411a3029b36ad90685/beacon_node/beacon_chain/src/data_column_verification.rs#L606-L627) in data column gossip verification currently load the parent first before checking if the column descends from the finalized root. However, the `fork_choice.get_block(&block_parent_root)` function also make the same check internally: https://github.com/sigp/lighthouse/blob/8a4f6cf0d5b6b261b2c3439ce7c05383a53d30c5/consensus/fork_choice/src/fork_choice.rs#L1242-L1249 Therefore, if the column does not descend from the finalized root, we return an `UnknownParent` error, before hitting the `is_finalized_checkpoint_or_descendant` check just below. Which means we `IGNORE` the gossip message instead `REJECT`, and the gossip peer is not _immediately_ penalised. This deviates from the spec. However, worth noting that lighthouse will currently attempt to request the parent from this peer, and if the peer is not able to serve the parent, it gets penalised with a `LowToleranceError`, and will get banned after ~5 occurences. https://github.com/sigp/lighthouse/blob/ffa7b2b2b9e3b4e70678e2c749b8bc45234febd7/beacon_node/network/src/sync/network_context.rs#L1530-L1532 This PR will penalise the bad peer immediately instead of performing block lookups before penalising it. Co-Authored-By: Jimmy Chen --- .../beacon_chain/src/data_column_verification.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/beacon_node/beacon_chain/src/data_column_verification.rs b/beacon_node/beacon_chain/src/data_column_verification.rs index 600b107c1d1..fad7771f018 100644 --- a/beacon_node/beacon_chain/src/data_column_verification.rs +++ b/beacon_node/beacon_chain/src/data_column_verification.rs @@ -608,22 +608,21 @@ fn verify_parent_block_and_finalized_descendant( chain: &BeaconChain, ) -> Result { let fork_choice = chain.canonical_head.fork_choice_read_lock(); + let block_parent_root = data_column.block_parent_root(); + + // Do not process a column that does not descend from the finalized root. + if !fork_choice.is_finalized_checkpoint_or_descendant(block_parent_root) { + return Err(GossipDataColumnError::NotFinalizedDescendant { block_parent_root }); + } // We have already verified that the column is past finalization, so we can // just check fork choice for the block's parent. - let block_parent_root = data_column.block_parent_root(); let Some(parent_block) = fork_choice.get_block(&block_parent_root) else { return Err(GossipDataColumnError::ParentUnknown { parent_root: block_parent_root, }); }; - // Do not process a column that does not descend from the finalized root. - // We just loaded the parent_block, so we can be sure that it exists in fork choice. - if !fork_choice.is_finalized_checkpoint_or_descendant(block_parent_root) { - return Err(GossipDataColumnError::NotFinalizedDescendant { block_parent_root }); - } - Ok(parent_block) } From 0c9fdea28db07eb2395d168c1b8369d785856adc Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Fri, 10 Oct 2025 00:53:51 +1100 Subject: [PATCH 45/45] Update `ForkName::latest_stable` to Fulu for tests (#8181) Update `ForkName::latest_stable` to Fulu, reflecting our plan to stabilise Fulu in the immediate future! This will lead to some more tests running with Fulu rather than Electra. Co-Authored-By: Michael Sproul --- consensus/types/src/fork_name.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/consensus/types/src/fork_name.rs b/consensus/types/src/fork_name.rs index 363d9e77a22..338e2b1e759 100644 --- a/consensus/types/src/fork_name.rs +++ b/consensus/types/src/fork_name.rs @@ -51,7 +51,7 @@ impl ForkName { /// This fork serves as the baseline for many tests, and the goal /// is to ensure features are passing on this fork. pub fn latest_stable() -> ForkName { - ForkName::Electra + ForkName::Fulu } /// Set the activation slots in the given `ChainSpec` so that the fork named by `self`