diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE/default-issue-template.md similarity index 79% rename from .github/ISSUE_TEMPLATE.md rename to .github/ISSUE_TEMPLATE/default-issue-template.md index d73b9ff6f04..784add20f35 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE/default-issue-template.md @@ -1,3 +1,12 @@ +--- +name: Default issue template +about: Use this template for all issues +title: '' +labels: '' +assignees: '' + +--- + ## Description Please provide a brief description of the issue. diff --git a/.github/mergify.yml b/.github/mergify.yml index 4ab73bcf079..0b917b25467 100644 --- a/.github/mergify.yml +++ b/.github/mergify.yml @@ -105,6 +105,10 @@ queue_rules: {{ body | get_section("## Proposed Changes", "") }} + + {% for commit in commits | unique(attribute='email_author') %} + Co-Authored-By: {{ commit.author }} <{{ commit.email_author }}> + {% endfor %} queue_conditions: - "#approved-reviews-by >= 1" - "check-success=license/cla" diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index 59a045c7d3f..0201bf9ae30 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -225,6 +225,7 @@ jobs: TEST_FEATURES: portable CI_LOGGER_DIR: ${{ runner.temp }}/network_test_logs - name: Upload logs + if: always() uses: actions/upload-artifact@v4 with: name: network_test_logs @@ -328,6 +329,7 @@ jobs: - name: Run a basic beacon chain sim that starts from Deneb run: cargo run --release --bin simulator basic-sim --disable-stdout-logging --log-dir ${{ runner.temp }}/basic_simulator_logs - name: Upload logs + if: always() uses: actions/upload-artifact@v4 with: name: basic_simulator_logs @@ -349,6 +351,7 @@ jobs: - name: Run a beacon chain sim which tests VC fallback behaviour run: cargo run --release --bin simulator fallback-sim --disable-stdout-logging --log-dir ${{ runner.temp }}/fallback_simulator_logs - name: Upload logs + if: always() uses: actions/upload-artifact@v4 with: name: fallback_simulator_logs diff --git a/Cargo.lock b/Cargo.lock index 88b5b7b57d2..481d2048652 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -918,7 +918,7 @@ dependencies = [ [[package]] name = "beacon_node" -version = "7.1.0" +version = "8.0.0-rc.0" dependencies = [ "account_utils", "beacon_chain", @@ -1165,9 +1165,9 @@ dependencies = [ [[package]] name = "blst" -version = "0.3.15" +version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fd49896f12ac9b6dcd7a5998466b9b58263a695a3dd1ecc1aaca2e12a90b080" +checksum = "dcdb4c7013139a150f9fc55d123186dbfaba0d912817466282c73ac49e71fb45" dependencies = [ "cc", "glob", @@ -1193,7 +1193,7 @@ dependencies = [ [[package]] name = "boot_node" -version = "7.1.0" +version = "8.0.0-rc.0" dependencies = [ "beacon_node", "bytes", @@ -1296,11 +1296,10 @@ dependencies = [ [[package]] name = "c-kzg" -version = "2.1.0" +version = "2.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e7e3c397401eb76228c89561cf22f85f41c95aa799ee9d860de3ea1cbc728fc" +checksum = "e00bf4b112b07b505472dbefd19e37e53307e2bfed5a79e0cc161d58ccd0e687" dependencies = [ - "arbitrary", "blst", "cc", "glob", @@ -2163,7 +2162,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18e4fdb82bd54a12e42fb58a800dcae6b9e13982238ce2296dc3570b92148e1f" dependencies = [ "data-encoding", - "syn 2.0.100", + "syn 1.0.109", ] [[package]] @@ -2395,9 +2394,9 @@ dependencies = [ [[package]] name = "discv5" -version = "0.9.1" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4b4e7798d2ff74e29cee344dc490af947ae657d6ab5273dde35d58ce06a4d71" +checksum = "a20b702c8491b3325866a4935d0b5101e49144d74540384243b6293794aad6fa" dependencies = [ "aes 0.8.4", "aes-gcm", @@ -2565,9 +2564,9 @@ dependencies = [ [[package]] name = "eip4844" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa86cda6af15a9a5e4cf680850addaee8cd427be95be3ec9d022b9d7b98a66c0" +checksum = "82ab45fc63db6bbe5c3eb7c79303b2aff7ee529c991b2111c46879d1ea38407e" dependencies = [ "ekzg-bls12-381", "ekzg-maybe-rayon", @@ -2590,9 +2589,9 @@ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "ekzg-bls12-381" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f0e00a7689af7f4f17e85ae07f5a92b568a47297a165f685b828edfd82e02b" +checksum = "05c599a59deba6188afd9f783507e4d89efc997f0fa340a758f0d0992b322416" dependencies = [ "blst", "blstrs", @@ -2604,9 +2603,9 @@ dependencies = [ [[package]] name = "ekzg-erasure-codes" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bfc7ab684a7bb0c5ee37fd6a73da7425858cdd28f4a285c70361f001d6d0efc" +checksum = "8474a41a30ddd2b651798b1aa9ce92011207c3667186fe9044184683250109e7" dependencies = [ "ekzg-bls12-381", "ekzg-polynomial", @@ -2614,15 +2613,15 @@ dependencies = [ [[package]] name = "ekzg-maybe-rayon" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e0a4876a612b9317be470768e134b671b8e645e412a82eb12fdd9b1958fa6f9" +checksum = "9cf94d1385185c1f7caef4973be49702c7d9ffdeaf832d126dbb9ed6efe09d40" [[package]] name = "ekzg-multi-open" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f7964754aa0921aaa89b1589100e4cae9b31f87f137eeb0af5403fdfca68bfc" +checksum = "e6d37456a32cf79bdbddd6685a2adec73210e2d60332370bc0e9a502b6d93beb" dependencies = [ "ekzg-bls12-381", "ekzg-maybe-rayon", @@ -2632,9 +2631,9 @@ dependencies = [ [[package]] name = "ekzg-polynomial" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed36d2ddf86661c9d18e9d5dfc47dce6c9b6e44db385e2da71952b10ba32df1" +checksum = "704751bac85af4754bb8a14457ef24d820738062d0b6f3763534d0980b1a1e81" dependencies = [ "ekzg-bls12-381", "ekzg-maybe-rayon", @@ -2642,9 +2641,9 @@ dependencies = [ [[package]] name = "ekzg-serialization" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c83402d591ac3534d1ae654feb8f56ee64cc2bacfe80bece7977c24ca5e72e2" +checksum = "3cb983d9f75b2804c00246def8d52c01cf05f70c22593b8d314fbcf0cf89042b" dependencies = [ "ekzg-bls12-381", "hex", @@ -2652,9 +2651,9 @@ dependencies = [ [[package]] name = "ekzg-single-open" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05e1dbb13023ccebbb24593e4753c87f77b7fb78254a20aef1a028e979145092" +checksum = "799d5806d51e1453fa0f528d6acf4127e2a89e98312c826151ebc24ee3448ec3" dependencies = [ "ekzg-bls12-381", "ekzg-polynomial", @@ -2663,9 +2662,9 @@ dependencies = [ [[package]] name = "ekzg-trusted-setup" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff1cb3e907b27fa51f35def95eeabe47e97765e2b6bac7e55967500937f94282" +checksum = "85314d56718dc2c6dd77c3b3630f1839defcb6f47d9c20195608a0f7976095ab" dependencies = [ "ekzg-bls12-381", "ekzg-serialization", @@ -5051,7 +5050,7 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "lcli" -version = "7.1.0" +version = "8.0.0-rc.0" dependencies = [ "account_utils", "beacon_chain", @@ -5561,7 +5560,7 @@ dependencies = [ [[package]] name = "lighthouse" -version = "7.1.0" +version = "8.0.0-rc.0" dependencies = [ "account_manager", "account_utils", @@ -7375,7 +7374,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.10.5", "proc-macro2", "quote", "syn 2.0.100", @@ -8013,9 +8012,9 @@ dependencies = [ [[package]] name = "rust_eth_kzg" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dc46814bb8e72bff20fe117db43b7455112e6fafdae7466f8f24d451ad773c0" +checksum = "1522b7a740cd7f5bc52ea49863618511c8de138dcdf3f8a80b15b3f764942a5b" dependencies = [ "eip4844", "ekzg-bls12-381", @@ -9065,16 +9064,16 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "superstruct" -version = "0.8.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf0f31f730ad9e579364950e10d6172b4a9bd04b447edf5988b066a860cc340e" +checksum = "3b986e4a629907f20a2c2a639a75bc22a8b5d99b444e0d83c395f4cb309022bf" dependencies = [ - "darling 0.13.4", - "itertools 0.10.5", + "darling 0.20.10", + "itertools 0.13.0", "proc-macro2", "quote", "smallvec", - "syn 1.0.109", + "syn 2.0.100", ] [[package]] @@ -9231,6 +9230,8 @@ dependencies = [ "async-channel 1.9.0", "futures", "metrics", + "num_cpus", + "rayon", "tokio", "tracing", ] @@ -11164,7 +11165,7 @@ dependencies = [ [[package]] name = "xdelta3" version = "0.1.5" -source = "git+http://github.com/sigp/xdelta3-rs?rev=4db64086bb02e9febb584ba93b9d16bb2ae3825a#4db64086bb02e9febb584ba93b9d16bb2ae3825a" +source = "git+https://github.com/sigp/xdelta3-rs?rev=4db64086bb02e9febb584ba93b9d16bb2ae3825a#4db64086bb02e9febb584ba93b9d16bb2ae3825a" dependencies = [ "bindgen", "cc", diff --git a/Cargo.toml b/Cargo.toml index 0b930b605d6..a5f01a498de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -117,7 +117,7 @@ byteorder = "1" bytes = "1" # Turn off c-kzg's default features which include `blst/portable`. We can turn on blst's portable # feature ourselves when desired. -c-kzg = { version = "2.1.0", default-features = false } +c-kzg = { version = "2.1", default-features = false } cargo_metadata = "0.19" clap = { version = "4.5.4", features = ["derive", "cargo", "wrap_help"] } clap_utils = { path = "common/clap_utils" } @@ -134,7 +134,7 @@ deposit_contract = { path = "common/deposit_contract" } derivative = "2" directory = { path = "common/directory" } dirs = "3" -discv5 = { version = "0.9", features = ["libp2p"] } +discv5 = { version = "0.10", features = ["libp2p"] } doppelganger_service = { path = "validator_client/doppelganger_service" } either = "1.9" environment = { path = "lighthouse/environment" } @@ -224,7 +224,7 @@ reqwest = { version = "0.11", default-features = false, features = [ ring = "0.17" rpds = "0.11" rusqlite = { version = "0.28", features = ["bundled"] } -rust_eth_kzg = "0.9.0" +rust_eth_kzg = "0.9" safe_arith = { path = "consensus/safe_arith" } sensitive_url = { path = "common/sensitive_url" } serde = { version = "1", features = ["derive"] } @@ -242,7 +242,7 @@ ssz_types = "0.11.0" state_processing = { path = "consensus/state_processing" } store = { path = "beacon_node/store" } strum = { version = "0.24", features = ["derive"] } -superstruct = "0.8" +superstruct = "0.10" swap_or_not_shuffle = { path = "consensus/swap_or_not_shuffle" } syn = "1" sysinfo = "0.26" @@ -279,7 +279,7 @@ validator_test_rig = { path = "testing/validator_test_rig" } warp = { version = "0.3.7", default-features = false, features = ["tls"] } warp_utils = { path = "common/warp_utils" } workspace_members = { path = "common/workspace_members" } -xdelta3 = { git = "http://github.com/sigp/xdelta3-rs", rev = "4db64086bb02e9febb584ba93b9d16bb2ae3825a" } +xdelta3 = { git = "https://github.com/sigp/xdelta3-rs", rev = "4db64086bb02e9febb584ba93b9d16bb2ae3825a" } zeroize = { version = "1", features = ["zeroize_derive", "serde"] } zip = "0.6" zstd = "0.13" diff --git a/beacon_node/Cargo.toml b/beacon_node/Cargo.toml index dd7416af540..bb904a7619c 100644 --- a/beacon_node/Cargo.toml +++ b/beacon_node/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "beacon_node" -version = "7.1.0" +version = "8.0.0-rc.0" authors = [ "Paul Hauner ", "Age Manning BeaconBlockStreamer { if self.check_caches == CheckCaches::Yes { match self.beacon_chain.get_block_process_status(&root) { BlockProcessStatus::Unknown => None, - BlockProcessStatus::NotValidated(block) + BlockProcessStatus::NotValidated(block, _) | BlockProcessStatus::ExecutionValidated(block) => { metrics::inc_counter(&metrics::BEACON_REQRESP_PRE_IMPORT_CACHE_HITS); Some(block) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 6e11b666102..f085684442b 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -5,8 +5,9 @@ use crate::attestation_verification::{ }; use crate::attester_cache::{AttesterCache, AttesterCacheKey}; use crate::beacon_block_streamer::{BeaconBlockStreamer, CheckCaches}; -use crate::beacon_proposer_cache::BeaconProposerCache; -use crate::beacon_proposer_cache::compute_proposer_duties_from_head; +use crate::beacon_proposer_cache::{ + BeaconProposerCache, EpochBlockProposers, ensure_state_can_determine_proposers_for_epoch, +}; use crate::blob_verification::{GossipBlobError, GossipVerifiedBlob}; use crate::block_times_cache::BlockTimesCache; use crate::block_verification::POS_PANDA_BANNER; @@ -124,7 +125,7 @@ use store::{ BlobSidecarListFromRoot, DBColumn, DatabaseBlock, Error as DBError, HotColdDB, HotStateSummary, KeyValueStore, KeyValueStoreOp, StoreItem, StoreOp, }; -use task_executor::{ShutdownReason, TaskExecutor}; +use task_executor::{RayonPoolType, ShutdownReason, TaskExecutor}; use tokio_stream::Stream; use tracing::{Span, debug, debug_span, error, info, info_span, instrument, trace, warn}; use tree_hash::TreeHash; @@ -334,16 +335,12 @@ pub enum BlockProcessStatus { /// Block is not in any pre-import cache. Block may be in the data-base or in the fork-choice. Unknown, /// Block is currently processing but not yet validated. - NotValidated(Arc>), + NotValidated(Arc>, BlockImportSource), /// Block is fully valid, but not yet imported. It's cached in the da_checker while awaiting /// missing block components. ExecutionValidated(Arc>), } -pub struct BeaconChainMetrics { - pub reqresp_pre_import_cache_len: usize, -} - pub type LightClientProducerEvent = (Hash256, Slot, SyncAggregate); pub type BeaconForkChoice = ForkChoice< @@ -363,9 +360,6 @@ pub type BeaconStore = Arc< >, >; -/// Cache gossip verified blocks to serve over ReqResp before they are imported -type ReqRespPreImportCache = HashMap>>; - /// Represents the "Beacon Chain" component of Ethereum 2.0. Allows import of blocks and block /// operations and chooses a canonical head. pub struct BeaconChain { @@ -462,8 +456,6 @@ pub struct BeaconChain { pub(crate) attester_cache: Arc, /// A cache used when producing attestations whilst the head block is still being imported. pub early_attester_cache: EarlyAttesterCache, - /// Cache gossip verified blocks to serve over ReqResp before they are imported - pub reqresp_pre_import_cache: Arc>>, /// A cache used to keep track of various block timings. pub block_times_cache: Arc>, /// A cache used to track pre-finalization block roots for quick rejection. @@ -1289,18 +1281,8 @@ impl BeaconChain { /// chain. Used by sync to learn the status of a block and prevent repeated downloads / /// processing attempts. pub fn get_block_process_status(&self, block_root: &Hash256) -> BlockProcessStatus { - if let Some(block) = self - .data_availability_checker - .get_execution_valid_block(block_root) - { - return BlockProcessStatus::ExecutionValidated(block); - } - - if let Some(block) = self.reqresp_pre_import_cache.read().get(block_root) { - // A block is on the `reqresp_pre_import_cache` but NOT in the - // `data_availability_checker` only if it is actively processing. We can expect a future - // event with the result of processing - return BlockProcessStatus::NotValidated(block.clone()); + if let Some(cached_block) = self.data_availability_checker.get_cached_block(block_root) { + return cached_block; } BlockProcessStatus::Unknown @@ -3054,8 +3036,7 @@ impl BeaconChain { self.emit_sse_blob_sidecar_events(&block_root, std::iter::once(blob.as_blob())); - let r = self.check_gossip_blob_availability_and_import(blob).await; - self.remove_notified(&block_root, r) + self.check_gossip_blob_availability_and_import(blob).await } /// Cache the data columns in the processing cache, process it, then evict it from the cache if it was @@ -3092,15 +3073,13 @@ impl BeaconChain { data_columns.iter().map(|column| column.as_data_column()), ); - let r = self - .check_gossip_data_columns_availability_and_import( - slot, - block_root, - data_columns, - publish_fn, - ) - .await; - self.remove_notified(&block_root, r) + self.check_gossip_data_columns_availability_and_import( + slot, + block_root, + data_columns, + publish_fn, + ) + .await } /// Cache the blobs in the processing cache, process it, then evict it from the cache if it was @@ -3139,10 +3118,8 @@ impl BeaconChain { self.emit_sse_blob_sidecar_events(&block_root, blobs.iter().flatten().map(Arc::as_ref)); - let r = self - .check_rpc_blob_availability_and_import(slot, block_root, blobs) - .await; - self.remove_notified(&block_root, r) + self.check_rpc_blob_availability_and_import(slot, block_root, blobs) + .await } /// Process blobs retrieved from the EL and returns the `AvailabilityProcessingStatus`. @@ -3174,10 +3151,8 @@ impl BeaconChain { } } - let r = self - .check_engine_blobs_availability_and_import(slot, block_root, engine_get_blobs_output) - .await; - self.remove_notified(&block_root, r) + self.check_engine_blobs_availability_and_import(slot, block_root, engine_get_blobs_output) + .await } fn emit_sse_blob_sidecar_events<'a, I>(self: &Arc, block_root: &Hash256, blobs_iter: I) @@ -3270,10 +3245,8 @@ impl BeaconChain { custody_columns.iter().map(|column| column.as_ref()), ); - let r = self - .check_rpc_custody_columns_availability_and_import(slot, block_root, custody_columns) - .await; - self.remove_notified(&block_root, r) + self.check_rpc_custody_columns_availability_and_import(slot, block_root, custody_columns) + .await } pub async fn reconstruct_data_columns( @@ -3299,15 +3272,15 @@ impl BeaconChain { let data_availability_checker = self.data_availability_checker.clone(); + let current_span = Span::current(); let result = self .task_executor - .spawn_blocking_handle( - move || data_availability_checker.reconstruct_data_columns(&block_root), - "reconstruct_data_columns", - ) - .ok_or(BeaconChainError::RuntimeShutdown)? + .spawn_blocking_with_rayon_async(RayonPoolType::HighPriority, move || { + let _guard = current_span.enter(); + data_availability_checker.reconstruct_data_columns(&block_root) + }) .await - .map_err(BeaconChainError::TokioJoin)??; + .map_err(|_| BeaconChainError::RuntimeShutdown)??; match result { DataColumnReconstructionResult::Success((availability, data_columns_to_publish)) => { @@ -3316,10 +3289,8 @@ impl BeaconChain { return Ok(None); }; - let r = self - .process_availability(slot, availability, || Ok(())) - .await; - self.remove_notified(&block_root, r) + self.process_availability(slot, availability, || Ok(())) + .await .map(|availability_processing_status| { Some((availability_processing_status, data_columns_to_publish)) }) @@ -3336,46 +3307,6 @@ impl BeaconChain { } } - /// Remove any block components from the *processing cache* if we no longer require them. If the - /// block was imported full or erred, we no longer require them. - fn remove_notified( - &self, - block_root: &Hash256, - r: Result, - ) -> Result { - let has_missing_components = - matches!(r, Ok(AvailabilityProcessingStatus::MissingComponents(_, _))); - if !has_missing_components { - self.reqresp_pre_import_cache.write().remove(block_root); - } - r - } - - /// Wraps `process_block` in logic to cache the block's commitments in the processing cache - /// and evict if the block was imported or errored. - pub async fn process_block_with_early_caching>( - self: &Arc, - block_root: Hash256, - unverified_block: B, - block_source: BlockImportSource, - notify_execution_layer: NotifyExecutionLayer, - ) -> Result { - self.reqresp_pre_import_cache - .write() - .insert(block_root, unverified_block.block_cloned()); - - let r = self - .process_block( - block_root, - unverified_block, - notify_execution_layer, - block_source, - || Ok(()), - ) - .await; - self.remove_notified(&block_root, r) - } - /// Check for known and configured invalid block roots before processing. pub fn check_invalid_block_roots(&self, block_root: Hash256) -> Result<(), BlockError> { if self.config.invalid_block_roots.contains(&block_root) { @@ -3407,12 +3338,6 @@ impl BeaconChain { block_source: BlockImportSource, publish_fn: impl FnOnce() -> Result<(), BlockError>, ) -> Result { - // Start the Prometheus timer. - let _full_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_TIMES); - - // Increment the Prometheus counter for block processing requests. - metrics::inc_counter(&metrics::BLOCK_PROCESSING_REQUESTS); - let block_slot = unverified_block.block().slot(); // Set observed time if not already set. Usually this should be set by gossip or RPC, @@ -3427,6 +3352,18 @@ impl BeaconChain { ); } + self.data_availability_checker.put_pre_execution_block( + block_root, + unverified_block.block_cloned(), + block_source, + )?; + + // Start the Prometheus timer. + let _full_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_TIMES); + + // Increment the Prometheus counter for block processing requests. + metrics::inc_counter(&metrics::BLOCK_PROCESSING_REQUESTS); + // A small closure to group the verification and import errors. let chain = self.clone(); let import_block = async move { @@ -3444,7 +3381,18 @@ impl BeaconChain { .set_time_consensus_verified(block_root, block_slot, timestamp) } - let executed_block = chain.into_executed_block(execution_pending).await?; + let executed_block = chain + .into_executed_block(execution_pending) + .await + .inspect_err(|_| { + // If the block fails execution for whatever reason (e.g. engine offline), + // and we keep it in the cache, then the node will NOT perform lookup and + // reprocess this block until the block is evicted from DA checker, causing the + // chain to get stuck temporarily if the block is canonical. Therefore we remove + // it from the cache if execution fails. + self.data_availability_checker + .remove_block_on_execution_error(&block_root); + })?; // Record the *additional* time it took to wait for execution layer verification. if let Some(timestamp) = self.slot_clock.now_duration() { @@ -3570,9 +3518,7 @@ impl BeaconChain { block: AvailabilityPendingExecutedBlock, ) -> Result { let slot = block.block.slot(); - let availability = self - .data_availability_checker - .put_pending_executed_block(block)?; + let availability = self.data_availability_checker.put_executed_block(block)?; self.process_availability(slot, availability, || Ok(())) .await } @@ -3889,9 +3835,16 @@ impl BeaconChain { .map_err(BeaconChainError::from)?; } + // Take an upgradable read lock on fork choice so we can check if this block has already + // been imported. We don't want to repeat work importing a block that is already imported. + let fork_choice_reader = self.canonical_head.fork_choice_upgradable_read_lock(); + if fork_choice_reader.contains_block(&block_root) { + return Err(BlockError::DuplicateFullyImported(block_root)); + } + // Take an exclusive write-lock on fork choice. It's very important to prevent deadlocks by // avoiding taking other locks whilst holding this lock. - let mut fork_choice = self.canonical_head.fork_choice_write_lock(); + let mut fork_choice = parking_lot::RwLockUpgradableReadGuard::upgrade(fork_choice_reader); // Do not import a block that doesn't descend from the finalized root. let signed_block = @@ -4746,65 +4699,54 @@ impl BeaconChain { // Compute the proposer index. let head_epoch = cached_head.head_slot().epoch(T::EthSpec::slots_per_epoch()); - let shuffling_decision_root = if head_epoch == proposal_epoch { - cached_head - .snapshot - .beacon_state - .proposer_shuffling_decision_root(proposer_head)? - } else { - proposer_head - }; - let cached_proposer = self - .beacon_proposer_cache - .lock() - .get_slot::(shuffling_decision_root, proposal_slot); - let proposer_index = if let Some(proposer) = cached_proposer { - proposer.index as u64 - } else { - if head_epoch + self.config.sync_tolerance_epochs < proposal_epoch { - warn!( - msg = "this is a non-critical issue that can happen on unhealthy nodes or \ - networks.", - %proposal_epoch, - %head_epoch, - "Skipping proposer preparation" - ); - - // Don't skip the head forward more than two epochs. This avoids burdening an - // unhealthy node. - // - // Although this node might miss out on preparing for a proposal, they should still - // be able to propose. This will prioritise beacon chain health over efficient - // packing of execution blocks. - return Ok(None); - } - - let (proposers, decision_root, _, fork) = - compute_proposer_duties_from_head(proposal_epoch, self)?; - - let proposer_offset = (proposal_slot % T::EthSpec::slots_per_epoch()).as_usize(); - let proposer = *proposers - .get(proposer_offset) - .ok_or(BeaconChainError::NoProposerForSlot(proposal_slot))?; - - self.beacon_proposer_cache.lock().insert( - proposal_epoch, - decision_root, - proposers, - fork, - )?; + let shuffling_decision_root = cached_head + .snapshot + .beacon_state + .proposer_shuffling_decision_root_at_epoch(proposal_epoch, proposer_head, &self.spec)?; + + let Some(proposer_index) = self.with_proposer_cache( + shuffling_decision_root, + proposal_epoch, + |proposers| proposers.get_slot::(proposal_slot).map(|p| p.index as u64), + || { + if head_epoch + self.config.sync_tolerance_epochs < proposal_epoch { + warn!( + msg = "this is a non-critical issue that can happen on unhealthy nodes or \ + networks", + %proposal_epoch, + %head_epoch, + "Skipping proposer preparation" + ); - // It's possible that the head changes whilst computing these duties. If so, abandon - // this routine since the change of head would have also spawned another instance of - // this routine. - // - // Exit now, after updating the cache. - if decision_root != shuffling_decision_root { - warn!("Head changed during proposer preparation"); - return Ok(None); + // Don't skip the head forward too many epochs. This avoids burdening an + // unhealthy node. + // + // Although this node might miss out on preparing for a proposal, they should + // still be able to propose. This will prioritise beacon chain health over + // efficient packing of execution blocks. + Err(Error::SkipProposerPreparation) + } else { + let head = self.canonical_head.cached_head(); + Ok(( + head.head_state_root(), + head.snapshot.beacon_state.clone(), + )) + } + }, + ).map_or_else(|e| { + match e { + Error::ProposerCacheIncorrectState { .. } => { + warn!("Head changed during proposer preparation"); + Ok(None) + } + Error::SkipProposerPreparation => { + // Warning logged for this above. + Ok(None) + } + e => Err(e) } - - proposer as u64 + }, |value| Ok(Some(value)))? else { + return Ok(None); }; // Get the `prev_randao` and parent block number. @@ -4964,14 +4906,19 @@ impl BeaconChain { // Only attempt a re-org if we have a proposer registered for the re-org slot. let proposing_at_re_org_slot = { - // The proposer shuffling has the same decision root as the next epoch attestation - // shuffling. We know our re-org block is not on the epoch boundary, so it has the - // same proposer shuffling as the head (but not necessarily the parent which may lie - // in the previous epoch). - let shuffling_decision_root = info - .head_node - .next_epoch_shuffling_id - .shuffling_decision_block; + // We know our re-org block is not on the epoch boundary, so it has the same proposer + // shuffling as the head (but not necessarily the parent which may lie in the previous + // epoch). + let shuffling_decision_root = if self + .spec + .fork_name_at_slot::(re_org_block_slot) + .fulu_enabled() + { + info.head_node.current_epoch_shuffling_id + } else { + info.head_node.next_epoch_shuffling_id + } + .shuffling_decision_block; let proposer_index = self .beacon_proposer_cache .lock() @@ -5286,16 +5233,20 @@ impl BeaconChain { None }; + let slashings_and_exits_span = debug_span!("get_slashings_and_exits").entered(); let (mut proposer_slashings, mut attester_slashings, mut voluntary_exits) = self.op_pool.get_slashings_and_exits(&state, &self.spec); + drop(slashings_and_exits_span); let eth1_data = state.eth1_data().clone(); let deposits = vec![]; + let bls_changes_span = debug_span!("get_bls_to_execution_changes").entered(); let bls_to_execution_changes = self .op_pool .get_bls_to_execution_changes(&state, &self.spec); + drop(bls_changes_span); // Iterate through the naive aggregation pool and ensure all the attestations from there // are included in the operation pool. @@ -6606,6 +6557,70 @@ impl BeaconChain { } } + pub fn with_proposer_cache + From>( + &self, + shuffling_decision_block: Hash256, + proposal_epoch: Epoch, + accessor: impl Fn(&EpochBlockProposers) -> Result, + state_provider: impl FnOnce() -> Result<(Hash256, BeaconState), E>, + ) -> Result { + let cache_entry = self + .beacon_proposer_cache + .lock() + .get_or_insert_key(proposal_epoch, shuffling_decision_block); + + // If the cache entry is not initialised, run the code to initialise it inside a OnceCell. + // This prevents duplication of work across multiple threads. + // + // If it is already initialised, then `get_or_try_init` will return immediately without + // executing the initialisation code at all. + let epoch_block_proposers = cache_entry.get_or_try_init(|| { + debug!( + ?shuffling_decision_block, + %proposal_epoch, + "Proposer shuffling cache miss" + ); + + // Fetch the state on-demand if the required epoch was missing from the cache. + // If the caller wants to not compute the state they must return an error here and then + // catch it at the call site. + let (state_root, mut state) = state_provider()?; + + // Ensure the state can compute proposer duties for `epoch`. + ensure_state_can_determine_proposers_for_epoch( + &mut state, + state_root, + proposal_epoch, + &self.spec, + )?; + + // Sanity check the state. + let latest_block_root = state.get_latest_block_root(state_root); + let state_decision_block_root = state.proposer_shuffling_decision_root_at_epoch( + proposal_epoch, + latest_block_root, + &self.spec, + )?; + if state_decision_block_root != shuffling_decision_block { + return Err(Error::ProposerCacheIncorrectState { + state_decision_block_root, + requested_decision_block_root: shuffling_decision_block, + } + .into()); + } + + let proposers = state.get_beacon_proposer_indices(proposal_epoch, &self.spec)?; + Ok::<_, E>(EpochBlockProposers::new( + proposal_epoch, + state.fork(), + proposers, + )) + })?; + + // Run the accessor function on the computed epoch proposers. + accessor(epoch_block_proposers).map_err(Into::into) + } + /// Runs the `map_fn` with the committee cache for `shuffling_epoch` from the chain with head /// `head_block_root`. The `map_fn` will be supplied two values: /// @@ -7145,12 +7160,6 @@ impl BeaconChain { ) } - pub fn metrics(&self) -> BeaconChainMetrics { - BeaconChainMetrics { - reqresp_pre_import_cache_len: self.reqresp_pre_import_cache.read().len(), - } - } - pub(crate) fn get_blobs_or_columns_store_op( &self, block_root: Hash256, diff --git a/beacon_node/beacon_chain/src/beacon_proposer_cache.rs b/beacon_node/beacon_chain/src/beacon_proposer_cache.rs index 12970214c6a..a64b4981cc4 100644 --- a/beacon_node/beacon_chain/src/beacon_proposer_cache.rs +++ b/beacon_node/beacon_chain/src/beacon_proposer_cache.rs @@ -12,9 +12,9 @@ use crate::{BeaconChain, BeaconChainError, BeaconChainTypes}; use fork_choice::ExecutionStatus; use lru::LruCache; use once_cell::sync::OnceCell; +use safe_arith::SafeArith; use smallvec::SmallVec; use state_processing::state_advance::partial_state_advance; -use std::cmp::Ordering; use std::num::NonZeroUsize; use std::sync::Arc; use types::non_zero_usize::new_non_zero_usize; @@ -51,6 +51,34 @@ pub struct EpochBlockProposers { pub(crate) proposers: SmallVec<[usize; TYPICAL_SLOTS_PER_EPOCH]>, } +impl EpochBlockProposers { + pub fn new(epoch: Epoch, fork: Fork, proposers: Vec) -> Self { + Self { + epoch, + fork, + proposers: proposers.into(), + } + } + + pub fn get_slot(&self, slot: Slot) -> Result { + let epoch = slot.epoch(E::slots_per_epoch()); + if epoch == self.epoch { + self.proposers + .get(slot.as_usize() % E::SlotsPerEpoch::to_usize()) + .map(|&index| Proposer { + index, + fork: self.fork, + }) + .ok_or(BeaconChainError::ProposerCacheOutOfBounds { slot, epoch }) + } else { + Err(BeaconChainError::ProposerCacheWrongEpoch { + request_epoch: epoch, + cache_epoch: self.epoch, + }) + } + } +} + /// A cache to store the proposers for some epoch. /// /// See the module-level documentation for more information. @@ -76,23 +104,8 @@ impl BeaconProposerCache { ) -> Option { let epoch = slot.epoch(E::slots_per_epoch()); let key = (epoch, shuffling_decision_block); - let cache_opt = self.cache.get(&key).and_then(|cell| cell.get()); - if let Some(cache) = cache_opt { - // This `if` statement is likely unnecessary, but it feels like good practice. - if epoch == cache.epoch { - cache - .proposers - .get(slot.as_usize() % E::SlotsPerEpoch::to_usize()) - .map(|&index| Proposer { - index, - fork: cache.fork, - }) - } else { - None - } - } else { - None - } + let cache = self.cache.get(&key)?.get()?; + cache.get_slot::(slot).ok() } /// As per `Self::get_slot`, but returns all proposers in all slots for the given `epoch`. @@ -142,11 +155,7 @@ impl BeaconProposerCache { ) -> Result<(), BeaconStateError> { let key = (epoch, shuffling_decision_block); if !self.cache.contains(&key) { - let epoch_proposers = EpochBlockProposers { - epoch, - fork, - proposers: proposers.into(), - }; + let epoch_proposers = EpochBlockProposers::new(epoch, fork, proposers); self.cache .put(key, Arc::new(OnceCell::with_value(epoch_proposers))); } @@ -178,7 +187,12 @@ pub fn compute_proposer_duties_from_head( .ok_or(BeaconChainError::HeadMissingFromForkChoice(head_block_root))?; // Advance the state into the requested epoch. - ensure_state_is_in_epoch(&mut state, head_state_root, request_epoch, &chain.spec)?; + ensure_state_can_determine_proposers_for_epoch( + &mut state, + head_state_root, + request_epoch, + &chain.spec, + )?; let indices = state .get_beacon_proposer_indices(request_epoch, &chain.spec) @@ -186,13 +200,13 @@ pub fn compute_proposer_duties_from_head( let dependent_root = state // The only block which decides its own shuffling is the genesis block. - .proposer_shuffling_decision_root(chain.genesis_block_root) + .proposer_shuffling_decision_root(chain.genesis_block_root, &chain.spec) .map_err(BeaconChainError::from)?; Ok((indices, dependent_root, execution_status, state.fork())) } -/// If required, advance `state` to `target_epoch`. +/// If required, advance `state` to the epoch required to determine proposer indices in `target_epoch`. /// /// ## Details /// @@ -200,22 +214,39 @@ pub fn compute_proposer_duties_from_head( /// - No-op if `state.current_epoch() == target_epoch`. /// - It must be the case that `state.canonical_root() == state_root`, but this function will not /// check that. -pub fn ensure_state_is_in_epoch( +pub fn ensure_state_can_determine_proposers_for_epoch( state: &mut BeaconState, state_root: Hash256, target_epoch: Epoch, spec: &ChainSpec, ) -> Result<(), BeaconChainError> { - match state.current_epoch().cmp(&target_epoch) { - // Protects against an inconsistent slot clock. - Ordering::Greater => Err(BeaconStateError::SlotOutOfBounds.into()), - // The state needs to be advanced. - Ordering::Less => { + // The decision slot is the end of an epoch, so we add 1 to reach the first slot of the epoch + // at which the shuffling is determined. + let minimum_slot = spec + .proposer_shuffling_decision_slot::(target_epoch) + .safe_add(1)?; + let minimum_epoch = minimum_slot.epoch(E::slots_per_epoch()); + + // Before and after Fulu, the oldest epoch reachable from a state at epoch N is epoch N itself, + // i.e. we can never "look back". + let maximum_epoch = target_epoch; + + if state.current_epoch() > maximum_epoch { + Err(BeaconStateError::SlotOutOfBounds.into()) + } else if state.current_epoch() >= minimum_epoch { + if target_epoch > state.current_epoch() { let target_slot = target_epoch.start_slot(E::slots_per_epoch()); + + // Advance the state into the same epoch as the block. Use the "partial" method since state + // roots are not important for proposer/attester shuffling. partial_state_advance(state, Some(state_root), target_slot, spec) - .map_err(BeaconChainError::from) + .map_err(BeaconChainError::from)?; } - // The state is suitable, nothing to do. - Ordering::Equal => Ok(()), + Ok(()) + } else { + // State's current epoch is less than the minimum epoch. + // Advance the state up to the minimum epoch. + partial_state_advance(state, Some(state_root), minimum_slot, spec) + .map_err(BeaconChainError::from) } } diff --git a/beacon_node/beacon_chain/src/blob_verification.rs b/beacon_node/beacon_chain/src/blob_verification.rs index 53676c0b248..53f2eff0ca3 100644 --- a/beacon_node/beacon_chain/src/blob_verification.rs +++ b/beacon_node/beacon_chain/src/blob_verification.rs @@ -5,8 +5,7 @@ use std::sync::Arc; use crate::beacon_chain::{BeaconChain, BeaconChainTypes}; use crate::block_verification::{ - BlockSlashInfo, cheap_state_advance_to_obtain_committees, get_validator_pubkey_cache, - process_block_slash_info, + BlockSlashInfo, get_validator_pubkey_cache, process_block_slash_info, }; use crate::kzg_utils::{validate_blob, validate_blobs}; use crate::observed_data_sidecars::{ObservationStrategy, Observe}; @@ -494,59 +493,31 @@ pub fn validate_blob_sidecar_for_gossip(proposer_shuffling_root, blob_slot); - - let (proposer_index, fork) = if let Some(proposer) = proposer_opt { - (proposer.index, proposer.fork) - } else { - debug!( - %block_root, - %blob_index, - "Proposer shuffling cache miss for blob verification" - ); - let (parent_state_root, mut parent_state) = chain - .store - .get_advanced_hot_state(block_parent_root, blob_slot, parent_block.state_root) - .map_err(|e| GossipBlobError::BeaconChainError(Box::new(e.into())))? - .ok_or_else(|| { - BeaconChainError::DBInconsistent(format!( - "Missing state for parent block {block_parent_root:?}", - )) - })?; - - let state = cheap_state_advance_to_obtain_committees::<_, GossipBlobError>( - &mut parent_state, - Some(parent_state_root), - blob_slot, - &chain.spec, - )?; - - let epoch = state.current_epoch(); - let proposers = state.get_beacon_proposer_indices(epoch, &chain.spec)?; - let proposer_index = *proposers - .get(blob_slot.as_usize() % T::EthSpec::slots_per_epoch() as usize) - .ok_or_else(|| BeaconChainError::NoProposerForSlot(blob_slot))?; - - // Prime the proposer shuffling cache with the newly-learned value. - chain.beacon_proposer_cache.lock().insert( - blob_epoch, - proposer_shuffling_root, - proposers, - state.fork(), - )?; - (proposer_index, state.fork()) - }; + parent_block.proposer_shuffling_root_for_child_block(blob_epoch, &chain.spec); + + let proposer = chain.with_proposer_cache( + proposer_shuffling_root, + blob_epoch, + |proposers| proposers.get_slot::(blob_slot), + || { + debug!( + %block_root, + index = %blob_index, + "Proposer shuffling cache miss for blob verification" + ); + chain + .store + .get_advanced_hot_state(block_parent_root, blob_slot, parent_block.state_root) + .map_err(|e| GossipBlobError::BeaconChainError(Box::new(e.into())))? + .ok_or_else(|| { + GossipBlobError::BeaconChainError(Box::new(BeaconChainError::DBInconsistent( + format!("Missing state for parent block {block_parent_root:?}",), + ))) + }) + }, + )?; + let proposer_index = proposer.index; + let fork = proposer.fork; // Signature verify the signed block header. let signature_is_valid = { diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index 1d10fae0a49..d0ed8258e55 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -948,61 +948,35 @@ impl GossipVerifiedBlock { } let proposer_shuffling_decision_block = - if parent_block.slot.epoch(T::EthSpec::slots_per_epoch()) == block_epoch { - parent_block - .next_epoch_shuffling_id - .shuffling_decision_block - } else { - parent_block.root - }; + parent_block.proposer_shuffling_root_for_child_block(block_epoch, &chain.spec); // We assign to a variable instead of using `if let Some` directly to ensure we drop the // write lock before trying to acquire it again in the `else` clause. - let proposer_opt = chain - .beacon_proposer_cache - .lock() - .get_slot::(proposer_shuffling_decision_block, block.slot()); - let (expected_proposer, fork, parent, block) = if let Some(proposer) = proposer_opt { - // The proposer index was cached and we can return it without needing to load the - // parent. - (proposer.index, proposer.fork, None, block) - } else { - // The proposer index was *not* cached and we must load the parent in order to determine - // the proposer index. - let (mut parent, block) = load_parent(block, chain)?; - - debug!( - parent_root = ?parent.beacon_block_root, - parent_slot = %parent.beacon_block.slot(), - ?block_root, - block_slot = %block.slot(), - "Proposer shuffling cache miss" - ); - - // The state produced is only valid for determining proposer/attester shuffling indices. - let state = cheap_state_advance_to_obtain_committees::<_, BlockError>( - &mut parent.pre_state, - parent.beacon_state_root, - block.slot(), - &chain.spec, - )?; - - let epoch = state.current_epoch(); - let proposers = state.get_beacon_proposer_indices(epoch, &chain.spec)?; - let proposer_index = *proposers - .get(block.slot().as_usize() % T::EthSpec::slots_per_epoch() as usize) - .ok_or_else(|| BeaconChainError::NoProposerForSlot(block.slot()))?; - - // Prime the proposer shuffling cache with the newly-learned value. - chain.beacon_proposer_cache.lock().insert( - block_epoch, - proposer_shuffling_decision_block, - proposers, - state.fork(), - )?; - - (proposer_index, state.fork(), Some(parent), block) - }; + let block_slot = block.slot(); + let mut opt_parent = None; + let proposer = chain.with_proposer_cache::<_, BlockError>( + proposer_shuffling_decision_block, + block_epoch, + |proposers| proposers.get_slot::(block_slot), + || { + // The proposer index was *not* cached and we must load the parent in order to + // determine the proposer index. + let (mut parent, _) = load_parent(block.clone(), chain)?; + let parent_state_root = if let Some(state_root) = parent.beacon_state_root { + state_root + } else { + // This is potentially a little inefficient, although we are likely to need + // the state's hash eventually (if the block is valid), and we are also likely + // to already have the hash cached (if fetched from the state cache). + parent.pre_state.canonical_root()? + }; + let parent_state = parent.pre_state.clone(); + opt_parent = Some(parent); + Ok((parent_state_root, parent_state)) + }, + )?; + let expected_proposer = proposer.index; + let fork = proposer.fork; let signature_is_valid = { let pubkey_cache = get_validator_pubkey_cache(chain)?; @@ -1077,7 +1051,7 @@ impl GossipVerifiedBlock { Ok(Self { block, block_root, - parent, + parent: opt_parent, consensus_context, }) } diff --git a/beacon_node/beacon_chain/src/builder.rs b/beacon_node/beacon_chain/src/builder.rs index 5e7aa7d4f87..5564c7916fa 100644 --- a/beacon_node/beacon_chain/src/builder.rs +++ b/beacon_node/beacon_chain/src/builder.rs @@ -899,6 +899,7 @@ where let genesis_time = head_snapshot.beacon_state.genesis_time(); let canonical_head = CanonicalHead::new(fork_choice, Arc::new(head_snapshot)); let shuffling_cache_size = self.chain_config.shuffling_cache_size; + let complete_blob_backfill = self.chain_config.complete_blob_backfill; // Calculate the weak subjectivity point in which to backfill blocks to. let genesis_backfill_slot = if self.chain_config.genesis_backfill { @@ -997,7 +998,6 @@ where validator_pubkey_cache: RwLock::new(validator_pubkey_cache), attester_cache: <_>::default(), early_attester_cache: <_>::default(), - reqresp_pre_import_cache: <_>::default(), light_client_server_cache: LightClientServerCache::new(), light_client_server_tx: self.light_client_server_tx, shutdown_sender: self @@ -1013,6 +1013,7 @@ where genesis_backfill_slot, data_availability_checker: Arc::new( DataAvailabilityChecker::new( + complete_blob_backfill, slot_clock, self.kzg.clone(), store, diff --git a/beacon_node/beacon_chain/src/canonical_head.rs b/beacon_node/beacon_chain/src/canonical_head.rs index 56d19759725..7dd4c88c513 100644 --- a/beacon_node/beacon_chain/src/canonical_head.rs +++ b/beacon_node/beacon_chain/src/canonical_head.rs @@ -47,8 +47,9 @@ use fork_choice::{ ResetPayloadStatuses, }; use itertools::process_results; +use lighthouse_tracing::SPAN_RECOMPUTE_HEAD; use logging::crit; -use parking_lot::{Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard}; +use parking_lot::{Mutex, RwLock, RwLockReadGuard, RwLockUpgradableReadGuard, RwLockWriteGuard}; use slot_clock::SlotClock; use state_processing::AllCaches; use std::sync::Arc; @@ -57,6 +58,7 @@ use store::{ Error as StoreError, KeyValueStore, KeyValueStoreOp, StoreConfig, iter::StateRootsIterator, }; use task_executor::{JoinHandle, ShutdownReason}; +use tracing::info_span; use tracing::{debug, error, info, instrument, warn}; use types::*; @@ -79,6 +81,10 @@ impl CanonicalHeadRwLock { self.0.read() } + fn upgradable_read(&self) -> RwLockUpgradableReadGuard<'_, T> { + self.0.upgradable_read() + } + fn write(&self) -> RwLockWriteGuard<'_, T> { self.0.write() } @@ -379,6 +385,7 @@ impl CanonicalHead { /// /// This function is **not safe** to be public. See the module-level documentation for more /// information about protecting from deadlocks. + #[instrument(skip_all)] fn cached_head_write_lock(&self) -> RwLockWriteGuard<'_, CachedHead> { self.cached_head.write() } @@ -389,7 +396,16 @@ impl CanonicalHead { self.fork_choice.read() } + /// Access an upgradable read-lock for fork choice. + pub fn fork_choice_upgradable_read_lock( + &self, + ) -> RwLockUpgradableReadGuard<'_, BeaconForkChoice> { + let _timer = metrics::start_timer(&metrics::FORK_CHOICE_UPGRADABLE_READ_LOCK_AQUIRE_TIMES); + self.fork_choice.upgradable_read() + } + /// Access a write-lock for fork choice. + #[instrument(skip_all)] pub fn fork_choice_write_lock(&self) -> RwLockWriteGuard<'_, BeaconForkChoice> { let _timer = metrics::start_timer(&metrics::FORK_CHOICE_WRITE_LOCK_AQUIRE_TIMES); self.fork_choice.write() @@ -497,13 +513,21 @@ impl BeaconChain { /// situation can be rectified. We avoid returning an error here so that calling functions /// can't abort block import because an error is returned here. pub async fn recompute_head_at_slot(self: &Arc, current_slot: Slot) { + let span = info_span!( + SPAN_RECOMPUTE_HEAD, + slot = %current_slot + ); + metrics::inc_counter(&metrics::FORK_CHOICE_REQUESTS); let _timer = metrics::start_timer(&metrics::FORK_CHOICE_TIMES); let chain = self.clone(); match self .spawn_blocking_handle( - move || chain.recompute_head_at_slot_internal(current_slot), + move || { + let _guard = span.enter(); + chain.recompute_head_at_slot_internal(current_slot) + }, "recompute_head_internal", ) .await @@ -761,6 +785,7 @@ impl BeaconChain { } /// Perform updates to caches and other components after the canonical head has been changed. + #[instrument(skip_all)] fn after_new_head( self: &Arc, old_cached_head: &CachedHead, @@ -804,7 +829,7 @@ impl BeaconChain { let head_slot = new_snapshot.beacon_state.slot(); let dependent_root = new_snapshot .beacon_state - .proposer_shuffling_decision_root(self.genesis_block_root); + .attester_shuffling_decision_root(self.genesis_block_root, RelativeEpoch::Next); let prev_dependent_root = new_snapshot .beacon_state .attester_shuffling_decision_root(self.genesis_block_root, RelativeEpoch::Current); @@ -899,6 +924,7 @@ impl BeaconChain { /// /// This function will take a write-lock on `canonical_head.fork_choice`, therefore it would be /// unwise to hold any lock on fork choice while calling this function. + #[instrument(skip_all)] fn after_finalization( self: &Arc, new_cached_head: &CachedHead, @@ -911,13 +937,6 @@ impl BeaconChain { .execution_status .is_optimistic_or_invalid(); - self.op_pool.prune_all( - &new_snapshot.beacon_block, - &new_snapshot.beacon_state, - self.epoch()?, - &self.spec, - ); - self.observed_block_producers.write().prune( new_view .finalized_checkpoint @@ -956,9 +975,9 @@ impl BeaconChain { })); } - // The store migration task requires the *state at the slot of the finalized epoch*, - // rather than the state of the latest finalized block. These two values will only - // differ when the first slot of the finalized epoch is a skip slot. + // The store migration task and op pool pruning require the *state at the first slot of the + // finalized epoch*, rather than the state of the latest finalized block. These two values + // will only differ when the first slot of the finalized epoch is a skip slot. // // Use the `StateRootsIterator` directly rather than `BeaconChain::state_root_at_slot` // to ensure we use the same state that we just set as the head. @@ -980,6 +999,23 @@ impl BeaconChain { )? .ok_or(Error::MissingFinalizedStateRoot(new_finalized_slot))?; + let update_cache = true; + let new_finalized_state = self + .store + .get_hot_state(&new_finalized_state_root, update_cache)? + .ok_or(Error::MissingBeaconState(new_finalized_state_root))?; + + self.op_pool.prune_all( + &new_snapshot.beacon_block, + &new_snapshot.beacon_state, + &new_finalized_state, + self.epoch()?, + &self.spec, + ); + + // We just pass the state root to the finalization thread. It should be able to reload the + // state from the state_cache near instantly anyway. We could experiment with sending the + // state over a channel in future, but it's probably no quicker. self.store_migrator.process_finalization( new_finalized_state_root.into(), new_view.finalized_checkpoint, @@ -1034,6 +1070,7 @@ impl BeaconChain { /// /// This function is called whilst holding a write-lock on the `canonical_head`. To ensure dead-lock /// safety, **do not take any other locks inside this function**. +#[instrument(skip_all)] fn check_finalized_payload_validity( chain: &BeaconChain, finalized_proto_block: &ProtoBlock, @@ -1117,6 +1154,7 @@ fn perform_debug_logging( } } +#[instrument(skip_all)] fn spawn_execution_layer_updates( chain: Arc>, forkchoice_update_params: ForkchoiceUpdateParameters, diff --git a/beacon_node/beacon_chain/src/chain_config.rs b/beacon_node/beacon_chain/src/chain_config.rs index d6be96afe94..a7defa9fa2a 100644 --- a/beacon_node/beacon_chain/src/chain_config.rs +++ b/beacon_node/beacon_chain/src/chain_config.rs @@ -86,6 +86,8 @@ pub struct ChainConfig { /// If using a weak-subjectivity sync, whether we should download blocks all the way back to /// genesis. pub genesis_backfill: bool, + /// EXPERIMENTAL: backfill blobs and data columns beyond the data availability window. + pub complete_blob_backfill: bool, /// Whether to send payload attributes every slot, regardless of connected proposers. /// /// This is useful for block builders and testing. @@ -144,6 +146,7 @@ impl Default for ChainConfig { optimistic_finalized_sync: true, shuffling_cache_size: crate::shuffling_cache::DEFAULT_CACHE_SIZE, genesis_backfill: false, + complete_blob_backfill: false, always_prepare_payload: false, epochs_per_migration: crate::migrate::DEFAULT_EPOCHS_PER_MIGRATION, enable_light_client_server: true, diff --git a/beacon_node/beacon_chain/src/data_availability_checker.rs b/beacon_node/beacon_chain/src/data_availability_checker.rs index 9225ed6b47b..43b7d8f7ea3 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker.rs @@ -7,7 +7,9 @@ use crate::block_verification_types::{ use crate::data_availability_checker::overflow_lru_cache::{ DataAvailabilityCheckerInner, ReconstructColumnsDecision, }; -use crate::{BeaconChain, BeaconChainTypes, BeaconStore, CustodyContext, metrics}; +use crate::{ + BeaconChain, BeaconChainTypes, BeaconStore, BlockProcessStatus, CustodyContext, metrics, +}; use kzg::Kzg; use slot_clock::SlotClock; use std::fmt; @@ -19,14 +21,15 @@ use task_executor::TaskExecutor; use tracing::{debug, error, instrument}; use types::blob_sidecar::{BlobIdentifier, BlobSidecar, FixedBlobSidecarList}; use types::{ - BlobSidecarList, ChainSpec, DataColumnSidecar, DataColumnSidecarList, Epoch, EthSpec, Hash256, - SignedBeaconBlock, Slot, + BlobSidecarList, BlockImportSource, ChainSpec, DataColumnSidecar, DataColumnSidecarList, Epoch, + EthSpec, Hash256, SignedBeaconBlock, Slot, }; mod error; mod overflow_lru_cache; mod state_lru_cache; +use crate::data_availability_checker::error::Error; use crate::data_column_verification::{ CustodyDataColumn, GossipVerifiedDataColumn, KzgVerifiedCustodyDataColumn, KzgVerifiedDataColumn, verify_kzg_for_data_column_list, @@ -78,6 +81,7 @@ pub const STATE_LRU_CAPACITY: usize = STATE_LRU_CAPACITY_NON_ZERO.get(); /// proposer. Having a capacity > 1 is an optimization to prevent sync lookup from having re-fetch /// data during moments of unstable network conditions. pub struct DataAvailabilityChecker { + complete_blob_backfill: bool, availability_cache: Arc>, slot_clock: T::SlotClock, kzg: Arc, @@ -116,6 +120,7 @@ impl Debug for Availability { impl DataAvailabilityChecker { pub fn new( + complete_blob_backfill: bool, slot_clock: T::SlotClock, kzg: Arc, store: BeaconStore, @@ -129,6 +134,7 @@ impl DataAvailabilityChecker { spec.clone(), )?; Ok(Self { + complete_blob_backfill, availability_cache: Arc::new(inner), slot_clock, kzg, @@ -141,14 +147,12 @@ impl DataAvailabilityChecker { &self.custody_context } - /// Checks if the block root is currenlty in the availability cache awaiting import because + /// Checks if the block root is currently in the availability cache awaiting import because /// of missing components. - pub fn get_execution_valid_block( - &self, - block_root: &Hash256, - ) -> Option>> { - self.availability_cache - .get_execution_valid_block(block_root) + /// + /// Returns the cache block wrapped in a `BlockProcessStatus` enum if it exists. + pub fn get_cached_block(&self, block_root: &Hash256) -> Option> { + self.availability_cache.get_cached_block(block_root) } /// Return the set of cached blob indexes for `block_root`. Returns None if there is no block @@ -337,12 +341,30 @@ impl DataAvailabilityChecker { /// Check if we have all the blobs for a block. Returns `Availability` which has information /// about whether all components have been received or more are required. - pub fn put_pending_executed_block( + pub fn put_executed_block( &self, executed_block: AvailabilityPendingExecutedBlock, ) -> Result, AvailabilityCheckError> { + self.availability_cache.put_executed_block(executed_block) + } + + /// Inserts a pre-execution block into the cache. + /// This does NOT override an existing executed block. + pub fn put_pre_execution_block( + &self, + block_root: Hash256, + block: Arc>, + source: BlockImportSource, + ) -> Result<(), Error> { self.availability_cache - .put_pending_executed_block(executed_block) + .put_pre_execution_block(block_root, block, source) + } + + /// Removes a pre-execution block from the cache. + /// This does NOT remove an existing executed block. + pub fn remove_block_on_execution_error(&self, block_root: &Hash256) { + self.availability_cache + .remove_pre_execution_block(block_root); } /// Verifies kzg commitments for an RpcBlock, returns a `MaybeAvailableBlock` that may @@ -518,9 +540,15 @@ impl DataAvailabilityChecker { /// The epoch at which we require a data availability check in block processing. /// `None` if the `Deneb` fork is disabled. pub fn data_availability_boundary(&self) -> Option { - let current_epoch = self.slot_clock.now()?.epoch(T::EthSpec::slots_per_epoch()); - self.spec - .min_epoch_data_availability_boundary(current_epoch) + let fork_epoch = self.spec.deneb_fork_epoch?; + + if self.complete_blob_backfill { + Some(fork_epoch) + } else { + let current_epoch = self.slot_clock.now()?.epoch(T::EthSpec::slots_per_epoch()); + self.spec + .min_epoch_data_availability_boundary(current_epoch) + } } /// Returns true if the given epoch lies within the da boundary and false otherwise. @@ -547,6 +575,7 @@ impl DataAvailabilityChecker { } } + #[instrument(skip_all, level = "debug")] pub fn reconstruct_data_columns( &self, block_root: &Hash256, @@ -1075,7 +1104,15 @@ mod test { let kzg = get_kzg(&spec); let store = Arc::new(HotColdDB::open_ephemeral(<_>::default(), spec.clone()).unwrap()); let custody_context = Arc::new(CustodyContext::new(false)); - DataAvailabilityChecker::new(slot_clock, kzg, store, custody_context, spec) - .expect("should initialise data availability checker") + let complete_blob_backfill = false; + DataAvailabilityChecker::new( + complete_blob_backfill, + slot_clock, + kzg, + store, + custody_context, + spec, + ) + .expect("should initialise data availability checker") } } diff --git a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs index 9de63f61261..42f6dbd8567 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs @@ -1,6 +1,5 @@ use super::AvailableBlockData; use super::state_lru_cache::{DietAvailabilityPendingExecutedBlock, StateLRUCache}; -use crate::BeaconChainTypes; use crate::CustodyContext; use crate::beacon_chain::BeaconStore; use crate::blob_verification::KzgVerifiedBlob; @@ -9,6 +8,7 @@ use crate::block_verification_types::{ }; use crate::data_availability_checker::{Availability, AvailabilityCheckError}; use crate::data_column_verification::KzgVerifiedCustodyDataColumn; +use crate::{BeaconChainTypes, BlockProcessStatus}; use lighthouse_tracing::SPAN_PENDING_COMPONENTS; use lru::LruCache; use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard, RwLockWriteGuard}; @@ -16,12 +16,47 @@ use std::cmp::Ordering; use std::num::NonZeroUsize; use std::sync::Arc; use tracing::{Span, debug, debug_span}; +use types::beacon_block_body::KzgCommitments; use types::blob_sidecar::BlobIdentifier; use types::{ - BlobSidecar, ChainSpec, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, Epoch, EthSpec, - Hash256, RuntimeFixedVector, RuntimeVariableList, SignedBeaconBlock, + BlobSidecar, BlockImportSource, ChainSpec, ColumnIndex, DataColumnSidecar, + DataColumnSidecarList, Epoch, EthSpec, Hash256, RuntimeFixedVector, RuntimeVariableList, + SignedBeaconBlock, }; +#[derive(Clone)] +pub enum CachedBlock { + PreExecution(Arc>, BlockImportSource), + Executed(Box>), +} + +impl CachedBlock { + pub fn get_commitments(&self) -> KzgCommitments { + let block = self.as_block(); + block + .message() + .body() + .blob_kzg_commitments() + .cloned() + .unwrap_or_default() + } + + fn as_block(&self) -> &SignedBeaconBlock { + match self { + CachedBlock::PreExecution(b, _) => b, + CachedBlock::Executed(b) => b.as_block(), + } + } + + pub fn num_blobs_expected(&self) -> usize { + self.as_block() + .message() + .body() + .blob_kzg_commitments() + .map_or(0, |commitments| commitments.len()) + } +} + /// This represents the components of a partially available block /// /// The blobs are all gossip and kzg verified. @@ -39,22 +74,25 @@ pub struct PendingComponents { pub block_root: Hash256, pub verified_blobs: RuntimeFixedVector>>, pub verified_data_columns: Vec>, - pub executed_block: Option>, + pub block: Option>, pub reconstruction_started: bool, span: Span, } impl PendingComponents { - /// Returns an immutable reference to the cached block. - pub fn get_cached_block(&self) -> &Option> { - &self.executed_block - } - /// Returns an immutable reference to the fixed vector of cached blobs. pub fn get_cached_blobs(&self) -> &RuntimeFixedVector>> { &self.verified_blobs } + #[cfg(test)] + fn get_diet_block(&self) -> Option<&DietAvailabilityPendingExecutedBlock> { + self.block.as_ref().and_then(|block| match block { + CachedBlock::Executed(block) => Some(block.as_ref()), + _ => None, + }) + } + /// Returns an immutable reference to the cached data column. pub fn get_cached_data_column( &self, @@ -66,11 +104,6 @@ impl PendingComponents { .map(|d| d.clone_arc()) } - /// Returns a mutable reference to the cached block. - pub fn get_cached_block_mut(&mut self) -> &mut Option> { - &mut self.executed_block - } - /// Returns a mutable reference to the fixed vector of cached blobs. pub fn get_cached_blobs_mut(&mut self) -> &mut RuntimeFixedVector>> { &mut self.verified_blobs @@ -96,9 +129,21 @@ impl PendingComponents { .collect() } - /// Inserts a block into the cache. - pub fn insert_block(&mut self, block: DietAvailabilityPendingExecutedBlock) { - *self.get_cached_block_mut() = Some(block) + /// Inserts an executed block into the cache. + pub fn insert_executed_block(&mut self, block: DietAvailabilityPendingExecutedBlock) { + self.block = Some(CachedBlock::Executed(Box::new(block))) + } + + /// Inserts a pre-execution block into the cache. + /// This does NOT override an existing executed block. + pub fn insert_pre_execution_block( + &mut self, + block: Arc>, + source: BlockImportSource, + ) { + if self.block.is_none() { + self.block = Some(CachedBlock::PreExecution(block, source)) + } } /// Inserts a blob at a specific index in the cache. @@ -128,7 +173,7 @@ impl PendingComponents { /// 1. The blob entry at the index is empty and no block exists, or /// 2. The block exists and its commitment matches the blob's commitment. pub fn merge_single_blob(&mut self, index: usize, blob: KzgVerifiedBlob) { - if let Some(cached_block) = self.get_cached_block() { + if let Some(cached_block) = &self.block { let block_commitment_opt = cached_block.get_commitments().get(index).copied(); if let Some(block_commitment) = block_commitment_opt && block_commitment == *blob.get_commitment() @@ -158,7 +203,7 @@ impl PendingComponents { /// /// Blobs that don't match the new block's commitments are evicted. pub fn merge_block(&mut self, block: DietAvailabilityPendingExecutedBlock) { - self.insert_block(block); + self.insert_executed_block(block); let reinsert = self.get_cached_blobs_mut().take(); self.merge_blobs(reinsert); } @@ -180,7 +225,7 @@ impl PendingComponents { &Span, ) -> Result, AvailabilityCheckError>, { - let Some(block) = &self.executed_block else { + let Some(CachedBlock::Executed(block)) = &self.block else { // Block not available yet return Ok(None); }; @@ -267,7 +312,7 @@ impl PendingComponents { block, import_data, payload_verification_outcome, - } = recover(block.clone(), &self.span)?; + } = recover(*block.clone(), &self.span)?; let available_block = AvailableBlock { block_root: self.block_root, @@ -295,7 +340,7 @@ impl PendingComponents { block_root, verified_blobs: RuntimeFixedVector::new(vec![None; max_len]), verified_data_columns: vec![], - executed_block: None, + block: None, reconstruction_started: false, span, } @@ -307,9 +352,9 @@ impl PendingComponents { /// - The first data column /// Otherwise, returns None pub fn epoch(&self) -> Option { - // Get epoch from cached executed block - if let Some(executed_block) = &self.executed_block { - return Some(executed_block.as_block().epoch()); + // Get epoch from cached block + if let Some(block) = &self.block { + return Some(block.as_block().epoch()); } // Or, get epoch from first available blob @@ -326,7 +371,7 @@ impl PendingComponents { } pub fn status_str(&self, num_expected_columns_opt: Option) -> String { - let block_count = if self.executed_block.is_some() { 1 } else { 0 }; + let block_count = if self.block.is_some() { 1 } else { 0 }; if let Some(num_expected_columns) = num_expected_columns_opt { format!( "block {} data_columns {}/{}", @@ -335,7 +380,7 @@ impl PendingComponents { num_expected_columns ) } else { - let num_expected_blobs = if let Some(block) = self.get_cached_block() { + let num_expected_blobs = if let Some(block) = &self.block { &block.num_blobs_expected().to_string() } else { "?" @@ -387,18 +432,19 @@ impl DataAvailabilityCheckerInner { } /// Returns true if the block root is known, without altering the LRU ordering - pub fn get_execution_valid_block( - &self, - block_root: &Hash256, - ) -> Option>> { + pub fn get_cached_block(&self, block_root: &Hash256) -> Option> { self.critical .read() .peek(block_root) .and_then(|pending_components| { - pending_components - .executed_block - .as_ref() - .map(|block| block.block_cloned()) + pending_components.block.as_ref().map(|block| match block { + CachedBlock::PreExecution(b, source) => { + BlockProcessStatus::NotValidated(b.clone(), *source) + } + CachedBlock::Executed(b) => { + BlockProcessStatus::ExecutionValidated(b.block_cloned()) + } + }) }) } @@ -592,9 +638,9 @@ impl DataAvailabilityCheckerInner { /// Check whether data column reconstruction should be attempted. /// - /// Potentially trigger reconstruction if: - /// - Our custody requirement is all columns (supernode), and we haven't got all columns - /// - We have >= 50% of columns, but not all columns + /// Potentially trigger reconstruction if all the following satisfy: + /// - Our custody requirement is more than 50% of total columns, + /// - We haven't received all required columns /// - Reconstruction hasn't been started for the block /// /// If reconstruction is required, returns `PendingComponents` which contains the @@ -609,15 +655,25 @@ impl DataAvailabilityCheckerInner { return ReconstructColumnsDecision::No("block already imported"); }; - // If we're sampling all columns, it means we must be custodying all columns. + let Some(epoch) = pending_components + .verified_data_columns + .first() + .map(|c| c.as_data_column().epoch()) + else { + return ReconstructColumnsDecision::No("not enough columns"); + }; + let total_column_count = T::EthSpec::number_of_columns(); + let sampling_column_count = self + .custody_context + .num_of_data_columns_to_sample(epoch, &self.spec); let received_column_count = pending_components.verified_data_columns.len(); if pending_components.reconstruction_started { return ReconstructColumnsDecision::No("already started"); } - if received_column_count >= total_column_count { - return ReconstructColumnsDecision::No("all columns received"); + if received_column_count >= sampling_column_count { + return ReconstructColumnsDecision::No("all sampling columns received"); } if received_column_count < total_column_count / 2 { return ReconstructColumnsDecision::No("not enough columns"); @@ -637,9 +693,47 @@ impl DataAvailabilityCheckerInner { } } + /// Inserts a pre executed block into the cache. + /// - This does NOT trigger the availability check as the block still needs to be executed. + /// - This does NOT override an existing cached block to avoid overwriting an executed block. + pub fn put_pre_execution_block( + &self, + block_root: Hash256, + block: Arc>, + source: BlockImportSource, + ) -> Result<(), AvailabilityCheckError> { + let epoch = block.epoch(); + let pending_components = + self.update_or_insert_pending_components(block_root, epoch, |pending_components| { + pending_components.insert_pre_execution_block(block, source); + Ok(()) + })?; + + let num_expected_columns_opt = self.get_num_expected_columns(epoch); + + pending_components.span.in_scope(|| { + debug!( + component = "pre execution block", + status = pending_components.status_str(num_expected_columns_opt), + "Component added to data availability checker" + ); + }); + + Ok(()) + } + + /// Removes a pre-execution block from the cache. + /// This does NOT remove an existing executed block. + pub fn remove_pre_execution_block(&self, block_root: &Hash256) { + // The read lock is immediately dropped so we can safely remove the block from the cache. + if let Some(BlockProcessStatus::NotValidated(_, _)) = self.get_cached_block(block_root) { + self.critical.write().pop(block_root); + } + } + /// Check if we have all the blobs for a block. If we do, return the Availability variant that /// triggers import of the block. - pub fn put_pending_executed_block( + pub fn put_executed_block( &self, executed_block: AvailabilityPendingExecutedBlock, ) -> Result, AvailabilityCheckError> { @@ -657,14 +751,7 @@ impl DataAvailabilityCheckerInner { Ok(()) })?; - let num_expected_columns_opt = if self.spec.is_peer_das_enabled_for_epoch(epoch) { - let num_of_column_samples = self - .custody_context - .num_of_data_columns_to_sample(epoch, &self.spec); - Some(num_of_column_samples) - } else { - None - }; + let num_expected_columns_opt = self.get_num_expected_columns(epoch); pending_components.span.in_scope(|| { debug!( @@ -681,6 +768,17 @@ impl DataAvailabilityCheckerInner { ) } + fn get_num_expected_columns(&self, epoch: Epoch) -> Option { + if self.spec.is_peer_das_enabled_for_epoch(epoch) { + let num_of_column_samples = self + .custody_context + .num_of_data_columns_to_sample(epoch, &self.spec); + Some(num_of_column_samples) + } else { + None + } + } + /// maintain the cache pub fn do_maintenance(&self, cutoff_epoch: Epoch) -> Result<(), AvailabilityCheckError> { // clean up any lingering states in the state cache @@ -954,7 +1052,7 @@ mod test { ); assert!(cache.critical.read().is_empty(), "cache should be empty"); let availability = cache - .put_pending_executed_block(pending_block) + .put_executed_block(pending_block) .expect("should put block"); if blobs_expected == 0 { assert!( @@ -1021,7 +1119,7 @@ mod test { ); } let availability = cache - .put_pending_executed_block(pending_block) + .put_executed_block(pending_block) .expect("should put block"); assert!( matches!(availability, Availability::Available(_)), @@ -1083,7 +1181,7 @@ mod test { // put the block in the cache let availability = cache - .put_pending_executed_block(pending_block) + .put_executed_block(pending_block) .expect("should put block"); // grab the diet block from the cache for later testing @@ -1091,12 +1189,7 @@ mod test { .critical .read() .peek(&block_root) - .map(|pending_components| { - pending_components - .executed_block - .clone() - .expect("should exist") - }) + .and_then(|pending_components| pending_components.get_diet_block().cloned()) .expect("should exist"); pushed_diet_blocks.push_back(diet_block); @@ -1257,7 +1350,7 @@ mod pending_components_tests { } pub fn assert_cache_consistent(cache: PendingComponents, max_len: usize) { - if let Some(cached_block) = cache.get_cached_block() { + if let Some(cached_block) = &cache.block { let cached_block_commitments = cached_block.get_commitments(); for index in 0..max_len { let block_commitment = cached_block_commitments.get(index).copied(); @@ -1363,4 +1456,38 @@ mod pending_components_tests { assert_cache_consistent(cache, max_len); } + + #[test] + fn should_not_insert_pre_execution_block_if_executed_block_exists() { + let (pre_execution_block, blobs, random_blobs, max_len) = pre_setup(); + let (executed_block, _blobs, _random_blobs) = + setup_pending_components(pre_execution_block.clone(), blobs, random_blobs); + + let block_root = pre_execution_block.canonical_root(); + let mut pending_component = >::empty(block_root, max_len); + + let pre_execution_block = Arc::new(pre_execution_block); + pending_component + .insert_pre_execution_block(pre_execution_block.clone(), BlockImportSource::Gossip); + assert!( + matches!( + pending_component.block, + Some(CachedBlock::PreExecution(_, _)) + ), + "pre execution block inserted" + ); + + pending_component.insert_executed_block(executed_block); + assert!( + matches!(pending_component.block, Some(CachedBlock::Executed(_))), + "executed block inserted" + ); + + pending_component + .insert_pre_execution_block(pre_execution_block, BlockImportSource::Gossip); + assert!( + matches!(pending_component.block, Some(CachedBlock::Executed(_))), + "executed block should remain" + ); + } } diff --git a/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs index 57c236efcf6..24f9237e3c9 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs @@ -10,7 +10,6 @@ use state_processing::BlockReplayer; use std::sync::Arc; use store::OnDiskConsensusContext; use tracing::{Span, debug_span, instrument}; -use types::beacon_block_body::KzgCommitments; use types::{BeaconState, BlindedPayload, ChainSpec, Epoch, EthSpec, Hash256, SignedBeaconBlock}; /// This mirrors everything in the `AvailabilityPendingExecutedBlock`, except @@ -43,15 +42,6 @@ impl DietAvailabilityPendingExecutedBlock { .map_or(0, |commitments| commitments.len()) } - pub fn get_commitments(&self) -> KzgCommitments { - self.as_block() - .message() - .body() - .blob_kzg_commitments() - .cloned() - .unwrap_or_default() - } - /// Returns the epoch corresponding to `self.slot()`. pub fn epoch(&self) -> Epoch { self.block.slot().epoch(E::slots_per_epoch()) diff --git a/beacon_node/beacon_chain/src/data_column_verification.rs b/beacon_node/beacon_chain/src/data_column_verification.rs index 608e003a228..fad7771f018 100644 --- a/beacon_node/beacon_chain/src/data_column_verification.rs +++ b/beacon_node/beacon_chain/src/data_column_verification.rs @@ -1,7 +1,5 @@ -use crate::beacon_proposer_cache::EpochBlockProposers; use crate::block_verification::{ - BlockSlashInfo, cheap_state_advance_to_obtain_committees, get_validator_pubkey_cache, - process_block_slash_info, + BlockSlashInfo, get_validator_pubkey_cache, process_block_slash_info, }; use crate::kzg_utils::{reconstruct_data_columns, validate_data_columns}; use crate::observed_data_sidecars::{ObservationStrategy, Observe}; @@ -610,22 +608,21 @@ fn verify_parent_block_and_finalized_descendant( chain: &BeaconChain, ) -> Result { let fork_choice = chain.canonical_head.fork_choice_read_lock(); + let block_parent_root = data_column.block_parent_root(); + + // Do not process a column that does not descend from the finalized root. + if !fork_choice.is_finalized_checkpoint_or_descendant(block_parent_root) { + return Err(GossipDataColumnError::NotFinalizedDescendant { block_parent_root }); + } // We have already verified that the column is past finalization, so we can // just check fork choice for the block's parent. - let block_parent_root = data_column.block_parent_root(); let Some(parent_block) = fork_choice.get_block(&block_parent_root) else { return Err(GossipDataColumnError::ParentUnknown { parent_root: block_parent_root, }); }; - // Do not process a column that does not descend from the finalized root. - // We just loaded the parent_block, so we can be sure that it exists in fork choice. - if !fork_choice.is_finalized_checkpoint_or_descendant(block_parent_root) { - return Err(GossipDataColumnError::NotFinalizedDescendant { block_parent_root }); - } - Ok(parent_block) } @@ -641,65 +638,34 @@ fn verify_proposer_and_signature( let block_root = data_column.block_root(); let block_parent_root = data_column.block_parent_root(); - let proposer_shuffling_root = if parent_block.slot.epoch(slots_per_epoch) == column_epoch { - parent_block - .next_epoch_shuffling_id - .shuffling_decision_block - } else { - parent_block.root - }; - - // We lock the cache briefly to get or insert a OnceCell, then drop the lock - // before doing proposer shuffling calculation via `OnceCell::get_or_try_init`. This avoids - // holding the lock during the computation, while still ensuring the result is cached and - // initialised only once. - // - // This approach exposes the cache internals (`OnceCell` & `EpochBlockProposers`) - // as a trade-off for avoiding lock contention. - let epoch_proposers_cell = chain - .beacon_proposer_cache - .lock() - .get_or_insert_key(column_epoch, proposer_shuffling_root); - - let epoch_proposers = epoch_proposers_cell.get_or_try_init(move || { - debug!( - %block_root, - index = %column_index, - "Proposer shuffling cache miss for column verification" - ); - let (parent_state_root, mut parent_state) = chain - .store - .get_advanced_hot_state(block_parent_root, column_slot, parent_block.state_root) - .map_err(|e| GossipDataColumnError::BeaconChainError(Box::new(e.into())))? - .ok_or_else(|| { - BeaconChainError::DBInconsistent(format!( - "Missing state for parent block {block_parent_root:?}", - )) - })?; - - let state = cheap_state_advance_to_obtain_committees::<_, GossipDataColumnError>( - &mut parent_state, - Some(parent_state_root), - column_slot, - &chain.spec, - )?; - - let epoch = state.current_epoch(); - let proposers = state.get_beacon_proposer_indices(epoch, &chain.spec)?; - // Prime the proposer shuffling cache with the newly-learned value. - Ok::<_, GossipDataColumnError>(EpochBlockProposers { - epoch: column_epoch, - fork: state.fork(), - proposers: proposers.into(), - }) - })?; - - let proposer_index = *epoch_proposers - .proposers - .get(column_slot.as_usize() % slots_per_epoch as usize) - .ok_or_else(|| BeaconChainError::NoProposerForSlot(column_slot))?; - - let fork = epoch_proposers.fork; + let proposer_shuffling_root = + parent_block.proposer_shuffling_root_for_child_block(column_epoch, &chain.spec); + + let proposer = chain.with_proposer_cache( + proposer_shuffling_root, + column_epoch, + |proposers| proposers.get_slot::(column_slot), + || { + debug!( + %block_root, + index = %column_index, + "Proposer shuffling cache miss for column verification" + ); + chain + .store + .get_advanced_hot_state(block_parent_root, column_slot, parent_block.state_root) + .map_err(|e| GossipDataColumnError::BeaconChainError(Box::new(e.into())))? + .ok_or_else(|| { + GossipDataColumnError::BeaconChainError(Box::new( + BeaconChainError::DBInconsistent(format!( + "Missing state for parent block {block_parent_root:?}", + )), + )) + }) + }, + )?; + let proposer_index = proposer.index; + let fork = proposer.fork; // Signature verify the signed block header. let signature_is_valid = { diff --git a/beacon_node/beacon_chain/src/errors.rs b/beacon_node/beacon_chain/src/errors.rs index a1a0ec74f66..7b04a36faec 100644 --- a/beacon_node/beacon_chain/src/errors.rs +++ b/beacon_node/beacon_chain/src/errors.rs @@ -230,6 +230,23 @@ pub enum BeaconChainError { columns_found: usize, }, FailedToReconstructBlobs(String), + ProposerCacheIncorrectState { + state_decision_block_root: Hash256, + requested_decision_block_root: Hash256, + }, + ProposerCacheAccessorFailure { + decision_block_root: Hash256, + proposal_epoch: Epoch, + }, + ProposerCacheOutOfBounds { + slot: Slot, + epoch: Epoch, + }, + ProposerCacheWrongEpoch { + request_epoch: Epoch, + cache_epoch: Epoch, + }, + SkipProposerPreparation, } easy_from_to!(SlotProcessingError, BeaconChainError); diff --git a/beacon_node/beacon_chain/src/kzg_utils.rs b/beacon_node/beacon_chain/src/kzg_utils.rs index 2147ed59663..382775ab50f 100644 --- a/beacon_node/beacon_chain/src/kzg_utils.rs +++ b/beacon_node/beacon_chain/src/kzg_utils.rs @@ -174,6 +174,13 @@ pub fn blobs_to_data_column_sidecars( let kzg_commitments_inclusion_proof = block.message().body().kzg_commitments_merkle_proof()?; let signed_block_header = block.signed_block_header(); + if cell_proofs.len() != blobs.len() * E::number_of_columns() { + return Err(DataColumnSidecarError::InvalidCellProofLength { + expected: blobs.len() * E::number_of_columns(), + actual: cell_proofs.len(), + }); + } + let proof_chunks = cell_proofs .chunks_exact(E::number_of_columns()) .collect::>(); @@ -292,6 +299,8 @@ pub(crate) fn build_data_column_sidecars( /// /// If `blob_indices_opt` is `None`, this function attempts to reconstruct all blobs associated /// with the block. +/// This function does NOT use rayon as this is primarily used by a non critical path in HTTP API +/// and it will be slow if the node needs to reconstruct the blobs pub fn reconstruct_blobs( kzg: &Kzg, data_columns: &[Arc>], @@ -313,7 +322,7 @@ pub fn reconstruct_blobs( }; let blob_sidecars = blob_indices - .into_par_iter() + .into_iter() .map(|row_index| { let mut cells: Vec = vec![]; let mut cell_ids: Vec = vec![]; @@ -330,16 +339,26 @@ pub fn reconstruct_blobs( cell_ids.push(data_column.index); } - let (cells, _kzg_proofs) = kzg - .recover_cells_and_compute_kzg_proofs(&cell_ids, &cells) - .map_err(|e| format!("Failed to recover cells and compute KZG proofs: {e:?}"))?; + let num_cells_original_blob = E::number_of_columns() / 2; + let blob_bytes = if data_columns.len() < E::number_of_columns() { + let (recovered_cells, _kzg_proofs) = kzg + .recover_cells_and_compute_kzg_proofs(&cell_ids, &cells) + .map_err(|e| { + format!("Failed to recover cells and compute KZG proofs: {e:?}") + })?; - let num_cells_original_blob = cells.len() / 2; - let blob_bytes = cells - .into_iter() - .take(num_cells_original_blob) - .flat_map(|cell| cell.into_iter()) - .collect(); + recovered_cells + .into_iter() + .take(num_cells_original_blob) + .flat_map(|cell| cell.into_iter()) + .collect() + } else { + cells + .into_iter() + .take(num_cells_original_blob) + .flat_map(|cell| (*cell).into_iter()) + .collect() + }; let blob = Blob::::new(blob_bytes).map_err(|e| format!("{e:?}"))?; let kzg_proof = KzgProof::empty(); diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index 1b57bad1049..0d34ffdcd15 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -458,12 +458,6 @@ pub static BEACON_EARLY_ATTESTER_CACHE_HITS: LazyLock> = Lazy ) }); -pub static BEACON_REQRESP_PRE_IMPORT_CACHE_SIZE: LazyLock> = LazyLock::new(|| { - try_create_int_gauge( - "beacon_reqresp_pre_import_cache_size", - "Current count of items of the reqresp pre import cache", - ) -}); pub static BEACON_REQRESP_PRE_IMPORT_CACHE_HITS: LazyLock> = LazyLock::new(|| { try_create_int_counter( @@ -578,6 +572,14 @@ pub static FORK_CHOICE_READ_LOCK_AQUIRE_TIMES: LazyLock> = Laz exponential_buckets(1e-4, 4.0, 7), ) }); +pub static FORK_CHOICE_UPGRADABLE_READ_LOCK_AQUIRE_TIMES: LazyLock> = + LazyLock::new(|| { + try_create_histogram_with_buckets( + "beacon_fork_choice_upgradable_read_lock_aquire_seconds", + "Time taken to aquire the fork-choice upgradable read lock", + exponential_buckets(1e-4, 4.0, 7), + ) + }); pub static FORK_CHOICE_WRITE_LOCK_AQUIRE_TIMES: LazyLock> = LazyLock::new(|| { try_create_histogram_with_buckets( "beacon_fork_choice_write_lock_aquire_seconds", @@ -1957,7 +1959,6 @@ pub fn scrape_for_metrics(beacon_chain: &BeaconChain) { } let attestation_stats = beacon_chain.op_pool.attestation_stats(); - let chain_metrics = beacon_chain.metrics(); // Kept duplicated for backwards compatibility set_gauge_by_usize( @@ -1965,11 +1966,6 @@ pub fn scrape_for_metrics(beacon_chain: &BeaconChain) { beacon_chain.store.state_cache_len(), ); - set_gauge_by_usize( - &BEACON_REQRESP_PRE_IMPORT_CACHE_SIZE, - chain_metrics.reqresp_pre_import_cache_len, - ); - let da_checker_metrics = beacon_chain.data_availability_checker.metrics(); set_gauge_by_usize( &DATA_AVAILABILITY_OVERFLOW_MEMORY_BLOCK_CACHE_SIZE, diff --git a/beacon_node/beacon_chain/src/state_advance_timer.rs b/beacon_node/beacon_chain/src/state_advance_timer.rs index 27c2c7c0a11..87348cb01be 100644 --- a/beacon_node/beacon_chain/src/state_advance_timer.rs +++ b/beacon_node/beacon_chain/src/state_advance_timer.rs @@ -33,7 +33,7 @@ use types::{AttestationShufflingId, BeaconStateError, EthSpec, Hash256, Relative /// /// This avoids doing unnecessary work whilst the node is syncing or has perhaps been put to sleep /// for some period of time. -const MAX_ADVANCE_DISTANCE: u64 = 4; +const MAX_ADVANCE_DISTANCE: u64 = 256; /// Similarly for fork choice: avoid the fork choice lookahead during sync. /// @@ -49,17 +49,7 @@ enum Error { HeadMissingFromSnapshotCache(#[allow(dead_code)] Hash256), BeaconState(#[allow(dead_code)] BeaconStateError), Store(#[allow(dead_code)] store::Error), - MaxDistanceExceeded { - current_slot: Slot, - head_slot: Slot, - }, - StateAlreadyAdvanced { - block_root: Hash256, - }, - BadStateSlot { - _state_slot: Slot, - _block_slot: Slot, - }, + MaxDistanceExceeded { current_slot: Slot, head_slot: Slot }, } impl From for Error { @@ -180,9 +170,6 @@ async fn state_advance_timer( error = ?e, "Failed to advance head state" ), - Err(Error::StateAlreadyAdvanced { block_root }) => { - debug!(?block_root, "State already advanced on slot") - } Err(Error::MaxDistanceExceeded { current_slot, head_slot, @@ -295,25 +282,6 @@ fn advance_head(beacon_chain: &Arc>) -> Resu .get_advanced_hot_state(head_block_root, current_slot, head_block_state_root)? .ok_or(Error::HeadMissingFromSnapshotCache(head_block_root))?; - // Protect against advancing a state more than a single slot. - // - // Advancing more than one slot without storing the intermediate state would corrupt the - // database. Future works might store intermediate states inside this function. - match state.slot().cmp(&state.latest_block_header().slot) { - std::cmp::Ordering::Equal => (), - std::cmp::Ordering::Greater => { - return Err(Error::StateAlreadyAdvanced { - block_root: head_block_root, - }); - } - std::cmp::Ordering::Less => { - return Err(Error::BadStateSlot { - _block_slot: state.latest_block_header().slot, - _state_slot: state.slot(), - }); - } - } - let initial_slot = state.slot(); let initial_epoch = state.current_epoch(); diff --git a/beacon_node/beacon_chain/src/validator_custody.rs b/beacon_node/beacon_chain/src/validator_custody.rs index 1c89624f9d7..3ab76828c9c 100644 --- a/beacon_node/beacon_chain/src/validator_custody.rs +++ b/beacon_node/beacon_chain/src/validator_custody.rs @@ -130,7 +130,7 @@ pub struct CustodyContext { /// and enr values. validator_custody_count: AtomicU64, /// Is the node run as a supernode based on current cli parameters. - pub current_is_supernode: bool, + current_is_supernode: bool, /// The persisted value for `is_supernode` based on the previous run of this node. /// /// Note: We require this value because if a user restarts the node with a higher cli custody @@ -307,6 +307,14 @@ impl CustodyContext { .expect("should compute node sampling size from valid chain spec") } + /// Returns whether the node should attempt reconstruction at a given epoch. + pub fn should_attempt_reconstruction(&self, epoch: Epoch, spec: &ChainSpec) -> bool { + let min_columns_for_reconstruction = E::number_of_columns() / 2; + // performing reconstruction is not necessary if sampling column count is exactly 50%, + // because the node doesn't need the remaining columns. + self.num_of_data_columns_to_sample(epoch, spec) > min_columns_for_reconstruction + } + /// Returns the ordered list of column indices that should be sampled for data availability checking at the given epoch. /// /// # Parameters diff --git a/beacon_node/beacon_chain/src/validator_monitor.rs b/beacon_node/beacon_chain/src/validator_monitor.rs index 23f1a7d4308..00c30e5ab1d 100644 --- a/beacon_node/beacon_chain/src/validator_monitor.rs +++ b/beacon_node/beacon_chain/src/validator_monitor.rs @@ -497,7 +497,7 @@ impl ValidatorMonitor { }); // Add missed non-finalized blocks for the monitored validators - self.add_validators_missed_blocks(state); + self.add_validators_missed_blocks(state, spec); self.process_unaggregated_attestations(state, spec); // Update metrics for individual validators. @@ -588,7 +588,7 @@ impl ValidatorMonitor { } /// Add missed non-finalized blocks for the monitored validators - fn add_validators_missed_blocks(&mut self, state: &BeaconState) { + fn add_validators_missed_blocks(&mut self, state: &BeaconState, spec: &ChainSpec) { // Define range variables let current_slot = state.slot(); let current_epoch = current_slot.epoch(E::slots_per_epoch()); @@ -616,8 +616,8 @@ impl ValidatorMonitor { if block_root == prev_block_root { let slot_epoch = slot.epoch(E::slots_per_epoch()); - if let Ok(shuffling_decision_block) = - state.proposer_shuffling_decision_root_at_epoch(slot_epoch, *block_root) + if let Ok(shuffling_decision_block) = state + .proposer_shuffling_decision_root_at_epoch(slot_epoch, *block_root, spec) { // Update the cache if it has not yet been initialised, or if it is // initialised for a prior epoch. This is an optimisation to avoid bouncing diff --git a/beacon_node/beacon_chain/tests/block_verification.rs b/beacon_node/beacon_chain/tests/block_verification.rs index 58ca4a032ed..b27295751ec 100644 --- a/beacon_node/beacon_chain/tests/block_verification.rs +++ b/beacon_node/beacon_chain/tests/block_verification.rs @@ -1730,6 +1730,8 @@ async fn add_altair_block_to_base_chain() { )); } +// This is a regression test for this bug: +// https://github.com/sigp/lighthouse/issues/4332#issuecomment-1565092279 #[tokio::test] async fn import_duplicate_block_unrealized_justification() { let spec = MainnetEthSpec::default_spec(); @@ -1791,7 +1793,7 @@ async fn import_duplicate_block_unrealized_justification() { .await .unwrap(); - // Unrealized justification should NOT have updated. + // The store's global unrealized justification should update immediately and match the block. let unrealized_justification = { let fc = chain.canonical_head.fork_choice_read_lock(); assert_eq!(fc.justified_checkpoint().epoch, 0); @@ -1808,9 +1810,12 @@ async fn import_duplicate_block_unrealized_justification() { }; // Import the second verified block, simulating a block processed via RPC. - import_execution_pending_block(chain.clone(), verified_block2) - .await - .unwrap(); + assert_eq!( + import_execution_pending_block(chain.clone(), verified_block2) + .await + .unwrap_err(), + format!("DuplicateFullyImported({block_root})") + ); // Unrealized justification should still be updated. let fc3 = chain.canonical_head.fork_choice_read_lock(); diff --git a/beacon_node/beacon_chain/tests/store_tests.rs b/beacon_node/beacon_chain/tests/store_tests.rs index fbb592b510f..cd4032f55d9 100644 --- a/beacon_node/beacon_chain/tests/store_tests.rs +++ b/beacon_node/beacon_chain/tests/store_tests.rs @@ -1191,6 +1191,316 @@ fn check_shuffling_compatible( } } +/// These tests check the consistency of: +/// +/// - ProtoBlock::proposer_shuffling_root_for_child_block, and +/// - BeaconState::proposer_shuffling_decision_root{_at_epoch} +async fn proposer_shuffling_root_consistency_test( + spec: ChainSpec, + parent_slot: u64, + child_slot: u64, +) { + let child_slot = Slot::new(child_slot); + let db_path = tempdir().unwrap(); + let store = get_store_generic(&db_path, Default::default(), spec.clone()); + let validators_keypairs = + types::test_utils::generate_deterministic_keypairs(LOW_VALIDATOR_COUNT); + let harness = TestHarness::builder(MinimalEthSpec) + .spec(spec.into()) + .keypairs(validators_keypairs) + .fresh_disk_store(store) + .mock_execution_layer() + .build(); + let spec = &harness.chain.spec; + + // Build chain out to parent block. + let initial_slots: Vec = (1..=parent_slot).map(Into::into).collect(); + let (state, state_root) = harness.get_current_state_and_root(); + let all_validators = harness.get_all_validators(); + let (_, _, parent_root, _) = harness + .add_attested_blocks_at_slots(state, state_root, &initial_slots, &all_validators) + .await; + + // Add the child block. + let (state, state_root) = harness.get_current_state_and_root(); + let all_validators = harness.get_all_validators(); + let (_, _, child_root, child_block_state) = harness + .add_attested_blocks_at_slots(state, state_root, &[child_slot], &all_validators) + .await; + + let child_block_epoch = child_slot.epoch(E::slots_per_epoch()); + + // Load parent block from fork choice. + let fc_parent = harness + .chain + .canonical_head + .fork_choice_read_lock() + .get_block(&parent_root.into()) + .unwrap(); + + // The proposer shuffling decision root computed using fork choice should equal the root + // computed from the child state. + let decision_root = fc_parent.proposer_shuffling_root_for_child_block(child_block_epoch, spec); + + assert_eq!( + decision_root, + child_block_state + .proposer_shuffling_decision_root(child_root.into(), spec) + .unwrap() + ); + assert_eq!( + decision_root, + child_block_state + .proposer_shuffling_decision_root_at_epoch(child_block_epoch, child_root.into(), spec) + .unwrap() + ); + + // The passed block root argument should be irrelevant for all blocks except the genesis block. + assert_eq!( + decision_root, + child_block_state + .proposer_shuffling_decision_root(Hash256::ZERO, spec) + .unwrap() + ); + assert_eq!( + decision_root, + child_block_state + .proposer_shuffling_decision_root_at_epoch(child_block_epoch, Hash256::ZERO, spec) + .unwrap() + ); +} + +#[tokio::test] +async fn proposer_shuffling_root_consistency_same_epoch() { + let spec = test_spec::(); + proposer_shuffling_root_consistency_test(spec, 32, 39).await; +} + +#[tokio::test] +async fn proposer_shuffling_root_consistency_next_epoch() { + let spec = test_spec::(); + proposer_shuffling_root_consistency_test(spec, 32, 47).await; +} + +#[tokio::test] +async fn proposer_shuffling_root_consistency_two_epochs() { + let spec = test_spec::(); + proposer_shuffling_root_consistency_test(spec, 32, 55).await; +} + +#[tokio::test] +async fn proposer_shuffling_root_consistency_at_fork_boundary() { + let mut spec = ForkName::Electra.make_genesis_spec(E::default_spec()); + spec.fulu_fork_epoch = Some(Epoch::new(4)); + + // Parent block in epoch prior to Fulu fork epoch, child block in Fulu fork epoch. + proposer_shuffling_root_consistency_test( + spec.clone(), + 3 * E::slots_per_epoch(), + 4 * E::slots_per_epoch(), + ) + .await; + + // Parent block and child block in Fulu fork epoch. + proposer_shuffling_root_consistency_test( + spec.clone(), + 4 * E::slots_per_epoch(), + 4 * E::slots_per_epoch() + 1, + ) + .await; + + // Parent block in Fulu fork epoch and child block in epoch after. + proposer_shuffling_root_consistency_test( + spec.clone(), + 4 * E::slots_per_epoch(), + 5 * E::slots_per_epoch(), + ) + .await; + + // Parent block in epoch prior and child block in epoch after. + proposer_shuffling_root_consistency_test( + spec, + 3 * E::slots_per_epoch(), + 5 * E::slots_per_epoch(), + ) + .await; +} + +#[tokio::test] +async fn proposer_shuffling_changing_with_lookahead() { + let initial_blocks = E::slots_per_epoch() * 4 - 1; + + let spec = ForkName::Fulu.make_genesis_spec(E::default_spec()); + let db_path = tempdir().unwrap(); + let store = get_store_generic(&db_path, Default::default(), spec.clone()); + let validators_keypairs = + types::test_utils::generate_deterministic_keypairs(LOW_VALIDATOR_COUNT); + let harness = TestHarness::builder(MinimalEthSpec) + .spec(spec.into()) + .keypairs(validators_keypairs) + .fresh_disk_store(store) + .mock_execution_layer() + .build(); + let spec = &harness.chain.spec; + + // Start with some blocks, finishing with one slot before a new epoch. + harness.advance_slot(); + harness + .extend_chain( + initial_blocks as usize, + BlockStrategy::OnCanonicalHead, + AttestationStrategy::AllValidators, + ) + .await; + + let pre_deposit_state = harness.get_current_state(); + assert_eq!(pre_deposit_state.slot(), initial_blocks); + let topup_block_slot = Slot::new(initial_blocks + 1); + let validator_to_topup_index = 1; + let validator_to_topup = pre_deposit_state + .get_validator(validator_to_topup_index) + .unwrap() + .clone(); + + // Craft a block with a deposit request and consolidation. + // XXX: This is a really nasty way to do this, but we need better test facilities in + // MockExecutionLayer to address this. + let deposit_request: DepositRequest = DepositRequest { + index: pre_deposit_state.eth1_deposit_index(), + pubkey: validator_to_topup.pubkey, + withdrawal_credentials: validator_to_topup.withdrawal_credentials, + amount: 63_000_000_000, + signature: SignatureBytes::empty(), + }; + + let consolidation_request: ConsolidationRequest = ConsolidationRequest { + source_address: validator_to_topup + .get_execution_withdrawal_address(spec) + .unwrap(), + source_pubkey: validator_to_topup.pubkey, + target_pubkey: validator_to_topup.pubkey, + }; + + let execution_requests = ExecutionRequests:: { + deposits: VariableList::new(vec![deposit_request]).unwrap(), + withdrawals: vec![].into(), + consolidations: VariableList::new(vec![consolidation_request]).unwrap(), + }; + + let mut block = Box::pin(harness.make_block_with_modifier( + pre_deposit_state.clone(), + topup_block_slot, + |block| *block.body_mut().execution_requests_mut().unwrap() = execution_requests, + )) + .await + .0; + + let Err(BlockError::StateRootMismatch { + local: true_state_root, + .. + }) = harness + .process_block(topup_block_slot, block.0.canonical_root(), block.clone()) + .await + else { + panic!("state root should not match due to pending deposits changes/etc"); + }; + let mut new_block = block.0.message_fulu().unwrap().clone(); + new_block.state_root = true_state_root; + block.0 = Arc::new(harness.sign_beacon_block(new_block.into(), &pre_deposit_state)); + + harness + .process_block(topup_block_slot, block.0.canonical_root(), block.clone()) + .await + .unwrap(); + + // Advance two epochs to finalize the deposit and process it. + // Start with just a single epoch advance so we can grab the state one epoch prior to where + // we end up. + harness.advance_slot(); + harness + .extend_chain( + E::slots_per_epoch() as usize, + BlockStrategy::OnCanonicalHead, + AttestationStrategy::AllValidators, + ) + .await; + + // Grab the epoch start state. This is the state from which the proposers at the next epoch were + // computed. + let prev_epoch_state = harness.get_current_state(); + assert_eq!(prev_epoch_state.slot() % E::slots_per_epoch(), 0); + + // The deposit should be pending. + let pending_deposits = prev_epoch_state.pending_deposits().unwrap(); + assert_eq!(pending_deposits.len(), 1, "{pending_deposits:?}"); + + // Advance the 2nd epoch to finalize the deposit and process it. + harness.advance_slot(); + harness + .extend_chain( + E::slots_per_epoch() as usize, + BlockStrategy::OnCanonicalHead, + AttestationStrategy::AllValidators, + ) + .await; + + let current_epoch_state = harness.get_current_state(); + assert_eq!(current_epoch_state.slot() % E::slots_per_epoch(), 0); + + // Deposit is processed! + let pending_deposits = current_epoch_state.pending_deposits().unwrap(); + assert_eq!(pending_deposits.len(), 0, "{pending_deposits:?}"); + + let validator = current_epoch_state + .get_validator(validator_to_topup_index) + .unwrap(); + assert!(validator.has_compounding_withdrawal_credential(spec)); + assert_eq!(validator.effective_balance, 95_000_000_000); + + // The shuffling for the current epoch from `prev_epoch_state` should match the shuffling + // for the current epoch from `current_epoch_state` because we should be correctly using the + // stored lookahead. + let current_epoch = current_epoch_state.current_epoch(); + let proposer_shuffling = prev_epoch_state + .get_beacon_proposer_indices(current_epoch, spec) + .unwrap(); + + assert_eq!( + proposer_shuffling, + current_epoch_state + .get_beacon_proposer_indices(current_epoch, spec) + .unwrap() + ); + + // If we bypass the safety checks in `get_proposer_indices`, we should see that the shuffling + // differs due to the effective balance change. + let unsafe_get_proposer_indices = |state: &BeaconState, epoch| -> Vec { + let indices = state.get_active_validator_indices(epoch, spec).unwrap(); + let preimage = state.get_seed(epoch, Domain::BeaconProposer, spec).unwrap(); + epoch + .slot_iter(E::slots_per_epoch()) + .map(|slot| { + let mut preimage = preimage.to_vec(); + preimage.append(&mut int_to_bytes::int_to_bytes8(slot.as_u64())); + let seed = ethereum_hashing::hash(&preimage); + state.compute_proposer_index(&indices, &seed, spec).unwrap() + }) + .collect() + }; + + // The unsafe function is correct when used with lookahead. + assert_eq!( + unsafe_get_proposer_indices(&prev_epoch_state, current_epoch), + proposer_shuffling + ); + + // Computing the shuffling for current epoch without lookahead is WRONG. + assert_ne!( + unsafe_get_proposer_indices(¤t_epoch_state, current_epoch), + proposer_shuffling, + ); +} + // Ensure blocks from abandoned forks are pruned from the Hot DB #[tokio::test] async fn prunes_abandoned_fork_between_two_finalized_checkpoints() { diff --git a/beacon_node/beacon_chain/tests/validator_monitor.rs b/beacon_node/beacon_chain/tests/validator_monitor.rs index 4e2554d3d8d..95732abeb5d 100644 --- a/beacon_node/beacon_chain/tests/validator_monitor.rs +++ b/beacon_node/beacon_chain/tests/validator_monitor.rs @@ -3,7 +3,7 @@ use beacon_chain::test_utils::{ }; use beacon_chain::validator_monitor::{MISSED_BLOCK_LAG_SLOTS, ValidatorMonitorConfig}; use std::sync::LazyLock; -use types::{Epoch, EthSpec, Keypair, MainnetEthSpec, PublicKeyBytes, Slot}; +use types::{Epoch, EthSpec, Hash256, Keypair, MainnetEthSpec, PublicKeyBytes, Slot}; // Should ideally be divisible by 3. pub const VALIDATOR_COUNT: usize = 48; @@ -74,7 +74,7 @@ async fn missed_blocks_across_epochs() { .get_hot_state(state_roots_by_slot[&start_slot]) .unwrap(); let decision_root = state - .proposer_shuffling_decision_root(genesis_block_root) + .proposer_shuffling_decision_root(genesis_block_root, &harness.chain.spec) .unwrap(); proposer_shuffling_cache .insert( @@ -152,7 +152,7 @@ async fn missed_blocks_basic() { .unwrap(); let mut missed_block_proposer = validator_indexes[slot_in_epoch.as_usize()]; let mut proposer_shuffling_decision_root = _state - .proposer_shuffling_decision_root(duplicate_block_root) + .proposer_shuffling_decision_root(duplicate_block_root, &harness1.chain.spec) .unwrap(); let beacon_proposer_cache = harness1 @@ -234,21 +234,24 @@ async fn missed_blocks_basic() { // Let's fill the cache with the proposers for the current epoch // and push the duplicate_block_root to the block_roots vector + assert_eq!( + _state2.set_block_root(prev_slot, duplicate_block_root), + Ok(()) + ); + + let decision_block_root = _state2 + .proposer_shuffling_decision_root_at_epoch(epoch, Hash256::ZERO, &harness2.chain.spec) + .unwrap(); assert_eq!( beacon_proposer_cache.lock().insert( epoch, - duplicate_block_root, + decision_block_root, validator_indexes.clone(), _state2.fork() ), Ok(()) ); - assert_eq!( - _state2.set_block_root(prev_slot, duplicate_block_root), - Ok(()) - ); - { // Let's validate the state which will call the function responsible for // adding the missed blocks to the validator monitor @@ -326,7 +329,11 @@ async fn missed_blocks_basic() { .unwrap(); missed_block_proposer = validator_indexes[slot_in_epoch.as_usize()]; proposer_shuffling_decision_root = _state3 - .proposer_shuffling_decision_root_at_epoch(epoch, duplicate_block_root) + .proposer_shuffling_decision_root_at_epoch( + epoch, + duplicate_block_root, + &harness1.chain.spec, + ) .unwrap(); let beacon_proposer_cache = harness3 diff --git a/beacon_node/beacon_processor/src/lib.rs b/beacon_node/beacon_processor/src/lib.rs index ab9ab045f4e..28ed0cca913 100644 --- a/beacon_node/beacon_processor/src/lib.rs +++ b/beacon_node/beacon_processor/src/lib.rs @@ -59,7 +59,7 @@ use std::sync::Arc; use std::task::Context; use std::time::{Duration, Instant}; use strum::IntoStaticStr; -use task_executor::TaskExecutor; +use task_executor::{RayonPoolType, TaskExecutor}; use tokio::sync::mpsc; use tokio::sync::mpsc::error::TrySendError; use tracing::{debug, error, trace, warn}; @@ -181,7 +181,7 @@ impl BeaconProcessorQueueLengths { // We don't request more than `PARENT_DEPTH_TOLERANCE` (32) lookups, so we can limit // this queue size. With 48 max blobs per block, each column sidecar list could be up to 12MB. rpc_custody_column_queue: 64, - column_reconstruction_queue: 64, + column_reconstruction_queue: 1, chain_segment_queue: 64, backfill_chain_segment: 64, gossip_block_queue: 1024, @@ -603,7 +603,7 @@ pub enum Work { process_fn: BlockingFn, }, ChainSegment(AsyncFn), - ChainSegmentBackfill(AsyncFn), + ChainSegmentBackfill(BlockingFn), Status(BlockingFn), BlocksByRangeRequest(AsyncFn), BlocksByRootsRequest(AsyncFn), @@ -867,7 +867,7 @@ impl BeaconProcessor { let mut rpc_blob_queue = FifoQueue::new(queue_lengths.rpc_blob_queue); let mut rpc_custody_column_queue = FifoQueue::new(queue_lengths.rpc_custody_column_queue); let mut column_reconstruction_queue = - FifoQueue::new(queue_lengths.column_reconstruction_queue); + LifoQueue::new(queue_lengths.column_reconstruction_queue); let mut chain_segment_queue = FifoQueue::new(queue_lengths.chain_segment_queue); let mut backfill_chain_segment = FifoQueue::new(queue_lengths.backfill_chain_segment); let mut gossip_block_queue = FifoQueue::new(queue_lengths.gossip_block_queue); @@ -1354,9 +1354,7 @@ impl BeaconProcessor { Work::RpcCustodyColumn { .. } => { rpc_custody_column_queue.push(work, work_id) } - Work::ColumnReconstruction(_) => { - column_reconstruction_queue.push(work, work_id) - } + Work::ColumnReconstruction(_) => column_reconstruction_queue.push(work), Work::ChainSegment { .. } => chain_segment_queue.push(work, work_id), Work::ChainSegmentBackfill { .. } => { backfill_chain_segment.push(work, work_id) @@ -1605,7 +1603,14 @@ impl BeaconProcessor { Work::BlocksByRangeRequest(work) | Work::BlocksByRootsRequest(work) => { task_spawner.spawn_async(work) } - Work::ChainSegmentBackfill(process_fn) => task_spawner.spawn_async(process_fn), + Work::ChainSegmentBackfill(process_fn) => { + if self.config.enable_backfill_rate_limiting { + task_spawner.spawn_blocking_with_rayon(RayonPoolType::LowPriority, process_fn) + } else { + // use the global rayon thread pool if backfill rate limiting is disabled. + task_spawner.spawn_blocking(process_fn) + } + } Work::ApiRequestP0(process_fn) | Work::ApiRequestP1(process_fn) => match process_fn { BlockingOrAsync::Blocking(process_fn) => task_spawner.spawn_blocking(process_fn), BlockingOrAsync::Async(process_fn) => task_spawner.spawn_async(process_fn), @@ -1667,6 +1672,21 @@ impl TaskSpawner { WORKER_TASK_NAME, ) } + + /// Spawns a blocking task on a rayon thread pool, dropping the `SendOnDrop` after task completion. + fn spawn_blocking_with_rayon(self, rayon_pool_type: RayonPoolType, task: F) + where + F: FnOnce() + Send + 'static, + { + self.executor.spawn_blocking_with_rayon( + move || { + task(); + drop(self.send_idle_on_drop) + }, + rayon_pool_type, + WORKER_TASK_NAME, + ) + } } /// This struct will send a message on `self.tx` when it is dropped. An error will be logged diff --git a/beacon_node/beacon_processor/src/scheduler/work_reprocessing_queue.rs b/beacon_node/beacon_processor/src/scheduler/work_reprocessing_queue.rs index 9565e57589d..8c33cf58693 100644 --- a/beacon_node/beacon_processor/src/scheduler/work_reprocessing_queue.rs +++ b/beacon_node/beacon_processor/src/scheduler/work_reprocessing_queue.rs @@ -37,7 +37,9 @@ const TASK_NAME: &str = "beacon_processor_reprocess_queue"; const GOSSIP_BLOCKS: &str = "gossip_blocks"; const RPC_BLOCKS: &str = "rpc_blocks"; const ATTESTATIONS: &str = "attestations"; +const ATTESTATIONS_PER_ROOT: &str = "attestations_per_root"; const LIGHT_CLIENT_UPDATES: &str = "lc_updates"; +const LIGHT_CLIENT_UPDATES_PER_PARENT_ROOT: &str = "lc_updates_per_parent_root"; /// Queue blocks for re-processing with an `ADDITIONAL_QUEUED_BLOCK_DELAY` after the slot starts. /// This is to account for any slight drift in the system clock. @@ -171,7 +173,7 @@ pub struct IgnoredRpcBlock { } /// A backfill batch work that has been queued for processing later. -pub struct QueuedBackfillBatch(pub AsyncFn); +pub struct QueuedBackfillBatch(pub BlockingFn); pub struct QueuedColumnReconstruction { pub block_root: Hash256, @@ -829,10 +831,19 @@ impl ReprocessQueue { ); } - if let Some(queued_atts) = self.awaiting_attestations_per_root.get_mut(&root) - && let Some(index) = queued_atts.iter().position(|&id| id == queued_id) + if let Entry::Occupied(mut queued_atts) = + self.awaiting_attestations_per_root.entry(root) + && let Some(index) = + queued_atts.get().iter().position(|&id| id == queued_id) { - queued_atts.swap_remove(index); + let queued_atts_mut = queued_atts.get_mut(); + queued_atts_mut.swap_remove(index); + + // If the vec is empty after this attestation's removal, we need to delete + // the entry to prevent bloating the hashmap indefinitely. + if queued_atts_mut.is_empty() { + queued_atts.remove_entry(); + } } } } @@ -853,13 +864,19 @@ impl ReprocessQueue { error!("Failed to send scheduled light client optimistic update"); } - if let Some(queued_lc_updates) = self - .awaiting_lc_updates_per_parent_root - .get_mut(&parent_root) - && let Some(index) = - queued_lc_updates.iter().position(|&id| id == queued_id) + if let Entry::Occupied(mut queued_lc_updates) = + self.awaiting_lc_updates_per_parent_root.entry(parent_root) + && let Some(index) = queued_lc_updates + .get() + .iter() + .position(|&id| id == queued_id) { - queued_lc_updates.swap_remove(index); + let queued_lc_updates_mut = queued_lc_updates.get_mut(); + queued_lc_updates_mut.swap_remove(index); + + if queued_lc_updates_mut.is_empty() { + queued_lc_updates.remove_entry(); + } } } } @@ -929,11 +946,21 @@ impl ReprocessQueue { &[ATTESTATIONS], self.attestations_delay_queue.len() as i64, ); + metrics::set_gauge_vec( + &metrics::BEACON_PROCESSOR_REPROCESSING_QUEUE_TOTAL, + &[ATTESTATIONS_PER_ROOT], + self.awaiting_attestations_per_root.len() as i64, + ); metrics::set_gauge_vec( &metrics::BEACON_PROCESSOR_REPROCESSING_QUEUE_TOTAL, &[LIGHT_CLIENT_UPDATES], self.lc_updates_delay_queue.len() as i64, ); + metrics::set_gauge_vec( + &metrics::BEACON_PROCESSOR_REPROCESSING_QUEUE_TOTAL, + &[LIGHT_CLIENT_UPDATES_PER_PARENT_ROOT], + self.awaiting_lc_updates_per_parent_root.len() as i64, + ); } fn recompute_next_backfill_batch_event(&mut self) { @@ -979,6 +1006,7 @@ impl ReprocessQueue { #[cfg(test)] mod tests { use super::*; + use crate::BeaconProcessorConfig; use logging::create_test_tracing_subscriber; use slot_clock::{ManualSlotClock, TestingSlotClock}; use std::ops::Add; @@ -1056,7 +1084,7 @@ mod tests { // Now queue a backfill sync batch. work_reprocessing_tx .try_send(ReprocessQueueMessage::BackfillSync(QueuedBackfillBatch( - Box::pin(async {}), + Box::new(|| {}), ))) .unwrap(); tokio::task::yield_now().await; @@ -1101,4 +1129,97 @@ mod tests { Duration::from_secs(slot_duration), ) } + + fn test_queue() -> ReprocessQueue { + create_test_tracing_subscriber(); + + let config = BeaconProcessorConfig::default(); + let (ready_work_tx, _) = mpsc::channel::(config.max_scheduled_work_queue_len); + let (_, reprocess_work_rx) = + mpsc::channel::(config.max_scheduled_work_queue_len); + let slot_clock = Arc::new(testing_slot_clock(12)); + + ReprocessQueue::new(ready_work_tx, reprocess_work_rx, slot_clock) + } + + // This is a regression test for a memory leak in `awaiting_attestations_per_root`. + // See: https://github.com/sigp/lighthouse/pull/8065 + #[tokio::test] + async fn prune_awaiting_attestations_per_root() { + create_test_tracing_subscriber(); + + let mut queue = test_queue(); + + // Pause time so it only advances manually + tokio::time::pause(); + + let beacon_block_root = Hash256::repeat_byte(0xaf); + + // Insert an attestation. + let att = ReprocessQueueMessage::UnknownBlockUnaggregate(QueuedUnaggregate { + beacon_block_root, + process_fn: Box::new(|| {}), + }); + + // Process the event to enter it into the delay queue. + queue.handle_message(InboundEvent::Msg(att)); + + // Check that it is queued. + assert_eq!(queue.awaiting_attestations_per_root.len(), 1); + assert!( + queue + .awaiting_attestations_per_root + .contains_key(&beacon_block_root) + ); + + // Advance time to expire the attestation. + advance_time(&queue.slot_clock, 2 * QUEUED_ATTESTATION_DELAY).await; + let ready_msg = queue.next().await.unwrap(); + assert!(matches!(ready_msg, InboundEvent::ReadyAttestation(_))); + queue.handle_message(ready_msg); + + // The entry for the block root should be gone. + assert!(queue.awaiting_attestations_per_root.is_empty()); + } + + // This is a regression test for a memory leak in `awaiting_lc_updates_per_parent_root`. + // See: https://github.com/sigp/lighthouse/pull/8065 + #[tokio::test] + async fn prune_awaiting_lc_updates_per_parent_root() { + create_test_tracing_subscriber(); + + let mut queue = test_queue(); + + // Pause time so it only advances manually + tokio::time::pause(); + + let parent_root = Hash256::repeat_byte(0xaf); + + // Insert an attestation. + let msg = + ReprocessQueueMessage::UnknownLightClientOptimisticUpdate(QueuedLightClientUpdate { + parent_root, + process_fn: Box::new(|| {}), + }); + + // Process the event to enter it into the delay queue. + queue.handle_message(InboundEvent::Msg(msg)); + + // Check that it is queued. + assert_eq!(queue.awaiting_lc_updates_per_parent_root.len(), 1); + assert!( + queue + .awaiting_lc_updates_per_parent_root + .contains_key(&parent_root) + ); + + // Advance time to expire the update. + advance_time(&queue.slot_clock, 2 * QUEUED_LIGHT_CLIENT_UPDATE_DELAY).await; + let ready_msg = queue.next().await.unwrap(); + assert!(matches!(ready_msg, InboundEvent::ReadyLightClientUpdate(_))); + queue.handle_message(ready_msg); + + // The entry for the block root should be gone. + assert!(queue.awaiting_lc_updates_per_parent_root.is_empty()); + } } diff --git a/beacon_node/client/src/builder.rs b/beacon_node/client/src/builder.rs index d984d5fedce..02c042bf282 100644 --- a/beacon_node/client/src/builder.rs +++ b/beacon_node/client/src/builder.rs @@ -412,7 +412,7 @@ where let blobs = if block.message().body().has_blobs() { debug!("Downloading finalized blobs"); if let Some(response) = remote - .get_blobs::(BlockId::Root(block_root), None, &spec) + .get_blob_sidecars::(BlockId::Root(block_root), None, &spec) .await .map_err(|e| format!("Error fetching finalized blobs from remote: {e:?}"))? { diff --git a/beacon_node/client/src/notifier.rs b/beacon_node/client/src/notifier.rs index 1e58c210daa..c83cdad7e01 100644 --- a/beacon_node/client/src/notifier.rs +++ b/beacon_node/client/src/notifier.rs @@ -9,8 +9,8 @@ use execution_layer::{ EngineCapabilities, http::{ ENGINE_FORKCHOICE_UPDATED_V2, ENGINE_FORKCHOICE_UPDATED_V3, ENGINE_GET_PAYLOAD_V2, - ENGINE_GET_PAYLOAD_V3, ENGINE_GET_PAYLOAD_V4, ENGINE_NEW_PAYLOAD_V2, ENGINE_NEW_PAYLOAD_V3, - ENGINE_NEW_PAYLOAD_V4, + ENGINE_GET_PAYLOAD_V3, ENGINE_GET_PAYLOAD_V4, ENGINE_GET_PAYLOAD_V5, ENGINE_NEW_PAYLOAD_V2, + ENGINE_NEW_PAYLOAD_V3, ENGINE_NEW_PAYLOAD_V4, }, }; use lighthouse_network::{NetworkGlobals, types::SyncState}; @@ -524,18 +524,16 @@ fn methods_required_for_fork( } } ForkName::Fulu => { - // TODO(fulu) switch to v5 when the EL is ready - if !capabilities.get_payload_v4 { - missing_methods.push(ENGINE_GET_PAYLOAD_V4); + if !capabilities.get_payload_v5 { + missing_methods.push(ENGINE_GET_PAYLOAD_V5); } if !capabilities.new_payload_v4 { missing_methods.push(ENGINE_NEW_PAYLOAD_V4); } } ForkName::Gloas => { - // TODO(gloas) switch to v5/v6 when the EL is ready - if !capabilities.get_payload_v4 { - missing_methods.push(ENGINE_GET_PAYLOAD_V4); + if !capabilities.get_payload_v5 { + missing_methods.push(ENGINE_GET_PAYLOAD_V5); } if !capabilities.new_payload_v4 { missing_methods.push(ENGINE_NEW_PAYLOAD_V4); diff --git a/beacon_node/execution_layer/src/lib.rs b/beacon_node/execution_layer/src/lib.rs index 401646f3670..a5fa0f34158 100644 --- a/beacon_node/execution_layer/src/lib.rs +++ b/beacon_node/execution_layer/src/lib.rs @@ -1914,9 +1914,19 @@ impl ExecutionLayer { ) -> Result, Error> { debug!(?block_root, "Sending block to builder"); if spec.is_fulu_scheduled() { - self.post_builder_blinded_blocks_v2(block_root, block) + let resp = self + .post_builder_blinded_blocks_v2(block_root, block) .await - .map(|()| SubmitBlindedBlockResponse::V2) + .map(|()| SubmitBlindedBlockResponse::V2); + // Fallback to v1 if v2 fails because the relay doesn't support it. + // Note: we should remove the fallback post fulu when all relays have support for v2. + if resp.is_err() { + self.post_builder_blinded_blocks_v1(block_root, block) + .await + .map(|full_payload| SubmitBlindedBlockResponse::V1(Box::new(full_payload))) + } else { + resp + } } else { self.post_builder_blinded_blocks_v1(block_root, block) .await diff --git a/beacon_node/http_api/src/block_id.rs b/beacon_node/http_api/src/block_id.rs index e527e466f67..778067c32bb 100644 --- a/beacon_node/http_api/src/block_id.rs +++ b/beacon_node/http_api/src/block_id.rs @@ -2,15 +2,16 @@ use crate::version::inconsistent_fork_rejection; use crate::{ExecutionOptimistic, state_id::checkpoint_slot_and_execution_optimistic}; use beacon_chain::kzg_utils::reconstruct_blobs; use beacon_chain::{BeaconChain, BeaconChainError, BeaconChainTypes, WhenSlotSkipped}; -use eth2::types::BlobIndicesQuery; use eth2::types::BlockId as CoreBlockId; use eth2::types::DataColumnIndicesQuery; +use eth2::types::{BlobIndicesQuery, BlobWrapper, BlobsVersionedHashesQuery}; use std::fmt; use std::str::FromStr; use std::sync::Arc; use types::{ BlobSidecarList, DataColumnSidecarList, EthSpec, FixedBytesExtended, ForkName, Hash256, - SignedBeaconBlock, SignedBlindedBeaconBlock, Slot, + SignedBeaconBlock, SignedBlindedBeaconBlock, Slot, UnversionedResponse, + beacon_response::ExecutionOptimisticFinalizedMetadata, }; use warp::Rejection; @@ -352,6 +353,68 @@ impl BlockId { Ok((block, blob_sidecar_list, execution_optimistic, finalized)) } + #[allow(clippy::type_complexity)] + pub fn get_blobs_by_versioned_hashes( + &self, + query: BlobsVersionedHashesQuery, + chain: &BeaconChain, + ) -> Result< + UnversionedResponse>, ExecutionOptimisticFinalizedMetadata>, + warp::Rejection, + > { + let (root, execution_optimistic, finalized) = self.root(chain)?; + let block = BlockId::blinded_block_by_root(&root, chain)?.ok_or_else(|| { + warp_utils::reject::custom_not_found(format!("beacon block with root {}", root)) + })?; + + // Error if the block is pre-Deneb and lacks blobs. + let blob_kzg_commitments = block.message().body().blob_kzg_commitments().map_err(|_| { + warp_utils::reject::custom_bad_request( + "block is pre-Deneb and has no blobs".to_string(), + ) + })?; + + let blob_indices_opt = query.versioned_hashes.map(|versioned_hashes| { + versioned_hashes + .iter() + .flat_map(|versioned_hash| { + blob_kzg_commitments.iter().position(|commitment| { + let computed_hash = commitment.calculate_versioned_hash(); + computed_hash == *versioned_hash + }) + }) + .map(|index| index as u64) + .collect::>() + }); + + let max_blobs_per_block = chain.spec.max_blobs_per_block(block.epoch()) as usize; + let blob_sidecar_list = if !blob_kzg_commitments.is_empty() { + if chain.spec.is_peer_das_enabled_for_epoch(block.epoch()) { + Self::get_blobs_from_data_columns(chain, root, blob_indices_opt, &block)? + } else { + Self::get_blobs(chain, root, blob_indices_opt, max_blobs_per_block)? + } + } else { + BlobSidecarList::new(vec![], max_blobs_per_block) + .map_err(|e| warp_utils::reject::custom_server_error(format!("{:?}", e)))? + }; + + let blobs = blob_sidecar_list + .into_iter() + .map(|sidecar| BlobWrapper:: { + blob: sidecar.blob.clone(), + }) + .collect(); + + Ok(UnversionedResponse { + metadata: ExecutionOptimisticFinalizedMetadata { + execution_optimistic: Some(execution_optimistic), + finalized: Some(finalized), + }, + data: blobs, + }) + } + fn get_blobs( chain: &BeaconChain, root: Hash256, @@ -369,9 +432,9 @@ impl BlockId { let blob_sidecar_list_filtered = match indices { Some(vec) => { - let list: Vec<_> = blob_sidecar_list + let list: Vec<_> = vec .into_iter() - .filter(|blob_sidecar| vec.contains(&blob_sidecar.index)) + .flat_map(|index| blob_sidecar_list.get(index as usize).cloned()) .collect(); BlobSidecarList::new(list, max_blobs_per_block) diff --git a/beacon_node/http_api/src/lib.rs b/beacon_node/http_api/src/lib.rs index 5c6a9df7391..7f6c97a0f85 100644 --- a/beacon_node/http_api/src/lib.rs +++ b/beacon_node/http_api/src/lib.rs @@ -214,6 +214,7 @@ pub fn prometheus_metrics() -> warp::filters::log::Log warp::filters::log::Log( */ // GET beacon/blob_sidecars/{block_id} - let get_blobs = eth_v1 + let get_blob_sidecars = eth_v1 .and(warp::path("beacon")) .and(warp::path("blob_sidecars")) .and(block_id_or_err) @@ -1950,6 +1948,52 @@ pub fn serve( }, ); + // GET beacon/blobs/{block_id} + let get_blobs = eth_v1 + .and(warp::path("beacon")) + .and(warp::path("blobs")) + .and(block_id_or_err) + .and(warp::path::end()) + .and(multi_key_query::()) + .and(task_spawner_filter.clone()) + .and(chain_filter.clone()) + .and(warp::header::optional::("accept")) + .then( + |block_id: BlockId, + version_hashes_res: Result, + task_spawner: TaskSpawner, + chain: Arc>, + accept_header: Option| { + task_spawner.blocking_response_task(Priority::P1, move || { + let versioned_hashes = version_hashes_res?; + let response = + block_id.get_blobs_by_versioned_hashes(versioned_hashes, &chain)?; + + match accept_header { + Some(api_types::Accept::Ssz) => Response::builder() + .status(200) + .body(response.data.as_ssz_bytes().into()) + .map(|res: Response| add_ssz_content_type_header(res)) + .map_err(|e| { + warp_utils::reject::custom_server_error(format!( + "failed to create response: {}", + e + )) + }), + _ => { + let res = execution_optimistic_finalized_beacon_response( + ResponseIncludesVersion::No, + response.metadata.execution_optimistic.unwrap_or(false), + response.metadata.finalized.unwrap_or(false), + response.data, + )?; + Ok(warp::reply::json(&res).into_response()) + } + } + }) + }, + ); + /* * beacon/pool */ @@ -4797,6 +4841,7 @@ pub fn serve( .uor(get_beacon_block_attestations) .uor(get_beacon_blinded_block) .uor(get_beacon_block_root) + .uor(get_blob_sidecars) .uor(get_blobs) .uor(get_beacon_pool_attestations) .uor(get_beacon_pool_attester_slashings) diff --git a/beacon_node/http_api/src/proposer_duties.rs b/beacon_node/http_api/src/proposer_duties.rs index 3705c399bd7..ceac60cbad1 100644 --- a/beacon_node/http_api/src/proposer_duties.rs +++ b/beacon_node/http_api/src/proposer_duties.rs @@ -3,12 +3,13 @@ use crate::state_id::StateId; use beacon_chain::{ BeaconChain, BeaconChainError, BeaconChainTypes, - beacon_proposer_cache::{compute_proposer_duties_from_head, ensure_state_is_in_epoch}, + beacon_proposer_cache::{ + compute_proposer_duties_from_head, ensure_state_can_determine_proposers_for_epoch, + }, }; use eth2::types::{self as api_types}; use safe_arith::SafeArith; use slot_clock::SlotClock; -use std::cmp::Ordering; use tracing::debug; use types::{Epoch, EthSpec, Hash256, Slot}; @@ -105,36 +106,29 @@ fn try_proposer_duties_from_cache( let head_decision_root = head .snapshot .beacon_state - .proposer_shuffling_decision_root(head_block_root) + .proposer_shuffling_decision_root(head_block_root, &chain.spec) .map_err(warp_utils::reject::beacon_state_error)?; let execution_optimistic = chain .is_optimistic_or_invalid_head_block(head_block) .map_err(warp_utils::reject::unhandled_error)?; - let dependent_root = match head_epoch.cmp(&request_epoch) { - // head_epoch == request_epoch - Ordering::Equal => head_decision_root, - // head_epoch < request_epoch - Ordering::Less => head_block_root, - // head_epoch > request_epoch - Ordering::Greater => { - return Err(warp_utils::reject::custom_server_error(format!( - "head epoch {} is later than request epoch {}", - head_epoch, request_epoch - ))); - } - }; + // This code path can't handle requests for past epochs. + if head_epoch > request_epoch { + return Err(warp_utils::reject::custom_server_error(format!( + "head epoch {head_epoch} is later than request epoch {request_epoch}", + ))); + } chain .beacon_proposer_cache .lock() - .get_epoch::(dependent_root, request_epoch) + .get_epoch::(head_decision_root, request_epoch) .cloned() .map(|indices| { convert_to_api_response( chain, request_epoch, - dependent_root, + head_decision_root, execution_optimistic, indices.to_vec(), ) @@ -204,18 +198,19 @@ fn compute_historic_proposer_duties( } }; - let (state, execution_optimistic) = - if let Some((state_root, mut state, execution_optimistic)) = state_opt { - // If we've loaded the head state it might be from a previous epoch, ensure it's in a - // suitable epoch. - ensure_state_is_in_epoch(&mut state, state_root, epoch, &chain.spec) - .map_err(warp_utils::reject::unhandled_error)?; - (state, execution_optimistic) - } else { - let (state, execution_optimistic, _finalized) = - StateId::from_slot(epoch.start_slot(T::EthSpec::slots_per_epoch())).state(chain)?; - (state, execution_optimistic) - }; + let (state, execution_optimistic) = if let Some((state_root, mut state, execution_optimistic)) = + state_opt + { + // If we've loaded the head state it might be from a previous epoch, ensure it's in a + // suitable epoch. + ensure_state_can_determine_proposers_for_epoch(&mut state, state_root, epoch, &chain.spec) + .map_err(warp_utils::reject::unhandled_error)?; + (state, execution_optimistic) + } else { + let (state, execution_optimistic, _finalized) = + StateId::from_slot(epoch.start_slot(T::EthSpec::slots_per_epoch())).state(chain)?; + (state, execution_optimistic) + }; // Ensure the state lookup was correct. if state.current_epoch() != epoch { @@ -234,7 +229,7 @@ fn compute_historic_proposer_duties( // We can supply the genesis block root as the block root since we know that the only block that // decides its own root is the genesis block. let dependent_root = state - .proposer_shuffling_decision_root(chain.genesis_block_root) + .proposer_shuffling_decision_root(chain.genesis_block_root, &chain.spec) .map_err(BeaconChainError::from) .map_err(warp_utils::reject::unhandled_error)?; diff --git a/beacon_node/http_api/src/publish_blocks.rs b/beacon_node/http_api/src/publish_blocks.rs index b6411167d92..05a4a4b7a4a 100644 --- a/beacon_node/http_api/src/publish_blocks.rs +++ b/beacon_node/http_api/src/publish_blocks.rs @@ -412,7 +412,7 @@ fn build_data_columns( error!( error = ?e, %slot, - "Invalid data column - not publishing block" + "Invalid data column - not publishing data columns" ); warp_utils::reject::custom_bad_request(format!("{e:?}")) })?; diff --git a/beacon_node/http_api/tests/tests.rs b/beacon_node/http_api/tests/tests.rs index 2072fb9932b..9c18a7c1e87 100644 --- a/beacon_node/http_api/tests/tests.rs +++ b/beacon_node/http_api/tests/tests.rs @@ -90,6 +90,7 @@ struct ApiTester { struct ApiTesterConfig { spec: ChainSpec, retain_historic_states: bool, + import_all_data_columns: bool, } impl Default for ApiTesterConfig { @@ -99,6 +100,7 @@ impl Default for ApiTesterConfig { Self { spec, retain_historic_states: false, + import_all_data_columns: false, } } } @@ -137,6 +139,7 @@ impl ApiTester { .deterministic_withdrawal_keypairs(VALIDATOR_COUNT) .fresh_ephemeral_store() .mock_execution_layer() + .import_all_data_columns(config.import_all_data_columns) .build(); harness @@ -441,10 +444,7 @@ impl ApiTester { } pub async fn new_mev_tester_default_payload_value() -> Self { - let mut config = ApiTesterConfig { - retain_historic_states: false, - spec: E::default_spec(), - }; + let mut config = ApiTesterConfig::default(); config.spec.altair_fork_epoch = Some(Epoch::new(0)); config.spec.bellatrix_fork_epoch = Some(Epoch::new(0)); let tester = Self::new_from_config(config) @@ -1858,7 +1858,7 @@ impl ApiTester { }; let result = match self .client - .get_blobs::( + .get_blob_sidecars::( CoreBlockId::Root(block_root), blob_indices.as_deref(), &self.chain.spec, @@ -1879,6 +1879,77 @@ impl ApiTester { self } + pub async fn test_get_blobs(self, versioned_hashes: bool) -> Self { + let block_id = BlockId(CoreBlockId::Finalized); + let (block_root, _, _) = block_id.root(&self.chain).unwrap(); + let (block, _, _) = block_id.full_block(&self.chain).await.unwrap(); + let num_blobs = block.num_expected_blobs(); + + let versioned_hashes: Option> = if versioned_hashes { + Some( + block + .message() + .body() + .blob_kzg_commitments() + .unwrap() + .iter() + .map(|commitment| commitment.calculate_versioned_hash()) + .collect(), + ) + } else { + None + }; + + let result = match self + .client + .get_blobs::(CoreBlockId::Root(block_root), versioned_hashes.as_deref()) + .await + { + Ok(response) => response.unwrap().into_data(), + Err(e) => panic!("query failed incorrectly: {e:?}"), + }; + + assert_eq!( + result.len(), + versioned_hashes.map_or(num_blobs, |versioned_hashes| versioned_hashes.len()) + ); + + self + } + + pub async fn test_get_blobs_post_fulu_full_node(self, versioned_hashes: bool) -> Self { + let block_id = BlockId(CoreBlockId::Finalized); + let (block_root, _, _) = block_id.root(&self.chain).unwrap(); + let (block, _, _) = block_id.full_block(&self.chain).await.unwrap(); + + let versioned_hashes: Option> = if versioned_hashes { + Some( + block + .message() + .body() + .blob_kzg_commitments() + .unwrap() + .iter() + .map(|commitment| commitment.calculate_versioned_hash()) + .collect(), + ) + } else { + None + }; + + match self + .client + .get_blobs::(CoreBlockId::Root(block_root), versioned_hashes.as_deref()) + .await + { + Ok(result) => panic!("Full node are unable to return blobs post-Fulu: {result:?}"), + // Post-Fulu, full nodes don't store blobs and return error 500 + Err(e) => assert_eq!(e.status().unwrap(), 500), + }; + + self + } + /// Test fetching of blob sidecars that are not available in the database due to pruning. /// /// If `zero_blobs` is false, test a block with >0 blobs, which should be unavailable. @@ -1918,7 +1989,7 @@ impl ApiTester { match self .client - .get_blobs::(CoreBlockId::Slot(test_slot), None, &self.chain.spec) + .get_blob_sidecars::(CoreBlockId::Slot(test_slot), None, &self.chain.spec) .await { Ok(result) => { @@ -1956,7 +2027,7 @@ impl ApiTester { match self .client - .get_blobs::(CoreBlockId::Slot(test_slot), None, &self.chain.spec) + .get_blob_sidecars::(CoreBlockId::Slot(test_slot), None, &self.chain.spec) .await { Ok(result) => panic!("queries for pre-Deneb slots should fail. got: {result:?}"), @@ -7704,10 +7775,7 @@ async fn builder_payload_chosen_by_profit_v3() { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn builder_works_post_capella() { - let mut config = ApiTesterConfig { - retain_historic_states: false, - spec: E::default_spec(), - }; + let mut config = ApiTesterConfig::default(); config.spec.altair_fork_epoch = Some(Epoch::new(0)); config.spec.bellatrix_fork_epoch = Some(Epoch::new(0)); config.spec.capella_fork_epoch = Some(Epoch::new(0)); @@ -7724,10 +7792,7 @@ async fn builder_works_post_capella() { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn builder_works_post_deneb() { - let mut config = ApiTesterConfig { - retain_historic_states: false, - spec: E::default_spec(), - }; + let mut config = ApiTesterConfig::default(); config.spec.altair_fork_epoch = Some(Epoch::new(0)); config.spec.bellatrix_fork_epoch = Some(Epoch::new(0)); config.spec.capella_fork_epoch = Some(Epoch::new(0)); @@ -7745,22 +7810,66 @@ async fn builder_works_post_deneb() { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn get_blob_sidecars() { + let mut config = ApiTesterConfig::default(); + config.spec.altair_fork_epoch = Some(Epoch::new(0)); + config.spec.bellatrix_fork_epoch = Some(Epoch::new(0)); + config.spec.capella_fork_epoch = Some(Epoch::new(0)); + config.spec.deneb_fork_epoch = Some(Epoch::new(0)); + + ApiTester::new_from_config(config) + .await + .test_post_beacon_blocks_valid() + .await + .test_get_blob_sidecars(false) + .await + .test_get_blob_sidecars(true) + .await + .test_get_blobs(false) + .await + .test_get_blobs(true) + .await; +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn get_blobs_post_fulu_supernode() { let mut config = ApiTesterConfig { retain_historic_states: false, spec: E::default_spec(), + // For supernode, we import all data columns + import_all_data_columns: true, }; config.spec.altair_fork_epoch = Some(Epoch::new(0)); config.spec.bellatrix_fork_epoch = Some(Epoch::new(0)); config.spec.capella_fork_epoch = Some(Epoch::new(0)); config.spec.deneb_fork_epoch = Some(Epoch::new(0)); + config.spec.electra_fork_epoch = Some(Epoch::new(0)); + config.spec.fulu_fork_epoch = Some(Epoch::new(0)); ApiTester::new_from_config(config) .await - .test_post_beacon_blocks_valid() + // We can call the same get_blobs function in this test + // because the function will call get_blobs_by_versioned_hashes which handles peerDAS post-Fulu + .test_get_blobs(false) .await - .test_get_blob_sidecars(false) + .test_get_blobs(true) + .await; +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn get_blobs_post_fulu_full_node() { + let mut config = ApiTesterConfig::default(); + config.spec.altair_fork_epoch = Some(Epoch::new(0)); + config.spec.bellatrix_fork_epoch = Some(Epoch::new(0)); + config.spec.capella_fork_epoch = Some(Epoch::new(0)); + config.spec.deneb_fork_epoch = Some(Epoch::new(0)); + config.spec.electra_fork_epoch = Some(Epoch::new(0)); + config.spec.fulu_fork_epoch = Some(Epoch::new(0)); + + ApiTester::new_from_config(config) .await - .test_get_blob_sidecars(true) + .test_get_blobs_post_fulu_full_node(false) + .await + .test_get_blobs_post_fulu_full_node(true) .await; } diff --git a/beacon_node/lighthouse_network/src/peer_manager/mod.rs b/beacon_node/lighthouse_network/src/peer_manager/mod.rs index 592fccdc741..ad16bb0421c 100644 --- a/beacon_node/lighthouse_network/src/peer_manager/mod.rs +++ b/beacon_node/lighthouse_network/src/peer_manager/mod.rs @@ -23,6 +23,7 @@ pub use libp2p::identity::Keypair; pub mod peerdb; use crate::peer_manager::peerdb::client::ClientKind; +use crate::types::GossipKind; use libp2p::multiaddr; use network_utils::discovery_metrics; use network_utils::enr_ext::{EnrExt, peer_id_to_node_id}; @@ -1434,8 +1435,16 @@ impl PeerManager { // Update peer score metrics; self.update_peer_score_metrics(); - // Maintain minimum count for custody peers. - self.maintain_custody_peers(); + // Maintain minimum count for custody peers if we are subscribed to any data column topics (i.e. PeerDAS activated) + let peerdas_enabled = self + .network_globals + .gossipsub_subscriptions + .read() + .iter() + .any(|topic| matches!(topic.kind(), &GossipKind::DataColumnSidecar(_))); + if peerdas_enabled { + self.maintain_custody_peers(); + } // Maintain minimum count for sync committee peers. self.maintain_sync_committee_peers(); @@ -3140,4 +3149,60 @@ mod tests { }) } } + + #[tokio::test] + async fn test_custody_peer_logic_only_runs_when_peerdas_enabled() { + use crate::types::{GossipEncoding, GossipTopic}; + + let mut peer_manager = build_peer_manager(5).await; + + // Set up sampling subnets so maintain_custody_peers would have work to do + *peer_manager.network_globals.sampling_subnets.write() = std::collections::HashSet::from([ + DataColumnSubnetId::new(0), + DataColumnSubnetId::new(1), + ]); + + // Test 1: No data column subscriptions - custody peer logic should NOT run + peer_manager.heartbeat(); + + // Should be no new DiscoverSubnetPeers events since PeerDAS is not enabled + let discovery_events: Vec<_> = peer_manager + .events + .iter() + .filter(|event| matches!(event, PeerManagerEvent::DiscoverSubnetPeers(_))) + .collect(); + assert!( + discovery_events.is_empty(), + "Should not generate discovery events when PeerDAS is disabled, but found: {:?}", + discovery_events + ); + + // Test 2: Add data column subscription - custody peer logic should run + let data_column_topic = GossipTopic::new( + GossipKind::DataColumnSidecar(DataColumnSubnetId::new(0)), + GossipEncoding::SSZSnappy, + [0, 0, 0, 0], // fork_digest + ); + peer_manager + .network_globals + .gossipsub_subscriptions + .write() + .insert(data_column_topic); + + // Clear any existing events to isolate the test + peer_manager.events.clear(); + + peer_manager.heartbeat(); + + // Should now have DiscoverSubnetPeers events since PeerDAS is enabled + let discovery_events: Vec<_> = peer_manager + .events + .iter() + .filter(|event| matches!(event, PeerManagerEvent::DiscoverSubnetPeers(_))) + .collect(); + assert!( + !discovery_events.is_empty(), + "Should generate discovery events when PeerDAS is enabled, but found no discovery events" + ); + } } diff --git a/beacon_node/lighthouse_network/src/rpc/rate_limiter.rs b/beacon_node/lighthouse_network/src/rpc/rate_limiter.rs index 65cd1c2e61e..8b364f506cc 100644 --- a/beacon_node/lighthouse_network/src/rpc/rate_limiter.rs +++ b/beacon_node/lighthouse_network/src/rpc/rate_limiter.rs @@ -382,16 +382,41 @@ impl RPCRateLimiter { pub fn prune(&mut self) { let time_since_start = self.init_time.elapsed(); - self.ping_rl.prune(time_since_start); - self.status_rl.prune(time_since_start); - self.metadata_rl.prune(time_since_start); - self.goodbye_rl.prune(time_since_start); - self.bbrange_rl.prune(time_since_start); - self.bbroots_rl.prune(time_since_start); - self.blbrange_rl.prune(time_since_start); - self.blbroot_rl.prune(time_since_start); - self.dcbrange_rl.prune(time_since_start); - self.dcbroot_rl.prune(time_since_start); + + let Self { + prune_interval: _, + init_time: _, + goodbye_rl, + ping_rl, + metadata_rl, + status_rl, + bbrange_rl, + bbroots_rl, + blbrange_rl, + blbroot_rl, + dcbroot_rl, + dcbrange_rl, + lc_bootstrap_rl, + lc_optimistic_update_rl, + lc_finality_update_rl, + lc_updates_by_range_rl, + fork_context: _, + } = self; + + goodbye_rl.prune(time_since_start); + ping_rl.prune(time_since_start); + metadata_rl.prune(time_since_start); + status_rl.prune(time_since_start); + bbrange_rl.prune(time_since_start); + bbroots_rl.prune(time_since_start); + blbrange_rl.prune(time_since_start); + blbroot_rl.prune(time_since_start); + dcbrange_rl.prune(time_since_start); + dcbroot_rl.prune(time_since_start); + lc_bootstrap_rl.prune(time_since_start); + lc_optimistic_update_rl.prune(time_since_start); + lc_finality_update_rl.prune(time_since_start); + lc_updates_by_range_rl.prune(time_since_start); } } diff --git a/beacon_node/lighthouse_tracing/src/lib.rs b/beacon_node/lighthouse_tracing/src/lib.rs index 1787399761b..18a9874252a 100644 --- a/beacon_node/lighthouse_tracing/src/lib.rs +++ b/beacon_node/lighthouse_tracing/src/lib.rs @@ -26,6 +26,10 @@ pub const SPAN_PROCESS_RPC_BLOCK: &str = "process_rpc_block"; pub const SPAN_PROCESS_RPC_BLOBS: &str = "process_rpc_blobs"; pub const SPAN_PROCESS_RPC_CUSTODY_COLUMNS: &str = "process_rpc_custody_columns"; pub const SPAN_PROCESS_CHAIN_SEGMENT: &str = "process_chain_segment"; +pub const SPAN_PROCESS_CHAIN_SEGMENT_BACKFILL: &str = "process_chain_segment_backfill"; + +/// Fork choice root spans +pub const SPAN_RECOMPUTE_HEAD: &str = "recompute_head_at_slot"; /// RPC methods root spans pub const SPAN_HANDLE_BLOCKS_BY_RANGE_REQUEST: &str = "handle_blocks_by_range_request"; @@ -58,6 +62,7 @@ pub const LH_BN_ROOT_SPAN_NAMES: &[&str] = &[ SPAN_PROCESS_RPC_BLOBS, SPAN_PROCESS_RPC_CUSTODY_COLUMNS, SPAN_PROCESS_CHAIN_SEGMENT, + SPAN_PROCESS_CHAIN_SEGMENT_BACKFILL, SPAN_HANDLE_BLOCKS_BY_RANGE_REQUEST, SPAN_HANDLE_BLOBS_BY_RANGE_REQUEST, SPAN_HANDLE_DATA_COLUMNS_BY_RANGE_REQUEST, diff --git a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs index 1f1a3427e78..5fc94c29587 100644 --- a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs @@ -34,7 +34,6 @@ use std::path::PathBuf; use std::sync::Arc; use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; use store::hot_cold_store::HotColdDBError; -use tokio::sync::mpsc::error::TrySendError; use tracing::{Instrument, Span, debug, error, info, instrument, trace, warn}; use types::{ Attestation, AttestationData, AttestationRef, AttesterSlashing, BlobSidecar, DataColumnSidecar, @@ -1054,36 +1053,43 @@ impl NetworkBeaconProcessor { "Processed data column, waiting for other components" ); - // Instead of triggering reconstruction immediately, schedule it to be run. If - // another column arrives it either completes availability or pushes - // reconstruction back a bit. - let cloned_self = Arc::clone(self); - let block_root = *block_root; - let send_result = self.beacon_processor_send.try_send(WorkEvent { - drop_during_sync: false, - work: Work::Reprocess(ReprocessQueueMessage::DelayColumnReconstruction( - QueuedColumnReconstruction { - block_root, - slot: *slot, - process_fn: Box::pin(async move { - cloned_self - .attempt_data_column_reconstruction(block_root, true) - .await; - }), - }, - )), - }); - if let Err(TrySendError::Full(WorkEvent { - work: - Work::Reprocess(ReprocessQueueMessage::DelayColumnReconstruction( - reconstruction, - )), - .. - })) = send_result + if self + .chain + .data_availability_checker + .custody_context() + .should_attempt_reconstruction( + slot.epoch(T::EthSpec::slots_per_epoch()), + &self.chain.spec, + ) { - warn!("Unable to send reconstruction to reprocessing"); - // Execute it immediately instead. - reconstruction.process_fn.await; + // Instead of triggering reconstruction immediately, schedule it to be run. If + // another column arrives, it either completes availability or pushes + // reconstruction back a bit. + let cloned_self = Arc::clone(self); + let block_root = *block_root; + + if self + .beacon_processor_send + .try_send(WorkEvent { + drop_during_sync: false, + work: Work::Reprocess( + ReprocessQueueMessage::DelayColumnReconstruction( + QueuedColumnReconstruction { + block_root, + slot: *slot, + process_fn: Box::pin(async move { + cloned_self + .attempt_data_column_reconstruction(block_root) + .await; + }), + }, + ), + ), + }) + .is_err() + { + warn!("Unable to send reconstruction to reprocessing"); + } } } }, @@ -1494,11 +1500,12 @@ impl NetworkBeaconProcessor { let result = self .chain - .process_block_with_early_caching( + .process_block( block_root, verified_block, - BlockImportSource::Gossip, NotifyExecutionLayer::Yes, + BlockImportSource::Gossip, + || Ok(()), ) .await; register_process_result_metrics(&result, metrics::BlockSource::Gossip, "block"); diff --git a/beacon_node/network/src/network_beacon_processor/mod.rs b/beacon_node/network/src/network_beacon_processor/mod.rs index 73349cd4314..85ccde1d591 100644 --- a/beacon_node/network/src/network_beacon_processor/mod.rs +++ b/beacon_node/network/src/network_beacon_processor/mod.rs @@ -6,9 +6,7 @@ use beacon_chain::data_column_verification::{GossipDataColumnError, observe_goss use beacon_chain::fetch_blobs::{ EngineGetBlobsOutput, FetchEngineBlobError, fetch_and_process_engine_blobs, }; -use beacon_chain::{ - AvailabilityProcessingStatus, BeaconChain, BeaconChainTypes, BlockError, NotifyExecutionLayer, -}; +use beacon_chain::{AvailabilityProcessingStatus, BeaconChain, BeaconChainTypes, BlockError}; use beacon_processor::{ BeaconProcessorSend, DuplicateCache, GossipAggregatePackage, GossipAttestationPackage, Work, WorkEvent as BeaconWorkEvent, @@ -28,7 +26,7 @@ use std::sync::Arc; use std::time::Duration; use task_executor::TaskExecutor; use tokio::sync::mpsc::{self, error::TrySendError}; -use tracing::{debug, error, trace, warn}; +use tracing::{debug, error, instrument, trace, warn}; use types::*; pub use sync_methods::ChainSegmentProcessId; @@ -500,33 +498,23 @@ impl NetworkBeaconProcessor { process_id: ChainSegmentProcessId, blocks: Vec>, ) -> Result<(), Error> { - let is_backfill = matches!(&process_id, ChainSegmentProcessId::BackSyncBatchId { .. }); debug!(blocks = blocks.len(), id = ?process_id, "Batch sending for process"); - let processor = self.clone(); - let process_fn = async move { - let notify_execution_layer = if processor - .network_globals - .sync_state - .read() - .is_syncing_finalized() - { - NotifyExecutionLayer::No - } else { - NotifyExecutionLayer::Yes - }; - processor - .process_chain_segment(process_id, blocks, notify_execution_layer) - .await; - }; - let process_fn = Box::pin(process_fn); // Back-sync batches are dispatched with a different `Work` variant so // they can be rate-limited. - let work = if is_backfill { - Work::ChainSegmentBackfill(process_fn) - } else { - Work::ChainSegment(process_fn) + let work = match process_id { + ChainSegmentProcessId::RangeBatchId(_, _) => { + let process_fn = async move { + processor.process_chain_segment(process_id, blocks).await; + }; + Work::ChainSegment(Box::pin(process_fn)) + } + ChainSegmentProcessId::BackSyncBatchId(_) => { + let process_fn = + move || processor.process_chain_segment_backfill(process_id, blocks); + Work::ChainSegmentBackfill(Box::new(process_fn)) + } }; self.try_send(BeaconWorkEvent { @@ -825,36 +813,15 @@ impl NetworkBeaconProcessor { } } - /// Attempt to reconstruct all data columns if the following conditions satisfies: - /// - Our custody requirement is all columns - /// - We have >= 50% of columns, but not all columns - /// - /// Returns `Some(AvailabilityProcessingStatus)` if reconstruction is successfully performed, - /// otherwise returns `None`. - /// - /// The `publish_columns` parameter controls whether reconstructed columns should be published - /// to the gossip network. - async fn attempt_data_column_reconstruction( - self: &Arc, - block_root: Hash256, - publish_columns: bool, - ) -> Option { - // Only supernodes attempt reconstruction - if !self - .chain - .data_availability_checker - .custody_context() - .current_is_supernode - { - return None; - } - + /// Attempts to reconstruct all data columns if the conditions checked in + /// [`DataAvailabilityCheckerInner::check_and_set_reconstruction_started`] are satisfied. + #[instrument(level = "debug", skip_all, fields(?block_root))] + async fn attempt_data_column_reconstruction(self: &Arc, block_root: Hash256) { let result = self.chain.reconstruct_data_columns(block_root).await; + match result { Ok(Some((availability_processing_status, data_columns_to_publish))) => { - if publish_columns { - self.publish_data_columns_gradually(data_columns_to_publish, block_root); - } + self.publish_data_columns_gradually(data_columns_to_publish, block_root); match &availability_processing_status { AvailabilityProcessingStatus::Imported(hash) => { debug!( @@ -867,21 +834,18 @@ impl NetworkBeaconProcessor { AvailabilityProcessingStatus::MissingComponents(_, _) => { debug!( result = "imported all custody columns", - block_hash = %block_root, + %block_root, "Block components still missing block after reconstruction" ); } } - - Some(availability_processing_status) } Ok(None) => { // reason is tracked via the `KZG_DATA_COLUMN_RECONSTRUCTION_INCOMPLETE_TOTAL` metric trace!( - block_hash = %block_root, + %block_root, "Reconstruction not required for block" ); - None } Err(e) => { error!( @@ -889,7 +853,6 @@ impl NetworkBeaconProcessor { error = ?e, "Error during data column reconstruction" ); - None } } } @@ -978,6 +941,7 @@ impl NetworkBeaconProcessor { /// by some nodes on the network as soon as possible. Our hope is that some columns arrive from /// other nodes in the meantime, obviating the need for us to publish them. If no other /// publisher exists for a column, it will eventually get published here. + #[instrument(level="debug", skip_all, fields(?block_root, data_column_count=data_columns_to_publish.len()))] fn publish_data_columns_gradually( self: &Arc, mut data_columns_to_publish: DataColumnSidecarList, diff --git a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs index 9ddba86b81d..58e02ffe007 100644 --- a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs @@ -437,12 +437,12 @@ impl NetworkBeaconProcessor { } } Err(e) => { - // TODO(das): lower log level when feature is stabilized - error!( + // The node is expected to be able to serve these columns, but it fails to retrieve them. + warn!( block_root = ?data_column_ids_by_root.block_root, %peer_id, error = ?e, - "Error getting data column" + "Error getting data column for by root request " ); return Err((RpcErrorResponse::ServerError, "Error getting data column")); } diff --git a/beacon_node/network/src/network_beacon_processor/sync_methods.rs b/beacon_node/network/src/network_beacon_processor/sync_methods.rs index f24495cc54c..1d99540c299 100644 --- a/beacon_node/network/src/network_beacon_processor/sync_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/sync_methods.rs @@ -19,9 +19,10 @@ use beacon_processor::{ use beacon_processor::{Work, WorkEvent}; use lighthouse_network::PeerAction; use lighthouse_tracing::{ - SPAN_PROCESS_CHAIN_SEGMENT, SPAN_PROCESS_RPC_BLOBS, SPAN_PROCESS_RPC_BLOCK, - SPAN_PROCESS_RPC_CUSTODY_COLUMNS, + SPAN_PROCESS_CHAIN_SEGMENT, SPAN_PROCESS_CHAIN_SEGMENT_BACKFILL, SPAN_PROCESS_RPC_BLOBS, + SPAN_PROCESS_RPC_BLOCK, SPAN_PROCESS_RPC_CUSTODY_COLUMNS, }; +use logging::crit; use std::sync::Arc; use std::time::Duration; use store::KzgCommitment; @@ -167,11 +168,12 @@ impl NetworkBeaconProcessor { let signed_beacon_block = block.block_cloned(); let result = self .chain - .process_block_with_early_caching( + .process_block( block_root, block, - BlockImportSource::Lookup, NotifyExecutionLayer::Yes, + BlockImportSource::Lookup, + || Ok(()), ) .await; register_process_result_metrics(&result, metrics::BlockSource::Rpc, "block"); @@ -331,14 +333,8 @@ impl NetworkBeaconProcessor { "Blobs have already been imported" ); } - Err(e) => { - warn!( - error = ?e, - block_hash = %block_root, - %slot, - "Error when importing rpc blobs" - ); - } + // Errors are handled and logged in `block_lookups` + Err(_) => {} } // Sync handles these results @@ -383,7 +379,7 @@ impl NetworkBeaconProcessor { "RPC custody data columns received" ); - let mut result = self + let result = self .chain .process_rpc_custody_columns(custody_columns) .await; @@ -404,17 +400,6 @@ impl NetworkBeaconProcessor { block_hash = %block_root, "Missing components over rpc" ); - // Attempt reconstruction here before notifying sync, to avoid sending out more requests - // that we may no longer need. - // We don't publish columns reconstructed from rpc columns to the gossip network, - // as these are likely historic columns. - let publish_columns = false; - if let Some(availability) = self - .attempt_data_column_reconstruction(block_root, publish_columns) - .await - { - result = Ok(availability) - } } }, Err(BlockError::DuplicateFullyImported(_)) => { @@ -423,13 +408,8 @@ impl NetworkBeaconProcessor { "Custody columns have already been imported" ); } - Err(e) => { - warn!( - error = ?e, - block_hash = %block_root, - "Error when importing rpc custody columns" - ); - } + // Errors are handled and logged in `block_lookups` + Err(_) => {} } self.send_sync_message(SyncMessage::BlockComponentProcessed { @@ -445,27 +425,42 @@ impl NetworkBeaconProcessor { parent = None, level = "debug", skip_all, - fields(sync_type = ?sync_type, downloaded_blocks = downloaded_blocks.len()) + fields(process_id = ?process_id, downloaded_blocks = downloaded_blocks.len()) )] pub async fn process_chain_segment( &self, - sync_type: ChainSegmentProcessId, + process_id: ChainSegmentProcessId, downloaded_blocks: Vec>, - notify_execution_layer: NotifyExecutionLayer, ) { - let result = match sync_type { - // this a request from the range sync - ChainSegmentProcessId::RangeBatchId(chain_id, epoch) => { - let start_slot = downloaded_blocks.first().map(|b| b.slot().as_u64()); - let end_slot = downloaded_blocks.last().map(|b| b.slot().as_u64()); - let sent_blocks = downloaded_blocks.len(); - - match self - .process_blocks(downloaded_blocks.iter(), notify_execution_layer) - .await - { - (imported_blocks, Ok(_)) => { - debug!( + let ChainSegmentProcessId::RangeBatchId(chain_id, epoch) = process_id else { + // This is a request from range sync, this should _never_ happen + crit!( + error = "process_chain_segment called on a variant other than RangeBatchId", + "Please notify the devs" + ); + return; + }; + + let start_slot = downloaded_blocks.first().map(|b| b.slot().as_u64()); + let end_slot = downloaded_blocks.last().map(|b| b.slot().as_u64()); + let sent_blocks = downloaded_blocks.len(); + let notify_execution_layer = if self + .network_globals + .sync_state + .read() + .is_syncing_finalized() + { + NotifyExecutionLayer::No + } else { + NotifyExecutionLayer::Yes + }; + + let result = match self + .process_blocks(downloaded_blocks.iter(), notify_execution_layer) + .await + { + (imported_blocks, Ok(_)) => { + debug!( batch_epoch = %epoch, first_block_slot = start_slot, chain = chain_id, @@ -473,13 +468,13 @@ impl NetworkBeaconProcessor { processed_blocks = sent_blocks, service= "sync", "Batch processed"); - BatchProcessResult::Success { - sent_blocks, - imported_blocks, - } - } - (imported_blocks, Err(e)) => { - debug!( + BatchProcessResult::Success { + sent_blocks, + imported_blocks, + } + } + (imported_blocks, Err(e)) => { + debug!( batch_epoch = %epoch, first_block_slot = start_slot, chain = chain_id, @@ -488,33 +483,61 @@ impl NetworkBeaconProcessor { error = %e.message, service = "sync", "Batch processing failed"); - match e.peer_action { - Some(penalty) => BatchProcessResult::FaultyFailure { - imported_blocks, - penalty, - }, - None => BatchProcessResult::NonFaultyFailure, - } - } + match e.peer_action { + Some(penalty) => BatchProcessResult::FaultyFailure { + imported_blocks, + penalty, + }, + None => BatchProcessResult::NonFaultyFailure, } } - // this a request from the Backfill sync - ChainSegmentProcessId::BackSyncBatchId(epoch) => { - let start_slot = downloaded_blocks.first().map(|b| b.slot().as_u64()); - let end_slot = downloaded_blocks.last().map(|b| b.slot().as_u64()); - let sent_blocks = downloaded_blocks.len(); - let n_blobs = downloaded_blocks - .iter() - .map(|wrapped| wrapped.n_blobs()) - .sum::(); - let n_data_columns = downloaded_blocks - .iter() - .map(|wrapped| wrapped.n_data_columns()) - .sum::(); - - match self.process_backfill_blocks(downloaded_blocks) { - (imported_blocks, Ok(_)) => { - debug!( + }; + + self.send_sync_message(SyncMessage::BatchProcessed { + sync_type: process_id, + result, + }); + } + + /// Attempt to import the chain segment (`blocks`) to the beacon chain, informing the sync + /// thread if more blocks are needed to process it. + #[instrument( + name = SPAN_PROCESS_CHAIN_SEGMENT_BACKFILL, + parent = None, + level = "debug", + skip_all, + fields(downloaded_blocks = downloaded_blocks.len()) + )] + pub fn process_chain_segment_backfill( + &self, + process_id: ChainSegmentProcessId, + downloaded_blocks: Vec>, + ) { + let ChainSegmentProcessId::BackSyncBatchId(epoch) = process_id else { + // this a request from RangeSync, this should _never_ happen + crit!( + error = + "process_chain_segment_backfill called on a variant other than BackSyncBatchId", + "Please notify the devs" + ); + return; + }; + + let start_slot = downloaded_blocks.first().map(|b| b.slot().as_u64()); + let end_slot = downloaded_blocks.last().map(|b| b.slot().as_u64()); + let sent_blocks = downloaded_blocks.len(); + let n_blobs = downloaded_blocks + .iter() + .map(|wrapped| wrapped.n_blobs()) + .sum::(); + let n_data_columns = downloaded_blocks + .iter() + .map(|wrapped| wrapped.n_data_columns()) + .sum::(); + + let result = match self.process_backfill_blocks(downloaded_blocks) { + (imported_blocks, Ok(_)) => { + debug!( batch_epoch = %epoch, first_block_slot = start_slot, keep_execution_payload = !self.chain.store.get_config().prune_payloads, @@ -524,34 +547,35 @@ impl NetworkBeaconProcessor { processed_data_columns = n_data_columns, service= "sync", "Backfill batch processed"); - BatchProcessResult::Success { - sent_blocks, - imported_blocks, - } - } - (_, Err(e)) => { - debug!( - batch_epoch = %epoch, - first_block_slot = start_slot, - last_block_slot = end_slot, - processed_blobs = n_blobs, - error = %e.message, - service = "sync", - "Backfill batch processing failed" - ); - match e.peer_action { - Some(penalty) => BatchProcessResult::FaultyFailure { - imported_blocks: 0, - penalty, - }, - None => BatchProcessResult::NonFaultyFailure, - } - } + BatchProcessResult::Success { + sent_blocks, + imported_blocks, + } + } + (_, Err(e)) => { + debug!( + batch_epoch = %epoch, + first_block_slot = start_slot, + last_block_slot = end_slot, + processed_blobs = n_blobs, + error = %e.message, + service = "sync", + "Backfill batch processing failed" + ); + match e.peer_action { + Some(penalty) => BatchProcessResult::FaultyFailure { + imported_blocks: 0, + penalty, + }, + None => BatchProcessResult::NonFaultyFailure, } } }; - self.send_sync_message(SyncMessage::BatchProcessed { sync_type, result }); + self.send_sync_message(SyncMessage::BatchProcessed { + sync_type: process_id, + result, + }); } /// Helper function to process blocks batches which only consumes the chain and blocks to process. diff --git a/beacon_node/network/src/network_beacon_processor/tests.rs b/beacon_node/network/src/network_beacon_processor/tests.rs index 2935c2d2132..4137c974bf3 100644 --- a/beacon_node/network/src/network_beacon_processor/tests.rs +++ b/beacon_node/network/src/network_beacon_processor/tests.rs @@ -458,10 +458,10 @@ impl TestRig { .unwrap(); } - pub fn enqueue_backfill_batch(&self) { + pub fn enqueue_backfill_batch(&self, epoch: Epoch) { self.network_beacon_processor .send_chain_segment( - ChainSegmentProcessId::BackSyncBatchId(Epoch::default()), + ChainSegmentProcessId::BackSyncBatchId(epoch), Vec::default(), ) .unwrap(); @@ -606,7 +606,7 @@ impl TestRig { } pub async fn assert_event_journal(&mut self, expected: &[&str]) { - self.assert_event_journal_with_timeout(expected, STANDARD_TIMEOUT) + self.assert_event_journal_with_timeout(expected, STANDARD_TIMEOUT, false, false) .await } @@ -623,6 +623,8 @@ impl TestRig { .chain(std::iter::once(NOTHING_TO_DO)) .collect::>(), timeout, + false, + false, ) .await } @@ -666,11 +668,21 @@ impl TestRig { &mut self, expected: &[&str], timeout: Duration, + ignore_worker_freed: bool, + ignore_nothing_to_do: bool, ) { let mut events = Vec::with_capacity(expected.len()); let drain_future = async { while let Some(event) = self.work_journal_rx.recv().await { + if event == WORKER_FREED && ignore_worker_freed { + continue; + } + + if event == NOTHING_TO_DO && ignore_nothing_to_do { + continue; + } + events.push(event); // Break as soon as we collect the desired number of events. @@ -1009,10 +1021,6 @@ async fn import_gossip_block_acceptably_early() { rig.assert_event_journal_completes(&[WorkType::GossipDataColumnSidecar]) .await; } - if num_data_columns > 0 { - rig.assert_event_journal_completes(&[WorkType::ColumnReconstruction]) - .await; - } // Note: this section of the code is a bit race-y. We're assuming that we can set the slot clock // and check the head in the time between the block arrived early and when its due for @@ -1388,6 +1396,8 @@ async fn requeue_unknown_block_gossip_attestation_without_import() { NOTHING_TO_DO, ], Duration::from_secs(1) + QUEUED_ATTESTATION_DELAY, + false, + false, ) .await; @@ -1428,6 +1438,8 @@ async fn requeue_unknown_block_gossip_aggregated_attestation_without_import() { NOTHING_TO_DO, ], Duration::from_secs(1) + QUEUED_ATTESTATION_DELAY, + false, + false, ) .await; @@ -1562,8 +1574,8 @@ async fn test_backfill_sync_processing() { // (not straight forward to manipulate `TestingSlotClock` due to cloning of `SlotClock` in code) // and makes the test very slow, hence timing calculation is unit tested separately in // `work_reprocessing_queue`. - for _ in 0..1 { - rig.enqueue_backfill_batch(); + for i in 0..1 { + rig.enqueue_backfill_batch(Epoch::new(i)); // ensure queued batch is not processed until later rig.assert_no_events_for(Duration::from_millis(100)).await; // A new batch should be processed within a slot. @@ -1574,6 +1586,8 @@ async fn test_backfill_sync_processing() { NOTHING_TO_DO, ], rig.chain.slot_clock.slot_duration(), + false, + false, ) .await; } @@ -1594,8 +1608,8 @@ async fn test_backfill_sync_processing_rate_limiting_disabled() { ) .await; - for _ in 0..3 { - rig.enqueue_backfill_batch(); + for i in 0..3 { + rig.enqueue_backfill_batch(Epoch::new(i)); } // ensure all batches are processed @@ -1606,6 +1620,8 @@ async fn test_backfill_sync_processing_rate_limiting_disabled() { WorkType::ChainSegmentBackfill.into(), ], Duration::from_millis(100), + true, + true, ) .await; } diff --git a/beacon_node/network/src/service.rs b/beacon_node/network/src/service.rs index c97206ea873..4bd649ba824 100644 --- a/beacon_node/network/src/service.rs +++ b/beacon_node/network/src/service.rs @@ -840,6 +840,7 @@ impl NetworkService { new_fork = ?new_fork_name, "Transitioned to new fork" ); + new_fork_name.fork_ascii(); } fork_context.update_current_fork(*new_fork_name, new_fork_digest, current_epoch); diff --git a/beacon_node/network/src/subnet_service/attestation_subnets.rs b/beacon_node/network/src/subnet_service/attestation_subnets.rs deleted file mode 100644 index 0da27c6a21f..00000000000 --- a/beacon_node/network/src/subnet_service/attestation_subnets.rs +++ /dev/null @@ -1,681 +0,0 @@ -//! This service keeps track of which shard subnet the beacon node should be subscribed to at any -//! given time. It schedules subscriptions to shard subnets, requests peer discoveries and -//! determines whether attestations should be aggregated and/or passed to the beacon node. - -use super::SubnetServiceMessage; -use std::collections::HashSet; -use std::collections::{HashMap, VecDeque}; -use std::pin::Pin; -use std::sync::Arc; -use std::task::{Context, Poll}; -use std::time::Duration; - -use beacon_chain::{BeaconChain, BeaconChainTypes}; -use delay_map::{HashMapDelay, HashSetDelay}; -use futures::prelude::*; -use lighthouse_network::{discv5::enr::NodeId, NetworkConfig, Subnet, SubnetDiscovery}; -use slot_clock::SlotClock; -use tracing::{debug, error, info, trace, warn}; -use types::{Attestation, EthSpec, Slot, SubnetId, ValidatorSubscription}; - -use crate::metrics; - -/// The minimum number of slots ahead that we attempt to discover peers for a subscription. If the -/// slot is less than this number, skip the peer discovery process. -/// Subnet discovery query takes at most 30 secs, 2 slots take 24s. -pub(crate) const MIN_PEER_DISCOVERY_SLOT_LOOK_AHEAD: u64 = 2; -/// The fraction of a slot that we subscribe to a subnet before the required slot. -/// -/// Currently a whole slot ahead. -const ADVANCE_SUBSCRIBE_SLOT_FRACTION: u32 = 1; - -/// The number of slots after an aggregator duty where we remove the entry from -/// `aggregate_validators_on_subnet` delay map. -const UNSUBSCRIBE_AFTER_AGGREGATOR_DUTY: u32 = 2; - -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] -pub(crate) enum SubscriptionKind { - /// Long lived subscriptions. - /// - /// These have a longer duration and are advertised in our ENR. - LongLived, - /// Short lived subscriptions. - /// - /// Subscribing to these subnets has a short duration and we don't advertise it in our ENR. - ShortLived, -} - -/// A particular subnet at a given slot. -#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy)] -pub struct ExactSubnet { - /// The `SubnetId` associated with this subnet. - pub subnet_id: SubnetId, - /// The `Slot` associated with this subnet. - pub slot: Slot, -} - -pub struct AttestationService { - /// Queued events to return to the driving service. - events: VecDeque, - - /// A reference to the beacon chain to process received attestations. - pub(crate) beacon_chain: Arc>, - - /// Subnets we are currently subscribed to as short lived subscriptions. - /// - /// Once they expire, we unsubscribe from these. - /// We subscribe to subnets when we are an aggregator for an exact subnet. - short_lived_subscriptions: HashMapDelay, - - /// Subnets we are currently subscribed to as long lived subscriptions. - /// - /// We advertise these in our ENR. When these expire, the subnet is removed from our ENR. - /// These are required of all beacon nodes. The exact number is determined by the chain - /// specification. - long_lived_subscriptions: HashSet, - - /// Short lived subscriptions that need to be executed in the future. - scheduled_short_lived_subscriptions: HashSetDelay, - - /// A collection timeouts to track the existence of aggregate validator subscriptions at an - /// `ExactSubnet`. - aggregate_validators_on_subnet: Option>, - - /// The waker for the current thread. - waker: Option, - - /// The discovery mechanism of lighthouse is disabled. - discovery_disabled: bool, - - /// We are always subscribed to all subnets. - subscribe_all_subnets: bool, - - /// Our Discv5 node_id. - node_id: NodeId, - - /// Future used to manage subscribing and unsubscribing from long lived subnets. - next_long_lived_subscription_event: Pin>, - - /// Whether this node is a block proposer-only node. - proposer_only: bool, -} - -impl AttestationService { - /* Public functions */ - - /// Establish the service based on the passed configuration. - pub fn new(beacon_chain: Arc>, node_id: NodeId, config: &NetworkConfig) -> Self { - let slot_duration = beacon_chain.slot_clock.slot_duration(); - - if config.subscribe_all_subnets { - info!("Subscribing to all subnets"); - } else { - info!( - subnets_per_node = beacon_chain.spec.subnets_per_node, - subscription_duration_in_epochs = beacon_chain.spec.epochs_per_subnet_subscription, - "Deterministic long lived subnets enabled" - ); - } - - let track_validators = !config.import_all_attestations; - let aggregate_validators_on_subnet = - track_validators.then(|| HashSetDelay::new(slot_duration)); - let mut service = AttestationService { - events: VecDeque::with_capacity(10), - beacon_chain, - short_lived_subscriptions: HashMapDelay::new(slot_duration), - long_lived_subscriptions: HashSet::default(), - scheduled_short_lived_subscriptions: HashSetDelay::default(), - aggregate_validators_on_subnet, - waker: None, - discovery_disabled: config.disable_discovery, - subscribe_all_subnets: config.subscribe_all_subnets, - node_id, - next_long_lived_subscription_event: { - // Set a dummy sleep. Calculating the current subnet subscriptions will update this - // value with a smarter timing - Box::pin(tokio::time::sleep(Duration::from_secs(1))) - }, - proposer_only: config.proposer_only, - }; - - // If we are not subscribed to all subnets, handle the deterministic set of subnets - if !config.subscribe_all_subnets { - service.recompute_long_lived_subnets(); - } - - service - } - - /// Return count of all currently subscribed subnets (long-lived **and** short-lived). - #[cfg(test)] - pub fn subscription_count(&self) -> usize { - if self.subscribe_all_subnets { - self.beacon_chain.spec.attestation_subnet_count as usize - } else { - let count = self - .short_lived_subscriptions - .keys() - .chain(self.long_lived_subscriptions.iter()) - .collect::>() - .len(); - count - } - } - - /// Returns whether we are subscribed to a subnet for testing purposes. - #[cfg(test)] - pub(crate) fn is_subscribed( - &self, - subnet_id: &SubnetId, - subscription_kind: SubscriptionKind, - ) -> bool { - match subscription_kind { - SubscriptionKind::LongLived => self.long_lived_subscriptions.contains(subnet_id), - SubscriptionKind::ShortLived => self.short_lived_subscriptions.contains_key(subnet_id), - } - } - - #[cfg(test)] - pub(crate) fn long_lived_subscriptions(&self) -> &HashSet { - &self.long_lived_subscriptions - } - - /// Processes a list of validator subscriptions. - /// - /// This will: - /// - Register new validators as being known. - /// - Search for peers for required subnets. - /// - Request subscriptions for subnets on specific slots when required. - /// - Build the timeouts for each of these events. - /// - /// This returns a result simply for the ergonomics of using ?. The result can be - /// safely dropped. - pub fn validator_subscriptions( - &mut self, - subscriptions: impl Iterator, - ) -> Result<(), String> { - // If the node is in a proposer-only state, we ignore all subnet subscriptions. - if self.proposer_only { - return Ok(()); - } - - // Maps each subnet_id subscription to it's highest slot - let mut subnets_to_discover: HashMap = HashMap::new(); - - // Registers the validator with the attestation service. - for subscription in subscriptions { - metrics::inc_counter(&metrics::SUBNET_SUBSCRIPTION_REQUESTS); - - trace!(?subscription, "Validator subscription"); - - // Compute the subnet that is associated with this subscription - let subnet_id = match SubnetId::compute_subnet::( - subscription.slot, - subscription.attestation_committee_index, - subscription.committee_count_at_slot, - &self.beacon_chain.spec, - ) { - Ok(subnet_id) => subnet_id, - Err(e) => { - warn!( - error = ?e, - "Failed to compute subnet id for validator subscription" - ); - continue; - } - }; - // Ensure each subnet_id inserted into the map has the highest slot as it's value. - // Higher slot corresponds to higher min_ttl in the `SubnetDiscovery` entry. - if let Some(slot) = subnets_to_discover.get(&subnet_id) { - if subscription.slot > *slot { - subnets_to_discover.insert(subnet_id, subscription.slot); - } - } else if !self.discovery_disabled { - subnets_to_discover.insert(subnet_id, subscription.slot); - } - - let exact_subnet = ExactSubnet { - subnet_id, - slot: subscription.slot, - }; - - // Determine if the validator is an aggregator. If so, we subscribe to the subnet and - // if successful add the validator to a mapping of known aggregators for that exact - // subnet. - - if subscription.is_aggregator { - metrics::inc_counter(&metrics::SUBNET_SUBSCRIPTION_AGGREGATOR_REQUESTS); - if let Err(e) = self.subscribe_to_short_lived_subnet(exact_subnet) { - warn!(error = e, "Subscription to subnet error"); - } else { - trace!(?exact_subnet, "Subscribed to subnet for aggregator duties"); - } - } - } - - // If the discovery mechanism isn't disabled, attempt to set up a peer discovery for the - // required subnets. - if !self.discovery_disabled { - if let Err(e) = self.discover_peers_request( - subnets_to_discover - .into_iter() - .map(|(subnet_id, slot)| ExactSubnet { subnet_id, slot }), - ) { - warn!(error = e, "Discovery lookup request error"); - }; - } - - Ok(()) - } - - fn recompute_long_lived_subnets(&mut self) { - // Ensure the next computation is scheduled even if assigning subnets fails. - let next_subscription_event = self - .recompute_long_lived_subnets_inner() - .unwrap_or_else(|_| self.beacon_chain.slot_clock.slot_duration()); - - debug!("Recomputing deterministic long lived subnets"); - self.next_long_lived_subscription_event = - Box::pin(tokio::time::sleep(next_subscription_event)); - - if let Some(waker) = self.waker.as_ref() { - waker.wake_by_ref(); - } - } - - /// Gets the long lived subnets the node should be subscribed to during the current epoch and - /// the remaining duration for which they remain valid. - fn recompute_long_lived_subnets_inner(&mut self) -> Result { - let current_epoch = self.beacon_chain.epoch().map_err(|e| { - if !self - .beacon_chain - .slot_clock - .is_prior_to_genesis() - .unwrap_or(false) - { - error!(err = ?e,"Failed to get the current epoch from clock") - } - })?; - - let (subnets, next_subscription_epoch) = SubnetId::compute_subnets_for_epoch::( - self.node_id.raw(), - current_epoch, - &self.beacon_chain.spec, - ) - .map_err(|e| error!(err = e, "Could not compute subnets for current epoch"))?; - - let next_subscription_slot = - next_subscription_epoch.start_slot(T::EthSpec::slots_per_epoch()); - let next_subscription_event = self - .beacon_chain - .slot_clock - .duration_to_slot(next_subscription_slot) - .ok_or_else(|| { - error!("Failed to compute duration to next to long lived subscription event") - })?; - - self.update_long_lived_subnets(subnets.collect()); - - Ok(next_subscription_event) - } - - /// Updates the long lived subnets. - /// - /// New subnets are registered as subscribed, removed subnets as unsubscribed and the Enr - /// updated accordingly. - fn update_long_lived_subnets(&mut self, mut subnets: HashSet) { - info!(subnets = ?subnets.iter().collect::>(),"Subscribing to long-lived subnets"); - for subnet in &subnets { - // Add the events for those subnets that are new as long lived subscriptions. - if !self.long_lived_subscriptions.contains(subnet) { - // Check if this subnet is new and send the subscription event if needed. - if !self.short_lived_subscriptions.contains_key(subnet) { - debug!( - ?subnet, - subscription_kind = ?SubscriptionKind::LongLived, - "Subscribing to subnet" - ); - self.queue_event(SubnetServiceMessage::Subscribe(Subnet::Attestation( - *subnet, - ))); - } - self.queue_event(SubnetServiceMessage::EnrAdd(Subnet::Attestation(*subnet))); - if !self.discovery_disabled { - self.queue_event(SubnetServiceMessage::DiscoverPeers(vec![SubnetDiscovery { - subnet: Subnet::Attestation(*subnet), - min_ttl: None, - }])) - } - } - } - - // Update the long_lived_subnets set and check for subnets that are being removed - std::mem::swap(&mut self.long_lived_subscriptions, &mut subnets); - for subnet in subnets { - if !self.long_lived_subscriptions.contains(&subnet) { - self.handle_removed_subnet(subnet, SubscriptionKind::LongLived); - } - } - } - - /// Checks if we have subscribed aggregate validators for the subnet. If not, checks the gossip - /// verification, re-propagates and returns false. - pub fn should_process_attestation( - &self, - subnet: SubnetId, - attestation: &Attestation, - ) -> bool { - // Proposer-only mode does not need to process attestations - if self.proposer_only { - return false; - } - self.aggregate_validators_on_subnet - .as_ref() - .map(|tracked_vals| { - tracked_vals.contains_key(&ExactSubnet { - subnet_id: subnet, - slot: attestation.data().slot, - }) - }) - .unwrap_or(true) - } - - /* Internal private functions */ - - /// Adds an event to the event queue and notifies that this service is ready to be polled - /// again. - fn queue_event(&mut self, ev: SubnetServiceMessage) { - self.events.push_back(ev); - if let Some(waker) = &self.waker { - waker.wake_by_ref() - } - } - /// Checks if there are currently queued discovery requests and the time required to make the - /// request. - /// - /// If there is sufficient time, queues a peer discovery request for all the required subnets. - fn discover_peers_request( - &mut self, - exact_subnets: impl Iterator, - ) -> Result<(), &'static str> { - let current_slot = self - .beacon_chain - .slot_clock - .now() - .ok_or("Could not get the current slot")?; - - let discovery_subnets: Vec = exact_subnets - .filter_map(|exact_subnet| { - // Check if there is enough time to perform a discovery lookup. - if exact_subnet.slot - >= current_slot.saturating_add(MIN_PEER_DISCOVERY_SLOT_LOOK_AHEAD) - { - // Send out an event to start looking for peers. - // Require the peer for an additional slot to ensure we keep the peer for the - // duration of the subscription. - let min_ttl = self - .beacon_chain - .slot_clock - .duration_to_slot(exact_subnet.slot + 1) - .map(|duration| std::time::Instant::now() + duration); - Some(SubnetDiscovery { - subnet: Subnet::Attestation(exact_subnet.subnet_id), - min_ttl, - }) - } else { - // We may want to check the global PeerInfo to see estimated timeouts for each - // peer before they can be removed. - warn!( - subnet_id = ?exact_subnet, - "Not enough time for a discovery search" - ); - None - } - }) - .collect(); - - if !discovery_subnets.is_empty() { - self.queue_event(SubnetServiceMessage::DiscoverPeers(discovery_subnets)); - } - Ok(()) - } - - // Subscribes to the subnet if it should be done immediately, or schedules it if required. - fn subscribe_to_short_lived_subnet( - &mut self, - ExactSubnet { subnet_id, slot }: ExactSubnet, - ) -> Result<(), &'static str> { - let slot_duration = self.beacon_chain.slot_clock.slot_duration(); - - // The short time we schedule the subscription before it's actually required. This - // ensures we are subscribed on time, and allows consecutive subscriptions to the same - // subnet to overlap, reducing subnet churn. - let advance_subscription_duration = slot_duration / ADVANCE_SUBSCRIBE_SLOT_FRACTION; - // The time to the required slot. - let time_to_subscription_slot = self - .beacon_chain - .slot_clock - .duration_to_slot(slot) - .unwrap_or_default(); // If this is a past slot we will just get a 0 duration. - - // Calculate how long before we need to subscribe to the subnet. - let time_to_subscription_start = - time_to_subscription_slot.saturating_sub(advance_subscription_duration); - - // The time after a duty slot where we no longer need it in the `aggregate_validators_on_subnet` - // delay map. - let time_to_unsubscribe = - time_to_subscription_slot + UNSUBSCRIBE_AFTER_AGGREGATOR_DUTY * slot_duration; - if let Some(tracked_vals) = self.aggregate_validators_on_subnet.as_mut() { - tracked_vals.insert_at(ExactSubnet { subnet_id, slot }, time_to_unsubscribe); - } - - // If the subscription should be done in the future, schedule it. Otherwise subscribe - // immediately. - if time_to_subscription_start.is_zero() { - // This is a current or past slot, we subscribe immediately. - self.subscribe_to_short_lived_subnet_immediately(subnet_id, slot + 1)?; - } else { - // This is a future slot, schedule subscribing. - trace!(subnet = ?subnet_id, ?time_to_subscription_start,"Scheduling subnet subscription"); - self.scheduled_short_lived_subscriptions - .insert_at(ExactSubnet { subnet_id, slot }, time_to_subscription_start); - } - - Ok(()) - } - - /* A collection of functions that handle the various timeouts */ - - /// Registers a subnet as subscribed. - /// - /// Checks that the time in which the subscription would end is not in the past. If we are - /// already subscribed, extends the timeout if necessary. If this is a new subscription, we send - /// out the appropriate events. - /// - /// On determinist long lived subnets, this is only used for short lived subscriptions. - fn subscribe_to_short_lived_subnet_immediately( - &mut self, - subnet_id: SubnetId, - end_slot: Slot, - ) -> Result<(), &'static str> { - if self.subscribe_all_subnets { - // Case not handled by this service. - return Ok(()); - } - - let time_to_subscription_end = self - .beacon_chain - .slot_clock - .duration_to_slot(end_slot) - .unwrap_or_default(); - - // First check this is worth doing. - if time_to_subscription_end.is_zero() { - return Err("Time when subscription would end has already passed."); - } - - let subscription_kind = SubscriptionKind::ShortLived; - - // We need to check and add a subscription for the right kind, regardless of the presence - // of the subnet as a subscription of the other kind. This is mainly since long lived - // subscriptions can be removed at any time when a validator goes offline. - - let (subscriptions, already_subscribed_as_other_kind) = ( - &mut self.short_lived_subscriptions, - self.long_lived_subscriptions.contains(&subnet_id), - ); - - match subscriptions.get(&subnet_id) { - Some(current_end_slot) => { - // We are already subscribed. Check if we need to extend the subscription. - if &end_slot > current_end_slot { - trace!( - subnet = ?subnet_id, - prev_end_slot = %current_end_slot, - new_end_slot = %end_slot, - ?subscription_kind, - "Extending subscription to subnet" - ); - subscriptions.insert_at(subnet_id, end_slot, time_to_subscription_end); - } - } - None => { - // This is a new subscription. Add with the corresponding timeout and send the - // notification. - subscriptions.insert_at(subnet_id, end_slot, time_to_subscription_end); - - // Inform of the subscription. - if !already_subscribed_as_other_kind { - debug!( - subnet = ?subnet_id, - %end_slot, - ?subscription_kind, - "Subscribing to subnet" - ); - self.queue_event(SubnetServiceMessage::Subscribe(Subnet::Attestation( - subnet_id, - ))); - } - } - } - - Ok(()) - } - - // Unsubscribes from a subnet that was removed if it does not continue to exist as a - // subscription of the other kind. For long lived subscriptions, it also removes the - // advertisement from our ENR. - fn handle_removed_subnet(&mut self, subnet_id: SubnetId, subscription_kind: SubscriptionKind) { - let exists_in_other_subscriptions = match subscription_kind { - SubscriptionKind::LongLived => self.short_lived_subscriptions.contains_key(&subnet_id), - SubscriptionKind::ShortLived => self.long_lived_subscriptions.contains(&subnet_id), - }; - - if !exists_in_other_subscriptions { - // Subscription no longer exists as short lived or long lived. - debug!( - subnet = ?subnet_id, - ?subscription_kind, - "Unsubscribing from subnet" - ); - self.queue_event(SubnetServiceMessage::Unsubscribe(Subnet::Attestation( - subnet_id, - ))); - } - - if subscription_kind == SubscriptionKind::LongLived { - // Remove from our ENR even if we remain subscribed in other way. - self.queue_event(SubnetServiceMessage::EnrRemove(Subnet::Attestation( - subnet_id, - ))); - } - } -} - -impl Stream for AttestationService { - type Item = SubnetServiceMessage; - - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - // Update the waker if needed. - if let Some(waker) = &self.waker { - if !waker.will_wake(cx.waker()) { - self.waker = Some(cx.waker().clone()); - } - } else { - self.waker = Some(cx.waker().clone()); - } - - // Send out any generated events. - if let Some(event) = self.events.pop_front() { - return Poll::Ready(Some(event)); - } - - // If we aren't subscribed to all subnets, handle the deterministic long-lived subnets - if !self.subscribe_all_subnets { - match self.next_long_lived_subscription_event.as_mut().poll(cx) { - Poll::Ready(_) => { - self.recompute_long_lived_subnets(); - // We re-wake the task as there could be other subscriptions to process - self.waker - .as_ref() - .expect("Waker has been set") - .wake_by_ref(); - } - Poll::Pending => {} - } - } - - // Process scheduled subscriptions that might be ready, since those can extend a soon to - // expire subscription. - match self.scheduled_short_lived_subscriptions.poll_next_unpin(cx) { - Poll::Ready(Some(Ok(ExactSubnet { subnet_id, slot }))) => { - if let Err(e) = - self.subscribe_to_short_lived_subnet_immediately(subnet_id, slot + 1) - { - debug!(subnet = ?subnet_id, err = e,"Failed to subscribe to short lived subnet"); - } - self.waker - .as_ref() - .expect("Waker has been set") - .wake_by_ref(); - } - Poll::Ready(Some(Err(e))) => { - error!( - error = e, - "Failed to check for scheduled subnet subscriptions" - ); - } - Poll::Ready(None) | Poll::Pending => {} - } - - // Finally process any expired subscriptions. - match self.short_lived_subscriptions.poll_next_unpin(cx) { - Poll::Ready(Some(Ok((subnet_id, _end_slot)))) => { - self.handle_removed_subnet(subnet_id, SubscriptionKind::ShortLived); - // We re-wake the task as there could be other subscriptions to process - self.waker - .as_ref() - .expect("Waker has been set") - .wake_by_ref(); - } - Poll::Ready(Some(Err(e))) => { - error!(error = e, "Failed to check for subnet unsubscription times"); - } - Poll::Ready(None) | Poll::Pending => {} - } - - // Poll to remove entries on expiration, no need to act on expiration events. - if let Some(tracked_vals) = self.aggregate_validators_on_subnet.as_mut() { - if let Poll::Ready(Some(Err(e))) = tracked_vals.poll_next_unpin(cx) { - error!( - error = e, - "Failed to check for aggregate validator on subnet expirations" - ); - } - } - - Poll::Pending - } -} diff --git a/beacon_node/network/src/subnet_service/sync_subnets.rs b/beacon_node/network/src/subnet_service/sync_subnets.rs deleted file mode 100644 index 6b3834e1958..00000000000 --- a/beacon_node/network/src/subnet_service/sync_subnets.rs +++ /dev/null @@ -1,345 +0,0 @@ -//! This service keeps track of which sync committee subnet the beacon node should be subscribed to at any -//! given time. It schedules subscriptions to sync committee subnets and requests peer discoveries. - -use std::collections::{hash_map::Entry, HashMap, VecDeque}; -use std::pin::Pin; -use std::sync::Arc; -use std::task::{Context, Poll}; -use std::time::Duration; - -use futures::prelude::*; -use tracing::{debug, error, trace, warn}; - -use super::SubnetServiceMessage; -use beacon_chain::{BeaconChain, BeaconChainTypes}; -use delay_map::HashSetDelay; -use lighthouse_network::{NetworkConfig, Subnet, SubnetDiscovery}; -use slot_clock::SlotClock; -use types::{Epoch, EthSpec, SyncCommitteeSubscription, SyncSubnetId}; - -use crate::metrics; - -/// The minimum number of slots ahead that we attempt to discover peers for a subscription. If the -/// slot is less than this number, skip the peer discovery process. -/// Subnet discovery query takes at most 30 secs, 2 slots take 24s. -const MIN_PEER_DISCOVERY_SLOT_LOOK_AHEAD: u64 = 2; - -/// A particular subnet at a given slot. -#[derive(PartialEq, Eq, Hash, Clone, Debug)] -pub struct ExactSubnet { - /// The `SyncSubnetId` associated with this subnet. - pub subnet_id: SyncSubnetId, - /// The epoch until which we need to stay subscribed to the subnet. - pub until_epoch: Epoch, -} -pub struct SyncCommitteeService { - /// Queued events to return to the driving service. - events: VecDeque, - - /// A reference to the beacon chain to process received attestations. - pub(crate) beacon_chain: Arc>, - - /// The collection of all currently subscribed subnets. - subscriptions: HashMap, - - /// A collection of timeouts for when to unsubscribe from a subnet. - unsubscriptions: HashSetDelay, - - /// The waker for the current thread. - waker: Option, - - /// The discovery mechanism of lighthouse is disabled. - discovery_disabled: bool, - - /// We are always subscribed to all subnets. - subscribe_all_subnets: bool, - - /// Whether this node is a block proposer-only node. - proposer_only: bool, -} - -impl SyncCommitteeService { - /* Public functions */ - - pub fn new(beacon_chain: Arc>, config: &NetworkConfig) -> Self { - let spec = &beacon_chain.spec; - let epoch_duration_secs = - beacon_chain.slot_clock.slot_duration().as_secs() * T::EthSpec::slots_per_epoch(); - let default_timeout = - epoch_duration_secs.saturating_mul(spec.epochs_per_sync_committee_period.as_u64()); - - SyncCommitteeService { - events: VecDeque::with_capacity(10), - beacon_chain, - subscriptions: HashMap::new(), - unsubscriptions: HashSetDelay::new(Duration::from_secs(default_timeout)), - waker: None, - subscribe_all_subnets: config.subscribe_all_subnets, - discovery_disabled: config.disable_discovery, - proposer_only: config.proposer_only, - } - } - - /// Return count of all currently subscribed subnets. - #[cfg(test)] - pub fn subscription_count(&self) -> usize { - use types::consts::altair::SYNC_COMMITTEE_SUBNET_COUNT; - if self.subscribe_all_subnets { - SYNC_COMMITTEE_SUBNET_COUNT as usize - } else { - self.subscriptions.len() - } - } - - /// Processes a list of sync committee subscriptions. - /// - /// This will: - /// - Search for peers for required subnets. - /// - Request subscriptions required subnets. - /// - Build the timeouts for each of these events. - /// - /// This returns a result simply for the ergonomics of using ?. The result can be - /// safely dropped. - pub fn validator_subscriptions( - &mut self, - subscriptions: Vec, - ) -> Result<(), String> { - // A proposer-only node does not subscribe to any sync-committees - if self.proposer_only { - return Ok(()); - } - - let mut subnets_to_discover = Vec::new(); - for subscription in subscriptions { - metrics::inc_counter(&metrics::SYNC_COMMITTEE_SUBSCRIPTION_REQUESTS); - //NOTE: We assume all subscriptions have been verified before reaching this service - - // Registers the validator with the subnet service. - // This will subscribe to long-lived random subnets if required. - trace!(?subscription, "Sync committee subscription"); - - let subnet_ids = match SyncSubnetId::compute_subnets_for_sync_committee::( - &subscription.sync_committee_indices, - ) { - Ok(subnet_ids) => subnet_ids, - Err(e) => { - warn!( - error = ?e, - validator_index = subscription.validator_index, - "Failed to compute subnet id for sync committee subscription" - ); - continue; - } - }; - - for subnet_id in subnet_ids { - let exact_subnet = ExactSubnet { - subnet_id, - until_epoch: subscription.until_epoch, - }; - subnets_to_discover.push(exact_subnet.clone()); - if let Err(e) = self.subscribe_to_subnet(exact_subnet.clone()) { - warn!( - error = e, - validator_index = subscription.validator_index, - "Subscription to sync subnet error" - ); - } else { - trace!( - ?exact_subnet, - validator_index = subscription.validator_index, - "Subscribed to subnet for sync committee duties" - ); - } - } - } - // If the discovery mechanism isn't disabled, attempt to set up a peer discovery for the - // required subnets. - if !self.discovery_disabled { - if let Err(e) = self.discover_peers_request(subnets_to_discover.iter()) { - warn!(error = e, "Discovery lookup request error"); - }; - } - - // pre-emptively wake the thread to check for new events - if let Some(waker) = &self.waker { - waker.wake_by_ref(); - } - Ok(()) - } - - /* Internal private functions */ - - /// Checks if there are currently queued discovery requests and the time required to make the - /// request. - /// - /// If there is sufficient time, queues a peer discovery request for all the required subnets. - fn discover_peers_request<'a>( - &mut self, - exact_subnets: impl Iterator, - ) -> Result<(), &'static str> { - let current_slot = self - .beacon_chain - .slot_clock - .now() - .ok_or("Could not get the current slot")?; - - let slots_per_epoch = T::EthSpec::slots_per_epoch(); - - let discovery_subnets: Vec = exact_subnets - .filter_map(|exact_subnet| { - let until_slot = exact_subnet.until_epoch.end_slot(slots_per_epoch); - // check if there is enough time to perform a discovery lookup - if until_slot >= current_slot.saturating_add(MIN_PEER_DISCOVERY_SLOT_LOOK_AHEAD) { - // if the slot is more than epoch away, add an event to start looking for peers - // add one slot to ensure we keep the peer for the subscription slot - let min_ttl = self - .beacon_chain - .slot_clock - .duration_to_slot(until_slot + 1) - .map(|duration| std::time::Instant::now() + duration); - Some(SubnetDiscovery { - subnet: Subnet::SyncCommittee(exact_subnet.subnet_id), - min_ttl, - }) - } else { - // We may want to check the global PeerInfo to see estimated timeouts for each - // peer before they can be removed. - warn!( - subnet_id = ?exact_subnet, - "Not enough time for a discovery search" - ); - None - } - }) - .collect(); - - if !discovery_subnets.is_empty() { - self.events - .push_back(SubnetServiceMessage::DiscoverPeers(discovery_subnets)); - } - Ok(()) - } - - /// Adds a subscription event and an associated unsubscription event if required. - fn subscribe_to_subnet(&mut self, exact_subnet: ExactSubnet) -> Result<(), &'static str> { - // Return if we have subscribed to all subnets - if self.subscribe_all_subnets { - return Ok(()); - } - - // Return if we already have a subscription for exact_subnet - if self.subscriptions.get(&exact_subnet.subnet_id) == Some(&exact_subnet.until_epoch) { - return Ok(()); - } - - // Return if we already have subscription set to expire later than the current request. - if let Some(until_epoch) = self.subscriptions.get(&exact_subnet.subnet_id) { - if *until_epoch >= exact_subnet.until_epoch { - return Ok(()); - } - } - - // initialise timing variables - let current_slot = self - .beacon_chain - .slot_clock - .now() - .ok_or("Could not get the current slot")?; - - let slots_per_epoch = T::EthSpec::slots_per_epoch(); - let until_slot = exact_subnet.until_epoch.end_slot(slots_per_epoch); - // Calculate the duration to the unsubscription event. - let expected_end_subscription_duration = if current_slot >= until_slot { - warn!( - %current_slot, - ?exact_subnet, - "Sync committee subscription is past expiration" - ); - return Ok(()); - } else { - let slot_duration = self.beacon_chain.slot_clock.slot_duration(); - - // the duration until we no longer need this subscription. We assume a single slot is - // sufficient. - self.beacon_chain - .slot_clock - .duration_to_slot(until_slot) - .ok_or("Unable to determine duration to unsubscription slot")? - + slot_duration - }; - - if let Entry::Vacant(e) = self.subscriptions.entry(exact_subnet.subnet_id) { - // We are not currently subscribed and have no waiting subscription, create one - debug!(subnet = *exact_subnet.subnet_id, until_epoch = ?exact_subnet.until_epoch, "Subscribing to subnet"); - e.insert(exact_subnet.until_epoch); - self.events - .push_back(SubnetServiceMessage::Subscribe(Subnet::SyncCommittee( - exact_subnet.subnet_id, - ))); - - // add the subnet to the ENR bitfield - self.events - .push_back(SubnetServiceMessage::EnrAdd(Subnet::SyncCommittee( - exact_subnet.subnet_id, - ))); - - // add an unsubscription event to remove ourselves from the subnet once completed - self.unsubscriptions - .insert_at(exact_subnet.subnet_id, expected_end_subscription_duration); - } else { - // We are already subscribed, extend the unsubscription duration - self.unsubscriptions - .update_timeout(&exact_subnet.subnet_id, expected_end_subscription_duration); - } - - Ok(()) - } - - /// A queued unsubscription is ready. - fn handle_unsubscriptions(&mut self, subnet_id: SyncSubnetId) { - debug!(subnet = *subnet_id, "Unsubscribing from subnet"); - - self.subscriptions.remove(&subnet_id); - self.events - .push_back(SubnetServiceMessage::Unsubscribe(Subnet::SyncCommittee( - subnet_id, - ))); - - self.events - .push_back(SubnetServiceMessage::EnrRemove(Subnet::SyncCommittee( - subnet_id, - ))); - } -} - -impl Stream for SyncCommitteeService { - type Item = SubnetServiceMessage; - - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - // update the waker if needed - if let Some(waker) = &self.waker { - if !waker.will_wake(cx.waker()) { - self.waker = Some(cx.waker().clone()); - } - } else { - self.waker = Some(cx.waker().clone()); - } - - // process any un-subscription events - match self.unsubscriptions.poll_next_unpin(cx) { - Poll::Ready(Some(Ok(exact_subnet))) => self.handle_unsubscriptions(exact_subnet), - Poll::Ready(Some(Err(e))) => { - error!(error = e, "Failed to check for subnet unsubscription times"); - } - Poll::Ready(None) | Poll::Pending => {} - } - - // process any generated events - if let Some(event) = self.events.pop_front() { - return Poll::Ready(Some(event)); - } - - Poll::Pending - } -} diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index b60c21972fb..f8ffd298caf 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -59,7 +59,7 @@ mod single_block_lookup; /// reaches the maximum depth it will force trigger range sync. pub(crate) const PARENT_DEPTH_TOLERANCE: usize = SLOT_IMPORT_TOLERANCE; -const FAILED_CHAINS_CACHE_EXPIRY_SECONDS: u64 = 60; +const IGNORED_CHAINS_CACHE_EXPIRY_SECONDS: u64 = 60; pub const SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS: u8 = 4; /// Maximum time we allow a lookup to exist before assuming it is stuck and will never make @@ -110,8 +110,10 @@ enum Action { } pub struct BlockLookups { - /// A cache of failed chain lookups to prevent duplicate searches. - failed_chains: LRUTimeCache, + /// A cache of block roots that must be ignored for some time to prevent useless searches. For + /// example if a chain is too long, its lookup chain is dropped, and range sync is expected to + /// eventually sync those blocks + ignored_chains: LRUTimeCache, // TODO: Why not index lookups by block_root? single_block_lookups: FnvHashMap>, @@ -128,21 +130,21 @@ pub(crate) type BlockLookupSummary = (Id, Hash256, Option, Vec) impl BlockLookups { pub fn new() -> Self { Self { - failed_chains: LRUTimeCache::new(Duration::from_secs( - FAILED_CHAINS_CACHE_EXPIRY_SECONDS, + ignored_chains: LRUTimeCache::new(Duration::from_secs( + IGNORED_CHAINS_CACHE_EXPIRY_SECONDS, )), single_block_lookups: Default::default(), } } #[cfg(test)] - pub(crate) fn insert_failed_chain(&mut self, block_root: Hash256) { - self.failed_chains.insert(block_root); + pub(crate) fn insert_ignored_chain(&mut self, block_root: Hash256) { + self.ignored_chains.insert(block_root); } #[cfg(test)] - pub(crate) fn get_failed_chains(&mut self) -> Vec { - self.failed_chains.keys().cloned().collect() + pub(crate) fn get_ignored_chains(&mut self) -> Vec { + self.ignored_chains.keys().cloned().collect() } #[cfg(test)] @@ -184,7 +186,7 @@ impl BlockLookups { self.search_parent_of_child(parent_root, block_root, &[peer_id], cx); // Only create the child lookup if the parent exists if parent_lookup_exists { - // `search_parent_of_child` ensures that parent root is not a failed chain + // `search_parent_of_child` ensures that the parent lookup exists so we can safely wait for it self.new_current_lookup( block_root, Some(block_component), @@ -244,8 +246,8 @@ impl BlockLookups { debug!(block_root = ?block_root_to_search, "Parent lookup chain too long"); // Searching for this parent would extend a parent chain over the max - // Insert the tip only to failed chains - self.failed_chains.insert(parent_chain.tip); + // Insert the tip only to chains to ignore + self.ignored_chains.insert(parent_chain.tip); // Note: Drop only the chain that's too long until it merges with another chain // that's not too long. Consider this attack: there's a chain of valid unknown @@ -330,12 +332,9 @@ impl BlockLookups { peers: &[PeerId], cx: &mut SyncNetworkContext, ) -> bool { - // If this block or it's parent is part of a known failed chain, ignore it. - if self.failed_chains.contains(&block_root) { - debug!(?block_root, "Block is from a past failed chain. Dropping"); - for peer_id in peers { - cx.report_peer(*peer_id, PeerAction::MidToleranceError, "failed_chain"); - } + // If this block or it's parent is part of a known ignored chain, ignore it. + if self.ignored_chains.contains(&block_root) { + debug!(?block_root, "Dropping lookup for block marked ignored"); return false; } diff --git a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs index 36509d2563e..8fb3248a871 100644 --- a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs +++ b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs @@ -219,7 +219,7 @@ impl SingleBlockLookup { // can assert that this is the correct value of `blob_kzg_commitments_count`. match cx.chain.get_block_process_status(&self.block_root) { BlockProcessStatus::Unknown => None, - BlockProcessStatus::NotValidated(block) + BlockProcessStatus::NotValidated(block, _) | BlockProcessStatus::ExecutionValidated(block) => Some(block.clone()), } }) { diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index 448e784ab6d..d7ba0280542 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -328,13 +328,13 @@ impl SyncManager { } #[cfg(test)] - pub(crate) fn get_failed_chains(&mut self) -> Vec { - self.block_lookups.get_failed_chains() + pub(crate) fn get_ignored_chains(&mut self) -> Vec { + self.block_lookups.get_ignored_chains() } #[cfg(test)] - pub(crate) fn insert_failed_chain(&mut self, block_root: Hash256) { - self.block_lookups.insert_failed_chain(block_root); + pub(crate) fn insert_ignored_chain(&mut self, block_root: Hash256) { + self.block_lookups.insert_ignored_chain(block_root); } #[cfg(test)] diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs index 17a42957009..ac2991c1474 100644 --- a/beacon_node/network/src/sync/network_context.rs +++ b/beacon_node/network/src/sync/network_context.rs @@ -49,8 +49,8 @@ use tokio::sync::mpsc; use tracing::{Span, debug, debug_span, error, warn}; use types::blob_sidecar::FixedBlobSidecarList; use types::{ - BlobSidecar, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, EthSpec, ForkContext, - Hash256, SignedBeaconBlock, Slot, + BlobSidecar, BlockImportSource, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, EthSpec, + ForkContext, Hash256, SignedBeaconBlock, Slot, }; pub mod custody; @@ -835,14 +835,26 @@ impl SyncNetworkContext { match self.chain.get_block_process_status(&block_root) { // Unknown block, continue request to download BlockProcessStatus::Unknown => {} - // Block is known are currently processing, expect a future event with the result of - // processing. - BlockProcessStatus::NotValidated { .. } => { - // Lookup sync event safety: If the block is currently in the processing cache, we - // are guaranteed to receive a `SyncMessage::GossipBlockProcessResult` that will - // make progress on this lookup - return Ok(LookupRequestResult::Pending("block in processing cache")); - } + // Block is known and currently processing. Imports from gossip and HTTP API insert the + // block in the da_cache. However, HTTP API is unable to notify sync when it completes + // block import. Returning `Pending` here will result in stuck lookups if the block is + // importing from sync. + BlockProcessStatus::NotValidated(_, source) => match source { + BlockImportSource::Gossip => { + // Lookup sync event safety: If the block is currently in the processing cache, we + // are guaranteed to receive a `SyncMessage::GossipBlockProcessResult` that will + // make progress on this lookup + return Ok(LookupRequestResult::Pending("block in processing cache")); + } + BlockImportSource::Lookup + | BlockImportSource::RangeSync + | BlockImportSource::HttpApi => { + // Lookup, RangeSync or HttpApi block import don't emit the GossipBlockProcessResult + // event. If a lookup happens to be created during block import from one of + // those sources just import the block twice. Otherwise the lookup will get + // stuck. Double imports are fine, they just waste resources. + } + }, // Block is fully validated. If it's not yet imported it's waiting for missing block // components. Consider this request completed and do nothing. BlockProcessStatus::ExecutionValidated { .. } => { diff --git a/beacon_node/network/src/sync/tests/lookups.rs b/beacon_node/network/src/sync/tests/lookups.rs index b5bc10851dc..fc641861754 100644 --- a/beacon_node/network/src/sync/tests/lookups.rs +++ b/beacon_node/network/src/sync/tests/lookups.rs @@ -41,8 +41,8 @@ use slot_clock::{SlotClock, TestingSlotClock}; use tokio::sync::mpsc; use tracing::info; use types::{ - BeaconState, BeaconStateBase, BlobSidecar, DataColumnSidecar, EthSpec, ForkContext, ForkName, - Hash256, MinimalEthSpec as E, SignedBeaconBlock, Slot, + BeaconState, BeaconStateBase, BlobSidecar, BlockImportSource, DataColumnSidecar, EthSpec, + ForkContext, ForkName, Hash256, MinimalEthSpec as E, SignedBeaconBlock, Slot, data_column_sidecar::ColumnIndex, test_utils::{SeedableRng, TestRandom, XorShiftRng}, }; @@ -285,21 +285,21 @@ impl TestRig { ); } - fn insert_failed_chain(&mut self, block_root: Hash256) { - self.sync_manager.insert_failed_chain(block_root); + fn insert_ignored_chain(&mut self, block_root: Hash256) { + self.sync_manager.insert_ignored_chain(block_root); } - fn assert_not_failed_chain(&mut self, chain_hash: Hash256) { - let failed_chains = self.sync_manager.get_failed_chains(); - if failed_chains.contains(&chain_hash) { - panic!("failed chains contain {chain_hash:?}: {failed_chains:?}"); + fn assert_not_ignored_chain(&mut self, chain_hash: Hash256) { + let chains = self.sync_manager.get_ignored_chains(); + if chains.contains(&chain_hash) { + panic!("ignored chains contain {chain_hash:?}: {chains:?}"); } } - fn assert_failed_chain(&mut self, chain_hash: Hash256) { - let failed_chains = self.sync_manager.get_failed_chains(); - if !failed_chains.contains(&chain_hash) { - panic!("expected failed chains to contain {chain_hash:?}: {failed_chains:?}"); + fn assert_ignored_chain(&mut self, chain_hash: Hash256) { + let chains = self.sync_manager.get_ignored_chains(); + if !chains.contains(&chain_hash) { + panic!("expected ignored chains to contain {chain_hash:?}: {chains:?}"); } } @@ -1021,11 +1021,6 @@ impl TestRig { self.log(&format!("Found expected penalty {penalty_msg}")); } - pub fn expect_single_penalty(&mut self, peer_id: PeerId, expect_penalty_msg: &'static str) { - self.expect_penalty(peer_id, expect_penalty_msg); - self.expect_no_penalty_for(peer_id); - } - pub fn block_with_parent_and_blobs( &mut self, parent_root: Hash256, @@ -1084,7 +1079,7 @@ impl TestRig { .harness .chain .data_availability_checker - .put_pending_executed_block(executed_block) + .put_executed_block(executed_block) .unwrap() { Availability::Available(_) => panic!("block removed from da_checker, available"), @@ -1114,20 +1109,19 @@ impl TestRig { }; } - fn insert_block_to_processing_cache(&mut self, block: Arc>) { + fn insert_block_to_availability_cache(&mut self, block: Arc>) { self.harness .chain - .reqresp_pre_import_cache - .write() - .insert(block.canonical_root(), block); + .data_availability_checker + .put_pre_execution_block(block.canonical_root(), block, BlockImportSource::Gossip) + .unwrap(); } fn simulate_block_gossip_processing_becomes_invalid(&mut self, block_root: Hash256) { self.harness .chain - .reqresp_pre_import_cache - .write() - .remove(&block_root); + .data_availability_checker + .remove_block_on_execution_error(&block_root); self.send_sync_message(SyncMessage::GossipBlockProcessResult { block_root, @@ -1140,11 +1134,6 @@ impl TestRig { block: Arc>, ) { let block_root = block.canonical_root(); - self.harness - .chain - .reqresp_pre_import_cache - .write() - .remove(&block_root); self.insert_block_to_da_checker(block); @@ -1461,7 +1450,7 @@ fn test_parent_lookup_too_many_download_attempts_no_blacklist() { // Trigger the request rig.trigger_unknown_parent_block(peer_id, block.into()); for i in 1..=PARENT_FAIL_TOLERANCE { - rig.assert_not_failed_chain(block_root); + rig.assert_not_ignored_chain(block_root); let id = rig.expect_block_parent_request(parent_root); if i % 2 != 0 { // The request fails. It should be tried again. @@ -1474,8 +1463,8 @@ fn test_parent_lookup_too_many_download_attempts_no_blacklist() { } } - rig.assert_not_failed_chain(block_root); - rig.assert_not_failed_chain(parent.canonical_root()); + rig.assert_not_ignored_chain(block_root); + rig.assert_not_ignored_chain(parent.canonical_root()); rig.expect_no_active_lookups_empty_network(); } @@ -1500,7 +1489,7 @@ fn test_parent_lookup_too_many_processing_attempts_must_blacklist() { for _ in 0..PROCESSING_FAILURES { let id = rig.expect_block_parent_request(parent_root); // Blobs are only requested in the previous first iteration as this test only retries blocks - rig.assert_not_failed_chain(block_root); + rig.assert_not_ignored_chain(block_root); // send the right parent but fail processing rig.parent_lookup_block_response(id, peer_id, Some(parent.clone().into())); rig.parent_block_processed(block_root, BlockError::BlockSlotLimitReached.into()); @@ -1508,7 +1497,7 @@ fn test_parent_lookup_too_many_processing_attempts_must_blacklist() { rig.expect_penalty(peer_id, "lookup_block_processing_failure"); } - rig.assert_not_failed_chain(block_root); + rig.assert_not_ignored_chain(block_root); rig.expect_no_active_lookups_empty_network(); } @@ -1551,12 +1540,14 @@ fn test_parent_lookup_too_deep_grow_ancestor() { ); // Should not penalize peer, but network is not clear because of the blocks_by_range requests rig.expect_no_penalty_for(peer_id); - rig.assert_failed_chain(chain_hash); + rig.assert_ignored_chain(chain_hash); } // Regression test for https://github.com/sigp/lighthouse/pull/7118 +// 8042 UPDATE: block was previously added to the failed_chains cache, now it's inserted into the +// ignored chains cache. The regression test still applies as the chaild lookup is not created #[test] -fn test_child_lookup_not_created_for_failed_chain_parent_after_processing() { +fn test_child_lookup_not_created_for_ignored_chain_parent_after_processing() { // GIVEN: A parent chain longer than PARENT_DEPTH_TOLERANCE. let mut rig = TestRig::test_setup(); let mut blocks = rig.rand_blockchain(PARENT_DEPTH_TOLERANCE + 1); @@ -1586,8 +1577,8 @@ fn test_child_lookup_not_created_for_failed_chain_parent_after_processing() { } // At this point, the chain should have been deemed too deep and pruned. - // The tip root should have been inserted into failed chains. - rig.assert_failed_chain(tip_root); + // The tip root should have been inserted into ignored chains. + rig.assert_ignored_chain(tip_root); rig.expect_no_penalty_for(peer_id); // WHEN: Trigger the extending block that points to the tip. @@ -1604,10 +1595,10 @@ fn test_child_lookup_not_created_for_failed_chain_parent_after_processing() { }), ); - // THEN: The extending block should not create a lookup because the tip was inserted into failed chains. + // THEN: The extending block should not create a lookup because the tip was inserted into + // ignored chains. rig.expect_no_active_lookups(); - // AND: The peer should be penalized for extending a failed chain. - rig.expect_single_penalty(peer_id, "failed_chain"); + rig.expect_no_penalty_for(peer_id); rig.expect_empty_network(); } @@ -1646,7 +1637,7 @@ fn test_parent_lookup_too_deep_grow_tip() { ); // Should not penalize peer, but network is not clear because of the blocks_by_range requests rig.expect_no_penalty_for(peer_id); - rig.assert_failed_chain(tip.canonical_root()); + rig.assert_ignored_chain(tip.canonical_root()); } #[test] @@ -1699,15 +1690,14 @@ fn test_lookup_add_peers_to_parent() { } #[test] -fn test_skip_creating_failed_parent_lookup() { +fn test_skip_creating_ignored_parent_lookup() { let mut rig = TestRig::test_setup(); let (_, block, parent_root, _) = rig.rand_block_and_parent(); let peer_id = rig.new_connected_peer(); - rig.insert_failed_chain(parent_root); + rig.insert_ignored_chain(parent_root); rig.trigger_unknown_parent_block(peer_id, block.into()); - // Expect single penalty for peer, despite dropping two lookups - rig.expect_single_penalty(peer_id, "failed_chain"); - // Both current and parent lookup should be rejected + rig.expect_no_penalty_for(peer_id); + // Both current and parent lookup should not be created rig.expect_no_active_lookups(); } @@ -1845,7 +1835,7 @@ fn block_in_processing_cache_becomes_invalid() { let (block, blobs) = r.rand_block_and_blobs(NumBlobs::Number(1)); let block_root = block.canonical_root(); let peer_id = r.new_connected_peer(); - r.insert_block_to_processing_cache(block.clone().into()); + r.insert_block_to_availability_cache(block.clone().into()); r.trigger_unknown_block_from_attestation(block_root, peer_id); // Should trigger blob request let id = r.expect_blob_lookup_request(block_root); @@ -1871,7 +1861,7 @@ fn block_in_processing_cache_becomes_valid_imported() { let (block, blobs) = r.rand_block_and_blobs(NumBlobs::Number(1)); let block_root = block.canonical_root(); let peer_id = r.new_connected_peer(); - r.insert_block_to_processing_cache(block.clone().into()); + r.insert_block_to_availability_cache(block.clone().into()); r.trigger_unknown_block_from_attestation(block_root, peer_id); // Should trigger blob request let id = r.expect_blob_lookup_request(block_root); diff --git a/beacon_node/operation_pool/src/lib.rs b/beacon_node/operation_pool/src/lib.rs index dd01f568fa3..24e2cfbbb5d 100644 --- a/beacon_node/operation_pool/src/lib.rs +++ b/beacon_node/operation_pool/src/lib.rs @@ -457,32 +457,35 @@ impl OperationPool { .collect() } - /// Prune proposer slashings for validators which are exited in the finalized epoch. - pub fn prune_proposer_slashings(&self, head_state: &BeaconState) { + /// Prune proposer slashings for validators which are already slashed or exited in the finalized + /// epoch. + pub fn prune_proposer_slashings(&self, finalized_state: &BeaconState) { prune_validator_hash_map( &mut self.proposer_slashings.write(), - |_, validator| validator.exit_epoch <= head_state.finalized_checkpoint().epoch, - head_state, + |_, validator| { + validator.slashed || validator.exit_epoch <= finalized_state.current_epoch() + }, + finalized_state, ); } /// Prune attester slashings for all slashed or withdrawn validators, or attestations on another /// fork. - pub fn prune_attester_slashings(&self, head_state: &BeaconState) { + pub fn prune_attester_slashings(&self, finalized_state: &BeaconState) { self.attester_slashings.write().retain(|slashing| { // Check that the attestation's signature is still valid wrt the fork version. - let signature_ok = slashing.signature_is_still_valid(&head_state.fork()); + // We might be a bit slower to detect signature staleness by using the finalized state + // here, but we filter when proposing anyway, so in the worst case we just keep some + // stuff around until we finalize. + let signature_ok = slashing.signature_is_still_valid(&finalized_state.fork()); // Slashings that don't slash any validators can also be dropped. let slashing_ok = get_slashable_indices_modular( - head_state, + finalized_state, slashing.as_inner().to_ref(), |_, validator| { - // Declare that a validator is still slashable if they have not exited prior - // to the finalized epoch. - // - // We cannot check the `slashed` field since the `head` is not finalized and - // a fork could un-slash someone. - validator.exit_epoch > head_state.finalized_checkpoint().epoch + // Declare that a validator is still slashable if they have not been slashed in + // the finalized state, and have not exited at the finalized epoch. + !validator.slashed && validator.exit_epoch > finalized_state.current_epoch() }, ) .is_ok_and(|indices| !indices.is_empty()); @@ -531,17 +534,12 @@ impl OperationPool { ) } - /// Prune if validator has already exited at or before the finalized checkpoint of the head. - pub fn prune_voluntary_exits(&self, head_state: &BeaconState) { + /// Prune if validator has already exited in the finalized state. + pub fn prune_voluntary_exits(&self, finalized_state: &BeaconState, spec: &ChainSpec) { prune_validator_hash_map( &mut self.voluntary_exits.write(), - // This condition is slightly too loose, since there will be some finalized exits that - // are missed here. - // - // We choose simplicity over the gain of pruning more exits since they are small and - // should not be seen frequently. - |_, validator| validator.exit_epoch <= head_state.finalized_checkpoint().epoch, - head_state, + |_, validator| validator.exit_epoch != spec.far_future_epoch, + finalized_state, ); } @@ -642,14 +640,15 @@ impl OperationPool { &self, head_block: &SignedBeaconBlock, head_state: &BeaconState, + finalized_state: &BeaconState, current_epoch: Epoch, spec: &ChainSpec, ) { self.prune_attestations(current_epoch); self.prune_sync_contributions(head_state.slot()); - self.prune_proposer_slashings(head_state); - self.prune_attester_slashings(head_state); - self.prune_voluntary_exits(head_state); + self.prune_proposer_slashings(finalized_state); + self.prune_attester_slashings(finalized_state); + self.prune_voluntary_exits(finalized_state, spec); self.prune_bls_to_execution_changes(head_block, head_state, spec); } @@ -758,14 +757,14 @@ where fn prune_validator_hash_map( map: &mut HashMap>, prune_if: F, - head_state: &BeaconState, + state: &BeaconState, ) where F: Fn(u64, &Validator) -> bool, T: VerifyOperation, { map.retain(|&validator_index, op| { - op.signature_is_still_valid(&head_state.fork()) - && head_state + op.signature_is_still_valid(&state.fork()) + && state .validators() .get(validator_index as usize) .is_none_or(|validator| !prune_if(validator_index, validator)) diff --git a/beacon_node/src/cli.rs b/beacon_node/src/cli.rs index 386eb721a04..2e3b3fde4b0 100644 --- a/beacon_node/src/cli.rs +++ b/beacon_node/src/cli.rs @@ -47,16 +47,17 @@ pub fn cli_app() -> Command { * Network parameters. */ .arg( - Arg::new("subscribe-all-data-column-subnets") - .long("subscribe-all-data-column-subnets") + Arg::new("supernode") + .long("supernode") + .alias("subscribe-all-data-column-subnets") .action(ArgAction::SetTrue) .help_heading(FLAG_HEADER) - .help("Subscribe to all data column subnets and participate in data custody for \ - all columns. This will also advertise the beacon node as being long-lived \ - subscribed to all data column subnets. \ - NOTE: this is an experimental flag and may change any time without notice!") + .help("Run as a voluntary supernode. This node will subscribe to all data column \ + subnets, custody all data columns, and perform reconstruction and cross-seeding. \ + This requires significantly more bandwidth, storage, and computation requirements but \ + the node will have direct access to all blobs via the beacon API and it \ + helps network resilience by serving all data columns to syncing peers.") .display_order(0) - .hide(true) ) .arg( // TODO(das): remove this before PeerDAS release @@ -401,6 +402,16 @@ pub fn cli_app() -> Command { .help_heading(FLAG_HEADER) .display_order(0) ) + .arg( + Arg::new("complete-blob-backfill") + .long("complete-blob-backfill") + .help("Download all blobs back to the Deneb fork epoch. This will likely result in \ + the node banning most of its peers.") + .action(ArgAction::SetTrue) + .help_heading(FLAG_HEADER) + .display_order(0) + .hide(true) + ) .arg( Arg::new("enable-private-discovery") .long("enable-private-discovery") @@ -688,38 +699,6 @@ pub fn cli_app() -> Command { .help_heading(FLAG_HEADER) .display_order(0) ) - - /* - * Eth1 Integration - */ - .arg( - Arg::new("eth1-purge-cache") - .long("eth1-purge-cache") - .value_name("PURGE-CACHE") - .help("DEPRECATED") - .action(ArgAction::SetTrue) - .help_heading(FLAG_HEADER) - .display_order(0) - .hide(true) - ) - .arg( - Arg::new("eth1-blocks-per-log-query") - .long("eth1-blocks-per-log-query") - .value_name("BLOCKS") - .help("DEPRECATED") - .action(ArgAction::Set) - .display_order(0) - .hide(true) - ) - .arg( - Arg::new("eth1-cache-follow-distance") - .long("eth1-cache-follow-distance") - .value_name("BLOCKS") - .help("DEPRECATED") - .action(ArgAction::Set) - .display_order(0) - .hide(true) - ) .arg( Arg::new("slots-per-restore-point") .long("slots-per-restore-point") @@ -769,7 +748,7 @@ pub fn cli_app() -> Command { .long("block-cache-size") .value_name("SIZE") .help("Specifies how many blocks the database should cache in memory") - .default_value("5") + .default_value("0") .action(ArgAction::Set) .display_order(0) ) @@ -1487,16 +1466,6 @@ pub fn cli_app() -> Command { .help_heading(FLAG_HEADER) .display_order(0) ) - .arg( - Arg::new("disable-deposit-contract-sync") - .long("disable-deposit-contract-sync") - .help("DEPRECATED") - .action(ArgAction::SetTrue) - .help_heading(FLAG_HEADER) - .conflicts_with("staking") - .display_order(0) - .hide(true) - ) .arg( Arg::new("disable-optimistic-finalized-sync") .long("disable-optimistic-finalized-sync") @@ -1507,15 +1476,6 @@ pub fn cli_app() -> Command { Lighthouse and only passed to the EL if initial verification fails.") .display_order(0) ) - .arg( - Arg::new("light-client-server") - .long("light-client-server") - .help("DEPRECATED") - .action(ArgAction::SetTrue) - - .help_heading(FLAG_HEADER) - .display_order(0) - ) .arg( Arg::new("disable-light-client-server") .long("disable-light-client-server") diff --git a/beacon_node/src/config.rs b/beacon_node/src/config.rs index 1b5f25b3175..c2599ec0cd9 100644 --- a/beacon_node/src/config.rs +++ b/beacon_node/src/config.rs @@ -170,13 +170,6 @@ pub fn get_config( parse_required(cli_args, "http-duplicate-block-status")?; } - if cli_args.get_flag("light-client-server") { - warn!( - "The --light-client-server flag is deprecated. The light client server is enabled \ - by default" - ); - } - if cli_args.get_flag("disable-light-client-server") { client_config.chain.enable_light_client_server = false; } @@ -262,24 +255,6 @@ pub fn get_config( client_config.http_metrics.allocator_metrics_enabled = false; } - /* - * Deprecated Eth1 flags (can be removed in the next minor release after v7.1.0) - */ - if cli_args - .get_one::("eth1-blocks-per-log-query") - .is_some() - { - warn!("The eth1-blocks-per-log-query flag is deprecated"); - } - - if cli_args.get_flag("eth1-purge-cache") { - warn!("The eth1-purge-cache flag is deprecated"); - } - - if clap_utils::parse_optional::(cli_args, "eth1-cache-follow-distance")?.is_some() { - warn!("The eth1-cache-follow-distance flag is deprecated"); - } - // `--execution-endpoint` is required now. let endpoints: String = clap_utils::parse_required(cli_args, "execution-endpoint")?; let mut el_config = execution_layer::Config::default(); @@ -773,10 +748,6 @@ pub fn get_config( } } - if cli_args.get_flag("disable-deposit-contract-sync") { - warn!("The disable-deposit-contract-sync flag is deprecated"); - } - client_config.chain.prepare_payload_lookahead = clap_utils::parse_optional(cli_args, "prepare-payload-lookahead")? .map(Duration::from_millis) @@ -825,6 +796,14 @@ pub fn get_config( client_config.chain.genesis_backfill = true; } + client_config.chain.complete_blob_backfill = cli_args.get_flag("complete-blob-backfill"); + + // Ensure `prune_blobs` is false whenever complete-blob-backfill is set. This overrides any + // setting of `--prune-blobs true` applied earlier in flag parsing. + if client_config.chain.complete_blob_backfill { + client_config.store.prune_blobs = false; + } + // Backfill sync rate-limiting client_config.beacon_processor.enable_backfill_rate_limiting = !cli_args.get_flag("disable-backfill-rate-limiting"); @@ -1154,7 +1133,7 @@ pub fn set_network_config( config.network_dir = data_dir.join(DEFAULT_NETWORK_DIR); }; - if parse_flag(cli_args, "subscribe-all-data-column-subnets") { + if parse_flag(cli_args, "supernode") { config.subscribe_all_data_column_subnets = true; } diff --git a/beacon_node/store/src/config.rs b/beacon_node/store/src/config.rs index ad81fa6076a..c0f15f2417b 100644 --- a/beacon_node/store/src/config.rs +++ b/beacon_node/store/src/config.rs @@ -19,7 +19,7 @@ pub const DEFAULT_BACKEND: DatabaseBackend = DatabaseBackend::LevelDb; pub const PREV_DEFAULT_SLOTS_PER_RESTORE_POINT: u64 = 2048; pub const DEFAULT_SLOTS_PER_RESTORE_POINT: u64 = 8192; pub const DEFAULT_EPOCHS_PER_STATE_DIFF: u64 = 8; -pub const DEFAULT_BLOCK_CACHE_SIZE: NonZeroUsize = new_non_zero_usize(64); +pub const DEFAULT_BLOCK_CACHE_SIZE: usize = 0; pub const DEFAULT_STATE_CACHE_SIZE: NonZeroUsize = new_non_zero_usize(128); pub const DEFAULT_STATE_CACHE_HEADROOM: NonZeroUsize = new_non_zero_usize(1); pub const DEFAULT_COMPRESSION_LEVEL: i32 = 1; @@ -34,7 +34,7 @@ pub const DEFAULT_BLOB_PUNE_MARGIN_EPOCHS: u64 = 0; #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct StoreConfig { /// Maximum number of blocks to store in the in-memory block cache. - pub block_cache_size: NonZeroUsize, + pub block_cache_size: usize, /// Maximum number of states to store in the in-memory state cache. pub state_cache_size: NonZeroUsize, /// Minimum number of states to cull from the state cache upon fullness. diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index 7156c75f114..0d8a65e0644 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -70,7 +70,7 @@ pub struct HotColdDB, Cold: ItemStore> { /// The hot database also contains all blocks. pub hot_db: Hot, /// LRU cache of deserialized blocks and blobs. Updated whenever a block or blob is loaded. - block_cache: Mutex>, + block_cache: Option>>, /// Cache of beacon states. /// /// LOCK ORDERING: this lock must always be locked *after* the `split` if both are required. @@ -229,7 +229,9 @@ impl HotColdDB, MemoryStore> { cold_db: MemoryStore::open(), blobs_db: MemoryStore::open(), hot_db: MemoryStore::open(), - block_cache: Mutex::new(BlockCache::new(config.block_cache_size)), + block_cache: NonZeroUsize::new(config.block_cache_size) + .map(BlockCache::new) + .map(Mutex::new), state_cache: Mutex::new(StateCache::new( config.state_cache_size, config.state_cache_headroom, @@ -281,7 +283,9 @@ impl HotColdDB, BeaconNodeBackend> { blobs_db: BeaconNodeBackend::open(&config, blobs_db_path)?, cold_db: BeaconNodeBackend::open(&config, cold_path)?, hot_db, - block_cache: Mutex::new(BlockCache::new(config.block_cache_size)), + block_cache: NonZeroUsize::new(config.block_cache_size) + .map(BlockCache::new) + .map(Mutex::new), state_cache: Mutex::new(StateCache::new( config.state_cache_size, config.state_cache_headroom, @@ -488,14 +492,17 @@ impl, Cold: ItemStore> HotColdDB pub fn register_metrics(&self) { let hsc_metrics = self.historic_state_cache.lock().metrics(); - metrics::set_gauge( - &metrics::STORE_BEACON_BLOCK_CACHE_SIZE, - self.block_cache.lock().block_cache.len() as i64, - ); - metrics::set_gauge( - &metrics::STORE_BEACON_BLOB_CACHE_SIZE, - self.block_cache.lock().blob_cache.len() as i64, - ); + if let Some(block_cache) = &self.block_cache { + let cache = block_cache.lock(); + metrics::set_gauge( + &metrics::STORE_BEACON_BLOCK_CACHE_SIZE, + cache.block_cache.len() as i64, + ); + metrics::set_gauge( + &metrics::STORE_BEACON_BLOB_CACHE_SIZE, + cache.blob_cache.len() as i64, + ); + } let state_cache = self.state_cache.lock(); metrics::set_gauge( &metrics::STORE_BEACON_STATE_CACHE_SIZE, @@ -553,7 +560,9 @@ impl, Cold: ItemStore> HotColdDB let block = self.block_as_kv_store_ops(block_root, block, &mut ops)?; self.hot_db.do_atomically(ops)?; // Update cache. - self.block_cache.lock().put_block(*block_root, block); + self.block_cache + .as_ref() + .inspect(|cache| cache.lock().put_block(*block_root, block)); Ok(()) } @@ -605,7 +614,9 @@ impl, Cold: ItemStore> HotColdDB metrics::inc_counter(&metrics::BEACON_BLOCK_GET_COUNT); // Check the cache. - if let Some(block) = self.block_cache.lock().get_block(block_root) { + if let Some(cache) = &self.block_cache + && let Some(block) = cache.lock().get_block(block_root) + { metrics::inc_counter(&metrics::BEACON_BLOCK_CACHE_HIT_COUNT); return Ok(Some(DatabaseBlock::Full(block.clone()))); } @@ -630,8 +641,8 @@ impl, Cold: ItemStore> HotColdDB // Add to cache. self.block_cache - .lock() - .put_block(*block_root, full_block.clone()); + .as_ref() + .inspect(|cache| cache.lock().put_block(*block_root, full_block.clone())); DatabaseBlock::Full(full_block) } else if !self.config.prune_payloads { @@ -656,6 +667,7 @@ impl, Cold: ItemStore> HotColdDB } /// Fetch a full block with execution payload from the store. + #[instrument(skip_all)] pub fn get_full_block( &self, block_root: &Hash256, @@ -901,7 +913,9 @@ impl, Cold: ItemStore> HotColdDB /// Delete a block from the store and the block cache. pub fn delete_block(&self, block_root: &Hash256) -> Result<(), Error> { - self.block_cache.lock().delete(block_root); + self.block_cache + .as_ref() + .inspect(|cache| cache.lock().delete(block_root)); self.hot_db .key_delete(DBColumn::BeaconBlock, block_root.as_slice())?; self.hot_db @@ -916,7 +930,9 @@ impl, Cold: ItemStore> HotColdDB block_root.as_slice(), &blobs.as_ssz_bytes(), )?; - self.block_cache.lock().put_blobs(*block_root, blobs); + self.block_cache + .as_ref() + .inspect(|cache| cache.lock().put_blobs(*block_root, blobs)); Ok(()) } @@ -944,9 +960,11 @@ impl, Cold: ItemStore> HotColdDB self.blobs_db .put(&DATA_COLUMN_CUSTODY_INFO_KEY, &data_column_custody_info)?; - self.block_cache - .lock() - .put_data_column_custody_info(Some(data_column_custody_info)); + self.block_cache.as_ref().inspect(|cache| { + cache + .lock() + .put_data_column_custody_info(Some(data_column_custody_info)) + }); Ok(()) } @@ -963,8 +981,8 @@ impl, Cold: ItemStore> HotColdDB &data_column.as_ssz_bytes(), )?; self.block_cache - .lock() - .put_data_column(*block_root, data_column); + .as_ref() + .inspect(|cache| cache.lock().put_data_column(*block_root, data_column)); } Ok(()) } @@ -1398,7 +1416,7 @@ impl, Cold: ItemStore> HotColdDB // Update database whilst holding a lock on cache, to ensure that the cache updates // atomically with the database. - let mut guard = self.block_cache.lock(); + let guard = self.block_cache.as_ref().map(|cache| cache.lock()); let blob_cache_ops = blobs_ops.clone(); // Try to execute blobs store ops. @@ -1445,57 +1463,68 @@ impl, Cold: ItemStore> HotColdDB return Err(e); } - for op in hot_db_cache_ops { + // Delete from the state cache. + for op in &hot_db_cache_ops { match op { - StoreOp::PutBlock(block_root, block) => { - guard.put_block(block_root, (*block).clone()); + StoreOp::DeleteBlock(block_root) => { + self.state_cache.lock().delete_block_states(block_root); } + StoreOp::DeleteState(state_root, _) => { + self.state_cache.lock().delete_state(state_root) + } + _ => (), + } + } - StoreOp::PutBlobs(_, _) => (), + // If the block cache is enabled, also delete from the block cache. + if let Some(mut guard) = guard { + for op in hot_db_cache_ops { + match op { + StoreOp::PutBlock(block_root, block) => { + guard.put_block(block_root, (*block).clone()); + } - StoreOp::PutDataColumns(_, _) => (), + StoreOp::PutBlobs(_, _) => (), - StoreOp::PutState(_, _) => (), + StoreOp::PutDataColumns(_, _) => (), - StoreOp::PutStateSummary(_, _) => (), + StoreOp::PutState(_, _) => (), - StoreOp::DeleteBlock(block_root) => { - guard.delete_block(&block_root); - self.state_cache.lock().delete_block_states(&block_root); - } + StoreOp::PutStateSummary(_, _) => (), - StoreOp::DeleteState(state_root, _) => { - self.state_cache.lock().delete_state(&state_root) - } + StoreOp::DeleteBlock(block_root) => { + guard.delete_block(&block_root); + } - StoreOp::DeleteBlobs(_) => (), + StoreOp::DeleteState(_, _) => (), - StoreOp::DeleteDataColumns(_, _) => (), + StoreOp::DeleteBlobs(_) => (), - StoreOp::DeleteExecutionPayload(_) => (), + StoreOp::DeleteDataColumns(_, _) => (), - StoreOp::DeleteSyncCommitteeBranch(_) => (), + StoreOp::DeleteExecutionPayload(_) => (), - StoreOp::KeyValueOp(_) => (), - } - } + StoreOp::DeleteSyncCommitteeBranch(_) => (), - for op in blob_cache_ops { - match op { - StoreOp::PutBlobs(block_root, blobs) => { - guard.put_blobs(block_root, blobs); + StoreOp::KeyValueOp(_) => (), } + } - StoreOp::DeleteBlobs(block_root) => { - guard.delete_blobs(&block_root); - } + for op in blob_cache_ops { + match op { + StoreOp::PutBlobs(block_root, blobs) => { + guard.put_blobs(block_root, blobs); + } - _ => (), + StoreOp::DeleteBlobs(block_root) => { + guard.delete_blobs(&block_root); + } + + _ => (), + } } } - drop(guard); - Ok(()) } @@ -2424,21 +2453,23 @@ impl, Cold: ItemStore> HotColdDB /// If custody info doesn't exist in the cache, /// try to fetch from the DB and prime the cache. pub fn get_data_column_custody_info(&self) -> Result, Error> { - let Some(data_column_custody_info) = self.block_cache.lock().get_data_column_custody_info() - else { - let data_column_custody_info = self - .blobs_db - .get::(&DATA_COLUMN_CUSTODY_INFO_KEY)?; + if let Some(cache) = &self.block_cache + && let Some(data_column_custody_info) = cache.lock().get_data_column_custody_info() + { + return Ok(Some(data_column_custody_info)); + } + let data_column_custody_info = self + .blobs_db + .get::(&DATA_COLUMN_CUSTODY_INFO_KEY)?; - // Update the cache - self.block_cache + // Update the cache + self.block_cache.as_ref().inspect(|cache| { + cache .lock() - .put_data_column_custody_info(data_column_custody_info.clone()); - - return Ok(data_column_custody_info); - }; + .put_data_column_custody_info(data_column_custody_info.clone()) + }); - Ok(Some(data_column_custody_info)) + Ok(data_column_custody_info) } /// Fetch all columns for a given block from the store. @@ -2459,9 +2490,13 @@ impl, Cold: ItemStore> HotColdDB /// Fetch blobs for a given block from the store. pub fn get_blobs(&self, block_root: &Hash256) -> Result, Error> { // Check the cache. - if let Some(blobs) = self.block_cache.lock().get_blobs(block_root) { + if let Some(blobs) = self + .block_cache + .as_ref() + .and_then(|cache| cache.lock().get_blobs(block_root).cloned()) + { metrics::inc_counter(&metrics::BEACON_BLOBS_CACHE_HIT_COUNT); - return Ok(blobs.clone().into()); + return Ok(blobs.into()); } match self @@ -2480,8 +2515,8 @@ impl, Cold: ItemStore> HotColdDB { let blobs = BlobSidecarList::new(blobs, max_blobs_per_block as usize)?; self.block_cache - .lock() - .put_blobs(*block_root, blobs.clone()); + .as_ref() + .inspect(|cache| cache.lock().put_blobs(*block_root, blobs.clone())); Ok(BlobSidecarListFromRoot::Blobs(blobs)) } else { @@ -2514,8 +2549,8 @@ impl, Cold: ItemStore> HotColdDB // Check the cache. if let Some(data_column) = self .block_cache - .lock() - .get_data_column(block_root, column_index) + .as_ref() + .and_then(|cache| cache.lock().get_data_column(block_root, column_index)) { metrics::inc_counter(&metrics::BEACON_DATA_COLUMNS_CACHE_HIT_COUNT); return Ok(Some(data_column)); @@ -2527,9 +2562,11 @@ impl, Cold: ItemStore> HotColdDB )? { Some(ref data_column_bytes) => { let data_column = Arc::new(DataColumnSidecar::from_ssz_bytes(data_column_bytes)?); - self.block_cache - .lock() - .put_data_column(*block_root, data_column.clone()); + self.block_cache.as_ref().inspect(|cache| { + cache + .lock() + .put_data_column(*block_root, data_column.clone()) + }); Ok(Some(data_column)) } None => Ok(None), @@ -3263,11 +3300,11 @@ impl, Cold: ItemStore> HotColdDB } // Remove deleted blobs from the cache. - let mut block_cache = self.block_cache.lock(); - for block_root in removed_block_roots { - block_cache.delete_blobs(&block_root); + if let Some(mut block_cache) = self.block_cache.as_ref().map(|cache| cache.lock()) { + for block_root in removed_block_roots { + block_cache.delete_blobs(&block_root); + } } - drop(block_cache); let new_blob_info = BlobInfo { oldest_blob_slot: Some(end_slot + 1), diff --git a/book/src/advanced_database_migrations.md b/book/src/advanced_database_migrations.md index e29397619cf..3552a90b0e8 100644 --- a/book/src/advanced_database_migrations.md +++ b/book/src/advanced_database_migrations.md @@ -17,6 +17,7 @@ validator client or the slasher**. | Lighthouse version | Release date | Schema version | Downgrade available? | |--------------------|--------------|----------------|----------------------| +| v8.0.0-rc.0 | Sep 2025 | v28 | yes before Fulu | | v7.1.0 | Jul 2025 | v26 | yes | | v7.0.0 | Apr 2025 | v22 | no | | v6.0.0 | Nov 2024 | v22 | no | @@ -207,6 +208,7 @@ Here are the steps to prune historic states: | Lighthouse version | Release date | Schema version | Downgrade available? | |--------------------|--------------|----------------|-------------------------------------| +| v8.0.0-rc.0 | Sep 2025 | v28 | yes before Fulu | | v7.1.0 | Jul 2025 | v26 | yes | | v7.0.0 | Apr 2025 | v22 | no | | v6.0.0 | Nov 2024 | v22 | no | diff --git a/book/src/help_bn.md b/book/src/help_bn.md index ea02b39bee6..6680202a277 100644 --- a/book/src/help_bn.md +++ b/book/src/help_bn.md @@ -22,7 +22,7 @@ Options: Data directory for the blobs database. --block-cache-size Specifies how many blocks the database should cache in memory - [default: 5] + [default: 0] --boot-nodes One or more comma-delimited base64-encoded ENR's to bootstrap the p2p network. Multiaddr is also supported. @@ -513,8 +513,6 @@ Flags: subscriptions. This will only import attestations from already-subscribed subnets, use with --subscribe-all-subnets to ensure all attestations are received for import. - --light-client-server - DEPRECATED --log-color [] Enables/Disables colors for logs in terminal. Set it to false to disable colors. [default: true] [possible values: true, false] @@ -571,6 +569,13 @@ Flags: Subscribe to all subnets regardless of validator count. This will also advertise the beacon node as being long-lived subscribed to all subnets. + --supernode + Run as a voluntary supernode. This node will subscribe to all data + column subnets, custody all data columns, and perform reconstruction + and cross-seeding. This requires significantly more bandwidth, + storage, and computation requirements but the node will have direct + access to all blobs via the beacon API and it helps network resilience + by serving all data columns to syncing peers. --validator-monitor-auto Enables the automatic detection and monitoring of validators connected to the HTTP API and using the subnet subscription endpoint. This diff --git a/boot_node/Cargo.toml b/boot_node/Cargo.toml index d431dbeadfe..70d76ad483d 100644 --- a/boot_node/Cargo.toml +++ b/boot_node/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "boot_node" -version = "7.1.0" +version = "8.0.0-rc.0" authors = ["Sigma Prime "] edition = { workspace = true } diff --git a/common/eth2/src/lib.rs b/common/eth2/src/lib.rs index 3368569d59f..0423794d0d5 100644 --- a/common/eth2/src/lib.rs +++ b/common/eth2/src/lib.rs @@ -1336,7 +1336,7 @@ impl BeaconNodeHttpClient { } /// Path for `v1/beacon/blob_sidecars/{block_id}` - pub fn get_blobs_path(&self, block_id: BlockId) -> Result { + pub fn get_blob_sidecars_path(&self, block_id: BlockId) -> Result { let mut path = self.eth_path(V1)?; path.path_segments_mut() .map_err(|()| Error::InvalidUrl(self.server.clone()))? @@ -1346,6 +1346,17 @@ impl BeaconNodeHttpClient { Ok(path) } + /// Path for `v1/beacon/blobs/{blob_id}` + pub fn get_blobs_path(&self, block_id: BlockId) -> Result { + let mut path = self.eth_path(V1)?; + path.path_segments_mut() + .map_err(|()| Error::InvalidUrl(self.server.clone()))? + .push("beacon") + .push("blobs") + .push(&block_id.to_string()); + Ok(path) + } + /// Path for `v1/beacon/blinded_blocks/{block_id}` pub fn get_beacon_blinded_blocks_path(&self, block_id: BlockId) -> Result { let mut path = self.eth_path(V1)?; @@ -1374,13 +1385,13 @@ impl BeaconNodeHttpClient { /// `GET v1/beacon/blob_sidecars/{block_id}` /// /// Returns `Ok(None)` on a 404 error. - pub async fn get_blobs( + pub async fn get_blob_sidecars( &self, block_id: BlockId, indices: Option<&[u64]>, spec: &ChainSpec, ) -> Result>>, Error> { - let mut path = self.get_blobs_path(block_id)?; + let mut path = self.get_blob_sidecars_path(block_id)?; if let Some(indices) = indices { let indices_string = indices .iter() @@ -1400,6 +1411,31 @@ impl BeaconNodeHttpClient { .map(|opt| opt.map(BeaconResponse::ForkVersioned)) } + /// `GET v1/beacon/blobs/{block_id}` + /// + /// Returns `Ok(None)` on a 404 error. + pub async fn get_blobs( + &self, + block_id: BlockId, + versioned_hashes: Option<&[Hash256]>, + ) -> Result>>>, Error> + { + let mut path = self.get_blobs_path(block_id)?; + if let Some(hashes) = versioned_hashes { + let hashes_string = hashes + .iter() + .map(|hash| hash.to_string()) + .collect::>() + .join(","); + path.query_pairs_mut() + .append_pair("versioned_hashes", &hashes_string); + } + + self.get_opt(path) + .await + .map(|opt| opt.map(BeaconResponse::Unversioned)) + } + /// `GET v1/beacon/blinded_blocks/{block_id}` /// /// Returns `Ok(None)` on a 404 error. diff --git a/common/eth2/src/types.rs b/common/eth2/src/types.rs index b72ab293801..8f553b57d9c 100644 --- a/common/eth2/src/types.rs +++ b/common/eth2/src/types.rs @@ -716,6 +716,13 @@ pub struct BlobIndicesQuery { pub indices: Option>, } +#[derive(Clone, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct BlobsVersionedHashesQuery { + #[serde(default, deserialize_with = "option_query_vec")] + pub versioned_hashes: Option>, +} + #[derive(Clone, Deserialize)] #[serde(deny_unknown_fields)] pub struct DataColumnIndicesQuery { @@ -2317,6 +2324,14 @@ pub struct StandardAttestationRewards { pub total_rewards: Vec, } +#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)] +#[serde(bound = "E: EthSpec")] +#[serde(transparent)] +pub struct BlobWrapper { + #[serde(with = "ssz_types::serde_utils::hex_fixed_vec")] + pub blob: Blob, +} + #[cfg(test)] mod test { use std::fmt::Debug; diff --git a/common/eth2_network_config/built_in_network_configs/holesky/config.yaml b/common/eth2_network_config/built_in_network_configs/holesky/config.yaml index ab5f0f3bde0..b1e9faea1d6 100644 --- a/common/eth2_network_config/built_in_network_configs/holesky/config.yaml +++ b/common/eth2_network_config/built_in_network_configs/holesky/config.yaml @@ -38,7 +38,7 @@ ELECTRA_FORK_VERSION: 0x06017000 ELECTRA_FORK_EPOCH: 115968 # Fulu FULU_FORK_VERSION: 0x07017000 -FULU_FORK_EPOCH: 18446744073709551615 +FULU_FORK_EPOCH: 165120 # Gloas GLOAS_FORK_VERSION: 0x08017000 GLOAS_FORK_EPOCH: 18446744073709551615 @@ -47,6 +47,8 @@ GLOAS_FORK_EPOCH: 18446744073709551615 # --------------------------------------------------------------- # 12 seconds SECONDS_PER_SLOT: 12 +# 1200 milliseconds +SLOT_DURATION_MS: 12000 # 14 (estimate from Eth1 mainnet) SECONDS_PER_ETH1_BLOCK: 14 # 2**8 (= 256) epochs ~27 hours @@ -55,6 +57,18 @@ MIN_VALIDATOR_WITHDRAWABILITY_DELAY: 256 SHARD_COMMITTEE_PERIOD: 256 # 2**11 (= 2,048) Eth1 blocks ~8 hours ETH1_FOLLOW_DISTANCE: 2048 +# 1667 basis points, ~17% of SLOT_DURATION_MS +PROPOSER_REORG_CUTOFF_BPS: 1667 +# 3333 basis points, ~33% of SLOT_DURATION_MS +ATTESTATION_DUE_BPS: 3333 +# 6667 basis points, ~67% of SLOT_DURATION_MS +AGGREGATE_DUE_BPS: 6667 + +# Altair +# 3333 basis points, ~33% of SLOT_DURATION_MS +SYNC_MESSAGE_DUE_BPS: 3333 +# 6667 basis points, ~67% of SLOT_DURATION_MS +CONTRIBUTION_DUE_BPS: 6667 # Validator cycle # --------------------------------------------------------------- @@ -141,13 +155,30 @@ MAX_BLOBS_PER_BLOCK_ELECTRA: 9 MAX_REQUEST_BLOB_SIDECARS_ELECTRA: 1152 # Fulu +# 2**7 (= 128) groups NUMBER_OF_CUSTODY_GROUPS: 128 +# 2**7 (= 128) subnets DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 +# MAX_REQUEST_BLOCKS_DENEB * NUMBER_OF_COLUMNS (= 128 * 128) sidecars MAX_REQUEST_DATA_COLUMN_SIDECARS: 16384 +# 2**3 (= 8) samples SAMPLES_PER_SLOT: 8 +# 2**2 (= 4) sidecars CUSTODY_REQUIREMENT: 4 +# 2**3 (= 8) sidecars VALIDATOR_CUSTODY_REQUIREMENT: 8 +# 2**5 * 10**9 (= 32,000,000,000) Gwei BALANCE_PER_ADDITIONAL_CUSTODY_GROUP: 32000000000 +# 2**12 (= 4,096) epochs MIN_EPOCHS_FOR_DATA_COLUMN_SIDECARS_REQUESTS: 4096 +# Blob Scheduling +# --------------------------------------------------------------- + +BLOB_SCHEDULE: + - EPOCH: 166400 + MAX_BLOBS_PER_BLOCK: 15 + - EPOCH: 167936 + MAX_BLOBS_PER_BLOCK: 21 + # Gloas \ No newline at end of file diff --git a/common/eth2_network_config/built_in_network_configs/hoodi/config.yaml b/common/eth2_network_config/built_in_network_configs/hoodi/config.yaml index 01322974c8e..256957e1197 100644 --- a/common/eth2_network_config/built_in_network_configs/hoodi/config.yaml +++ b/common/eth2_network_config/built_in_network_configs/hoodi/config.yaml @@ -42,7 +42,7 @@ ELECTRA_FORK_EPOCH: 2048 # Fulu FULU_FORK_VERSION: 0x70000910 -FULU_FORK_EPOCH: 18446744073709551615 +FULU_FORK_EPOCH: 50688 # Gloas GLOAS_FORK_VERSION: 0x80000910 @@ -53,6 +53,8 @@ GLOAS_FORK_EPOCH: 18446744073709551615 # --------------------------------------------------------------- # 12 seconds SECONDS_PER_SLOT: 12 +# 12000 milliseconds +SLOT_DURATION_MS: 12000 # 14 (estimate from Eth1 mainnet) SECONDS_PER_ETH1_BLOCK: 12 # 2**8 (= 256) epochs ~27 hours @@ -61,6 +63,18 @@ MIN_VALIDATOR_WITHDRAWABILITY_DELAY: 256 SHARD_COMMITTEE_PERIOD: 256 # 2**11 (= 2,048) Eth1 blocks ~8 hours ETH1_FOLLOW_DISTANCE: 2048 +# 1667 basis points, ~17% of SLOT_DURATION_MS +PROPOSER_REORG_CUTOFF_BPS: 1667 +# 3333 basis points, ~33% of SLOT_DURATION_MS +ATTESTATION_DUE_BPS: 3333 +# 6667 basis points, ~67% of SLOT_DURATION_MS +AGGREGATE_DUE_BPS: 6667 + +# Altair +# 3333 basis points, ~33% of SLOT_DURATION_MS +SYNC_MESSAGE_DUE_BPS: 3333 +# 6667 basis points, ~67% of SLOT_DURATION_MS +CONTRIBUTION_DUE_BPS: 6667 # Validator cycle # --------------------------------------------------------------- @@ -154,15 +168,33 @@ WHISK_EPOCHS_PER_SHUFFLING_PHASE: 256 WHISK_PROPOSER_SELECTION_GAP: 2 # Fulu +# 2**7 (= 128) groups NUMBER_OF_CUSTODY_GROUPS: 128 +# 2**7 (= 128) subnets DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 +# MAX_REQUEST_BLOCKS_DENEB * NUMBER_OF_COLUMNS (= 128 * 128) sidecars MAX_REQUEST_DATA_COLUMN_SIDECARS: 16384 +# 2**3 (= 8) samples SAMPLES_PER_SLOT: 8 +# 2**2 (= 4) sidecars CUSTODY_REQUIREMENT: 4 +# 2**3 (= 8) sidecars VALIDATOR_CUSTODY_REQUIREMENT: 8 +# 2**5 * 10**9 (= 32,000,000,000) Gwei BALANCE_PER_ADDITIONAL_CUSTODY_GROUP: 32000000000 +# 2**12 (= 4,096) epochs MIN_EPOCHS_FOR_DATA_COLUMN_SIDECARS_REQUESTS: 4096 + +# Blob Scheduling +# --------------------------------------------------------------- + +BLOB_SCHEDULE: + - EPOCH: 52480 + MAX_BLOBS_PER_BLOCK: 15 + - EPOCH: 54016 + MAX_BLOBS_PER_BLOCK: 21 + # Gloas # EIP7732 diff --git a/common/eth2_network_config/built_in_network_configs/sepolia/config.yaml b/common/eth2_network_config/built_in_network_configs/sepolia/config.yaml index 9802e409fbf..b1a01933d70 100644 --- a/common/eth2_network_config/built_in_network_configs/sepolia/config.yaml +++ b/common/eth2_network_config/built_in_network_configs/sepolia/config.yaml @@ -42,7 +42,7 @@ ELECTRA_FORK_EPOCH: 222464 # Fulu FULU_FORK_VERSION: 0x90000075 -FULU_FORK_EPOCH: 18446744073709551615 +FULU_FORK_EPOCH: 272640 # Gloas GLOAS_FORK_VERSION: 0x90000076 @@ -52,6 +52,8 @@ GLOAS_FORK_EPOCH: 18446744073709551615 # --------------------------------------------------------------- # 12 seconds SECONDS_PER_SLOT: 12 +# 12000 milliseconds +SLOT_DURATION_MS: 12000 # 14 (estimate from Eth1 mainnet) SECONDS_PER_ETH1_BLOCK: 14 # 2**8 (= 256) epochs ~27 hours @@ -60,6 +62,18 @@ MIN_VALIDATOR_WITHDRAWABILITY_DELAY: 256 SHARD_COMMITTEE_PERIOD: 256 # 2**11 (= 2,048) Eth1 blocks ~8 hours ETH1_FOLLOW_DISTANCE: 2048 +# 1667 basis points, ~17% of SLOT_DURATION_MS +PROPOSER_REORG_CUTOFF_BPS: 1667 +# 3333 basis points, ~33% of SLOT_DURATION_MS +ATTESTATION_DUE_BPS: 3333 +# 6667 basis points, ~67% of SLOT_DURATION_MS +AGGREGATE_DUE_BPS: 6667 + +# Altair +# 3333 basis points, ~33% of SLOT_DURATION_MS +SYNC_MESSAGE_DUE_BPS: 3333 +# 6667 basis points, ~67% of SLOT_DURATION_MS +CONTRIBUTION_DUE_BPS: 6667 # Validator cycle @@ -147,13 +161,31 @@ MAX_BLOBS_PER_BLOCK_ELECTRA: 9 MAX_REQUEST_BLOB_SIDECARS_ELECTRA: 1152 # Fulu +# 2**7 (= 128) groups NUMBER_OF_CUSTODY_GROUPS: 128 +# 2**7 (= 128) subnets DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 +# MAX_REQUEST_BLOCKS_DENEB * NUMBER_OF_COLUMNS (= 128 * 128) sidecars MAX_REQUEST_DATA_COLUMN_SIDECARS: 16384 +# 2**3 (= 8) samples SAMPLES_PER_SLOT: 8 +# 2**2 (= 4) sidecars CUSTODY_REQUIREMENT: 4 +# 2**3 (= 8) sidecars VALIDATOR_CUSTODY_REQUIREMENT: 8 +# 2**5 * 10**9 (= 32,000,000,000) Gwei BALANCE_PER_ADDITIONAL_CUSTODY_GROUP: 32000000000 +# 2**12 (= 4,096) epochs MIN_EPOCHS_FOR_DATA_COLUMN_SIDECARS_REQUESTS: 4096 + +# Blob Scheduling +# --------------------------------------------------------------- + +BLOB_SCHEDULE: + - EPOCH: 274176 + MAX_BLOBS_PER_BLOCK: 15 + - EPOCH: 275712 + MAX_BLOBS_PER_BLOCK: 21 + # Gloas \ No newline at end of file diff --git a/common/lighthouse_version/src/lib.rs b/common/lighthouse_version/src/lib.rs index c45dbac4d3a..574fdfea35f 100644 --- a/common/lighthouse_version/src/lib.rs +++ b/common/lighthouse_version/src/lib.rs @@ -17,8 +17,8 @@ pub const VERSION: &str = git_version!( // NOTE: using --match instead of --exclude for compatibility with old Git "--match=thiswillnevermatchlol" ], - prefix = "Lighthouse/v7.1.0-", - fallback = "Lighthouse/v7.1.0" + prefix = "Lighthouse/v8.0.0-rc.0-", + fallback = "Lighthouse/v8.0.0-rc.0" ); /// Returns the first eight characters of the latest commit hash for this build. @@ -54,7 +54,7 @@ pub fn version_with_platform() -> String { /// /// `1.5.1` pub fn version() -> &'static str { - "7.1.0" + "8.0.0-rc.0" } /// Returns the name of the current client running. diff --git a/common/network_utils/src/discovery_metrics.rs b/common/network_utils/src/discovery_metrics.rs index d105dee57af..26a9e8a45f5 100644 --- a/common/network_utils/src/discovery_metrics.rs +++ b/common/network_utils/src/discovery_metrics.rs @@ -35,8 +35,7 @@ pub static DISCOVERY_SESSIONS: LazyLock> = LazyLock::new(|| { }); pub fn scrape_discovery_metrics() { - let metrics = - discv5::metrics::Metrics::from(discv5::Discv5::::raw_metrics()); + let metrics = discv5::metrics::Metrics::from(discv5::Discv5::raw_metrics()); set_float_gauge(&DISCOVERY_REQS, metrics.unsolicited_requests_per_second); set_gauge(&DISCOVERY_SESSIONS, metrics.active_sessions as i64); set_gauge_vec(&DISCOVERY_BYTES, &["inbound"], metrics.bytes_recv as i64); diff --git a/common/network_utils/src/unused_port.rs b/common/network_utils/src/unused_port.rs index 212ae963e3c..de22869a862 100644 --- a/common/network_utils/src/unused_port.rs +++ b/common/network_utils/src/unused_port.rs @@ -22,25 +22,57 @@ static FOUND_PORTS_CACHE: LazyLock>> = LazyLock::new(|| Mutex::new(LRUTimeCache::new(CACHED_PORTS_TTL))); /// A convenience wrapper over [`zero_port`]. +#[deprecated(note = "Use bind_tcp4_any() which returns a bound socket to avoid TOCTOU")] pub fn unused_tcp4_port() -> Result { zero_port(Transport::Tcp, IpVersion::Ipv4) } /// A convenience wrapper over [`zero_port`]. +#[deprecated(note = "Use bind_udp4_any() which returns a bound socket to avoid TOCTOU")] pub fn unused_udp4_port() -> Result { zero_port(Transport::Udp, IpVersion::Ipv4) } /// A convenience wrapper over [`zero_port`]. +#[deprecated(note = "Use bind_tcp6_any() which returns a bound socket to avoid TOCTOU")] pub fn unused_tcp6_port() -> Result { zero_port(Transport::Tcp, IpVersion::Ipv6) } /// A convenience wrapper over [`zero_port`]. +#[deprecated(note = "Use bind_udp6_any() which returns a bound socket to avoid TOCTOU")] pub fn unused_udp6_port() -> Result { zero_port(Transport::Udp, IpVersion::Ipv6) } +/// Bind a TCPv4 listener on localhost with an ephemeral port (port 0) and return it. +/// Safe against TOCTOU: the socket remains open and reserved by the OS. +pub fn bind_tcp4_any() -> Result { + let addr = std::net::SocketAddr::new(std::net::Ipv4Addr::LOCALHOST.into(), 0); + TcpListener::bind(addr).map_err(|e| format!("Failed to bind TCPv4 listener: {:?}", e)) +} + +/// Bind a TCPv6 listener on localhost with an ephemeral port (port 0) and return it. +/// Safe against TOCTOU: the socket remains open and reserved by the OS. +pub fn bind_tcp6_any() -> Result { + let addr = std::net::SocketAddr::new(std::net::Ipv6Addr::LOCALHOST.into(), 0); + TcpListener::bind(addr).map_err(|e| format!("Failed to bind TCPv6 listener: {:?}", e)) +} + +/// Bind a UDPv4 socket on localhost with an ephemeral port (port 0) and return it. +/// Safe against TOCTOU: the socket remains open and reserved by the OS. +pub fn bind_udp4_any() -> Result { + let addr = std::net::SocketAddr::new(std::net::Ipv4Addr::LOCALHOST.into(), 0); + UdpSocket::bind(addr).map_err(|e| format!("Failed to bind UDPv4 socket: {:?}", e)) +} + +/// Bind a UDPv6 socket on localhost with an ephemeral port (port 0) and return it. +/// Safe against TOCTOU: the socket remains open and reserved by the OS. +pub fn bind_udp6_any() -> Result { + let addr = std::net::SocketAddr::new(std::net::Ipv6Addr::LOCALHOST.into(), 0); + UdpSocket::bind(addr).map_err(|e| format!("Failed to bind UDPv6 socket: {:?}", e)) +} + /// A bit of hack to find an unused port. /// /// Does not guarantee that the given port is unused after the function exits, just that it was @@ -51,6 +83,7 @@ pub fn unused_udp6_port() -> Result { /// It is possible that users are unable to bind to the ports returned by this function as the OS /// has a buffer period where it doesn't allow binding to the same port even after the socket is /// closed. We might have to use SO_REUSEADDR socket option from `std::net2` crate in that case. +#[deprecated(note = "Use bind_*_any() functions that return a bound socket to avoid TOCTOU")] pub fn zero_port(transport: Transport, ipv: IpVersion) -> Result { let localhost = match ipv { IpVersion::Ipv4 => std::net::Ipv4Addr::LOCALHOST.into(), diff --git a/common/task_executor/Cargo.toml b/common/task_executor/Cargo.toml index d4faf1e4b82..92a4fc4b596 100644 --- a/common/task_executor/Cargo.toml +++ b/common/task_executor/Cargo.toml @@ -8,6 +8,8 @@ edition = { workspace = true } async-channel = { workspace = true } futures = { workspace = true } metrics = { workspace = true } +num_cpus = { workspace = true } +rayon = { workspace = true } tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } tracing = { workspace = true } diff --git a/common/task_executor/src/lib.rs b/common/task_executor/src/lib.rs index 5f0c822b03f..0b8e9f8eba5 100644 --- a/common/task_executor/src/lib.rs +++ b/common/task_executor/src/lib.rs @@ -1,12 +1,15 @@ mod metrics; +mod rayon_pool_provider; pub mod test_utils; use futures::channel::mpsc::Sender; use futures::prelude::*; -use std::sync::Weak; +use std::sync::{Arc, Weak}; use tokio::runtime::{Handle, Runtime}; use tracing::debug; +use crate::rayon_pool_provider::RayonPoolProvider; +pub use crate::rayon_pool_provider::RayonPoolType; pub use tokio::task::JoinHandle; /// Provides a reason when Lighthouse is shut down. @@ -84,6 +87,8 @@ pub struct TaskExecutor { // FIXME(sproul): delete? #[allow(dead_code)] service_name: String, + + rayon_pool_provider: Arc, } impl TaskExecutor { @@ -105,6 +110,7 @@ impl TaskExecutor { exit, signal_tx, service_name, + rayon_pool_provider: Arc::new(RayonPoolProvider::default()), } } @@ -115,6 +121,7 @@ impl TaskExecutor { exit: self.exit.clone(), signal_tx: self.signal_tx.clone(), service_name, + rayon_pool_provider: self.rayon_pool_provider.clone(), } } @@ -226,6 +233,47 @@ impl TaskExecutor { } } + /// Spawns a blocking task on a dedicated tokio thread pool and installs a rayon context within it. + pub fn spawn_blocking_with_rayon( + self, + task: F, + rayon_pool_type: RayonPoolType, + name: &'static str, + ) where + F: FnOnce() + Send + 'static, + { + let thread_pool = self.rayon_pool_provider.get_thread_pool(rayon_pool_type); + self.spawn_blocking( + move || { + thread_pool.install(|| { + task(); + }); + }, + name, + ) + } + + /// Spawns a blocking computation on a rayon thread pool and awaits the result. + pub async fn spawn_blocking_with_rayon_async( + &self, + rayon_pool_type: RayonPoolType, + task: F, + ) -> Result + where + F: FnOnce() -> R + Send + 'static, + R: Send + 'static, + { + let thread_pool = self.rayon_pool_provider.get_thread_pool(rayon_pool_type); + let (tx, rx) = tokio::sync::oneshot::channel(); + + thread_pool.spawn(move || { + let result = task(); + let _ = tx.send(result); + }); + + rx.await + } + /// Spawn a future on the tokio runtime wrapped in an `async-channel::Receiver` returning an optional /// join handle to the future. /// The task is cancelled when the corresponding async-channel is dropped. diff --git a/common/task_executor/src/rayon_pool_provider.rs b/common/task_executor/src/rayon_pool_provider.rs new file mode 100644 index 00000000000..8e12f7eaa49 --- /dev/null +++ b/common/task_executor/src/rayon_pool_provider.rs @@ -0,0 +1,58 @@ +use rayon::{ThreadPool, ThreadPoolBuilder}; +use std::sync::Arc; + +const DEFAULT_LOW_PRIORITY_CPU_PERCENTAGE: usize = 25; +const DEFAULT_HIGH_PRIORITY_CPU_PERCENTAGE: usize = 80; +const MINIMUM_THREAD_COUNT: usize = 1; + +pub enum RayonPoolType { + HighPriority, + LowPriority, +} + +pub struct RayonPoolProvider { + /// Smaller rayon thread pool for lower-priority, compute-intensive tasks. + /// By default ~25% of CPUs or a minimum of 1 thread. + low_priority_thread_pool: Arc, + /// Larger rayon thread pool for high-priority, compute-intensive tasks. + /// By default ~80% of CPUs or a minimum of 1 thread. Citical/highest + /// priority tasks should use the global pool instead. + high_priority_thread_pool: Arc, +} + +impl Default for RayonPoolProvider { + fn default() -> Self { + let low_prio_threads = + (num_cpus::get() * DEFAULT_LOW_PRIORITY_CPU_PERCENTAGE / 100).max(MINIMUM_THREAD_COUNT); + let low_priority_thread_pool = Arc::new( + ThreadPoolBuilder::new() + .num_threads(low_prio_threads) + .build() + .expect("failed to build low-priority rayon pool"), + ); + + let high_prio_threads = (num_cpus::get() * DEFAULT_HIGH_PRIORITY_CPU_PERCENTAGE / 100) + .max(MINIMUM_THREAD_COUNT); + let high_priority_thread_pool = Arc::new( + ThreadPoolBuilder::new() + .num_threads(high_prio_threads) + .build() + .expect("failed to build high-priority rayon pool"), + ); + Self { + low_priority_thread_pool, + high_priority_thread_pool, + } + } +} + +impl RayonPoolProvider { + /// Get a scoped thread pool by priority level. + /// For critical/highest priority tasks, use the global pool instead. + pub fn get_thread_pool(&self, rayon_pool_type: RayonPoolType) -> Arc { + match rayon_pool_type { + RayonPoolType::HighPriority => self.high_priority_thread_pool.clone(), + RayonPoolType::LowPriority => self.low_priority_thread_pool.clone(), + } + } +} diff --git a/consensus/proto_array/src/proto_array_fork_choice.rs b/consensus/proto_array/src/proto_array_fork_choice.rs index 4b31dc60bd3..dea853d245d 100644 --- a/consensus/proto_array/src/proto_array_fork_choice.rs +++ b/consensus/proto_array/src/proto_array_fork_choice.rs @@ -160,6 +160,56 @@ pub struct Block { pub unrealized_finalized_checkpoint: Option, } +impl Block { + /// Compute the proposer shuffling decision root of a child block in `child_block_epoch`. + /// + /// This function assumes that `child_block_epoch >= self.epoch`. It is the responsibility of + /// the caller to check this condition, or else incorrect results will be produced. + pub fn proposer_shuffling_root_for_child_block( + &self, + child_block_epoch: Epoch, + spec: &ChainSpec, + ) -> Hash256 { + let block_epoch = self.current_epoch_shuffling_id.shuffling_epoch; + + // For child blocks in the Fulu fork epoch itself, we want to use the old logic. There is no + // lookahead in the first Fulu epoch. So we check whether Fulu is enabled at + // `child_block_epoch - 1`, i.e. whether `child_block_epoch > fulu_fork_epoch`. + if !spec + .fork_name_at_epoch(child_block_epoch.saturating_sub(1_u64)) + .fulu_enabled() + { + // Prior to Fulu the proposer shuffling decision root for the current epoch is the same + // as the attestation shuffling for the *next* epoch, i.e. it is determined at the start + // of the current epoch. + if block_epoch == child_block_epoch { + self.next_epoch_shuffling_id.shuffling_decision_block + } else { + // Otherwise, the child block epoch is greater, so its decision root is its parent + // root itself (this block's root). + self.root + } + } else { + // After Fulu the proposer shuffling is determined with lookahead, so if the block + // lies in the same epoch as its parent, its decision root is the same as the + // parent's current epoch attester shuffling + // + // i.e. the block from the end of epoch N - 2. + if child_block_epoch == block_epoch { + self.current_epoch_shuffling_id.shuffling_decision_block + } else if child_block_epoch == block_epoch + 1 { + // If the block is the next epoch, then it instead shares its decision root with + // the parent's *next epoch* attester shuffling. + self.next_epoch_shuffling_id.shuffling_decision_block + } else { + // The child block lies in the future beyond the lookahead, at the point where this + // block (its parent) will be the decision block. + self.root + } + } + } +} + /// A Vec-wrapper which will grow to match any request. /// /// E.g., a `get` or `insert` to an out-of-bounds element will cause the Vec to grow (using diff --git a/consensus/state_processing/src/all_caches.rs b/consensus/state_processing/src/all_caches.rs index e49eb395c40..0381bb820f2 100644 --- a/consensus/state_processing/src/all_caches.rs +++ b/consensus/state_processing/src/all_caches.rs @@ -1,8 +1,7 @@ use crate::common::update_progressive_balances_cache::initialize_progressive_balances_cache; use crate::epoch_cache::initialize_epoch_cache; -use types::{ - BeaconState, ChainSpec, EpochCacheError, EthSpec, FixedBytesExtended, Hash256, RelativeEpoch, -}; +use tracing::instrument; +use types::{BeaconState, ChainSpec, EpochCacheError, EthSpec, Hash256, RelativeEpoch}; /// Mixin trait for the beacon state that provides operations on *all* caches. /// @@ -23,6 +22,7 @@ pub trait AllCaches { } impl AllCaches for BeaconState { + #[instrument(skip_all)] fn build_all_caches(&mut self, spec: &ChainSpec) -> Result<(), EpochCacheError> { self.build_caches(spec)?; initialize_epoch_cache(self, spec)?; @@ -32,8 +32,7 @@ impl AllCaches for BeaconState { fn all_caches_built(&self) -> bool { let current_epoch = self.current_epoch(); - let Ok(epoch_cache_decision_block_root) = - self.proposer_shuffling_decision_root(Hash256::zero()) + let Ok(epoch_cache_decision_block_root) = self.epoch_cache_decision_root(Hash256::ZERO) else { return false; }; diff --git a/consensus/state_processing/src/epoch_cache.rs b/consensus/state_processing/src/epoch_cache.rs index 6654c6a7ef8..86db037446b 100644 --- a/consensus/state_processing/src/epoch_cache.rs +++ b/consensus/state_processing/src/epoch_cache.rs @@ -123,7 +123,7 @@ pub fn is_epoch_cache_initialized( let current_epoch = state.current_epoch(); let epoch_cache: &EpochCache = state.epoch_cache(); let decision_block_root = state - .proposer_shuffling_decision_root(Hash256::zero()) + .epoch_cache_decision_root(Hash256::zero()) .map_err(EpochCacheError::BeaconState)?; Ok(epoch_cache @@ -146,7 +146,7 @@ pub fn initialize_epoch_cache( let current_epoch = state.current_epoch(); let next_epoch = state.next_epoch().map_err(EpochCacheError::BeaconState)?; let decision_block_root = state - .proposer_shuffling_decision_root(Hash256::zero()) + .epoch_cache_decision_root(Hash256::zero()) .map_err(EpochCacheError::BeaconState)?; state.build_total_active_balance_cache(spec)?; diff --git a/consensus/state_processing/src/upgrade/fulu.rs b/consensus/state_processing/src/upgrade/fulu.rs index 6b038ad73a1..c2aced7047a 100644 --- a/consensus/state_processing/src/upgrade/fulu.rs +++ b/consensus/state_processing/src/upgrade/fulu.rs @@ -33,9 +33,7 @@ fn initialize_proposer_lookahead( ); } - Vector::new(lookahead).map_err(|e| { - Error::PleaseNotifyTheDevs(format!("Failed to initialize proposer lookahead: {:?}", e)) - }) + Vector::new(lookahead).map_err(|e| e.into()) } pub fn upgrade_state_to_fulu( diff --git a/consensus/types/src/beacon_block.rs b/consensus/types/src/beacon_block.rs index f4e4e369661..61c32dd4ac9 100644 --- a/consensus/types/src/beacon_block.rs +++ b/consensus/types/src/beacon_block.rs @@ -843,6 +843,7 @@ impl<'de, E: EthSpec, Payload: AbstractExecPayload> ContextDeserialize<'de, F } } +#[derive(Clone, Copy)] pub enum BlockImportSource { Gossip, Lookup, diff --git a/consensus/types/src/beacon_response.rs b/consensus/types/src/beacon_response.rs index 2e458543649..fc59fc94329 100644 --- a/consensus/types/src/beacon_response.rs +++ b/consensus/types/src/beacon_response.rs @@ -25,6 +25,7 @@ pub struct ForkVersionedResponse { /// `Deserialize`. #[derive(Debug, PartialEq, Clone, Serialize)] pub struct UnversionedResponse { + #[serde(flatten)] pub metadata: M, pub data: T, } @@ -195,9 +196,10 @@ impl From> for BeaconResponse { #[cfg(test)] mod fork_version_response_tests { + use crate::beacon_response::ExecutionOptimisticFinalizedMetadata; use crate::{ ExecutionPayload, ExecutionPayloadBellatrix, ForkName, ForkVersionedResponse, - MainnetEthSpec, + MainnetEthSpec, UnversionedResponse, }; use serde_json::json; @@ -236,4 +238,24 @@ mod fork_version_response_tests { assert!(result.is_err()); } + + // The following test should only pass by having the attribute #[serde(flatten)] on the metadata + #[test] + fn unversioned_response_serialize_dezerialize_round_trip_test() { + // Create an UnversionedResponse with some data + let data = UnversionedResponse { + metadata: ExecutionOptimisticFinalizedMetadata { + execution_optimistic: Some(false), + finalized: Some(false), + }, + data: "some_test_data".to_string(), + }; + + let serialized = serde_json::to_string(&data); + + let deserialized = + serde_json::from_str(&serialized.unwrap()).expect("Failed to deserialize"); + + assert_eq!(data, deserialized); + } } diff --git a/consensus/types/src/beacon_state.rs b/consensus/types/src/beacon_state.rs index d2efbfe9095..1bd4927fe87 100644 --- a/consensus/types/src/beacon_state.rs +++ b/consensus/types/src/beacon_state.rs @@ -173,7 +173,21 @@ pub enum Error { AggregatorNotInCommittee { aggregator_index: u64, }, - PleaseNotifyTheDevs(String), + ComputeProposerIndicesPastEpoch { + current_epoch: Epoch, + request_epoch: Epoch, + }, + ComputeProposerIndicesInsufficientLookahead { + current_epoch: Epoch, + request_epoch: Epoch, + }, + ComputeProposerIndicesExcessiveLookahead { + current_epoch: Epoch, + request_epoch: Epoch, + }, + ProposerLookaheadOutOfBounds { + i: usize, + }, } /// Control whether an epoch-indexed field can be indexed at the next epoch or not. @@ -578,6 +592,7 @@ where #[compare_fields(as_iter)] #[test_random(default)] #[superstruct(only(Fulu, Gloas))] + #[serde(with = "ssz_types::serde_utils::quoted_u64_fixed_vec")] pub proposer_lookahead: Vector, // Gloas @@ -886,8 +901,9 @@ impl BeaconState { &self, epoch: Epoch, block_root: Hash256, + spec: &ChainSpec, ) -> Result { - let decision_slot = self.proposer_shuffling_decision_slot(epoch); + let decision_slot = spec.proposer_shuffling_decision_slot::(epoch); if self.slot() <= decision_slot { Ok(block_root) } else { @@ -902,19 +918,18 @@ impl BeaconState { /// /// The `block_root` covers the one-off scenario where the genesis block decides its own /// shuffling. It should be set to the latest block applied to `self` or the genesis block root. - pub fn proposer_shuffling_decision_root(&self, block_root: Hash256) -> Result { - let decision_slot = self.proposer_shuffling_decision_slot(self.current_epoch()); - if self.slot() == decision_slot { - Ok(block_root) - } else { - self.get_block_root(decision_slot).copied() - } + pub fn proposer_shuffling_decision_root( + &self, + block_root: Hash256, + spec: &ChainSpec, + ) -> Result { + self.proposer_shuffling_decision_root_at_epoch(self.current_epoch(), block_root, spec) } - /// Returns the slot at which the proposer shuffling was decided. The block root at this slot - /// can be used to key the proposer shuffling for the given epoch. - fn proposer_shuffling_decision_slot(&self, epoch: Epoch) -> Slot { - epoch.start_slot(E::slots_per_epoch()).saturating_sub(1_u64) + pub fn epoch_cache_decision_root(&self, block_root: Hash256) -> Result { + // Epoch cache decision root for the current epoch (N) is the block root at the end of epoch + // N - 1. This is the same as the root that determines the next epoch attester shuffling. + self.attester_shuffling_decision_root(block_root, RelativeEpoch::Next) } /// Returns the block root which decided the attester shuffling for the given `relative_epoch`. @@ -998,6 +1013,45 @@ impl BeaconState { indices: &[usize], spec: &ChainSpec, ) -> Result, Error> { + // Regardless of fork, we never support computing proposer indices for past epochs. + let current_epoch = self.current_epoch(); + if epoch < current_epoch { + return Err(Error::ComputeProposerIndicesPastEpoch { + current_epoch, + request_epoch: epoch, + }); + } + + if spec.fork_name_at_epoch(epoch).fulu_enabled() { + // Post-Fulu we must never compute proposer indices using insufficient lookahead. This + // would be very dangerous as it would lead to conflicts between the *true* proposer as + // defined by `self.proposer_lookahead` and the output of this function. + // With MIN_SEED_LOOKAHEAD=1 (common config), this is equivalent to checking that the + // requested epoch is not the current epoch. + // + // We do not run this check if this function is called from `upgrade_to_fulu`, + // which runs *after* the slot is incremented, and needs to compute the proposer + // shuffling for the epoch that was just transitioned into. + if self.fork_name_unchecked().fulu_enabled() + && epoch < current_epoch.safe_add(spec.min_seed_lookahead)? + { + return Err(Error::ComputeProposerIndicesInsufficientLookahead { + current_epoch, + request_epoch: epoch, + }); + } + } else { + // Pre-Fulu the situation is reversed, we *should not* compute proposer indices using + // too much lookahead. To do so would make us vulnerable to changes in the proposer + // indices caused by effective balance changes. + if epoch >= current_epoch.safe_add(spec.min_seed_lookahead)? { + return Err(Error::ComputeProposerIndicesExcessiveLookahead { + current_epoch, + request_epoch: epoch, + }); + } + } + epoch .slot_iter(E::slots_per_epoch()) .map(|slot| { @@ -1146,10 +1200,7 @@ impl BeaconState { let index = slot.as_usize().safe_rem(E::slots_per_epoch() as usize)?; proposer_lookahead .get(index) - .ok_or(Error::PleaseNotifyTheDevs(format!( - "Proposer lookahead out of bounds: {} for slot: {}", - index, slot - ))) + .ok_or(Error::ProposerLookaheadOutOfBounds { i: index }) .map(|index| *index as usize) } else { // Pre-Fulu @@ -1168,6 +1219,25 @@ impl BeaconState { epoch: Epoch, spec: &ChainSpec, ) -> Result, Error> { + // This isn't in the spec, but we remove the footgun that is requesting the current epoch + // for a Fulu state. + if let Ok(proposer_lookahead) = self.proposer_lookahead() + && epoch >= self.current_epoch() + && epoch <= self.next_epoch()? + { + let slots_per_epoch = E::slots_per_epoch() as usize; + let start_offset = if epoch == self.current_epoch() { + 0 + } else { + slots_per_epoch + }; + return Ok(proposer_lookahead + .iter_from(start_offset)? + .take(slots_per_epoch) + .map(|x| *x as usize) + .collect()); + } + // Not using the cached validator indices since they are shuffled. let indices = self.get_active_validator_indices(epoch, spec)?; diff --git a/consensus/types/src/chain_spec.rs b/consensus/types/src/chain_spec.rs index a1005d904ae..50a2f268e00 100644 --- a/consensus/types/src/chain_spec.rs +++ b/consensus/types/src/chain_spec.rs @@ -227,7 +227,7 @@ pub struct ChainSpec { pub ttfb_timeout: u64, pub resp_timeout: u64, pub attestation_propagation_slot_range: u64, - pub maximum_gossip_clock_disparity_millis: u64, + pub maximum_gossip_clock_disparity: u64, pub message_domain_invalid_snappy: [u8; 4], pub message_domain_valid_snappy: [u8; 4], pub subnets_per_node: u8, @@ -670,7 +670,7 @@ impl ChainSpec { } pub fn maximum_gossip_clock_disparity(&self) -> Duration { - Duration::from_millis(self.maximum_gossip_clock_disparity_millis) + Duration::from_millis(self.maximum_gossip_clock_disparity) } pub fn ttfb_timeout(&self) -> Duration { @@ -865,6 +865,34 @@ impl ChainSpec { ) } + /// Returns the slot at which the proposer shuffling was decided. + /// + /// The block root at this slot can be used to key the proposer shuffling for the given epoch. + pub fn proposer_shuffling_decision_slot(&self, epoch: Epoch) -> Slot { + // At the Fulu fork epoch itself, the shuffling is computed "the old way" with no lookahead. + // Therefore for `epoch == fulu_fork_epoch` we must take the `else` branch. Checking if Fulu + // is enabled at `epoch - 1` accomplishes this neatly. + if self + .fork_name_at_epoch(epoch.saturating_sub(1_u64)) + .fulu_enabled() + { + // Post-Fulu the proposer shuffling decision slot for epoch N is the slot at the end + // of epoch N - 2 (note: min_seed_lookahead=1 in all current configs). + epoch + .saturating_sub(self.min_seed_lookahead) + .start_slot(E::slots_per_epoch()) + .saturating_sub(1_u64) + } else { + // Pre-Fulu the proposer shuffling decision slot for epoch N is the slot at the end of + // epoch N - 1 (note: +1 -1 for min_seed_lookahead=1 in all current configs). + epoch + .saturating_add(Epoch::new(1)) + .saturating_sub(self.min_seed_lookahead) + .start_slot(E::slots_per_epoch()) + .saturating_sub(1_u64) + } + } + /// Returns a `ChainSpec` compatible with the Ethereum Foundation specification. pub fn mainnet() -> Self { Self { @@ -1084,7 +1112,7 @@ impl ChainSpec { attestation_propagation_slot_range: default_attestation_propagation_slot_range(), attestation_subnet_count: 64, subnets_per_node: 2, - maximum_gossip_clock_disparity_millis: default_maximum_gossip_clock_disparity_millis(), + maximum_gossip_clock_disparity: default_maximum_gossip_clock_disparity(), target_aggregators_per_committee: 16, max_payload_size: default_max_payload_size(), min_epochs_for_block_requests: default_min_epochs_for_block_requests(), @@ -1430,7 +1458,7 @@ impl ChainSpec { attestation_propagation_slot_range: default_attestation_propagation_slot_range(), attestation_subnet_count: 64, subnets_per_node: 4, // Make this larger than usual to avoid network damage - maximum_gossip_clock_disparity_millis: default_maximum_gossip_clock_disparity_millis(), + maximum_gossip_clock_disparity: default_maximum_gossip_clock_disparity(), target_aggregators_per_committee: 16, max_payload_size: default_max_payload_size(), min_epochs_for_block_requests: 33024, @@ -1751,9 +1779,9 @@ pub struct Config { #[serde(default = "default_attestation_propagation_slot_range")] #[serde(with = "serde_utils::quoted_u64")] attestation_propagation_slot_range: u64, - #[serde(default = "default_maximum_gossip_clock_disparity_millis")] + #[serde(default = "default_maximum_gossip_clock_disparity")] #[serde(with = "serde_utils::quoted_u64")] - maximum_gossip_clock_disparity_millis: u64, + maximum_gossip_clock_disparity: u64, #[serde(default = "default_message_domain_invalid_snappy")] #[serde(with = "serde_utils::bytes_4_hex")] message_domain_invalid_snappy: [u8; 4], @@ -1967,7 +1995,7 @@ const fn default_attestation_propagation_slot_range() -> u64 { 32 } -const fn default_maximum_gossip_clock_disparity_millis() -> u64 { +const fn default_maximum_gossip_clock_disparity() -> u64 { 500 } @@ -2186,7 +2214,7 @@ impl Config { ttfb_timeout: spec.ttfb_timeout, resp_timeout: spec.resp_timeout, attestation_propagation_slot_range: spec.attestation_propagation_slot_range, - maximum_gossip_clock_disparity_millis: spec.maximum_gossip_clock_disparity_millis, + maximum_gossip_clock_disparity: spec.maximum_gossip_clock_disparity, message_domain_invalid_snappy: spec.message_domain_invalid_snappy, message_domain_valid_snappy: spec.message_domain_valid_snappy, max_request_blocks_deneb: spec.max_request_blocks_deneb, @@ -2274,7 +2302,7 @@ impl Config { message_domain_valid_snappy, max_request_blocks, attestation_propagation_slot_range, - maximum_gossip_clock_disparity_millis, + maximum_gossip_clock_disparity, max_request_blocks_deneb, max_request_blob_sidecars, max_request_data_column_sidecars, @@ -2350,7 +2378,7 @@ impl Config { attestation_subnet_prefix_bits, max_request_blocks, attestation_propagation_slot_range, - maximum_gossip_clock_disparity_millis, + maximum_gossip_clock_disparity, max_request_blocks_deneb, max_request_blob_sidecars, max_request_data_column_sidecars, @@ -2977,4 +3005,32 @@ mod yaml_tests { spec.min_epoch_data_availability_boundary(current_epoch) ); } + + #[test] + fn proposer_shuffling_decision_root_around_epoch_boundary() { + type E = MainnetEthSpec; + let fulu_fork_epoch = 5; + let spec = { + let mut spec = ForkName::Electra.make_genesis_spec(E::default_spec()); + spec.fulu_fork_epoch = Some(Epoch::new(fulu_fork_epoch)); + Arc::new(spec) + }; + + // For epochs prior to AND including the Fulu fork epoch, the decision slot is the end + // of the previous epoch (i.e. only 1 slot lookahead). + for epoch in (0..=fulu_fork_epoch).map(Epoch::new) { + assert_eq!( + spec.proposer_shuffling_decision_slot::(epoch), + epoch.start_slot(E::slots_per_epoch()) - 1 + ); + } + + // For epochs after Fulu, the decision slot is the end of the epoch two epochs prior. + for epoch in ((fulu_fork_epoch + 1)..(fulu_fork_epoch + 10)).map(Epoch::new) { + assert_eq!( + spec.proposer_shuffling_decision_slot::(epoch), + (epoch - 1).start_slot(E::slots_per_epoch()) - 1 + ); + } + } } diff --git a/consensus/types/src/data_column_sidecar.rs b/consensus/types/src/data_column_sidecar.rs index 57f7a88e193..2272b1695c9 100644 --- a/consensus/types/src/data_column_sidecar.rs +++ b/consensus/types/src/data_column_sidecar.rs @@ -143,6 +143,7 @@ pub enum DataColumnSidecarError { PreDeneb, SszError(SszError), BuildSidecarFailed(String), + InvalidCellProofLength { expected: usize, actual: usize }, } impl From for DataColumnSidecarError { diff --git a/consensus/types/src/epoch_cache.rs b/consensus/types/src/epoch_cache.rs index ef91c20d753..9956cb400a7 100644 --- a/consensus/types/src/epoch_cache.rs +++ b/consensus/types/src/epoch_cache.rs @@ -5,9 +5,13 @@ use std::sync::Arc; /// Cache of values which are uniquely determined at the start of an epoch. /// /// The values are fixed with respect to the last block of the _prior_ epoch, which we refer -/// to as the "decision block". This cache is very similar to the `BeaconProposerCache` in that -/// beacon proposers are determined at exactly the same time as the values in this cache, so -/// the keys for the two caches are identical. +/// to as the "decision block". +/// +/// Prior to Fulu this cache was similar to the `BeaconProposerCache` in that beacon proposers were +/// determined at exactly the same time as the values in this cache, so the keys for the two caches +/// were identical. +/// +/// Post-Fulu, we use a different key (the proposers have more lookahead). #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] #[derive(Debug, PartialEq, Eq, Clone, Default)] pub struct EpochCache { diff --git a/consensus/types/src/fork_name.rs b/consensus/types/src/fork_name.rs index f12b14ff6ed..338e2b1e759 100644 --- a/consensus/types/src/fork_name.rs +++ b/consensus/types/src/fork_name.rs @@ -51,7 +51,7 @@ impl ForkName { /// This fork serves as the baseline for many tests, and the goal /// is to ensure features are passing on this fork. pub fn latest_stable() -> ForkName { - ForkName::Electra + ForkName::Fulu } /// Set the activation slots in the given `ChainSpec` so that the fork named by `self` @@ -201,6 +201,46 @@ impl ForkName { pub fn gloas_enabled(self) -> bool { self >= ForkName::Gloas } + + pub fn fork_ascii(self) { + if self == ForkName::Fulu { + println!( + r#" + ╔═══════════════════════════════════════╗ + ║ ║ + ║ TO FULU, MOAR BLOBS TO ETHEREUM ║ + ║ ║ + ║ III DECEMBER MMXXV ║ + ║ ║ + ╚═══════════════════════════════════════╝ + + ============================================================================= + |||| |||| + |---------------------------------------------------------------------------| + |___-----___-----___-----___-----___-----___-----___-----___-----___-----___| + / _ \===/ _ \ / _ \===/ _ \ / _ \===/ _ \ / _ \===/ _ \ + ( (.\ oOo /.) ) ( (.\ oOo /.) ) ( (.\ oOo /.) ) ( (.\ oOo /.) ) + \__/=====\__/ \__/=====\__/ \__/=====\__/ \__/=====\__/ + ||||||| ||||||| ||||||| ||||||| + ||||||| ||||||| \\/), ||||||| ||||||| + ||||||| ||||||| ,'.' /, ||||||| ||||||| + ||||||| ||||||| (_)- / /, ||||||| ||||||| + ||||||| ||||||| /\_/ |__..--, * ||||||| ||||||| + ||||||| ||||||| (\___/\ \ \ / ).' ||||||| ||||||| + ||||||| ||||||| \____/ / (_ // ||||||| ||||||| + ||||||| ||||||| \\_ ,'--'\_( ||||||| ||||||| + (oOoOo) (oOoOo) )_)_/ )_/ )_) (oOoOo) (oOoOo) + J%%%%%L J%%%%%L (_(_.'(_.'(_.' J%%%%%L J%%%%%L + ZZZZZZZZZ ZZZZZZZZZ ZZZZZZZZZ ZZZZZZZZZ + =========================================================================== + |_________________________________________________________________________| + |___________________________________________________________________________| + |_____________________________________________________________________________| + |_______________________________________________________________________________| + "# + ); + } + } } /// Map a fork name into a fork-versioned superstruct type like `BeaconBlock`. diff --git a/consensus/types/src/preset.rs b/consensus/types/src/preset.rs index c31183192f2..ab54c0345f7 100644 --- a/consensus/types/src/preset.rs +++ b/consensus/types/src/preset.rs @@ -208,6 +208,8 @@ pub struct DenebPreset { #[serde(with = "serde_utils::quoted_u64")] pub max_blob_commitments_per_block: u64, #[serde(with = "serde_utils::quoted_u64")] + pub kzg_commitment_inclusion_proof_depth: u64, + #[serde(with = "serde_utils::quoted_u64")] pub field_elements_per_blob: u64, } @@ -215,6 +217,7 @@ impl DenebPreset { pub fn from_chain_spec(_spec: &ChainSpec) -> Self { Self { max_blob_commitments_per_block: E::max_blob_commitments_per_block() as u64, + kzg_commitment_inclusion_proof_depth: E::KzgCommitmentInclusionProofDepth::to_u64(), field_elements_per_blob: E::field_elements_per_blob() as u64, } } diff --git a/lcli/Cargo.toml b/lcli/Cargo.toml index 2eed9da4c05..8f020e03876 100644 --- a/lcli/Cargo.toml +++ b/lcli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "lcli" description = "Lighthouse CLI (modeled after zcli)" -version = "7.1.0" +version = "8.0.0-rc.0" authors = ["Paul Hauner "] edition = { workspace = true } diff --git a/lcli/src/http_sync.rs b/lcli/src/http_sync.rs index 2e36eadf235..6f7dcdb5956 100644 --- a/lcli/src/http_sync.rs +++ b/lcli/src/http_sync.rs @@ -124,7 +124,7 @@ async fn get_block_from_source( .unwrap() .unwrap(); let blobs_from_source = source - .get_blobs::(block_id, None, spec) + .get_blob_sidecars::(block_id, None, spec) .await .unwrap() .unwrap() diff --git a/lighthouse/Cargo.toml b/lighthouse/Cargo.toml index bf8241f8a2d..4139286b532 100644 --- a/lighthouse/Cargo.toml +++ b/lighthouse/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lighthouse" -version = "7.1.0" +version = "8.0.0-rc.0" authors = ["Sigma Prime "] edition = { workspace = true } autotests = false diff --git a/lighthouse/src/main.rs b/lighthouse/src/main.rs index 8660074e91d..c93016a0f54 100644 --- a/lighthouse/src/main.rs +++ b/lighthouse/src/main.rs @@ -28,7 +28,7 @@ use std::path::PathBuf; use std::process::exit; use std::sync::LazyLock; use task_executor::ShutdownReason; -use tracing::{Level, info, warn}; +use tracing::{Level, info}; use tracing_subscriber::{Layer, filter::EnvFilter, layer::SubscriberExt, util::SubscriberInitExt}; use types::{EthSpec, EthSpecId}; use validator_client::ProductionValidatorClient; @@ -126,16 +126,6 @@ fn main() { .global(true) .display_order(0), ) - .arg( - Arg::new("logfile") - .long("logfile") - .value_name("PATH") - .help("DEPRECATED") - .action(ArgAction::Set) - .global(true) - .hide(true) - .display_order(0) - ) .arg( Arg::new("logfile-dir") .long("logfile-dir") @@ -385,48 +375,6 @@ fn main() { .global(true) .display_order(0) ) - .arg( - Arg::new("terminal-total-difficulty-override") - .long("terminal-total-difficulty-override") - .value_name("INTEGER") - .help("DEPRECATED") - .action(ArgAction::Set) - .global(true) - .display_order(0) - .hide(true) - ) - .arg( - Arg::new("terminal-block-hash-override") - .long("terminal-block-hash-override") - .value_name("TERMINAL_BLOCK_HASH") - .help("DEPRECATED") - .requires("terminal-block-hash-epoch-override") - .action(ArgAction::Set) - .global(true) - .display_order(0) - .hide(true) - ) - .arg( - Arg::new("terminal-block-hash-epoch-override") - .long("terminal-block-hash-epoch-override") - .value_name("EPOCH") - .help("DEPRECATED") - .requires("terminal-block-hash-override") - .action(ArgAction::Set) - .global(true) - .display_order(0) - .hide(true) - ) - .arg( - Arg::new("safe-slots-to-import-optimistically") - .long("safe-slots-to-import-optimistically") - .value_name("INTEGER") - .help("DEPRECATED") - .action(ArgAction::Set) - .global(true) - .display_order(0) - .hide(true) - ) .arg( Arg::new("genesis-state-url") .long("genesis-state-url") @@ -780,11 +728,6 @@ fn run( // Allow Prometheus access to the version and commit of the Lighthouse build. metrics::expose_lighthouse_version(); - // DEPRECATED: can be removed in v7.2.0/v8.0.0. - if clap_utils::parse_optional::(matches, "logfile")?.is_some() { - warn!("The --logfile flag is deprecated and replaced by --logfile-dir"); - } - #[cfg(all(feature = "modern", target_arch = "x86_64"))] if !std::is_x86_feature_detected!("adx") { tracing::warn!( @@ -793,20 +736,6 @@ fn run( ); } - // Warn for DEPRECATED global flags. This code should be removed when we finish deleting these - // flags. - let deprecated_flags = [ - "terminal-total-difficulty-override", - "terminal-block-hash-override", - "terminal-block-hash-epoch-override", - "safe-slots-to-import-optimistically", - ]; - for flag in deprecated_flags { - if matches.get_one::(flag).is_some() { - warn!("The {} flag is deprecated and does nothing", flag); - } - } - // Note: the current code technically allows for starting a beacon node _and_ a validator // client at the same time. // diff --git a/lighthouse/tests/beacon_node.rs b/lighthouse/tests/beacon_node.rs index 1fd3cc1b792..c3b0581bd9c 100644 --- a/lighthouse/tests/beacon_node.rs +++ b/lighthouse/tests/beacon_node.rs @@ -35,6 +35,12 @@ const DUMMY_ENR_TCP_PORT: u16 = 7777; const DUMMY_ENR_UDP_PORT: u16 = 8888; const DUMMY_ENR_QUIC_PORT: u16 = 9999; +// Fixed test ports for config-only assertions (no actual bind occurs in these tests). +const TEST_TCP4_PORT: u16 = 39001; +const TEST_TCP6_PORT: u16 = 39011; +const TEST_UDP4_PORT: u16 = 39002; +const TEST_UDP6_PORT: u16 = 39012; + const _: () = assert!(DUMMY_ENR_QUIC_PORT != 0 && DUMMY_ENR_TCP_PORT != 0 && DUMMY_ENR_UDP_PORT != 0); @@ -392,27 +398,35 @@ fn genesis_backfill_with_historic_flag() { .with_config(|config| assert!(config.chain.genesis_backfill)); } -// Tests for Eth1 flags. -// DEPRECATED but should not crash #[test] -fn eth1_blocks_per_log_query_flag() { +fn complete_blob_backfill_default() { CommandLineTest::new() - .flag("eth1-blocks-per-log-query", Some("500")) - .run_with_zero_port(); + .run_with_zero_port() + .with_config(|config| assert!(!config.chain.complete_blob_backfill)); } -// DEPRECATED but should not crash + #[test] -fn eth1_purge_cache_flag() { +fn complete_blob_backfill_flag() { CommandLineTest::new() - .flag("eth1-purge-cache", None) - .run_with_zero_port(); + .flag("complete-blob-backfill", None) + .run_with_zero_port() + .with_config(|config| { + assert!(config.chain.complete_blob_backfill); + assert!(!config.store.prune_blobs); + }); } -// DEPRECATED but should not crash + +// Even if `--prune-blobs true` is provided, `--complete-blob-backfill` should override it to false. #[test] -fn eth1_cache_follow_distance_manual() { +fn complete_blob_backfill_and_prune_blobs_true() { CommandLineTest::new() - .flag("eth1-cache-follow-distance", Some("128")) - .run_with_zero_port(); + .flag("complete-blob-backfill", None) + .flag("prune-blobs", Some("true")) + .run_with_zero_port() + .with_config(|config| { + assert!(config.chain.complete_blob_backfill); + assert!(!config.store.prune_blobs); + }); } // Tests for Bellatrix flags. @@ -750,31 +764,6 @@ fn jwt_optional_flags() { fn jwt_optional_alias_flags() { run_jwt_optional_flags_test("jwt-secrets", "jwt-id", "jwt-version"); } -// DEPRECATED. This flag is deprecated but should not cause a crash. -#[test] -fn terminal_total_difficulty_override_flag() { - CommandLineTest::new() - .flag("terminal-total-difficulty-override", Some("1337424242")) - .run_with_zero_port(); -} -// DEPRECATED. This flag is deprecated but should not cause a crash. -#[test] -fn terminal_block_hash_and_activation_epoch_override_flags() { - CommandLineTest::new() - .flag("terminal-block-hash-epoch-override", Some("1337")) - .flag( - "terminal-block-hash-override", - Some("0x4242424242424242424242424242424242424242424242424242424242424242"), - ) - .run_with_zero_port(); -} -// DEPRECATED. This flag is deprecated but should not cause a crash. -#[test] -fn safe_slots_to_import_optimistically_flag() { - CommandLineTest::new() - .flag("safe-slots-to-import-optimistically", Some("421337")) - .run_with_zero_port(); -} // Tests for Network flags. #[test] @@ -802,6 +791,19 @@ fn network_subscribe_all_data_column_subnets_flag() { .with_config(|config| assert!(config.network.subscribe_all_data_column_subnets)); } #[test] +fn network_supernode_flag() { + CommandLineTest::new() + .flag("supernode", None) + .run_with_zero_port() + .with_config(|config| assert!(config.network.subscribe_all_data_column_subnets)); +} +#[test] +fn network_subscribe_all_data_column_subnets_default() { + CommandLineTest::new() + .run_with_zero_port() + .with_config(|config| assert!(!config.network.subscribe_all_data_column_subnets)); +} +#[test] fn blob_publication_batches() { CommandLineTest::new() .flag("blob-publication-batches", Some("3")) @@ -1029,8 +1031,8 @@ fn network_port_flag_over_ipv4_and_ipv6() { ); }); - let port = unused_tcp4_port().expect("Unable to find unused port."); - let port6 = unused_tcp6_port().expect("Unable to find unused port."); + let port = TEST_TCP4_PORT; + let port6 = TEST_TCP6_PORT; CommandLineTest::new() .flag("listen-address", Some("127.0.0.1")) .flag("listen-address", Some("::1")) @@ -1412,8 +1414,8 @@ fn enr_match_flag_over_ipv6() { const ADDR: &str = "::1"; let addr = ADDR.parse::().unwrap(); - let udp6_port = unused_udp6_port().expect("Unable to find unused port."); - let tcp6_port = unused_tcp6_port().expect("Unable to find unused port."); + let udp6_port = TEST_UDP6_PORT; + let tcp6_port = TEST_TCP6_PORT; CommandLineTest::new() .flag("enr-match", None) @@ -1442,13 +1444,13 @@ fn enr_match_flag_over_ipv6() { fn enr_match_flag_over_ipv4_and_ipv6() { const IPV6_ADDR: &str = "::1"; - let udp6_port = unused_udp6_port().expect("Unable to find unused port."); - let tcp6_port = unused_tcp6_port().expect("Unable to find unused port."); + let udp6_port = TEST_UDP6_PORT; + let tcp6_port = TEST_TCP6_PORT; let ipv6_addr = IPV6_ADDR.parse::().unwrap(); const IPV4_ADDR: &str = "127.0.0.1"; - let udp4_port = unused_udp4_port().expect("Unable to find unused port."); - let tcp4_port = unused_tcp4_port().expect("Unable to find unused port."); + let udp4_port = TEST_UDP4_PORT; + let tcp4_port = TEST_TCP4_PORT; let ipv4_addr = IPV4_ADDR.parse::().unwrap(); CommandLineTest::new() @@ -1808,12 +1810,25 @@ fn slots_per_restore_point_flag() { .run_with_zero_port(); } +#[test] +fn block_cache_size_default() { + CommandLineTest::new() + .run_with_zero_port() + .with_config(|config| assert_eq!(config.store.block_cache_size, 0)); +} #[test] fn block_cache_size_flag() { CommandLineTest::new() .flag("block-cache-size", Some("4")) .run_with_zero_port() - .with_config(|config| assert_eq!(config.store.block_cache_size, new_non_zero_usize(4))); + .with_config(|config| assert_eq!(config.store.block_cache_size, 4)); +} +#[test] +fn block_cache_size_zero() { + CommandLineTest::new() + .flag("block-cache-size", Some("0")) + .run_with_zero_port() + .with_config(|config| assert_eq!(config.store.block_cache_size, 0)); } #[test] fn state_cache_size_default() { @@ -2466,42 +2481,6 @@ fn logfile_format_flag() { ) }); } -// DEPRECATED but should not crash. -#[test] -fn deprecated_logfile() { - CommandLineTest::new() - .flag("logfile", Some("test.txt")) - .run_with_zero_port(); -} - -// DEPRECATED but should not crash. -#[test] -fn sync_eth1_chain_disable_deposit_contract_sync_flag() { - let dir = TempDir::new().expect("Unable to create temporary directory"); - CommandLineTest::new_with_no_execution_endpoint() - .flag("disable-deposit-contract-sync", None) - .flag("execution-endpoints", Some("http://localhost:8551/")) - .flag( - "execution-jwt", - dir.path().join("jwt-file").as_os_str().to_str(), - ) - .run_with_zero_port(); -} - -#[test] -#[should_panic] -fn disable_deposit_contract_sync_conflicts_with_staking() { - let dir = TempDir::new().expect("Unable to create temporary directory"); - CommandLineTest::new_with_no_execution_endpoint() - .flag("disable-deposit-contract-sync", None) - .flag("staking", None) - .flag("execution-endpoints", Some("http://localhost:8551/")) - .flag( - "execution-jwt", - dir.path().join("jwt-file").as_os_str().to_str(), - ) - .run_with_zero_port(); -} #[test] fn light_client_server_default() { @@ -2516,7 +2495,6 @@ fn light_client_server_default() { #[test] fn light_client_server_enabled() { CommandLineTest::new() - .flag("light-client-server", None) .run_with_zero_port() .with_config(|config| { assert!(config.network.enable_light_client_server); diff --git a/lighthouse/tests/boot_node.rs b/lighthouse/tests/boot_node.rs index 38111ca0efd..0c4ef4077ea 100644 --- a/lighthouse/tests/boot_node.rs +++ b/lighthouse/tests/boot_node.rs @@ -15,6 +15,9 @@ use tempfile::TempDir; const IP_ADDRESS: &str = "192.168.2.108"; +/ Fixed test port for config-only assertions (no actual bind occurs in these tests). +const TEST_UDP4_PORT: u16 = 39102; + /// Returns the `lighthouse boot_node` command. fn base_cmd() -> Command { let lighthouse_bin = env!("CARGO_BIN_EXE_lighthouse"); @@ -61,7 +64,7 @@ fn enr_address_arg() { #[test] fn port_flag() { - let port = unused_udp4_port().unwrap(); + let port = TEST_UDP4_PORT; CommandLineTest::new() .flag("port", Some(port.to_string().as_str())) .run_with_ip() @@ -133,7 +136,7 @@ fn boot_nodes_flag() { #[test] fn enr_port_flag() { - let port = unused_udp4_port().unwrap(); + let port = TEST_UDP4_PORT; CommandLineTest::new() .flag("enr-port", Some(port.to_string().as_str())) .run_with_ip() diff --git a/scripts/print_release_diffs.py b/scripts/print_release_diffs.py new file mode 100644 index 00000000000..d910b1be5bd --- /dev/null +++ b/scripts/print_release_diffs.py @@ -0,0 +1,72 @@ +""" +Summarise pull requests between two Lighthouse releases. + +Usage: + export GITHUB_TOKEN=your_token + python -m pip install requests==2.32.4 + python print_release_diffs.py --base v7.0.1 --head release-v7.1.0 + +Shows commit SHA, PR number, 'backwards-incompat' label status, and PR title. +""" + +import requests +import re +import argparse +import os + +GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN") +if not GITHUB_TOKEN: + raise SystemExit("Error: Please set the GITHUB_TOKEN environment variable.") + +parser = argparse.ArgumentParser(description="Summarise PRs between two Lighthouse versions.") +parser.add_argument("--base", required=True, help="Base tag or branch (older release)") +parser.add_argument("--head", required=True, help="Head tag or branch (newer release)") +args = parser.parse_args() + +BASE = args.base +HEAD = args.head +OWNER = 'sigp' +REPO = 'lighthouse' + +HEADERS = { + 'Authorization': f'token {GITHUB_TOKEN}', + 'Accept': 'application/vnd.github+json' +} + +def get_commits_between(base, head): + url = f'https://api.github.com/repos/{OWNER}/{REPO}/compare/{base}...{head}' + response = requests.get(url, headers=HEADERS) + response.raise_for_status() + return response.json()['commits'] + +def has_backwards_incompat_label(pr_number): + url = f'https://api.github.com/repos/{OWNER}/{REPO}/issues/{pr_number}' + response = requests.get(url, headers=HEADERS) + if response.status_code != 200: + raise Exception(f"Failed to fetch PR #{pr_number}") + labels = response.json().get('labels', []) + return any(label['name'] == 'backwards-incompat' for label in labels) + +def main(): + commits = get_commits_between(BASE, HEAD) + print(" # Commit SHA PR Number Has backwards-incompat Label PR Title") + print("--- ------------ ----------- ------------------------------ --------------------------------------------") + + for i, commit in enumerate(commits, 1): + sha = commit['sha'][:12] + message = commit['commit']['message'] + pr_match = re.search(r"\(#(\d+)\)", message) + + if not pr_match: + print(f"{i:<3} {sha} {'-':<11} {'-':<30} [NO PR MATCH]: {message.splitlines()[0]}") + continue + + pr_number = int(pr_match.group(1)) + try: + has_label = has_backwards_incompat_label(pr_number) + print(f"{i:<3} {sha} {pr_number:<11} {str(has_label):<30} {message.splitlines()[0]}") + except Exception as e: + print(f"{i:<3} {sha} {pr_number:<11} {'ERROR':<30} [ERROR FETCHING PR]: {e}") + +if __name__ == '__main__': + main() diff --git a/scripts/tests/checkpoint-sync-config-devnet.yaml b/scripts/tests/checkpoint-sync-config-devnet.yaml index f1b96dc9e52..2392011ed33 100644 --- a/scripts/tests/checkpoint-sync-config-devnet.yaml +++ b/scripts/tests/checkpoint-sync-config-devnet.yaml @@ -4,11 +4,15 @@ participants: cl_image: lighthouse:local el_type: geth el_image: ethpandaops/geth:master + cl_extra_params: + - --disable-backfill-rate-limiting supernode: true - cl_type: lighthouse cl_image: lighthouse:local el_type: geth el_image: ethpandaops/geth:master + cl_extra_params: + - --disable-backfill-rate-limiting supernode: false checkpoint_sync_enabled: true diff --git a/scripts/tests/checkpoint-sync.sh b/scripts/tests/checkpoint-sync.sh index a170d1e94dc..df03da042e5 100755 --- a/scripts/tests/checkpoint-sync.sh +++ b/scripts/tests/checkpoint-sync.sh @@ -15,7 +15,7 @@ CONFIG=${2:-$SCRIPT_DIR/checkpoint-sync-config-sepolia.yaml} # Interval for polling the /lighthouse/syncing endpoint for sync status POLL_INTERVAL_SECS=5 # Target number of slots to backfill to complete this test. -TARGET_BACKFILL_SLOTS=1024 +TARGET_BACKFILL_SLOTS=256 # Timeout for this test, if the node(s) fail to backfill `TARGET_BACKFILL_SLOTS` slots, fail the test. TIMEOUT_MINS=10 TIMEOUT_SECS=$((TIMEOUT_MINS * 60)) diff --git a/testing/ef_tests/src/cases/fork.rs b/testing/ef_tests/src/cases/fork.rs index 78d802c2283..54efb9f9cec 100644 --- a/testing/ef_tests/src/cases/fork.rs +++ b/testing/ef_tests/src/cases/fork.rs @@ -60,7 +60,7 @@ impl Case for ForkTest { fn result(&self, _case_index: usize, fork_name: ForkName) -> Result<(), Error> { let mut result_state = self.pre.clone(); let mut expected = Some(self.post.clone()); - let spec = &E::default_spec(); + let spec = &fork_name.make_genesis_spec(E::default_spec()); let mut result = match fork_name { ForkName::Base => panic!("phase0 not supported"), diff --git a/testing/simulator/src/checks.rs b/testing/simulator/src/checks.rs index 1368c495cd8..1240785121a 100644 --- a/testing/simulator/src/checks.rs +++ b/testing/simulator/src/checks.rs @@ -424,7 +424,7 @@ pub async fn verify_full_blob_production_up_to( // the `verify_full_block_production_up_to` function. if block.is_some() { remote_node - .get_blobs::(BlockId::Slot(Slot::new(slot)), None, &E::default_spec()) + .get_blobs::(BlockId::Slot(Slot::new(slot)), None) .await .map_err(|e| format!("Failed to get blobs at slot {slot:?}: {e:?}"))? .ok_or_else(|| format!("No blobs available at slot {slot:?}"))?; diff --git a/wordlist.txt b/wordlist.txt index 57674cf9749..58c4cf6db1e 100644 --- a/wordlist.txt +++ b/wordlist.txt @@ -39,6 +39,7 @@ EthStaker Exercism Extractable FFG +Fulu Geth GiB Gitcoin