Skip to content

Commit 7cee5d6

Browse files
authored
Optimise pubkey cache initialisation during beacon node startup (#8451)
Instrument beacon node startup and parallelise pubkey cache initialisation. I instrumented beacon node startup and noticed that pubkey cache takes a long time to initialise, mostly due to decompressing all the validator pubkeys. This PR uses rayon to parallelize the decompression on initial checkpoint sync. The pubkeys are stored uncompressed, so the decopression time is not a problem on subsequent restarts. On restarts, we still deserialize pubkeys, but the timing is quite minimal on Sepolia so I didn't investigate further. `validator_pubkey_cache_new` timing on Sepolia: * before: 109.64ms * with parallelization: 21ms on Hoodi: * before: times out with Kurtosis after 120s * with parallelization: 12.77s to import keys **UPDATE**: downloading checkpoint state + genesis state takes about 2 minutes on my laptop, so it seems like the BN managed to start the http server just before timing out (after the optimisation). <img width="1380" height="625" alt="image" src="https://github.com/user-attachments/assets/4c548c14-57dd-4b47-af9a-115b15791940" /> Co-Authored-By: Jimmy Chen <[email protected]>
1 parent 9394663 commit 7cee5d6

File tree

2 files changed

+94
-22
lines changed

2 files changed

+94
-22
lines changed

beacon_node/beacon_chain/src/validator_pubkey_cache.rs

Lines changed: 90 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
use crate::errors::BeaconChainError;
22
use crate::{BeaconChainTypes, BeaconStore};
33
use bls::PUBLIC_KEY_UNCOMPRESSED_BYTES_LEN;
4+
use rayon::prelude::*;
45
use smallvec::SmallVec;
56
use ssz::{Decode, Encode};
67
use ssz_derive::{Decode, Encode};
78
use std::collections::HashMap;
89
use std::marker::PhantomData;
910
use store::{DBColumn, Error as StoreError, StoreItem, StoreOp};
11+
use tracing::instrument;
1012
use types::{BeaconState, FixedBytesExtended, Hash256, PublicKey, PublicKeyBytes};
1113

1214
/// Provides a mapping of `validator_index -> validator_publickey`.
@@ -28,6 +30,7 @@ impl<T: BeaconChainTypes> ValidatorPubkeyCache<T> {
2830
/// Create a new public key cache using the keys in `state.validators`.
2931
///
3032
/// The new cache will be updated with the keys from `state` and immediately written to disk.
33+
#[instrument(name = "validator_pubkey_cache_new", skip_all)]
3134
pub fn new(
3235
state: &BeaconState<T::EthSpec>,
3336
store: BeaconStore<T>,
@@ -46,6 +49,7 @@ impl<T: BeaconChainTypes> ValidatorPubkeyCache<T> {
4649
}
4750

4851
/// Load the pubkey cache from the given on-disk database.
52+
#[instrument(name = "validator_pubkey_cache_load_from_store", skip_all)]
4953
pub fn load_from_store(store: BeaconStore<T>) -> Result<Self, BeaconChainError> {
5054
let mut pubkeys = vec![];
5155
let mut indices = HashMap::new();
@@ -77,6 +81,7 @@ impl<T: BeaconChainTypes> ValidatorPubkeyCache<T> {
7781
/// Does not delete any keys from `self` if they don't appear in `state`.
7882
///
7983
/// NOTE: The caller *must* commit the returned I/O batch as part of the block import process.
84+
#[instrument(skip_all)]
8085
pub fn import_new_pubkeys(
8186
&mut self,
8287
state: &BeaconState<T::EthSpec>,
@@ -106,29 +111,58 @@ impl<T: BeaconChainTypes> ValidatorPubkeyCache<T> {
106111
self.indices.reserve(validator_keys.len());
107112

108113
let mut store_ops = Vec::with_capacity(validator_keys.len());
109-
for pubkey_bytes in validator_keys {
110-
let i = self.pubkeys.len();
111114

112-
if self.indices.contains_key(&pubkey_bytes) {
113-
return Err(BeaconChainError::DuplicateValidatorPublicKey);
115+
let is_initial_import = self.pubkeys.is_empty();
116+
117+
// Helper to insert a decompressed key
118+
let mut insert_key =
119+
|pubkey_bytes: PublicKeyBytes, pubkey: PublicKey| -> Result<(), BeaconChainError> {
120+
let i = self.pubkeys.len();
121+
122+
if self.indices.contains_key(&pubkey_bytes) {
123+
return Err(BeaconChainError::DuplicateValidatorPublicKey);
124+
}
125+
126+
// Stage the new validator key for writing to disk.
127+
// It will be committed atomically when the block that introduced it is written to disk.
128+
// Notably it is NOT written while the write lock on the cache is held.
129+
// See: https://github.com/sigp/lighthouse/issues/2327
130+
store_ops.push(StoreOp::KeyValueOp(
131+
DatabasePubkey::from_pubkey(&pubkey)
132+
.as_kv_store_op(DatabasePubkey::key_for_index(i)),
133+
));
134+
135+
self.pubkeys.push(pubkey);
136+
self.pubkey_bytes.push(pubkey_bytes);
137+
self.indices.insert(pubkey_bytes, i);
138+
Ok(())
139+
};
140+
141+
if is_initial_import {
142+
// On first startup, decompress keys in parallel for better performance
143+
let validator_keys_vec: Vec<PublicKeyBytes> = validator_keys.collect();
144+
145+
let decompressed: Vec<(PublicKeyBytes, PublicKey)> = validator_keys_vec
146+
.into_par_iter()
147+
.map(|pubkey_bytes| {
148+
let pubkey = (&pubkey_bytes)
149+
.try_into()
150+
.map_err(BeaconChainError::InvalidValidatorPubkeyBytes)?;
151+
Ok((pubkey_bytes, pubkey))
152+
})
153+
.collect::<Result<Vec<_>, BeaconChainError>>()?;
154+
155+
for (pubkey_bytes, pubkey) in decompressed {
156+
insert_key(pubkey_bytes, pubkey)?;
157+
}
158+
} else {
159+
// Sequential path for incremental updates
160+
for pubkey_bytes in validator_keys {
161+
let pubkey = (&pubkey_bytes)
162+
.try_into()
163+
.map_err(BeaconChainError::InvalidValidatorPubkeyBytes)?;
164+
insert_key(pubkey_bytes, pubkey)?;
114165
}
115-
116-
let pubkey = (&pubkey_bytes)
117-
.try_into()
118-
.map_err(BeaconChainError::InvalidValidatorPubkeyBytes)?;
119-
120-
// Stage the new validator key for writing to disk.
121-
// It will be committed atomically when the block that introduced it is written to disk.
122-
// Notably it is NOT written while the write lock on the cache is held.
123-
// See: https://github.com/sigp/lighthouse/issues/2327
124-
store_ops.push(StoreOp::KeyValueOp(
125-
DatabasePubkey::from_pubkey(&pubkey)
126-
.as_kv_store_op(DatabasePubkey::key_for_index(i)),
127-
));
128-
129-
self.pubkeys.push(pubkey);
130-
self.pubkey_bytes.push(pubkey_bytes);
131-
self.indices.insert(pubkey_bytes, i);
132166
}
133167

134168
Ok(store_ops)
@@ -324,4 +358,39 @@ mod test {
324358
let cache = ValidatorPubkeyCache::load_from_store(store).expect("should open cache");
325359
check_cache_get(&cache, &keypairs[..]);
326360
}
361+
362+
#[test]
363+
fn parallel_import_maintains_order() {
364+
// Test that parallel decompression on first startup maintains correct order and indices
365+
let (state, keypairs) = get_state(100);
366+
let store = get_store();
367+
368+
// Create cache from empty state (triggers parallel path)
369+
let cache: ValidatorPubkeyCache<T> =
370+
ValidatorPubkeyCache::new(&state, store).expect("should create cache");
371+
372+
check_cache_get(&cache, &keypairs[..]);
373+
}
374+
375+
#[test]
376+
fn incremental_import_maintains_order() {
377+
// Test that incremental imports maintain correct order (triggers sequential path)
378+
let store = get_store();
379+
380+
// Start with 50 validators
381+
let (state1, keypairs1) = get_state(50);
382+
let mut cache =
383+
ValidatorPubkeyCache::new(&state1, store.clone()).expect("should create cache");
384+
check_cache_get(&cache, &keypairs1[..]);
385+
386+
// Add 50 more validators
387+
let (state2, keypairs2) = get_state(100);
388+
let ops = cache
389+
.import_new_pubkeys(&state2)
390+
.expect("should import pubkeys");
391+
store.do_atomically_with_block_and_blobs_cache(ops).unwrap();
392+
393+
// Verify all 100 validators are correctly indexed
394+
check_cache_get(&cache, &keypairs2[..]);
395+
}
327396
}

beacon_node/client/src/builder.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ use std::time::Duration;
4242
use std::time::{SystemTime, UNIX_EPOCH};
4343
use store::database::interface::BeaconNodeBackend;
4444
use timer::spawn_timer;
45-
use tracing::{debug, info, warn};
45+
use tracing::{debug, info, instrument, warn};
4646
use types::data_column_custody_group::compute_ordered_custody_column_indices;
4747
use types::{
4848
BeaconState, BlobSidecarList, ChainSpec, EthSpec, ExecutionBlockHash, Hash256,
@@ -151,6 +151,7 @@ where
151151

152152
/// Initializes the `BeaconChainBuilder`. The `build_beacon_chain` method will need to be
153153
/// called later in order to actually instantiate the `BeaconChain`.
154+
#[instrument(skip_all)]
154155
pub async fn beacon_chain_builder(
155156
mut self,
156157
client_genesis: ClientGenesis,
@@ -613,6 +614,7 @@ where
613614
///
614615
/// If type inference errors are being raised, see the comment on the definition of `Self`.
615616
#[allow(clippy::type_complexity)]
617+
#[instrument(name = "build_client", skip_all)]
616618
pub fn build(
617619
mut self,
618620
) -> Result<Client<Witness<TSlotClock, E, THotStore, TColdStore>>, String> {
@@ -813,6 +815,7 @@ where
813815
TColdStore: ItemStore<E> + 'static,
814816
{
815817
/// Consumes the internal `BeaconChainBuilder`, attaching the resulting `BeaconChain` to self.
818+
#[instrument(skip_all)]
816819
pub fn build_beacon_chain(mut self) -> Result<Self, String> {
817820
let context = self
818821
.runtime_context

0 commit comments

Comments
 (0)