Skip to content

Commit e6c7b66

Browse files
committed
feat: parse BlockComponents during replay
1 parent 780e94b commit e6c7b66

File tree

8 files changed

+538
-21
lines changed

8 files changed

+538
-21
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

core/src/replay_stage.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3534,6 +3534,34 @@ impl ReplayStage {
35343534
let _ = cluster_slots_update_sender.send(vec![bank_slot]);
35353535
}
35363536

3537+
// Verify block components (header, footer) before freezing
3538+
// Only verify blocks that were replayed from blockstore (not leader blocks)
3539+
if !is_leader_block {
3540+
if let Err(err) = bank
3541+
.block_component_verifier
3542+
.read()
3543+
.unwrap()
3544+
.finish(migration_status)
3545+
{
3546+
warn!("Block component verification failed for slot {bank_slot}: {err:?}",);
3547+
let root = bank_forks.read().unwrap().root();
3548+
Self::mark_dead_slot(
3549+
blockstore,
3550+
bank,
3551+
root,
3552+
&BlockstoreProcessorError::BlockComponentVerifier(err),
3553+
rpc_subscriptions,
3554+
slot_status_notifier,
3555+
progress,
3556+
duplicate_slots_to_repair,
3557+
ancestor_hashes_replay_update_sender,
3558+
purge_repair_slot_counter,
3559+
&mut tbft_structs,
3560+
);
3561+
continue;
3562+
}
3563+
}
3564+
35373565
bank.freeze();
35383566
datapoint_info!(
35393567
"bank_frozen",

ledger/src/blockstore.rs

Lines changed: 96 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3928,14 +3928,13 @@ impl Blockstore {
39283928
.map(|x| x.0)
39293929
}
39303930

3931-
/// Returns the entry vector for the slot starting with `shred_start_index`, the number of
3932-
/// shreds that comprise the entry vector, and whether the slot is full (consumed all shreds).
3933-
pub fn get_slot_entries_with_shred_info(
3931+
/// Helper function that contains the common logic for getting slot data with shred info
3932+
fn get_slot_data_with_shred_info_common(
39343933
&self,
39353934
slot: Slot,
39363935
start_index: u64,
39373936
allow_dead_slots: bool,
3938-
) -> Result<(Vec<Entry>, u64, bool)> {
3937+
) -> Result<(CompletedRanges, SlotMeta, u64)> {
39393938
let (completed_ranges, slot_meta) = self.get_completed_ranges(slot, start_index)?;
39403939

39413940
// Check if the slot is dead *after* fetching completed ranges to avoid a race
@@ -3945,7 +3944,7 @@ impl Blockstore {
39453944
if self.is_dead(slot) && !allow_dead_slots {
39463945
return Err(BlockstoreError::DeadSlot);
39473946
} else if completed_ranges.is_empty() {
3948-
return Ok((vec![], 0, false));
3947+
return Err(BlockstoreError::SlotUnavailable);
39493948
}
39503949

39513950
let slot_meta = slot_meta.unwrap();
@@ -3954,10 +3953,49 @@ impl Blockstore {
39543953
.map(|&Range { end, .. }| u64::from(end) - start_index)
39553954
.unwrap_or(0);
39563955

3956+
Ok((completed_ranges, slot_meta, num_shreds))
3957+
}
3958+
3959+
/// Returns the entry vector for the slot starting with `shred_start_index`, the number of
3960+
/// shreds that comprise the entry vector, and whether the slot is full (consumed all shreds).
3961+
pub fn get_slot_entries_with_shred_info(
3962+
&self,
3963+
slot: Slot,
3964+
start_index: u64,
3965+
allow_dead_slots: bool,
3966+
) -> Result<(Vec<Entry>, u64, bool)> {
3967+
let (completed_ranges, slot_meta, num_shreds) =
3968+
match self.get_slot_data_with_shred_info_common(slot, start_index, allow_dead_slots) {
3969+
Ok(data) => data,
3970+
Err(BlockstoreError::SlotUnavailable) => return Ok((vec![], 0, false)),
3971+
Err(e) => return Err(e),
3972+
};
3973+
39573974
let entries = self.get_slot_entries_in_block(slot, completed_ranges, Some(&slot_meta))?;
39583975
Ok((entries, num_shreds, slot_meta.is_full()))
39593976
}
39603977

3978+
/// Returns the components vector for the slot starting with `shred_start_index`, the number of
3979+
/// shreds that comprise the components vector, and whether the slot is full (consumed all
3980+
/// shreds).
3981+
pub fn get_slot_components_with_shred_info(
3982+
&self,
3983+
slot: Slot,
3984+
start_index: u64,
3985+
allow_dead_slots: bool,
3986+
) -> Result<(Vec<BlockComponent>, u64, bool)> {
3987+
let (completed_ranges, slot_meta, num_shreds) =
3988+
match self.get_slot_data_with_shred_info_common(slot, start_index, allow_dead_slots) {
3989+
Ok(data) => data,
3990+
Err(BlockstoreError::SlotUnavailable) => return Ok((vec![], 0, false)),
3991+
Err(e) => return Err(e),
3992+
};
3993+
3994+
let components =
3995+
self.get_slot_components_in_block(slot, completed_ranges, Some(&slot_meta))?;
3996+
Ok((components, num_shreds, slot_meta.is_full()))
3997+
}
3998+
39613999
/// Gets accounts used in transactions in the slot range [starting_slot, ending_slot].
39624000
/// Additionally returns a bool indicating if the set may be incomplete.
39634001
/// Used by ledger-tool to create a minimized snapshot
@@ -4061,18 +4099,24 @@ impl Blockstore {
40614099
.collect()
40624100
}
40634101

4064-
/// Fetch the entries corresponding to all of the shred indices in `completed_ranges`
4102+
/// Helper function to process shreds in `completed_ranges` and apply a transformation
4103+
/// to the resulting block components.
40654104
/// This function takes advantage of the fact that `completed_ranges` are both
40664105
/// contiguous and in sorted order. To clarify, suppose completed_ranges is as follows:
40674106
/// completed_ranges = [..., (s_i..e_i), (s_i+1..e_i+1), ...]
40684107
/// Then, the following statements are true:
40694108
/// s_i < e_i == s_i+1 < e_i+1
4070-
fn get_slot_entries_in_block(
4109+
fn process_slot_data_in_block<T, I, F>(
40714110
&self,
40724111
slot: Slot,
40734112
completed_ranges: CompletedRanges,
40744113
slot_meta: Option<&SlotMeta>,
4075-
) -> Result<Vec<Entry>> {
4114+
transform: F,
4115+
) -> Result<Vec<T>>
4116+
where
4117+
I: IntoIterator<Item = T>,
4118+
F: Fn(Vec<BlockComponent>) -> Result<I>,
4119+
{
40764120
debug_assert!(completed_ranges
40774121
.iter()
40784122
.tuple_windows()
@@ -4118,26 +4162,60 @@ impl Blockstore {
41184162
.and_then(|payload| {
41194163
// TODO(karthik): if Alpenglow flag is disabled, return an error on special
41204164
// EntryBatches.
4121-
BlockComponent::from_bytes_multiple(&payload)
4122-
.map(|cs| {
4123-
cs.into_iter()
4124-
.filter_map(|bc| bc.as_entry_batch_owned())
4125-
.flatten()
4126-
.collect_vec()
4127-
})
4128-
.map_err(|e| {
4165+
let components =
4166+
BlockComponent::from_bytes_multiple(&payload).map_err(|e| {
41294167
BlockstoreError::InvalidShredData(Box::new(
41304168
bincode::ErrorKind::Custom(format!(
4131-
"could not reconstruct entries: {e:?}"
4169+
"could not reconstruct block components: {e:?}"
41324170
)),
41334171
))
4134-
})
4172+
})?;
4173+
transform(components).map_err(|e| {
4174+
BlockstoreError::InvalidShredData(Box::new(bincode::ErrorKind::Custom(
4175+
format!("could not transform block components: {e:?}"),
4176+
)))
4177+
})
41354178
})
41364179
})
41374180
.flatten_ok()
41384181
.collect()
41394182
}
41404183

4184+
/// Fetch the components corresponding to all of the shred indices in `completed_ranges`
4185+
/// This function takes advantage of the fact that `completed_ranges` are both
4186+
/// contiguous and in sorted order. To clarify, suppose completed_ranges is as follows:
4187+
/// completed_ranges = [..., (s_i..e_i), (s_i+1..e_i+1), ...]
4188+
/// Then, the following statements are true:
4189+
/// s_i < e_i == s_i+1 < e_i+1
4190+
fn get_slot_components_in_block(
4191+
&self,
4192+
slot: Slot,
4193+
completed_ranges: CompletedRanges,
4194+
slot_meta: Option<&SlotMeta>,
4195+
) -> Result<Vec<BlockComponent>> {
4196+
self.process_slot_data_in_block(slot, completed_ranges, slot_meta, |cs| Ok(cs.into_iter()))
4197+
}
4198+
4199+
/// Fetch the entries corresponding to all of the shred indices in `completed_ranges`
4200+
/// This function takes advantage of the fact that `completed_ranges` are both
4201+
/// contiguous and in sorted order. To clarify, suppose completed_ranges is as follows:
4202+
/// completed_ranges = [..., (s_i..e_i), (s_i+1..e_i+1), ...]
4203+
/// Then, the following statements are true:
4204+
/// s_i < e_i == s_i+1 < e_i+1
4205+
fn get_slot_entries_in_block(
4206+
&self,
4207+
slot: Slot,
4208+
completed_ranges: CompletedRanges,
4209+
slot_meta: Option<&SlotMeta>,
4210+
) -> Result<Vec<Entry>> {
4211+
self.process_slot_data_in_block(slot, completed_ranges, slot_meta, |cs| {
4212+
Ok(cs
4213+
.into_iter()
4214+
.filter_map(|bc| bc.as_entry_batch_owned())
4215+
.flatten())
4216+
})
4217+
}
4218+
41414219
pub fn get_entries_in_data_block(
41424220
&self,
41434221
slot: Slot,

ledger/src/blockstore_processor.rs

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ use {
3232
bank::{Bank, PreCommitResult, TransactionBalancesSet},
3333
bank_forks::{BankForks, SetRootError},
3434
bank_utils,
35+
block_component_verifier::BlockComponentVerifierError,
3536
commitment::VOTE_THRESHOLD_SIZE,
3637
dependency_tracker::DependencyTracker,
3738
installed_scheduler_pool::BankWithScheduler,
@@ -836,6 +837,9 @@ pub enum BlockstoreProcessorError {
836837

837838
#[error("user transactions found in vote only mode bank at slot {0}")]
838839
UserTransactionsInVoteOnlyBank(Slot),
840+
841+
#[error("block component verifier error: {0}")]
842+
BlockComponentVerifier(#[from] BlockComponentVerifierError),
839843
}
840844

841845
/// Callback for accessing bank state after each slot is confirmed while
@@ -1511,10 +1515,10 @@ pub fn confirm_slot(
15111515
) -> result::Result<(), BlockstoreProcessorError> {
15121516
let slot = bank.slot();
15131517

1514-
let slot_entries_load_result = {
1518+
let (slot_components, num_shreds, slot_full) = {
15151519
let mut load_elapsed = Measure::start("load_elapsed");
15161520
let load_result = blockstore
1517-
.get_slot_entries_with_shred_info(slot, progress.num_shreds, allow_dead_slots)
1521+
.get_slot_components_with_shred_info(slot, progress.num_shreds, allow_dead_slots)
15181522
.map_err(BlockstoreProcessorError::FailedToLoadEntries);
15191523
load_elapsed.stop();
15201524
if load_result.is_err() {
@@ -1525,10 +1529,37 @@ pub fn confirm_slot(
15251529
load_result
15261530
}?;
15271531

1532+
// Process block component markers for Alpenglow slots. Note that we don't need to run migration
1533+
// checks here. Here's why:
1534+
//
1535+
// Post-Alpenglow migration - validators that have Alpenglow enabled can parse BlockComponents.
1536+
// Things just work.
1537+
//
1538+
// Pre-Alpenglow migration, suppose a validator receives a BlockMarker:
1539+
//
1540+
// (1) validators *incapable* of processing BlockMarkers will mark the slot as dead on shred
1541+
// ingest in blockstore.
1542+
//
1543+
// (2) validators *capable* of processing BlockMarkers will store the BlockMarkers in shred
1544+
// ingest, run through this verifying code here, and then error out when finish() is invoked
1545+
// during replay, resulting in the slot being marked as dead.
1546+
if let Some(parent_bank) = bank.parent() {
1547+
let mut verifier = bank.block_component_verifier.write().unwrap();
1548+
for marker in slot_components.iter().filter_map(|bc| bc.as_marker()) {
1549+
verifier.on_marker(bank.clone_without_scheduler(), parent_bank.clone(), marker)?;
1550+
}
1551+
}
1552+
1553+
let slot_entries = slot_components
1554+
.into_iter()
1555+
.filter_map(|c| c.as_entry_batch_owned())
1556+
.flatten()
1557+
.collect_vec();
1558+
15281559
confirm_slot_entries(
15291560
bank,
15301561
replay_tx_thread_pool,
1531-
slot_entries_load_result,
1562+
(slot_entries, num_shreds, slot_full),
15321563
timing,
15331564
progress,
15341565
skip_verification,

runtime/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ solana-compute-budget-interface = { workspace = true }
107107
solana-cost-model = { workspace = true }
108108
solana-cpi = { workspace = true }
109109
solana-ed25519-program = { workspace = true }
110+
solana-entry = { workspace = true }
110111
solana-epoch-info = { workspace = true }
111112
solana-epoch-rewards-hasher = { workspace = true }
112113
solana-epoch-schedule = { workspace = true }
@@ -200,6 +201,7 @@ solana-instruction-error = { workspace = true }
200201
solana-logger = { workspace = true }
201202
solana-program = { workspace = true }
202203
solana-program-binaries = { workspace = true }
204+
solana-votor-messages = { workspace = true, features = ["dev-context-only-utils"] }
203205
# See order-crates-for-publishing.py for using this unusual `path = "."`
204206
solana-runtime = { path = ".", features = ["dev-context-only-utils"] }
205207
solana-runtime-transaction = { workspace = true, features = [

runtime/src/bank.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ use {
4141
partitioned_epoch_rewards::{EpochRewardStatus, VoteRewardsAccounts},
4242
},
4343
bank_forks::BankForks,
44+
block_component_verifier::BlockComponentVerifier,
4445
epoch_stakes::{NodeVoteAccounts, VersionedEpochStakes},
4546
inflation_rewards::points::InflationPointCalculationEvent,
4647
installed_scheduler_pool::{BankWithScheduler, InstalledSchedulerRwLock},
@@ -556,6 +557,7 @@ impl PartialEq for Bank {
556557
block_id,
557558
bank_hash_stats: _,
558559
epoch_rewards_calculation_cache: _,
560+
block_component_verifier: _,
559561
// Ignore new fields explicitly if they do not impact PartialEq.
560562
// Adding ".." will remove compile-time checks that if a new field
561563
// is added to the struct, this PartialEq is accordingly updated.
@@ -903,6 +905,9 @@ pub struct Bank {
903905
/// This is used to avoid recalculating the same epoch rewards at epoch boundary.
904906
/// The hashmap is keyed by parent_hash.
905907
epoch_rewards_calculation_cache: Arc<Mutex<HashMap<Hash, Arc<PartitionedRewardsCalculation>>>>,
908+
909+
/// Block component verifier for validating block headers/footers and clock bounds
910+
pub block_component_verifier: RwLock<BlockComponentVerifier>,
906911
}
907912

908913
#[derive(Debug)]
@@ -1099,6 +1104,7 @@ impl Bank {
10991104
block_id: RwLock::new(None),
11001105
bank_hash_stats: AtomicBankHashStats::default(),
11011106
epoch_rewards_calculation_cache: Arc::new(Mutex::new(HashMap::default())),
1107+
block_component_verifier: RwLock::new(BlockComponentVerifier::default()),
11021108
};
11031109

11041110
bank.transaction_processor =
@@ -1347,6 +1353,7 @@ impl Bank {
13471353
block_id: RwLock::new(None),
13481354
bank_hash_stats: AtomicBankHashStats::default(),
13491355
epoch_rewards_calculation_cache: parent.epoch_rewards_calculation_cache.clone(),
1356+
block_component_verifier: RwLock::new(BlockComponentVerifier::default()),
13501357
};
13511358

13521359
let (_, ancestors_time_us) = measure_us!({
@@ -1815,6 +1822,7 @@ impl Bank {
18151822
block_id: RwLock::new(None),
18161823
bank_hash_stats: AtomicBankHashStats::new(&fields.bank_hash_stats),
18171824
epoch_rewards_calculation_cache: Arc::new(Mutex::new(HashMap::default())),
1825+
block_component_verifier: RwLock::new(BlockComponentVerifier::default()),
18181826
};
18191827

18201828
// Sanity assertions between bank snapshot and genesis config

0 commit comments

Comments
 (0)