Skip to content

Commit ffa7b2b

Browse files
authored
Only mark block lookups as pending if block is importing from gossip (#8112)
- PR #8045 introduced a regression of how lookup sync interacts with the da_checker. Now in unstable block import from the HTTP API also insert the block in the da_checker while the block is being execution verified. If lookup sync finds the block in the da_checker in `NotValidated` state it expects a `GossipBlockProcessResult` message sometime later. That message is only sent after block import in gossip. I confirmed in our node's logs for 4/4 cases of stuck lookups are caused by this sequence of events: - Receive block through API, insert into da_checker in fn process_block in put_pre_execution_block - Create lookup and leave in AwaitingDownload(block in processing cache) state - Block from HTTP API finishes importing - Lookup is left stuck Closes #8104 - #8110 was my initial solution attempt but we can't send the `GossipBlockProcessResult` event from the `http_api` crate without adding new channels, which seems messy. For a given node it's rare that a lookup is created at the same time that a block is being published. This PR solves #8104 by allowing lookup sync to import the block twice in that case. Co-Authored-By: dapplion <[email protected]>
1 parent 79b3321 commit ffa7b2b

File tree

8 files changed

+63
-33
lines changed

8 files changed

+63
-33
lines changed

beacon_node/beacon_chain/src/beacon_block_streamer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ impl<T: BeaconChainTypes> BeaconBlockStreamer<T> {
404404
if self.check_caches == CheckCaches::Yes {
405405
match self.beacon_chain.get_block_process_status(&root) {
406406
BlockProcessStatus::Unknown => None,
407-
BlockProcessStatus::NotValidated(block)
407+
BlockProcessStatus::NotValidated(block, _)
408408
| BlockProcessStatus::ExecutionValidated(block) => {
409409
metrics::inc_counter(&metrics::BEACON_REQRESP_PRE_IMPORT_CACHE_HITS);
410410
Some(block)

beacon_node/beacon_chain/src/beacon_chain.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ pub enum BlockProcessStatus<E: EthSpec> {
334334
/// Block is not in any pre-import cache. Block may be in the data-base or in the fork-choice.
335335
Unknown,
336336
/// Block is currently processing but not yet validated.
337-
NotValidated(Arc<SignedBeaconBlock<E>>),
337+
NotValidated(Arc<SignedBeaconBlock<E>>, BlockImportSource),
338338
/// Block is fully valid, but not yet imported. It's cached in the da_checker while awaiting
339339
/// missing block components.
340340
ExecutionValidated(Arc<SignedBeaconBlock<E>>),
@@ -3351,8 +3351,11 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
33513351
);
33523352
}
33533353

3354-
self.data_availability_checker
3355-
.put_pre_execution_block(block_root, unverified_block.block_cloned())?;
3354+
self.data_availability_checker.put_pre_execution_block(
3355+
block_root,
3356+
unverified_block.block_cloned(),
3357+
block_source,
3358+
)?;
33563359

33573360
// Start the Prometheus timer.
33583361
let _full_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_TIMES);

beacon_node/beacon_chain/src/data_availability_checker.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ use task_executor::TaskExecutor;
2121
use tracing::{debug, error, instrument};
2222
use types::blob_sidecar::{BlobIdentifier, BlobSidecar, FixedBlobSidecarList};
2323
use types::{
24-
BlobSidecarList, ChainSpec, DataColumnSidecar, DataColumnSidecarList, Epoch, EthSpec, Hash256,
25-
SignedBeaconBlock, Slot,
24+
BlobSidecarList, BlockImportSource, ChainSpec, DataColumnSidecar, DataColumnSidecarList, Epoch,
25+
EthSpec, Hash256, SignedBeaconBlock, Slot,
2626
};
2727

2828
mod error;
@@ -354,9 +354,10 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
354354
&self,
355355
block_root: Hash256,
356356
block: Arc<SignedBeaconBlock<T::EthSpec>>,
357+
source: BlockImportSource,
357358
) -> Result<(), Error> {
358359
self.availability_cache
359-
.put_pre_execution_block(block_root, block)
360+
.put_pre_execution_block(block_root, block, source)
360361
}
361362

362363
/// Removes a pre-execution block from the cache.

beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,14 @@ use tracing::{Span, debug, debug_span};
1919
use types::beacon_block_body::KzgCommitments;
2020
use types::blob_sidecar::BlobIdentifier;
2121
use types::{
22-
BlobSidecar, ChainSpec, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, Epoch, EthSpec,
23-
Hash256, RuntimeFixedVector, RuntimeVariableList, SignedBeaconBlock,
22+
BlobSidecar, BlockImportSource, ChainSpec, ColumnIndex, DataColumnSidecar,
23+
DataColumnSidecarList, Epoch, EthSpec, Hash256, RuntimeFixedVector, RuntimeVariableList,
24+
SignedBeaconBlock,
2425
};
2526

2627
#[derive(Clone)]
2728
pub enum CachedBlock<E: EthSpec> {
28-
PreExecution(Arc<SignedBeaconBlock<E>>),
29+
PreExecution(Arc<SignedBeaconBlock<E>>, BlockImportSource),
2930
Executed(Box<DietAvailabilityPendingExecutedBlock<E>>),
3031
}
3132

@@ -42,7 +43,7 @@ impl<E: EthSpec> CachedBlock<E> {
4243

4344
fn as_block(&self) -> &SignedBeaconBlock<E> {
4445
match self {
45-
CachedBlock::PreExecution(b) => b,
46+
CachedBlock::PreExecution(b, _) => b,
4647
CachedBlock::Executed(b) => b.as_block(),
4748
}
4849
}
@@ -135,9 +136,13 @@ impl<E: EthSpec> PendingComponents<E> {
135136

136137
/// Inserts a pre-execution block into the cache.
137138
/// This does NOT override an existing executed block.
138-
pub fn insert_pre_execution_block(&mut self, block: Arc<SignedBeaconBlock<E>>) {
139+
pub fn insert_pre_execution_block(
140+
&mut self,
141+
block: Arc<SignedBeaconBlock<E>>,
142+
source: BlockImportSource,
143+
) {
139144
if self.block.is_none() {
140-
self.block = Some(CachedBlock::PreExecution(block))
145+
self.block = Some(CachedBlock::PreExecution(block, source))
141146
}
142147
}
143148

@@ -433,7 +438,9 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
433438
.peek(block_root)
434439
.and_then(|pending_components| {
435440
pending_components.block.as_ref().map(|block| match block {
436-
CachedBlock::PreExecution(b) => BlockProcessStatus::NotValidated(b.clone()),
441+
CachedBlock::PreExecution(b, source) => {
442+
BlockProcessStatus::NotValidated(b.clone(), *source)
443+
}
437444
CachedBlock::Executed(b) => {
438445
BlockProcessStatus::ExecutionValidated(b.block_cloned())
439446
}
@@ -693,11 +700,12 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
693700
&self,
694701
block_root: Hash256,
695702
block: Arc<SignedBeaconBlock<T::EthSpec>>,
703+
source: BlockImportSource,
696704
) -> Result<(), AvailabilityCheckError> {
697705
let epoch = block.epoch();
698706
let pending_components =
699707
self.update_or_insert_pending_components(block_root, epoch, |pending_components| {
700-
pending_components.insert_pre_execution_block(block);
708+
pending_components.insert_pre_execution_block(block, source);
701709
Ok(())
702710
})?;
703711

@@ -718,7 +726,7 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
718726
/// This does NOT remove an existing executed block.
719727
pub fn remove_pre_execution_block(&self, block_root: &Hash256) {
720728
// The read lock is immediately dropped so we can safely remove the block from the cache.
721-
if let Some(BlockProcessStatus::NotValidated(_)) = self.get_cached_block(block_root) {
729+
if let Some(BlockProcessStatus::NotValidated(_, _)) = self.get_cached_block(block_root) {
722730
self.critical.write().pop(block_root);
723731
}
724732
}
@@ -1459,9 +1467,13 @@ mod pending_components_tests {
14591467
let mut pending_component = <PendingComponents<E>>::empty(block_root, max_len);
14601468

14611469
let pre_execution_block = Arc::new(pre_execution_block);
1462-
pending_component.insert_pre_execution_block(pre_execution_block.clone());
1470+
pending_component
1471+
.insert_pre_execution_block(pre_execution_block.clone(), BlockImportSource::Gossip);
14631472
assert!(
1464-
matches!(pending_component.block, Some(CachedBlock::PreExecution(_))),
1473+
matches!(
1474+
pending_component.block,
1475+
Some(CachedBlock::PreExecution(_, _))
1476+
),
14651477
"pre execution block inserted"
14661478
);
14671479

@@ -1471,7 +1483,8 @@ mod pending_components_tests {
14711483
"executed block inserted"
14721484
);
14731485

1474-
pending_component.insert_pre_execution_block(pre_execution_block);
1486+
pending_component
1487+
.insert_pre_execution_block(pre_execution_block, BlockImportSource::Gossip);
14751488
assert!(
14761489
matches!(pending_component.block, Some(CachedBlock::Executed(_))),
14771490
"executed block should remain"

beacon_node/network/src/sync/block_lookups/single_block_lookup.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ impl<T: BeaconChainTypes> SingleBlockLookup<T> {
219219
// can assert that this is the correct value of `blob_kzg_commitments_count`.
220220
match cx.chain.get_block_process_status(&self.block_root) {
221221
BlockProcessStatus::Unknown => None,
222-
BlockProcessStatus::NotValidated(block)
222+
BlockProcessStatus::NotValidated(block, _)
223223
| BlockProcessStatus::ExecutionValidated(block) => Some(block.clone()),
224224
}
225225
}) {

beacon_node/network/src/sync/network_context.rs

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ use tokio::sync::mpsc;
4949
use tracing::{Span, debug, debug_span, error, warn};
5050
use types::blob_sidecar::FixedBlobSidecarList;
5151
use types::{
52-
BlobSidecar, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, EthSpec, ForkContext,
53-
Hash256, SignedBeaconBlock, Slot,
52+
BlobSidecar, BlockImportSource, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, EthSpec,
53+
ForkContext, Hash256, SignedBeaconBlock, Slot,
5454
};
5555

5656
pub mod custody;
@@ -835,14 +835,26 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
835835
match self.chain.get_block_process_status(&block_root) {
836836
// Unknown block, continue request to download
837837
BlockProcessStatus::Unknown => {}
838-
// Block is known are currently processing, expect a future event with the result of
839-
// processing.
840-
BlockProcessStatus::NotValidated { .. } => {
841-
// Lookup sync event safety: If the block is currently in the processing cache, we
842-
// are guaranteed to receive a `SyncMessage::GossipBlockProcessResult` that will
843-
// make progress on this lookup
844-
return Ok(LookupRequestResult::Pending("block in processing cache"));
845-
}
838+
// Block is known and currently processing. Imports from gossip and HTTP API insert the
839+
// block in the da_cache. However, HTTP API is unable to notify sync when it completes
840+
// block import. Returning `Pending` here will result in stuck lookups if the block is
841+
// importing from sync.
842+
BlockProcessStatus::NotValidated(_, source) => match source {
843+
BlockImportSource::Gossip => {
844+
// Lookup sync event safety: If the block is currently in the processing cache, we
845+
// are guaranteed to receive a `SyncMessage::GossipBlockProcessResult` that will
846+
// make progress on this lookup
847+
return Ok(LookupRequestResult::Pending("block in processing cache"));
848+
}
849+
BlockImportSource::Lookup
850+
| BlockImportSource::RangeSync
851+
| BlockImportSource::HttpApi => {
852+
// Lookup, RangeSync or HttpApi block import don't emit the GossipBlockProcessResult
853+
// event. If a lookup happens to be created during block import from one of
854+
// those sources just import the block twice. Otherwise the lookup will get
855+
// stuck. Double imports are fine, they just waste resources.
856+
}
857+
},
846858
// Block is fully validated. If it's not yet imported it's waiting for missing block
847859
// components. Consider this request completed and do nothing.
848860
BlockProcessStatus::ExecutionValidated { .. } => {

beacon_node/network/src/sync/tests/lookups.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ use slot_clock::{SlotClock, TestingSlotClock};
4141
use tokio::sync::mpsc;
4242
use tracing::info;
4343
use types::{
44-
BeaconState, BeaconStateBase, BlobSidecar, DataColumnSidecar, EthSpec, ForkContext, ForkName,
45-
Hash256, MinimalEthSpec as E, SignedBeaconBlock, Slot,
44+
BeaconState, BeaconStateBase, BlobSidecar, BlockImportSource, DataColumnSidecar, EthSpec,
45+
ForkContext, ForkName, Hash256, MinimalEthSpec as E, SignedBeaconBlock, Slot,
4646
data_column_sidecar::ColumnIndex,
4747
test_utils::{SeedableRng, TestRandom, XorShiftRng},
4848
};
@@ -1113,7 +1113,7 @@ impl TestRig {
11131113
self.harness
11141114
.chain
11151115
.data_availability_checker
1116-
.put_pre_execution_block(block.canonical_root(), block)
1116+
.put_pre_execution_block(block.canonical_root(), block, BlockImportSource::Gossip)
11171117
.unwrap();
11181118
}
11191119

consensus/types/src/beacon_block.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -843,6 +843,7 @@ impl<'de, E: EthSpec, Payload: AbstractExecPayload<E>> ContextDeserialize<'de, F
843843
}
844844
}
845845

846+
#[derive(Clone, Copy)]
846847
pub enum BlockImportSource {
847848
Gossip,
848849
Lookup,

0 commit comments

Comments
 (0)