Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions prdoc/pr_11332.prdoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
title: 'client/db: Close missing body gaps for non archive nodes'
doc:
- audience: Node Dev
description: |-
This PR closes missing body gaps in the database for non-archive nodes.


Effectively, a missing body gap cannot be closed on the DB side if the node is non-archive. Since execution is already skipped, the node will close the memory gap in the sync engine; however, the gap remains open in the db.

This leads to wasting resources at every startup:
- client info contains a gap that cannot be filled (since we don't have the state around for execution)
- blocks are fetched from the connected peers
- gap is filled by ignoring blocks in the sync engine

Further, for collators on origin master this causes an infinite loop of sync engine restarts that get punished via banning and disconnecting. For more details and root cause check:
- https://github.com/paritytech/polkadot-sdk/pull/11330

Part of:
- https://github.com/paritytech/polkadot-sdk/issues/11299
crates:
- name: sc-client-db
bump: patch
365 changes: 365 additions & 0 deletions substrate/client/db/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1289,6 +1289,30 @@ impl<Block: BlockT> Backend<Block> {
});
}

// Non archive nodes cannot fill the missing block gap with bodies.
// If the gap is present, it means that every restart will try to fill the gap:
// - a block request is made for each and every block in the gap
// - the request is fulfilled putting pressure on the network and other nodes
// - upon receiving the block, the block cannot be executed since the state
// of the parent block might have been discarded
// - then the sync engine closes the gap in memory, but never in DB.
//
// This leads to inefficient syncing and high CPU usage on every restart. To mitigate this,
// remove the gap from the DB if we detect it and the current node is not an archive.
match (backend.is_archive, info.block_gap) {
(false, Some(gap)) if matches!(gap.gap_type, BlockGapType::MissingBody) => {
warn!(
"Detected a missing body gap for non-archive nodes. Removing the gap={:?}",
gap
);

db_init_transaction.remove(columns::META, meta_keys::BLOCK_GAP);
db_init_transaction.remove(columns::META, meta_keys::BLOCK_GAP_VERSION);
backend.blockchain.update_block_gap(None);
},
_ => {},
}

db.commit(db_init_transaction)?;

Ok(backend)
Expand Down Expand Up @@ -5015,4 +5039,345 @@ pub(crate) mod tests {
backend.unpin_block(fork_hash_3);
assert!(bc.body(fork_hash_3).unwrap().is_none());
}

#[test]
fn prune_blocks_with_empty_predicates_prunes_all() {
// Test backward compatibility: empty predicates means all blocks are pruned
let backend = Backend::<Block>::new_test_with_tx_storage_and_filters(
BlocksPruning::Some(2),
0,
vec![], // Empty predicates
);

let mut blocks = Vec::new();
let mut prev_hash = Default::default();

// Create 5 blocks
for i in 0..5 {
let hash = insert_block(
&backend,
i,
prev_hash,
None,
Default::default(),
vec![UncheckedXt::new_transaction(i.into(), ())],
None,
)
.unwrap();
blocks.push(hash);
prev_hash = hash;
}

// Justification - but no predicate to preserve it
let justification = (CONS0_ENGINE_ID, vec![1, 2, 3]);

// Finalize blocks, adding justification to block 1
{
let mut op = backend.begin_operation().unwrap();
backend.begin_state_operation(&mut op, blocks[4]).unwrap();
op.mark_finalized(blocks[1], Some(justification.clone())).unwrap();
op.mark_finalized(blocks[2], None).unwrap();
op.mark_finalized(blocks[3], None).unwrap();
op.mark_finalized(blocks[4], None).unwrap();
backend.commit_operation(op).unwrap();
}

let bc = backend.blockchain();

// All blocks outside pruning window should be pruned, even with justification
assert_eq!(None, bc.body(blocks[0]).unwrap());
assert_eq!(None, bc.body(blocks[1]).unwrap()); // Has justification but no predicate
assert_eq!(None, bc.body(blocks[2]).unwrap());

// Blocks 3 and 4 are within the pruning window
assert!(bc.body(blocks[3]).unwrap().is_some());
assert!(bc.body(blocks[4]).unwrap().is_some());
}

#[test]
fn prune_blocks_multiple_filters_or_logic() {
// Test that multiple filters use OR logic: if ANY filter matches, block is kept
let backend = Backend::<Block>::new_test_with_tx_storage_and_filters(
BlocksPruning::Some(2),
0,
vec![
Arc::new(|j: &Justifications| j.get(CONS0_ENGINE_ID).is_some()),
Arc::new(|j: &Justifications| j.get(CONS1_ENGINE_ID).is_some()),
],
);

let mut blocks = Vec::new();
let mut prev_hash = Default::default();

// Create 7 blocks
for i in 0..7 {
let hash = insert_block(
&backend,
i,
prev_hash,
None,
Default::default(),
vec![UncheckedXt::new_transaction(i.into(), ())],
None,
)
.unwrap();
blocks.push(hash);
prev_hash = hash;
}

let cons0_justification = (CONS0_ENGINE_ID, vec![1, 2, 3]);
let cons1_justification = (CONS1_ENGINE_ID, vec![4, 5, 6]);

// Finalize blocks with different justification patterns
{
let mut op = backend.begin_operation().unwrap();
backend.begin_state_operation(&mut op, blocks[6]).unwrap();
// Block 1: CONS0 only - should be preserved
op.mark_finalized(blocks[1], Some(cons0_justification.clone())).unwrap();
// Block 2: CONS1 only - should be preserved
op.mark_finalized(blocks[2], Some(cons1_justification.clone())).unwrap();
// Block 3: No justification - should be pruned
op.mark_finalized(blocks[3], None).unwrap();
// Block 4: Random/unknown engine ID - should be pruned
op.mark_finalized(blocks[4], Some(([9, 9, 9, 9], vec![7, 8, 9]))).unwrap();
op.mark_finalized(blocks[5], None).unwrap();
op.mark_finalized(blocks[6], None).unwrap();
backend.commit_operation(op).unwrap();
}

let bc = backend.blockchain();

// Block 0 should be pruned (outside window, no justification)
assert_eq!(None, bc.body(blocks[0]).unwrap());

// Block 1 should be preserved (has CONS0 justification)
assert!(bc.body(blocks[1]).unwrap().is_some());

// Block 2 should be preserved (has CONS1 justification)
assert!(bc.body(blocks[2]).unwrap().is_some());

// Block 3 should be pruned (no justification)
assert_eq!(None, bc.body(blocks[3]).unwrap());

// Block 4 should be pruned (unknown engine ID)
assert_eq!(None, bc.body(blocks[4]).unwrap());

// Blocks 5 and 6 are within the pruning window
assert!(bc.body(blocks[5]).unwrap().is_some());
assert!(bc.body(blocks[6]).unwrap().is_some());
}

#[test]
fn prune_blocks_filter_only_matches_specific_engine() {
// Test that a filter for one engine ID does NOT preserve blocks with a different engine ID
let backend = Backend::<Block>::new_test_with_tx_storage_and_filters(
BlocksPruning::Some(2),
0,
vec![Arc::new(|j: &Justifications| j.get(CONS0_ENGINE_ID).is_some())],
);

let mut blocks = Vec::new();
let mut prev_hash = Default::default();

// Create 5 blocks
for i in 0..5 {
let hash = insert_block(
&backend,
i,
prev_hash,
None,
Default::default(),
vec![UncheckedXt::new_transaction(i.into(), ())],
None,
)
.unwrap();
blocks.push(hash);
prev_hash = hash;
}

let cons1_justification = (CONS1_ENGINE_ID, vec![4, 5, 6]);

// Finalize blocks, adding CONS1 justification to block 1
{
let mut op = backend.begin_operation().unwrap();
backend.begin_state_operation(&mut op, blocks[4]).unwrap();
// Block 1 gets CONS1 justification - should NOT be preserved by CONS0 filter
op.mark_finalized(blocks[1], Some(cons1_justification.clone())).unwrap();
op.mark_finalized(blocks[2], None).unwrap();
op.mark_finalized(blocks[3], None).unwrap();
op.mark_finalized(blocks[4], None).unwrap();
backend.commit_operation(op).unwrap();
}

let bc = backend.blockchain();

// Block 0 should be pruned
assert_eq!(None, bc.body(blocks[0]).unwrap());

// Block 1 should also be pruned (CONS1 justification, but only CONS0 filter)
assert_eq!(None, bc.body(blocks[1]).unwrap());

// Block 2 should be pruned
assert_eq!(None, bc.body(blocks[2]).unwrap());

// Blocks 3 and 4 are within the pruning window
assert!(bc.body(blocks[3]).unwrap().is_some());
assert!(bc.body(blocks[4]).unwrap().is_some());
}

/// Insert a header without body as best block. This triggers `MissingBody` gap creation
/// when the parent header exists and `create_gap` is true.
fn insert_header_no_body_as_best(
backend: &Backend<Block>,
number: u64,
parent_hash: H256,
) -> H256 {
use sp_runtime::testing::Digest;

let digest = Digest::default();
let header = Header {
number,
parent_hash,
state_root: Default::default(),
digest,
extrinsics_root: Default::default(),
};

let mut op = backend.begin_operation().unwrap();
// body = None triggers MissingBody gap when parent exists
op.set_block_data(header.clone(), None, None, None, NewBlockState::Best, true)
.unwrap();
backend.commit_operation(op).unwrap();

header.hash()
}

/// Re-open a backend from an existing database with the given blocks pruning mode.
fn reopen_backend(
db: Arc<dyn sp_database::Database<DbHash>>,
blocks_pruning: BlocksPruning,
) -> Backend<Block> {
let state_pruning = match blocks_pruning {
BlocksPruning::KeepAll => PruningMode::ArchiveAll,
BlocksPruning::KeepFinalized => PruningMode::ArchiveCanonical,
BlocksPruning::Some(n) => PruningMode::blocks_pruning(n),
};
Backend::<Block>::new(
DatabaseSettings {
trie_cache_maximum_size: Some(16 * 1024 * 1024),
state_pruning: Some(state_pruning),
source: DatabaseSource::Custom { db, require_create_flag: false },
blocks_pruning,
pruning_filters: Default::default(),
metrics_registry: None,
},
0,
)
.unwrap()
}

#[test]
fn missing_body_gap_is_removed_for_non_archive_node() {
// Create a non-archive backend and produce a multi-block MissingBody gap.
let backend = Backend::<Block>::new_test_with_tx_storage(BlocksPruning::Some(100), 0);
assert!(!backend.is_archive);

let genesis_hash = insert_header(&backend, 0, Default::default(), None, Default::default());

// Insert blocks 1..3 without bodies — creates a MissingBody gap spanning blocks 1 to 3.
let hash_1 = insert_header_no_body_as_best(&backend, 1, genesis_hash);
let hash_2 = insert_header_no_body_as_best(&backend, 2, hash_1);
insert_header_no_body_as_best(&backend, 3, hash_2);

let info = backend.blockchain().info();
assert!(info.block_gap.is_some(), "MissingBody gap should have been created");
let gap = info.block_gap.unwrap();
assert!(matches!(gap.gap_type, BlockGapType::MissingBody));
assert_eq!(gap.start, 1);
assert_eq!(gap.end, 3);

// Re-open the same database as a non-archive node.
let db = backend.storage.db.clone();
let backend = reopen_backend(db, BlocksPruning::Some(100));
assert!(!backend.is_archive);

// The multi-block gap should have been removed on re-open.
let info = backend.blockchain().info();
assert!(
info.block_gap.is_none(),
"MissingBody gap should be removed for non-archive nodes, got: {:?}",
info.block_gap,
);
}

#[test]
fn missing_body_gap_is_preserved_for_archive_node() {
// Create a backend with archive pruning and produce a multi-block MissingBody gap.
let backend = Backend::<Block>::new_test_with_tx_storage(BlocksPruning::KeepAll, 0);
assert!(backend.is_archive);

let genesis_hash = insert_header(&backend, 0, Default::default(), None, Default::default());

// Insert blocks 1..3 without bodies — creates a MissingBody gap spanning blocks 1 to 3.
let hash_1 = insert_header_no_body_as_best(&backend, 1, genesis_hash);
let hash_2 = insert_header_no_body_as_best(&backend, 2, hash_1);
insert_header_no_body_as_best(&backend, 3, hash_2);

let info = backend.blockchain().info();
assert!(info.block_gap.is_some(), "MissingBody gap should have been created");
let gap = info.block_gap.unwrap();
assert!(matches!(gap.gap_type, BlockGapType::MissingBody));
assert_eq!(gap.start, 1);
assert_eq!(gap.end, 3);

// Re-open the same database as an archive node.
let db = backend.storage.db.clone();
let backend = reopen_backend(db, BlocksPruning::KeepAll);
assert!(backend.is_archive);

// The gap should be preserved for archive nodes.
let info = backend.blockchain().info();
assert!(info.block_gap.is_some(), "MissingBody gap should be preserved for archive nodes",);
let gap = info.block_gap.unwrap();
assert!(matches!(gap.gap_type, BlockGapType::MissingBody));
assert_eq!(gap.start, 1);
assert_eq!(gap.end, 3);
}

#[test]
fn missing_header_and_body_gap_is_preserved_for_non_archive_node() {
// Create a non-archive backend and produce a MissingHeaderAndBody gap (from warp sync).
let backend = Backend::<Block>::new_test_with_tx_storage(BlocksPruning::Some(100), 0);
assert!(!backend.is_archive);

let _genesis_hash =
insert_header(&backend, 0, Default::default(), None, Default::default());

// Insert a disconnected block at height 3 with a fake parent to create a
// MissingHeaderAndBody gap (blocks 1..2 are missing).
insert_disconnected_header(&backend, 3, H256::from([200; 32]), Default::default(), true);

let info = backend.blockchain().info();
assert!(info.block_gap.is_some(), "Gap should have been created");
let gap = info.block_gap.unwrap();
assert!(matches!(gap.gap_type, BlockGapType::MissingHeaderAndBody));
assert_eq!(gap.start, 1);
assert_eq!(gap.end, 2);

// Re-open the same database as a non-archive node.
let db = backend.storage.db.clone();
let backend = reopen_backend(db, BlocksPruning::Some(100));
assert!(!backend.is_archive);

// The MissingHeaderAndBody gap should NOT be removed — only MissingBody gaps are removed.
let info = backend.blockchain().info();
assert!(
info.block_gap.is_some(),
"MissingHeaderAndBody gap should be preserved for non-archive nodes",
);
let gap = info.block_gap.unwrap();
assert!(matches!(gap.gap_type, BlockGapType::MissingHeaderAndBody));
assert_eq!(gap.start, 1);
assert_eq!(gap.end, 2);
}
}
Loading