Skip to content

Commit cf680ec

Browse files
paritytech-release-backport-bot[bot]lexnvgithub-actions[bot]
authored
[stable2603] Backport #11332 (#11452)
Backport #11332 into `stable2603` from lexnv. See the [documentation](https://github.com/paritytech/polkadot-sdk/blob/master/docs/BACKPORT.md) on how to use this bot. <!-- # To be used by other automation, do not modify: original-pr-number: #${pull_number} --> Signed-off-by: Alexandru Vasile <alexandru.vasile@parity.io> Co-authored-by: Alexandru Vasile <60601340+lexnv@users.noreply.github.com> Co-authored-by: cmd[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 4064890 commit cf680ec

File tree

2 files changed

+202
-0
lines changed

2 files changed

+202
-0
lines changed

prdoc/pr_11332.prdoc

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
title: 'client/db: Close missing body gaps for non archive nodes'
2+
doc:
3+
- audience: Node Dev
4+
description: |-
5+
This PR closes missing body gaps in the database for non-archive nodes.
6+
7+
8+
Effectively, a missing body gap cannot be closed on the DB side if the node is non-archive. Since execution is already skipped, the node will close the memory gap in the sync engine; however, the gap remains open in the db.
9+
10+
This leads to wasting resources at every startup:
11+
- client info contains a gap that cannot be filled (since we don't have the state around for execution)
12+
- blocks are fetched from the connected peers
13+
- gap is filled by ignoring blocks in the sync engine
14+
15+
Further, for collators on origin master this causes an infinite loop of sync engine restarts that get punished via banning and disconnecting. For more details and root cause check:
16+
- https://github.com/paritytech/polkadot-sdk/pull/11330
17+
18+
Part of:
19+
- https://github.com/paritytech/polkadot-sdk/issues/11299
20+
crates:
21+
- name: sc-client-db
22+
bump: patch

substrate/client/db/src/lib.rs

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,6 +1363,30 @@ impl<Block: BlockT> Backend<Block> {
13631363
});
13641364
}
13651365

1366+
// Non archive nodes cannot fill the missing block gap with bodies.
1367+
// If the gap is present, it means that every restart will try to fill the gap:
1368+
// - a block request is made for each and every block in the gap
1369+
// - the request is fulfilled putting pressure on the network and other nodes
1370+
// - upon receiving the block, the block cannot be executed since the state
1371+
// of the parent block might have been discarded
1372+
// - then the sync engine closes the gap in memory, but never in DB.
1373+
//
1374+
// This leads to inefficient syncing and high CPU usage on every restart. To mitigate this,
1375+
// remove the gap from the DB if we detect it and the current node is not an archive.
1376+
match (backend.is_archive, info.block_gap) {
1377+
(false, Some(gap)) if matches!(gap.gap_type, BlockGapType::MissingBody) => {
1378+
warn!(
1379+
"Detected a missing body gap for non-archive nodes. Removing the gap={:?}",
1380+
gap
1381+
);
1382+
1383+
db_init_transaction.remove(columns::META, meta_keys::BLOCK_GAP);
1384+
db_init_transaction.remove(columns::META, meta_keys::BLOCK_GAP_VERSION);
1385+
backend.blockchain.update_block_gap(None);
1386+
},
1387+
_ => {},
1388+
}
1389+
13661390
db.commit(db_init_transaction)?;
13671391

13681392
Ok(backend)
@@ -5495,4 +5519,160 @@ pub(crate) mod tests {
54955519
assert!(bc.body(blocks[3]).unwrap().is_some());
54965520
assert!(bc.body(blocks[4]).unwrap().is_some());
54975521
}
5522+
5523+
/// Insert a header without body as best block. This triggers `MissingBody` gap creation
5524+
/// when the parent header exists and `create_gap` is true.
5525+
fn insert_header_no_body_as_best(
5526+
backend: &Backend<Block>,
5527+
number: u64,
5528+
parent_hash: H256,
5529+
) -> H256 {
5530+
use sp_runtime::testing::Digest;
5531+
5532+
let digest = Digest::default();
5533+
let header = Header {
5534+
number,
5535+
parent_hash,
5536+
state_root: Default::default(),
5537+
digest,
5538+
extrinsics_root: Default::default(),
5539+
};
5540+
5541+
let mut op = backend.begin_operation().unwrap();
5542+
// body = None triggers MissingBody gap when parent exists
5543+
op.set_block_data(header.clone(), None, None, None, NewBlockState::Best, true)
5544+
.unwrap();
5545+
backend.commit_operation(op).unwrap();
5546+
5547+
header.hash()
5548+
}
5549+
5550+
/// Re-open a backend from an existing database with the given blocks pruning mode.
5551+
fn reopen_backend(
5552+
db: Arc<dyn sp_database::Database<DbHash>>,
5553+
blocks_pruning: BlocksPruning,
5554+
) -> Backend<Block> {
5555+
let state_pruning = match blocks_pruning {
5556+
BlocksPruning::KeepAll => PruningMode::ArchiveAll,
5557+
BlocksPruning::KeepFinalized => PruningMode::ArchiveCanonical,
5558+
BlocksPruning::Some(n) => PruningMode::blocks_pruning(n),
5559+
};
5560+
Backend::<Block>::new(
5561+
DatabaseSettings {
5562+
trie_cache_maximum_size: Some(16 * 1024 * 1024),
5563+
state_pruning: Some(state_pruning),
5564+
source: DatabaseSource::Custom { db, require_create_flag: false },
5565+
blocks_pruning,
5566+
pruning_filters: Default::default(),
5567+
metrics_registry: None,
5568+
},
5569+
0,
5570+
)
5571+
.unwrap()
5572+
}
5573+
5574+
#[test]
5575+
fn missing_body_gap_is_removed_for_non_archive_node() {
5576+
// Create a non-archive backend and produce a multi-block MissingBody gap.
5577+
let backend = Backend::<Block>::new_test_with_tx_storage(BlocksPruning::Some(100), 0);
5578+
assert!(!backend.is_archive);
5579+
5580+
let genesis_hash = insert_header(&backend, 0, Default::default(), None, Default::default());
5581+
5582+
// Insert blocks 1..3 without bodies — creates a MissingBody gap spanning blocks 1 to 3.
5583+
let hash_1 = insert_header_no_body_as_best(&backend, 1, genesis_hash);
5584+
let hash_2 = insert_header_no_body_as_best(&backend, 2, hash_1);
5585+
insert_header_no_body_as_best(&backend, 3, hash_2);
5586+
5587+
let info = backend.blockchain().info();
5588+
assert!(info.block_gap.is_some(), "MissingBody gap should have been created");
5589+
let gap = info.block_gap.unwrap();
5590+
assert!(matches!(gap.gap_type, BlockGapType::MissingBody));
5591+
assert_eq!(gap.start, 1);
5592+
assert_eq!(gap.end, 3);
5593+
5594+
// Re-open the same database as a non-archive node.
5595+
let db = backend.storage.db.clone();
5596+
let backend = reopen_backend(db, BlocksPruning::Some(100));
5597+
assert!(!backend.is_archive);
5598+
5599+
// The multi-block gap should have been removed on re-open.
5600+
let info = backend.blockchain().info();
5601+
assert!(
5602+
info.block_gap.is_none(),
5603+
"MissingBody gap should be removed for non-archive nodes, got: {:?}",
5604+
info.block_gap,
5605+
);
5606+
}
5607+
5608+
#[test]
5609+
fn missing_body_gap_is_preserved_for_archive_node() {
5610+
// Create a backend with archive pruning and produce a multi-block MissingBody gap.
5611+
let backend = Backend::<Block>::new_test_with_tx_storage(BlocksPruning::KeepAll, 0);
5612+
assert!(backend.is_archive);
5613+
5614+
let genesis_hash = insert_header(&backend, 0, Default::default(), None, Default::default());
5615+
5616+
// Insert blocks 1..3 without bodies — creates a MissingBody gap spanning blocks 1 to 3.
5617+
let hash_1 = insert_header_no_body_as_best(&backend, 1, genesis_hash);
5618+
let hash_2 = insert_header_no_body_as_best(&backend, 2, hash_1);
5619+
insert_header_no_body_as_best(&backend, 3, hash_2);
5620+
5621+
let info = backend.blockchain().info();
5622+
assert!(info.block_gap.is_some(), "MissingBody gap should have been created");
5623+
let gap = info.block_gap.unwrap();
5624+
assert!(matches!(gap.gap_type, BlockGapType::MissingBody));
5625+
assert_eq!(gap.start, 1);
5626+
assert_eq!(gap.end, 3);
5627+
5628+
// Re-open the same database as an archive node.
5629+
let db = backend.storage.db.clone();
5630+
let backend = reopen_backend(db, BlocksPruning::KeepAll);
5631+
assert!(backend.is_archive);
5632+
5633+
// The gap should be preserved for archive nodes.
5634+
let info = backend.blockchain().info();
5635+
assert!(info.block_gap.is_some(), "MissingBody gap should be preserved for archive nodes",);
5636+
let gap = info.block_gap.unwrap();
5637+
assert!(matches!(gap.gap_type, BlockGapType::MissingBody));
5638+
assert_eq!(gap.start, 1);
5639+
assert_eq!(gap.end, 3);
5640+
}
5641+
5642+
#[test]
5643+
fn missing_header_and_body_gap_is_preserved_for_non_archive_node() {
5644+
// Create a non-archive backend and produce a MissingHeaderAndBody gap (from warp sync).
5645+
let backend = Backend::<Block>::new_test_with_tx_storage(BlocksPruning::Some(100), 0);
5646+
assert!(!backend.is_archive);
5647+
5648+
let _genesis_hash =
5649+
insert_header(&backend, 0, Default::default(), None, Default::default());
5650+
5651+
// Insert a disconnected block at height 3 with a fake parent to create a
5652+
// MissingHeaderAndBody gap (blocks 1..2 are missing).
5653+
insert_disconnected_header(&backend, 3, H256::from([200; 32]), Default::default(), true);
5654+
5655+
let info = backend.blockchain().info();
5656+
assert!(info.block_gap.is_some(), "Gap should have been created");
5657+
let gap = info.block_gap.unwrap();
5658+
assert!(matches!(gap.gap_type, BlockGapType::MissingHeaderAndBody));
5659+
assert_eq!(gap.start, 1);
5660+
assert_eq!(gap.end, 2);
5661+
5662+
// Re-open the same database as a non-archive node.
5663+
let db = backend.storage.db.clone();
5664+
let backend = reopen_backend(db, BlocksPruning::Some(100));
5665+
assert!(!backend.is_archive);
5666+
5667+
// The MissingHeaderAndBody gap should NOT be removed — only MissingBody gaps are removed.
5668+
let info = backend.blockchain().info();
5669+
assert!(
5670+
info.block_gap.is_some(),
5671+
"MissingHeaderAndBody gap should be preserved for non-archive nodes",
5672+
);
5673+
let gap = info.block_gap.unwrap();
5674+
assert!(matches!(gap.gap_type, BlockGapType::MissingHeaderAndBody));
5675+
assert_eq!(gap.start, 1);
5676+
assert_eq!(gap.end, 2);
5677+
}
54985678
}

0 commit comments

Comments
 (0)