Skip to content

Commit 528101f

Browse files
paritytech-release-backport-bot[bot]lexnvgithub-actions[bot]
authored
[stable2506] Backport #11330 (#11356)
Backport #11330 into `stable2506` from lexnv. See the [documentation](https://github.com/paritytech/polkadot-sdk/blob/master/docs/BACKPORT.md) on how to use this bot. <!-- # To be used by other automation, do not modify: original-pr-number: #${pull_number} --> Signed-off-by: Alexandru Vasile <alexandru.vasile@parity.io> Co-authored-by: Alexandru Vasile <60601340+lexnv@users.noreply.github.com> Co-authored-by: cmd[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent fbc1a5c commit 528101f

File tree

2 files changed

+153
-3
lines changed

2 files changed

+153
-3
lines changed

cumulus/client/consensus/aura/src/collators/slot_based/block_import.rs

Lines changed: 143 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,16 @@ where
9797
) -> Result<sc_consensus::ImportResult, Self::Error> {
9898
// If the channel exists and it is required to execute the block, we will execute the block
9999
// here. This is done to collect the storage proof and to prevent re-execution, we push
100-
// downwards the state changes. `StateAction::ApplyChanges` is ignored, because it either
101-
// means that the node produced the block itself or the block was imported via state sync.
102-
if !self.sender.is_closed() && !matches!(params.state_action, StateAction::ApplyChanges(_))
100+
// downwards the state changes.
101+
//
102+
// The following states are ignored:
103+
// - `StateAction::ApplyChanges`: means that the node produced the block itself or the
104+
// block was imported via state sync.
105+
// - `StateAction::Skip`: means that the block should be skipped. This is evident in the
106+
// context of gap-sync with collators running in non-archive modes. The state of the
107+
// parent block has already been discarded and therefore any import would fail.
108+
if !self.sender.is_closed() &&
109+
!matches!(params.state_action, StateAction::ApplyChanges(_) | StateAction::Skip)
103110
{
104111
let mut runtime_api = self.client.runtime_api();
105112

@@ -143,3 +150,136 @@ where
143150
self.inner.import_block(params).await.map_err(Into::into)
144151
}
145152
}
153+
154+
#[cfg(test)]
155+
mod tests {
156+
use super::*;
157+
use codec::Encode;
158+
use cumulus_test_client::{
159+
runtime::Block, DefaultTestClientBuilderExt, InitBlockBuilder, TestClientBuilder,
160+
TestClientBuilderExt,
161+
};
162+
use cumulus_test_relay_sproof_builder::RelayStateSproofBuilder;
163+
use polkadot_primitives::HeadData;
164+
use sc_consensus::{BlockImportParams, ImportResult, StateAction};
165+
use sp_blockchain::HeaderBackend;
166+
use sp_consensus::BlockOrigin;
167+
168+
fn sproof_with_best_parent(client: &cumulus_test_client::Client) -> RelayStateSproofBuilder {
169+
let best_hash = client.info().best_hash;
170+
let header = client.header(best_hash).ok().flatten().expect("No header for best block");
171+
let mut builder = RelayStateSproofBuilder::default();
172+
builder.para_id = cumulus_test_client::runtime::PARACHAIN_ID.into();
173+
builder.included_para_head = Some(HeadData(header.encode()));
174+
builder
175+
}
176+
177+
/// Mock inner block import that always succeeds.
178+
#[derive(Clone)]
179+
struct MockBlockImport;
180+
181+
#[async_trait::async_trait]
182+
impl BlockImport<Block> for MockBlockImport {
183+
type Error = sp_consensus::Error;
184+
185+
async fn check_block(
186+
&self,
187+
_block: sc_consensus::BlockCheckParams<Block>,
188+
) -> Result<ImportResult, Self::Error> {
189+
Ok(ImportResult::imported(false))
190+
}
191+
192+
async fn import_block(
193+
&self,
194+
_block: BlockImportParams<Block>,
195+
) -> Result<ImportResult, Self::Error> {
196+
Ok(ImportResult::imported(true))
197+
}
198+
}
199+
200+
/// Regression test for the gap-sync infinite loop issue.
201+
///
202+
/// When a non-archive collator has a block gap of size 1, gap-sync downloads
203+
/// the block and marks it with `skip_execution: true` (which translates to
204+
/// `StateAction::Skip`). Before the fix, `SlotBasedBlockImport` would attempt
205+
/// to execute such blocks, fail with a consensus error ("State already
206+
/// discarded for parent"), and trigger a chain-sync restart that re-creates
207+
/// the same gap — leading to an infinite retry loop.
208+
///
209+
/// This test verifies that `StateAction::Skip` blocks are forwarded to the
210+
/// inner block import without attempting runtime execution.
211+
#[tokio::test]
212+
async fn gap_sync_block_with_skip_execution_does_not_attempt_runtime_call() {
213+
sp_tracing::try_init_simple();
214+
215+
let client = Arc::new(TestClientBuilder::new().build());
216+
217+
// Build a valid block so we have realistic headers/bodies.
218+
let sproof = sproof_with_best_parent(&client);
219+
let block_builder_data = client.init_block_builder(None, sproof);
220+
let block = block_builder_data.block_builder.build().unwrap().block;
221+
222+
let (slot_based_import, mut handle) =
223+
SlotBasedBlockImport::new(MockBlockImport, client.clone());
224+
225+
// Simulate the gap-sync scenario: a block arrives with StateAction::Skip
226+
// because the parent state has been pruned.
227+
let mut params = BlockImportParams::new(BlockOrigin::NetworkInitialSync, block.header);
228+
params.body = Some(block.extrinsics);
229+
params.state_action = StateAction::Skip;
230+
params.import_existing = true;
231+
232+
// Before the fix, this would fail with a consensus error because
233+
// SlotBasedBlockImport would try to call `runtime_api.execute_block()`
234+
// on the parent hash whose state is no longer available.
235+
//
236+
// After the fix, StateAction::Skip is recognized and the block is
237+
// forwarded directly to the inner import without execution.
238+
let result = slot_based_import.import_block(params).await;
239+
assert!(result.is_ok(), "Gap-sync block with StateAction::Skip must not fail: {result:?}");
240+
241+
// The channel must be empty — execution should have been skipped entirely,
242+
// so no (block, proof) was sent. This is the key assertion: without the
243+
// StateAction::Skip guard, execute_block() would run and send a message.
244+
//
245+
// Drop the sender side so the channel closes, then verify no message was queued.
246+
drop(slot_based_import);
247+
assert!(
248+
handle.receiver.next().await.is_none(),
249+
"No block+proof should be sent through the channel for StateAction::Skip"
250+
);
251+
}
252+
253+
/// Verify that `StateAction::Execute` still triggers runtime execution.
254+
///
255+
/// This complements the gap-sync regression test by ensuring we did not
256+
/// accidentally disable execution for normal blocks.
257+
#[tokio::test]
258+
async fn normal_block_with_execute_action_triggers_runtime_execution() {
259+
sp_tracing::try_init_simple();
260+
261+
let client = Arc::new(TestClientBuilder::new().build());
262+
263+
let sproof = sproof_with_best_parent(&client);
264+
let block_builder_data = client.init_block_builder(None, sproof);
265+
let block = block_builder_data.block_builder.build().unwrap().block;
266+
267+
let (slot_based_import, mut handle) =
268+
SlotBasedBlockImport::new(MockBlockImport, client.clone());
269+
270+
// Normal import with StateAction::Execute should trigger execution
271+
// and send the block + proof through the channel.
272+
let mut params =
273+
BlockImportParams::new(BlockOrigin::NetworkInitialSync, block.header.clone());
274+
params.body = Some(block.extrinsics.clone());
275+
params.state_action = StateAction::Execute;
276+
277+
let result = slot_based_import.import_block(params).await;
278+
assert!(result.is_ok(), "Normal block import should succeed: {result:?}");
279+
280+
// The block and proof should have been sent through the channel,
281+
// confirming that execution actually happened.
282+
let (received_block, _proof) = handle.next().await;
283+
assert_eq!(*received_block.header(), block.header);
284+
}
285+
}

prdoc/pr_11330.prdoc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
title: 'aura/import: Skip block execution when collators have no parent block state'
2+
doc:
3+
- audience: Node Dev
4+
description: "Skip block execution in `SlotBasedBlockImport` when gap-sync marks
5+
blocks with `StateAction::Skip`. This fixes an infinite retry loop where non-archive
6+
collators failed to import gap-sync blocks because the parent state was already
7+
pruned, causing repeated sync restarts and eventual peer bans."
8+
crates:
9+
- name: cumulus-client-consensus-aura
10+
bump: patch

0 commit comments

Comments
 (0)