|
97 | 97 | ) -> Result<sc_consensus::ImportResult, Self::Error> { |
98 | 98 | // If the channel exists and it is required to execute the block, we will execute the block |
99 | 99 | // here. This is done to collect the storage proof and to prevent re-execution, we push |
100 | | - // downwards the state changes. `StateAction::ApplyChanges` is ignored, because it either |
101 | | - // means that the node produced the block itself or the block was imported via state sync. |
102 | | - if !self.sender.is_closed() && !matches!(params.state_action, StateAction::ApplyChanges(_)) |
| 100 | + // downwards the state changes. |
| 101 | + // |
| 102 | + // The following states are ignored: |
| 103 | + // - `StateAction::ApplyChanges`: means that the node produced the block itself or the |
| 104 | + // block was imported via state sync. |
| 105 | + // - `StateAction::Skip`: means that the block should be skipped. This is evident in the |
| 106 | + // context of gap-sync with collators running in non-archive modes. The state of the |
| 107 | + // parent block has already been discarded and therefore any import would fail. |
| 108 | + if !self.sender.is_closed() && |
| 109 | + !matches!(params.state_action, StateAction::ApplyChanges(_) | StateAction::Skip) |
103 | 110 | { |
104 | 111 | let mut runtime_api = self.client.runtime_api(); |
105 | 112 |
|
@@ -143,3 +150,136 @@ where |
143 | 150 | self.inner.import_block(params).await.map_err(Into::into) |
144 | 151 | } |
145 | 152 | } |
| 153 | + |
| 154 | +#[cfg(test)] |
| 155 | +mod tests { |
| 156 | + use super::*; |
| 157 | + use codec::Encode; |
| 158 | + use cumulus_test_client::{ |
| 159 | + runtime::Block, DefaultTestClientBuilderExt, InitBlockBuilder, TestClientBuilder, |
| 160 | + TestClientBuilderExt, |
| 161 | + }; |
| 162 | + use cumulus_test_relay_sproof_builder::RelayStateSproofBuilder; |
| 163 | + use polkadot_primitives::HeadData; |
| 164 | + use sc_consensus::{BlockImportParams, ImportResult, StateAction}; |
| 165 | + use sp_blockchain::HeaderBackend; |
| 166 | + use sp_consensus::BlockOrigin; |
| 167 | + |
| 168 | + fn sproof_with_best_parent(client: &cumulus_test_client::Client) -> RelayStateSproofBuilder { |
| 169 | + let best_hash = client.info().best_hash; |
| 170 | + let header = client.header(best_hash).ok().flatten().expect("No header for best block"); |
| 171 | + let mut builder = RelayStateSproofBuilder::default(); |
| 172 | + builder.para_id = cumulus_test_client::runtime::PARACHAIN_ID.into(); |
| 173 | + builder.included_para_head = Some(HeadData(header.encode())); |
| 174 | + builder |
| 175 | + } |
| 176 | + |
| 177 | + /// Mock inner block import that always succeeds. |
| 178 | + #[derive(Clone)] |
| 179 | + struct MockBlockImport; |
| 180 | + |
| 181 | + #[async_trait::async_trait] |
| 182 | + impl BlockImport<Block> for MockBlockImport { |
| 183 | + type Error = sp_consensus::Error; |
| 184 | + |
| 185 | + async fn check_block( |
| 186 | + &self, |
| 187 | + _block: sc_consensus::BlockCheckParams<Block>, |
| 188 | + ) -> Result<ImportResult, Self::Error> { |
| 189 | + Ok(ImportResult::imported(false)) |
| 190 | + } |
| 191 | + |
| 192 | + async fn import_block( |
| 193 | + &self, |
| 194 | + _block: BlockImportParams<Block>, |
| 195 | + ) -> Result<ImportResult, Self::Error> { |
| 196 | + Ok(ImportResult::imported(true)) |
| 197 | + } |
| 198 | + } |
| 199 | + |
| 200 | + /// Regression test for the gap-sync infinite loop issue. |
| 201 | + /// |
| 202 | + /// When a non-archive collator has a block gap of size 1, gap-sync downloads |
| 203 | + /// the block and marks it with `skip_execution: true` (which translates to |
| 204 | + /// `StateAction::Skip`). Before the fix, `SlotBasedBlockImport` would attempt |
| 205 | + /// to execute such blocks, fail with a consensus error ("State already |
| 206 | + /// discarded for parent"), and trigger a chain-sync restart that re-creates |
| 207 | + /// the same gap — leading to an infinite retry loop. |
| 208 | + /// |
| 209 | + /// This test verifies that `StateAction::Skip` blocks are forwarded to the |
| 210 | + /// inner block import without attempting runtime execution. |
| 211 | + #[tokio::test] |
| 212 | + async fn gap_sync_block_with_skip_execution_does_not_attempt_runtime_call() { |
| 213 | + sp_tracing::try_init_simple(); |
| 214 | + |
| 215 | + let client = Arc::new(TestClientBuilder::new().build()); |
| 216 | + |
| 217 | + // Build a valid block so we have realistic headers/bodies. |
| 218 | + let sproof = sproof_with_best_parent(&client); |
| 219 | + let block_builder_data = client.init_block_builder(None, sproof); |
| 220 | + let block = block_builder_data.block_builder.build().unwrap().block; |
| 221 | + |
| 222 | + let (slot_based_import, mut handle) = |
| 223 | + SlotBasedBlockImport::new(MockBlockImport, client.clone()); |
| 224 | + |
| 225 | + // Simulate the gap-sync scenario: a block arrives with StateAction::Skip |
| 226 | + // because the parent state has been pruned. |
| 227 | + let mut params = BlockImportParams::new(BlockOrigin::NetworkInitialSync, block.header); |
| 228 | + params.body = Some(block.extrinsics); |
| 229 | + params.state_action = StateAction::Skip; |
| 230 | + params.import_existing = true; |
| 231 | + |
| 232 | + // Before the fix, this would fail with a consensus error because |
| 233 | + // SlotBasedBlockImport would try to call `runtime_api.execute_block()` |
| 234 | + // on the parent hash whose state is no longer available. |
| 235 | + // |
| 236 | + // After the fix, StateAction::Skip is recognized and the block is |
| 237 | + // forwarded directly to the inner import without execution. |
| 238 | + let result = slot_based_import.import_block(params).await; |
| 239 | + assert!(result.is_ok(), "Gap-sync block with StateAction::Skip must not fail: {result:?}"); |
| 240 | + |
| 241 | + // The channel must be empty — execution should have been skipped entirely, |
| 242 | + // so no (block, proof) was sent. This is the key assertion: without the |
| 243 | + // StateAction::Skip guard, execute_block() would run and send a message. |
| 244 | + // |
| 245 | + // Drop the sender side so the channel closes, then verify no message was queued. |
| 246 | + drop(slot_based_import); |
| 247 | + assert!( |
| 248 | + handle.receiver.next().await.is_none(), |
| 249 | + "No block+proof should be sent through the channel for StateAction::Skip" |
| 250 | + ); |
| 251 | + } |
| 252 | + |
| 253 | + /// Verify that `StateAction::Execute` still triggers runtime execution. |
| 254 | + /// |
| 255 | + /// This complements the gap-sync regression test by ensuring we did not |
| 256 | + /// accidentally disable execution for normal blocks. |
| 257 | + #[tokio::test] |
| 258 | + async fn normal_block_with_execute_action_triggers_runtime_execution() { |
| 259 | + sp_tracing::try_init_simple(); |
| 260 | + |
| 261 | + let client = Arc::new(TestClientBuilder::new().build()); |
| 262 | + |
| 263 | + let sproof = sproof_with_best_parent(&client); |
| 264 | + let block_builder_data = client.init_block_builder(None, sproof); |
| 265 | + let block = block_builder_data.block_builder.build().unwrap().block; |
| 266 | + |
| 267 | + let (slot_based_import, mut handle) = |
| 268 | + SlotBasedBlockImport::new(MockBlockImport, client.clone()); |
| 269 | + |
| 270 | + // Normal import with StateAction::Execute should trigger execution |
| 271 | + // and send the block + proof through the channel. |
| 272 | + let mut params = |
| 273 | + BlockImportParams::new(BlockOrigin::NetworkInitialSync, block.header.clone()); |
| 274 | + params.body = Some(block.extrinsics.clone()); |
| 275 | + params.state_action = StateAction::Execute; |
| 276 | + |
| 277 | + let result = slot_based_import.import_block(params).await; |
| 278 | + assert!(result.is_ok(), "Normal block import should succeed: {result:?}"); |
| 279 | + |
| 280 | + // The block and proof should have been sent through the channel, |
| 281 | + // confirming that execution actually happened. |
| 282 | + let (received_block, _proof) = handle.next().await; |
| 283 | + assert_eq!(*received_block.header(), block.header); |
| 284 | + } |
| 285 | +} |
0 commit comments