Skip to content

Commit a5c2afa

Browse files
authored
Fix sending sparse sender chains. (#4787)
## Motivation `test_end_to_end_repeated_transfers` is flaky, especially when running against the testnet. This is because the following case results in a local node error: * We are making a block proposal with incoming message. * Some validators don't have the sender block. * We have the sender block, but not the sender chain description. ## Proposal Fix this case, and add some logging. ## Test Plan CI, especially against the testnet: #4786 ## Release Plan - These changes should be backported to `testnet_conway` and - released in a new SDK. ## Links - [reviewer checklist](https://github.com/linera-io/linera-protocol/blob/main/CONTRIBUTING.md#reviewer-checklist)
1 parent 675be78 commit a5c2afa

File tree

1 file changed

+33
-3
lines changed

1 file changed

+33
-3
lines changed

linera-core/src/updater.rs

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ use crate::{
3636
local_node::LocalNodeClient,
3737
node::{CrossChainMessageDelivery, NodeError, ValidatorNode},
3838
remote_node::RemoteNode,
39+
LocalNodeError,
3940
};
4041

4142
/// The default amount of time we wait for additional validators to contribute
@@ -361,6 +362,10 @@ where
361362
Err(NodeError::WrongRound(_round)) => {
362363
// The proposal is for a different round, so we need to update the validator.
363364
// TODO: this should probably be more specific as to which rounds are retried.
365+
tracing::debug!(
366+
"Wrong round; sending chain {chain_id} to validator {}.",
367+
self.remote_node.public_key
368+
);
364369
self.send_chain_information(
365370
chain_id,
366371
proposal.content.block.height,
@@ -371,7 +376,13 @@ where
371376
Err(NodeError::UnexpectedBlockHeight {
372377
expected_block_height,
373378
found_block_height,
374-
}) if expected_block_height < found_block_height => {
379+
}) if expected_block_height < found_block_height
380+
&& found_block_height == proposal.content.block.height =>
381+
{
382+
tracing::debug!(
383+
"Wrong height; sending chain {chain_id} to validator {}.",
384+
self.remote_node.public_key
385+
);
375386
// The proposal is for a later block height, so we need to update the validator.
376387
self.send_chain_information(
377388
chain_id,
@@ -389,6 +400,10 @@ where
389400
.get(&origin)
390401
.is_none_or(|h| *h < height) =>
391402
{
403+
tracing::debug!(
404+
"Missing cross-chain update; sending chain {origin} to validator {}.",
405+
self.remote_node.public_key
406+
);
392407
sent_cross_chain_updates.insert(origin, height);
393408
// Some received certificates may be missing for this validator
394409
// (e.g. to create the chain or make the balance sufficient) so we are going to
@@ -407,6 +422,10 @@ where
407422
.map(|event_id| event_id.chain_id)
408423
.filter(|chain_id| !publisher_chain_ids_sent.contains(chain_id))
409424
.collect::<BTreeSet<_>>();
425+
tracing::debug!(
426+
"Missing events; sending chains {new_chain_ids:?} to validator {}",
427+
self.remote_node.public_key
428+
);
410429
ensure!(
411430
!new_chain_ids.is_empty(),
412431
NodeError::EventsNotFound(event_ids)
@@ -430,6 +449,7 @@ where
430449
Err(NodeError::BlobsNotFound(_) | NodeError::InactiveChain(_))
431450
if !blob_ids.is_empty() =>
432451
{
452+
tracing::debug!("Missing blobs");
433453
// For `BlobsNotFound`, we assume that the local node should already be
434454
// updated with the needed blobs, so sending the chain information about the
435455
// certificates that last used the blobs to the validator node should be enough.
@@ -465,6 +485,7 @@ where
465485
.and_modify(|h| *h = block_height.max(*h))
466486
.or_insert(block_height);
467487
}
488+
tracing::debug!("Sending chains {chain_heights:?}");
468489

469490
self.send_chain_info_up_to_heights(
470491
chain_heights,
@@ -523,7 +544,11 @@ where
523544
&& event_id.chain_id == self.admin_id
524545
}) =>
525546
{
526-
// The chain is missing epoch events. Send all blocks.
547+
if chain_id != self.admin_id {
548+
tracing::error!(
549+
"Missing epochs were not handled by send_confirmed_certificate."
550+
);
551+
}
527552
let query = ChainInfoQuery::new(chain_id);
528553
self.remote_node.handle_chain_info_query(query).await?
529554
}
@@ -586,7 +611,12 @@ where
586611
};
587612
let (remote_height, remote_round) = (info.next_block_height, info.manager.current_round);
588613
let query = ChainInfoQuery::new(chain_id).with_manager_values();
589-
let local_info = self.local_node.handle_chain_info_query(query).await?.info;
614+
let local_info = match self.local_node.handle_chain_info_query(query).await {
615+
Ok(response) => response.info,
616+
// We don't have the full chain description.
617+
Err(LocalNodeError::BlobsNotFound(_)) => return Ok(()),
618+
Err(error) => return Err(error.into()),
619+
};
590620
let manager = local_info.manager;
591621
if local_info.next_block_height != remote_height || manager.current_round <= remote_round {
592622
return Ok(());

0 commit comments

Comments
 (0)