Skip to content

Commit e92fd4f

Browse files
authored
Sync if we learn about a new round while requesting a timeout. (#4912)
## Motivation If a client requests a timeout certificate and the validators return a `WrongRound` error with a _higher_ round, the client currently fails to update its local node about the new round, and thus cannot propose a block. ## Proposal Synchronize the chain state if we get a `WrongRound` error with a round higher than the one we see locally. ## Test Plan A regression test was added (by Claude). ## Release Plan - These changes should be ported to `testnet_conway` and * released in a new SDK. ## Links - Fixes #4909. - [reviewer checklist](https://github.com/linera-io/linera-protocol/blob/main/CONTRIBUTING.md#reviewer-checklist)
1 parent 09ad671 commit e92fd4f

File tree

3 files changed

+226
-39
lines changed

3 files changed

+226
-39
lines changed

linera-core/src/client/mod.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ pub struct Client<Env: Environment> {
259259
environment: Env,
260260
/// Local node to manage the execution state and the local storage of the chains that we are
261261
/// tracking.
262-
local_node: LocalNodeClient<Env::Storage>,
262+
pub local_node: LocalNodeClient<Env::Storage>,
263263
/// Manages the requests sent to validator nodes.
264264
requests_scheduler: RequestsScheduler<Env>,
265265
/// The admin chain ID.
@@ -673,7 +673,7 @@ impl<Env: Environment> Client<Env> {
673673
/// Submits a validated block for finalization and returns the confirmed block certificate.
674674
#[instrument(level = "trace", skip_all)]
675675
async fn finalize_block(
676-
&self,
676+
self: &Arc<Self>,
677677
committee: &Committee,
678678
certificate: ValidatedBlockCertificate,
679679
) -> Result<ConfirmedBlockCertificate, chain_client::Error> {
@@ -694,7 +694,7 @@ impl<Env: Environment> Client<Env> {
694694
/// Submits a block proposal to the validators.
695695
#[instrument(level = "trace", skip_all)]
696696
async fn submit_block_proposal<T: ProcessableCertificate>(
697-
&self,
697+
self: &Arc<Self>,
698698
committee: &Committee,
699699
proposal: Box<BlockProposal>,
700700
value: T,
@@ -718,7 +718,7 @@ impl<Env: Environment> Client<Env> {
718718
/// Broadcasts certified blocks to validators.
719719
#[instrument(level = "trace", skip_all, fields(chain_id, block_height, delivery))]
720720
async fn communicate_chain_updates(
721-
&self,
721+
self: &Arc<Self>,
722722
committee: &Committee,
723723
chain_id: ChainId,
724724
height: BlockHeight,
@@ -732,7 +732,7 @@ impl<Env: Environment> Client<Env> {
732732
|remote_node| {
733733
let mut updater = ValidatorUpdater {
734734
remote_node,
735-
local_node: self.local_node.clone(),
735+
client: self.clone(),
736736
admin_id: self.admin_id,
737737
};
738738
Box::pin(async move {
@@ -754,7 +754,7 @@ impl<Env: Environment> Client<Env> {
754754
/// and returns a certificate.
755755
#[instrument(level = "trace", skip_all)]
756756
async fn communicate_chain_action<T: CertificateValue>(
757-
&self,
757+
self: &Arc<Self>,
758758
committee: &Committee,
759759
action: CommunicateAction,
760760
value: T,
@@ -767,7 +767,7 @@ impl<Env: Environment> Client<Env> {
767767
|remote_node| {
768768
let mut updater = ValidatorUpdater {
769769
remote_node,
770-
local_node: self.local_node.clone(),
770+
client: self.clone(),
771771
admin_id: self.admin_id,
772772
};
773773
let action = action.clone();
@@ -1202,7 +1202,7 @@ impl<Env: Environment> Client<Env> {
12021202
/// Downloads any certificates from the specified validator that we are missing for the given
12031203
/// chain, and processes them.
12041204
#[instrument(level = "trace", skip(self, remote_node, chain_id))]
1205-
async fn synchronize_chain_state_from(
1205+
pub(crate) async fn synchronize_chain_state_from(
12061206
&self,
12071207
remote_node: &RemoteNode<Env::ValidatorNode>,
12081208
chain_id: ChainId,

linera-core/src/unit_tests/client_tests.rs

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2198,6 +2198,111 @@ where
21982198
Ok(())
21992199
}
22002200

2201+
#[test_case(MemoryStorageBuilder::default(); "memory")]
2202+
#[cfg_attr(feature = "storage-service", test_case(ServiceStorageBuilder::new(); "storage_service"))]
2203+
#[cfg_attr(feature = "rocksdb", test_case(RocksDbStorageBuilder::new().await; "rocks_db"))]
2204+
#[cfg_attr(feature = "dynamodb", test_case(DynamoDbStorageBuilder::default(); "dynamo_db"))]
2205+
#[cfg_attr(feature = "scylladb", test_case(ScyllaDbStorageBuilder::default(); "scylla_db"))]
2206+
#[test_log::test(tokio::test)]
2207+
async fn test_request_leader_timeout_client_behind_validators<B>(
2208+
storage_builder: B,
2209+
) -> anyhow::Result<()>
2210+
where
2211+
B: StorageBuilder,
2212+
{
2213+
let signer = InMemorySigner::new(None);
2214+
let clock = storage_builder.clock().clone();
2215+
let mut builder = TestBuilder::new(storage_builder, 4, 1, signer).await?;
2216+
let client = builder.add_root_chain(1, Amount::from_tokens(3)).await?;
2217+
let observer = builder.add_root_chain(2, Amount::ZERO).await?;
2218+
let chain_id = client.chain_id();
2219+
let observer_id = observer.chain_id();
2220+
let owner0 = client.identity().await.unwrap();
2221+
let owner1 = AccountSecretKey::generate().public().into();
2222+
2223+
// Set up multi-owner chain.
2224+
let owners = [(owner0, 100), (owner1, 100)];
2225+
let ownership = ChainOwnership::multiple(owners, 0, TimeoutConfig::default());
2226+
client.change_ownership(ownership.clone()).await.unwrap();
2227+
2228+
// Advance to a round where owner1 is the leader (so owner0 is not).
2229+
let round_where_owner0_not_leader = loop {
2230+
let manager = client.chain_info().await.unwrap().manager;
2231+
if manager.leader == Some(owner1) {
2232+
break manager.current_round;
2233+
}
2234+
clock.set(manager.round_timeout.unwrap());
2235+
client.request_leader_timeout().await.unwrap();
2236+
};
2237+
let round_number = match round_where_owner0_not_leader {
2238+
Round::SingleLeader(n) => n,
2239+
round => panic!("Unexpected round {round:?}"),
2240+
};
2241+
2242+
// Now create a second client on the same chain to advance validators ahead
2243+
// to the next round, where owner0 will be the leader.
2244+
let client2 = builder
2245+
.make_client(chain_id, None, BlockHeight::ZERO)
2246+
.await?;
2247+
2248+
// Sync client2 to get the current state.
2249+
client2.synchronize_from_validators().await?;
2250+
2251+
// Advance validators one more round using client2.
2252+
let timeout = client2
2253+
.chain_info()
2254+
.await
2255+
.unwrap()
2256+
.manager
2257+
.round_timeout
2258+
.expect("round_timeout should be set after sync");
2259+
clock.set(timeout);
2260+
client2.request_leader_timeout().await.unwrap();
2261+
2262+
// Validators are now in round_number + 1.
2263+
let validator_round = Round::SingleLeader(round_number + 1);
2264+
builder
2265+
.check_that_validators_are_in_round(chain_id, BlockHeight::from(1), validator_round, 3)
2266+
.await;
2267+
2268+
// At this point:
2269+
// - The client's local state shows it's in round_number (where owner1 is the leader).
2270+
// - The validators are in round_number + 1 (where owner0 is the leader).
2271+
// When the client tries to transfer, it will see it's not the leader in its current round
2272+
// and will try to request a timeout. That timeout request for round_number will be rejected
2273+
// by validators (who are in round_number + 1). The client should handle this error,
2274+
// sync to the actual round, and complete the transfer.
2275+
2276+
let result = client
2277+
.transfer(
2278+
AccountOwner::CHAIN,
2279+
Amount::ONE,
2280+
Account::chain(observer_id),
2281+
)
2282+
.await;
2283+
2284+
// The transfer should succeed after the client discovers it's actually the leader in the validator's current round.
2285+
match result {
2286+
Ok(ClientOutcome::Committed(_)) => {
2287+
// Success! The client handled the round mismatch and completed the transfer.
2288+
}
2289+
Ok(ClientOutcome::WaitForTimeout(_)) => {
2290+
panic!(
2291+
"Transfer returned WaitForTimeout, but the client should have discovered \
2292+
it's the leader in the validator's current round and completed the transfer."
2293+
);
2294+
}
2295+
Err(e) => {
2296+
panic!(
2297+
"Transfer failed with error: {e:?}. The client should have handled the \
2298+
round mismatch automatically by syncing with validators."
2299+
);
2300+
}
2301+
}
2302+
2303+
Ok(())
2304+
}
2305+
22012306
#[test_case(MemoryStorageBuilder::default(); "memory")]
22022307
#[cfg_attr(feature = "storage-service", test_case(ServiceStorageBuilder::new(); "storage_service"))]
22032308
#[cfg_attr(feature = "rocksdb", test_case(RocksDbStorageBuilder::new().await; "rocks_db"))]

0 commit comments

Comments
 (0)