Skip to content

Commit 366cf06

Browse files
authored
Make fault_type part of LocalValidatorClient. (#4455)
## Motivation The `linera-core` unit tests don't run a whole network, and instead wrap a worker in a mutex inside a clonable `LocalValidatorClient` struct. To simulate faults, a `FaultType` enum value is stored alongside the worker inside the mutex. Since `ChainClient` sometimes cancels tasks that wait for a response from the validator, and we don't want to cancel the actual worker code in random places, we `spawn` tasks with the calls to the worker. (Maybe with the chain state actors that isn't necessary anymore? But that's a bigger discussion, and such a change could cause new flakiness if we're missing something.) These spawned tasks have a handle on the mutex with the worker state and the fault type. That means the spawned tasks can outlive calls to `ChainClient` functions in the tests. A `set_fault_type` call after such a function has returned can still affect how the spawned tasks behave. I strongly suspect this is what causes the very rare `test_memory_skipping_proposal` failure in CI, where we observe that a validator that should have had fault type `DontProcessValidated` ends up processing a validated block certificate anyway, because _after_ the client call that had spawned that task, we call `set_fault_type(… Honest)`. ## Proposal Put the fault type in the `LocalValidatorClient`, so it is copied before the task is spawned. ## Test Plan CI I will close #4422; let's reopen it if it happens again anyway. ## Release Plan - These changes should be backported to the testnet and devnet. ## Links - Fixes #4422. - [reviewer checklist](https://github.com/linera-io/linera-protocol/blob/main/CONTRIBUTING.md#reviewer-checklist)
1 parent b448c8f commit 366cf06

File tree

3 files changed

+134
-190
lines changed

3 files changed

+134
-190
lines changed

linera-core/src/unit_tests/client_tests.rs

Lines changed: 45 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -418,16 +418,16 @@ where
418418
);
419419

420420
// We need at least three validators for making an operation.
421-
builder.set_fault_type([0, 1], FaultType::Offline).await;
421+
builder.set_fault_type([0, 1], FaultType::Offline);
422422
let result = client.burn(AccountOwner::CHAIN, Amount::ONE).await;
423423
assert_matches!(
424424
result,
425425
Err(ChainClientError::CommunicationError(
426426
CommunicationError::Trusted(ClientIoError { .. }),
427427
))
428428
);
429-
builder.set_fault_type([0, 1], FaultType::Honest).await;
430-
builder.set_fault_type([2, 3], FaultType::Offline).await;
429+
builder.set_fault_type([0, 1], FaultType::Honest);
430+
builder.set_fault_type([2, 3], FaultType::Offline);
431431
assert_matches!(
432432
sender.burn(AccountOwner::CHAIN, Amount::ONE).await,
433433
Err(ChainClientError::CommunicationError(
@@ -437,9 +437,7 @@ where
437437

438438
// Half the validators voted for one block, half for the other. We need to make a proposal in
439439
// the next round to succeed.
440-
builder
441-
.set_fault_type([0, 1, 2, 3], FaultType::Honest)
442-
.await;
440+
builder.set_fault_type([0, 1, 2, 3], FaultType::Honest);
443441
client.synchronize_from_validators().await.unwrap();
444442
client.process_inbox().await.unwrap();
445443
assert_eq!(
@@ -805,7 +803,7 @@ where
805803
{
806804
let signer = InMemorySigner::new(None);
807805
let mut builder = TestBuilder::new(storage_builder, 4, 0, signer).await?;
808-
builder.set_fault_type([0, 1], FaultType::Malicious).await;
806+
builder.set_fault_type([0, 1], FaultType::Malicious);
809807
let chain_1 = builder.add_root_chain(1, Amount::from_tokens(4)).await?;
810808
let chain_2 = builder.add_root_chain(2, Amount::from_tokens(4)).await?;
811809
let result = chain_1
@@ -1404,7 +1402,7 @@ where
14041402
);
14051403

14061404
// Take one validator down
1407-
builder.set_fault_type([2], FaultType::Offline).await;
1405+
builder.set_fault_type([2], FaultType::Offline);
14081406

14091407
// Publish blob on chain 1
14101408
let publish_certificate = client_1a
@@ -1416,9 +1414,9 @@ where
14161414
.requires_or_creates_blob(&blob0_id));
14171415

14181416
// Validators goes back up
1419-
builder.set_fault_type([2], FaultType::Honest).await;
1417+
builder.set_fault_type([2], FaultType::Honest);
14201418
// But another one goes down
1421-
builder.set_fault_type([3], FaultType::Offline).await;
1419+
builder.set_fault_type([3], FaultType::Offline);
14221420

14231421
// Try to read the blob. This is a different client but on the same chain, so when we
14241422
// synchronize this with the validators before executing the block, we'll actually download
@@ -1435,9 +1433,7 @@ where
14351433
// Validators 0, 1, 2 now don't process validated block certificates. Client 2A tries to
14361434
// commit a block that reads blob 0 and publishes blob 1. Client 2A will have that block
14371435
// locked now, but the validators won't.
1438-
builder
1439-
.set_fault_type([0, 1, 2], FaultType::DontProcessValidated)
1440-
.await;
1436+
builder.set_fault_type([0, 1, 2], FaultType::DontProcessValidated);
14411437

14421438
client_2a.synchronize_from_validators().await.unwrap();
14431439
let blob1 = Blob::new_data(b"blob1".to_vec());
@@ -1465,8 +1461,8 @@ where
14651461
}
14661462

14671463
// Now 2 goes offline and the other validators are working again.
1468-
builder.set_fault_type([2], FaultType::Offline).await;
1469-
builder.set_fault_type([0, 1, 3], FaultType::Honest).await;
1464+
builder.set_fault_type([2], FaultType::Offline);
1465+
builder.set_fault_type([0, 1, 3], FaultType::Honest);
14701466

14711467
// We make validator 3 (who does not have the block proposal) process the validated block.
14721468
let info2_a = client_2a.chain_info_with_manager_values().await?;
@@ -1575,7 +1571,7 @@ where
15751571
client2_b.set_preferred_owner(owner2_b);
15761572

15771573
// Take one validator down
1578-
builder.set_fault_type([3], FaultType::Offline).await;
1574+
builder.set_fault_type([3], FaultType::Offline);
15791575

15801576
let blob0_bytes = b"blob0".to_vec();
15811577
let blob0_id = Blob::new(BlobContent::new_data(blob0_bytes.clone())).id();
@@ -1589,9 +1585,7 @@ where
15891585
.block()
15901586
.requires_or_creates_blob(&blob0_id));
15911587

1592-
builder
1593-
.set_fault_type([0, 1, 2], FaultType::DontProcessValidated)
1594-
.await;
1588+
builder.set_fault_type([0, 1, 2], FaultType::DontProcessValidated);
15951589

15961590
client2_a.synchronize_from_validators().await.unwrap();
15971591
let blob1 = Blob::new_data(b"blob1".to_vec());
@@ -1630,8 +1624,8 @@ where
16301624
assert!(validator_manager.requested_locking.is_none());
16311625
}
16321626

1633-
builder.set_fault_type([2], FaultType::Offline).await;
1634-
builder.set_fault_type([0, 1, 3], FaultType::Honest).await;
1627+
builder.set_fault_type([2], FaultType::Offline);
1628+
builder.set_fault_type([0, 1, 3], FaultType::Honest);
16351629

16361630
client2_b.prepare_chain().await.unwrap();
16371631
let recipient = Account::burn_address(client2_b.chain_id());
@@ -1700,19 +1694,15 @@ where
17001694
client2.synchronize_from_validators().await.unwrap();
17011695

17021696
// Client 1 makes a proposal to only validators 0 and 1.
1703-
builder
1704-
.set_fault_type([2, 3], FaultType::OfflineWithInfo)
1705-
.await;
1697+
builder.set_fault_type([2, 3], FaultType::OfflineWithInfo);
17061698
assert!(client1
17071699
.burn(AccountOwner::CHAIN, Amount::from_millis(1))
17081700
.await
17091701
.is_err());
17101702

17111703
// Client 2's proposal reaches only 2 and 3.
1712-
builder
1713-
.set_fault_type([0, 1], FaultType::OfflineWithInfo)
1714-
.await;
1715-
builder.set_fault_type([2, 3], FaultType::Honest).await;
1704+
builder.set_fault_type([0, 1], FaultType::OfflineWithInfo);
1705+
builder.set_fault_type([2, 3], FaultType::Honest);
17161706
assert!(client2
17171707
.burn(AccountOwner::CHAIN, Amount::from_millis(2))
17181708
.await
@@ -1734,9 +1724,7 @@ where
17341724
// }
17351725

17361726
// Once all validators are functional again, a new proposal should succeed.
1737-
builder
1738-
.set_fault_type([0, 1, 2, 3], FaultType::Honest)
1739-
.await;
1727+
builder.set_fault_type([0, 1, 2, 3], FaultType::Honest);
17401728

17411729
client1.synchronize_from_validators().await.unwrap();
17421730
client1.publish_data_blob(b"foo".to_vec()).await?;
@@ -1796,7 +1784,7 @@ where
17961784
client3_c.set_preferred_owner(owner3_c);
17971785

17981786
// Take one validator down
1799-
builder.set_fault_type([3], FaultType::Offline).await;
1787+
builder.set_fault_type([3], FaultType::Offline);
18001788

18011789
let blob0_bytes = b"blob0".to_vec();
18021790
let blob0_id = Blob::new(BlobContent::new_data(blob0_bytes.clone())).id();
@@ -1824,12 +1812,8 @@ where
18241812
.block()
18251813
.requires_or_creates_blob(&blob2_id));
18261814

1827-
builder
1828-
.set_fault_type([0, 1], FaultType::DontProcessValidated)
1829-
.await;
1830-
builder
1831-
.set_fault_type([2], FaultType::DontSendConfirmVote)
1832-
.await;
1815+
builder.set_fault_type([0, 1], FaultType::DontProcessValidated);
1816+
builder.set_fault_type([2], FaultType::DontSendConfirmVote);
18331817

18341818
client3_a.synchronize_from_validators().await.unwrap();
18351819
let blob1 = Blob::new_data(b"blob1".to_vec());
@@ -1898,10 +1882,8 @@ where
18981882
}
18991883
}
19001884

1901-
builder.set_fault_type([2], FaultType::Offline).await;
1902-
builder
1903-
.set_fault_type([3], FaultType::DontSendConfirmVote)
1904-
.await;
1885+
builder.set_fault_type([2], FaultType::Offline);
1886+
builder.set_fault_type([3], FaultType::DontSendConfirmVote);
19051887

19061888
client3_b.synchronize_from_validators().await.unwrap();
19071889
let blob3 = Blob::new_data(b"blob3".to_vec());
@@ -1961,8 +1943,8 @@ where
19611943
blob_2_3_operations.iter().collect::<Vec<_>>()
19621944
);
19631945

1964-
builder.set_fault_type([1], FaultType::Offline).await;
1965-
builder.set_fault_type([0, 2, 3], FaultType::Honest).await;
1946+
builder.set_fault_type([1], FaultType::Offline);
1947+
builder.set_fault_type([0, 2, 3], FaultType::Honest);
19661948

19671949
client3_c.synchronize_from_validators().await.unwrap();
19681950
let blob4_data = b"blob4".to_vec();
@@ -2171,19 +2153,15 @@ where
21712153

21722154
// Client 0 tries to burn 3 tokens. Two validators are offline, so nothing will get
21732155
// validated or confirmed. However, client 0 now has a pending block.
2174-
builder
2175-
.set_fault_type([2], FaultType::OfflineWithInfo)
2176-
.await;
2156+
builder.set_fault_type([2], FaultType::OfflineWithInfo);
21772157
let result = client0
21782158
.burn(AccountOwner::CHAIN, Amount::from_tokens(3))
21792159
.await;
21802160
assert!(result.is_err());
21812161

21822162
// Client 1 thinks it is madness to burn 3 tokens! They want to publish a blob instead.
21832163
// The validators are still faulty: They validate blocks but don't confirm them.
2184-
builder
2185-
.set_fault_type([2], FaultType::DontSendConfirmVote)
2186-
.await;
2164+
builder.set_fault_type([2], FaultType::DontSendConfirmVote);
21872165
client1.synchronize_from_validators().await.unwrap();
21882166
let manager = client1
21892167
.chain_info_with_manager_values()
@@ -2202,7 +2180,7 @@ where
22022180
.is_empty());
22032181

22042182
// Finally, the validators are online and honest again.
2205-
builder.set_fault_type([1, 2], FaultType::Honest).await;
2183+
builder.set_fault_type([1, 2], FaultType::Honest);
22062184
client0.synchronize_from_validators().await.unwrap();
22072185
let manager = client0
22082186
.chain_info_with_manager_values()
@@ -2261,16 +2239,14 @@ where
22612239

22622240
// The client tries to burn 3 tokens. Two validators are offline, so nothing will get
22632241
// validated or confirmed. However, the client now has a pending block.
2264-
builder
2265-
.set_fault_type([2], FaultType::OfflineWithInfo)
2266-
.await;
2242+
builder.set_fault_type([2], FaultType::OfflineWithInfo);
22672243
let result = client
22682244
.burn(AccountOwner::CHAIN, Amount::from_tokens(3))
22692245
.await;
22702246
assert!(result.is_err());
22712247

22722248
// Now three validators are online again.
2273-
builder.set_fault_type([2], FaultType::Honest).await;
2249+
builder.set_fault_type([2], FaultType::Honest);
22742250

22752251
// The client tries to burn another token. Before that, they automatically finalize the
22762252
// pending block, which transfers 3 tokens, leaving 10 - 3 - 1 = 6.
@@ -2320,10 +2296,8 @@ where
23202296

23212297
// Client 0 tries to burn 3 tokens. Three validators are faulty: 1 and 2 will validate the
23222298
// block but not receive it for confirmation. Validator 3 is offline.
2323-
builder
2324-
.set_fault_type([1, 2], FaultType::DontProcessValidated)
2325-
.await;
2326-
builder.set_fault_type([3], FaultType::Offline).await;
2299+
builder.set_fault_type([1, 2], FaultType::DontProcessValidated);
2300+
builder.set_fault_type([3], FaultType::Offline);
23272301

23282302
let result = client0
23292303
.burn(AccountOwner::CHAIN, Amount::from_tokens(3))
@@ -2350,10 +2324,8 @@ where
23502324
// Client 1 wants to burn 2 tokens. They learn about the proposal in round 0, but now the
23512325
// validator 0 is offline, so they don't learn about the validated block and make their own
23522326
// proposal in round 1.
2353-
builder.set_fault_type([0], FaultType::Offline).await;
2354-
builder
2355-
.set_fault_type([3], FaultType::OfflineWithInfo)
2356-
.await;
2327+
builder.set_fault_type([0], FaultType::Offline);
2328+
builder.set_fault_type([3], FaultType::OfflineWithInfo);
23572329
client1.synchronize_from_validators().await.unwrap();
23582330
let manager = client1
23592331
.chain_info_with_manager_values()
@@ -2370,8 +2342,8 @@ where
23702342

23712343
// Finally, three validators are online and honest again. Client 1 realizes there has been a
23722344
// validated block in round 0, and re-proposes it when it tries to burn 4 tokens.
2373-
builder.set_fault_type([0, 1, 2], FaultType::Honest).await;
2374-
builder.set_fault_type([3], FaultType::Offline).await;
2345+
builder.set_fault_type([0, 1, 2], FaultType::Honest);
2346+
builder.set_fault_type([3], FaultType::Offline);
23752347
client1.synchronize_from_validators().await.unwrap();
23762348
let manager = client1
23772349
.chain_info_with_manager_values()
@@ -2448,9 +2420,7 @@ where
24482420
.await?;
24492421

24502422
// Client 0 tries to burn 3 of their own tokens, but three validators are faulty.
2451-
builder
2452-
.set_fault_type([1, 2, 3], FaultType::OfflineWithInfo)
2453-
.await;
2423+
builder.set_fault_type([1, 2, 3], FaultType::OfflineWithInfo);
24542424

24552425
let result = client0.burn(owner0, Amount::from_tokens(3)).await;
24562426
assert!(result.is_err());
@@ -2474,22 +2444,22 @@ where
24742444

24752445
// Round 0 times out.
24762446
clock.add(TimeDelta::from_secs(5));
2477-
builder.set_fault_type([0], FaultType::Offline).await;
2478-
builder.set_fault_type([1, 2, 3], FaultType::Honest).await;
2447+
builder.set_fault_type([0], FaultType::Offline);
2448+
builder.set_fault_type([1, 2, 3], FaultType::Honest);
24792449
client1.synchronize_from_validators().await.unwrap();
24802450
client1.request_leader_timeout().await.unwrap();
24812451

24822452
// Client 1 wants to burn 2 tokens. But now validators 0 and 3 is offline, so they don't learn
24832453
// about the proposed fast block and make their own instead.
2484-
builder.set_fault_type([3], FaultType::Offline).await;
2454+
builder.set_fault_type([3], FaultType::Offline);
24852455
let result = client1
24862456
.burn(AccountOwner::CHAIN, Amount::from_tokens(2))
24872457
.await;
24882458
assert!(result.is_err());
24892459

24902460
// Finally, three validators are online and honest again. Client 1 realizes there has been a
24912461
// validated block in round 0, and re-proposes it when it tries to burn 4 tokens.
2492-
builder.set_fault_type([0, 1, 2], FaultType::Honest).await;
2462+
builder.set_fault_type([0, 1, 2], FaultType::Honest);
24932463
client1.synchronize_from_validators().await.unwrap();
24942464
assert!(client1.pending_proposal().is_some());
24952465
client1
@@ -2600,7 +2570,7 @@ where
26002570
}
26012571

26022572
// Take one validator down
2603-
builder.set_fault_type([3], FaultType::Offline).await;
2573+
builder.set_fault_type([3], FaultType::Offline);
26042574

26052575
// Publish a blob on chain 1.
26062576
let blob_id = Blob::new(BlobContent::new_data(blob_bytes.clone())).id();
@@ -2622,8 +2592,8 @@ where
26222592
client3.synchronize_from_validators().await.unwrap();
26232593
assert_eq!(certificate.round, Round::Fast);
26242594

2625-
builder.set_fault_type([2], FaultType::Offline).await;
2626-
builder.set_fault_type([3], FaultType::Honest).await;
2595+
builder.set_fault_type([2], FaultType::Offline);
2596+
builder.set_fault_type([3], FaultType::Honest);
26272597

26282598
// Client 3 should be able to update validator 3 about the blob and the message.
26292599
let certificate = client3

0 commit comments

Comments
 (0)