Skip to content

Commit edc559c

Browse files
A0-0000 Fixed performance e2e flakiness (#1963)
# Description The way `one_validator_is_dead` is written now, it's disabling both AURA and Aleph keys. Since default production ban config threshold is 3, sometimes we kicked disabled node from the committee which then resulted in perfect score, hence test failed. This PR fixes that by extending production ban config threshold so that kick does not happen. There's tiny bug in the test which does not influence test run - it printed out wrong account id to std out. Also, I decreased waiting time here and there so both performance tests should run faster now (around 3 minutes). ## Type of change Please delete options that are not relevant. - Bug fix (non-breaking change which fixes an issue) ## Testing * https://github.com/Cardinal-Cryptography/aleph-node/actions/runs/14104180901
1 parent aa9b1a3 commit edc559c

File tree

2 files changed

+47
-32
lines changed

2 files changed

+47
-32
lines changed

.github/workflows/_run-e2e-tests.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,7 @@ jobs:
707707
uses: ./.github/actions/run-e2e-test
708708
with:
709709
test-case: all_validators_have_ideal_performance
710+
node-count: '4'
710711
artifact-aleph-e2e-client-image: ${{ inputs.artifact-aleph-e2e-client-image }}
711712
artifact-aleph-node-image: ${{ inputs.artifact-aleph-node-image }}
712713
artifact-chain-bootstrapper-image: ${{ inputs.artifact-chain-bootstrapper-image }}
@@ -723,6 +724,7 @@ jobs:
723724
uses: ./.github/actions/run-e2e-test
724725
with:
725726
test-case: one_validator_is_dead
727+
node-count: '4'
726728
artifact-aleph-e2e-client-image: ${{ inputs.artifact-aleph-e2e-client-image }}
727729
artifact-aleph-node-image: ${{ inputs.artifact-aleph-node-image }}
728730
artifact-chain-bootstrapper-image: ${{ inputs.artifact-chain-bootstrapper-image }}

e2e-tests/src/test/performance.rs

Lines changed: 45 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use aleph_client::{
1212
waiting::{AlephWaiting, BlockStatus, WaitingExt},
1313
AccountId, RootConnection, SignedConnection, TxStatus,
1414
};
15+
use aleph_client::pallets::committee_management::CommitteeManagementSudoApi;
1516
use log::info;
1617
use primitives::{
1718
SessionCount, DEFAULT_FINALITY_BAN_MINIMAL_EXPECTED_PERFORMANCE,
@@ -25,11 +26,17 @@ use crate::{
2526
validators::get_test_validators,
2627
};
2728

29+
// all below consts are related to each other, and they correspond to local chain setup:
30+
// there are exactly 4 validator nodes run locally, //1 and //2 are reserved and //3 and //4 are non-reserved
31+
// we're going to disable in one of the tests exactly validator with seed //3, which has RPC address port 9948
2832
const RESERVED_SEATS: u32 = 2;
2933
const NON_RESERVED_SEATS: u32 = 2;
30-
const DEAD_INDEX: usize = 1;
34+
// since we keep non-reserved account ids in a separate array, node //3 is the first account on that list
35+
const NON_RESERVED_DEAD_INDEX: usize = 0;
3136
const NODE_TO_DISABLE_ADDRESS: &str = "ws://127.0.0.1:9948";
3237
const VALIDATOR_TO_DISABLE_OVERALL_INDEX: u32 = 3;
38+
39+
// version which is required for scores to be enabled
3340
const ABFT_PERFORMANCE_VERSION: u32 = 5;
3441

3542
#[tokio::test]
@@ -41,23 +48,15 @@ async fn all_validators_have_ideal_performance() -> anyhow::Result<()> {
4148
.iter()
4249
.chain(non_reserved_validators.iter());
4350

44-
let current_finality_version = root_connection.finality_version(None).await;
45-
if current_finality_version < ABFT_PERFORMANCE_VERSION {
46-
change_finality_version(&root_connection).await?
47-
}
48-
// In this session first performance metrics are sent, we have to wait some time
49-
// to make sure that we don't check storage before first score is sent.
50-
root_connection
51-
.wait_for_n_sessions(1, BlockStatus::Best)
52-
.await;
51+
set_finality_version(ABFT_PERFORMANCE_VERSION, &root_connection).await?;
5352

5453
check_validators(
5554
&reserved_validators,
5655
&non_reserved_validators,
5756
root_connection.get_current_era_validators(None).await,
5857
);
5958

60-
check_ban_config(
59+
check_finality_ban_config(
6160
&root_connection,
6261
DEFAULT_FINALITY_BAN_MINIMAL_EXPECTED_PERFORMANCE,
6362
DEFAULT_FINALITY_BAN_SESSION_COUNT_THRESHOLD,
@@ -93,35 +92,34 @@ async fn one_validator_is_dead() -> anyhow::Result<()> {
9392
let (root_connection, reserved_validators, non_reserved_validators, _) =
9493
setup_test(config).await?;
9594

96-
let current_finality_version = root_connection.finality_version(None).await;
97-
if current_finality_version < ABFT_PERFORMANCE_VERSION {
98-
change_finality_version(&root_connection).await?
99-
}
100-
101-
// In this session first performance metrics are sent, we have to wait some time
102-
// to make sure that we don't check storage before first score is sent.
103-
root_connection
104-
.wait_for_n_sessions(1, BlockStatus::Best)
105-
.await;
95+
set_finality_version(ABFT_PERFORMANCE_VERSION, &root_connection).await?;
10696

10797
check_validators(
10898
&reserved_validators,
10999
&non_reserved_validators,
110100
root_connection.get_current_era_validators(None).await,
111101
);
112-
check_ban_config(
102+
103+
let production_underperformed_threshold = 9;
104+
info!("Increasing production ban config threshold to {} sessions", production_underperformed_threshold);
105+
root_connection
106+
.set_ban_config(None, Some(production_underperformed_threshold), None, None, TxStatus::InBlock)
107+
.await?;
108+
let ban_config = root_connection.get_ban_config(None).await;
109+
assert_eq!(
110+
ban_config.underperformed_session_count_threshold,
111+
production_underperformed_threshold
112+
);
113+
check_finality_ban_config(
113114
&root_connection,
114115
DEFAULT_FINALITY_BAN_MINIMAL_EXPECTED_PERFORMANCE,
115116
DEFAULT_FINALITY_BAN_SESSION_COUNT_THRESHOLD,
116117
)
117118
.await;
118119

119-
let validator_to_disable = &non_reserved_validators[DEAD_INDEX];
120-
120+
let validator_to_disable = &non_reserved_validators[NON_RESERVED_DEAD_INDEX];
121121
info!("Validator to disable: {}", validator_to_disable);
122-
123122
check_underperformed_validator_session_count(&root_connection, validator_to_disable, 0).await;
124-
125123
disable_validator( NODE_TO_DISABLE_ADDRESS, VALIDATOR_TO_DISABLE_OVERALL_INDEX).await?;
126124

127125
// Validator has been disabled, let's wait one session in which it's disabled.
@@ -140,6 +138,20 @@ async fn one_validator_is_dead() -> anyhow::Result<()> {
140138
Ok(())
141139
}
142140

141+
async fn set_finality_version(finality_version: u32, root_connection: &RootConnection) -> anyhow::Result<()> {
142+
let current_finality_version = root_connection.finality_version(None).await;
143+
if current_finality_version < ABFT_PERFORMANCE_VERSION {
144+
change_finality_version(finality_version, root_connection).await?
145+
}
146+
147+
// In this session first performance metrics are sent, we have to wait some time
148+
// to make sure that we don't check storage before first score is sent.
149+
root_connection
150+
.wait_for_n_sessions(1, BlockStatus::Best)
151+
.await;
152+
Ok(())
153+
}
154+
143155
async fn setup_test(
144156
config: &Config,
145157
) -> anyhow::Result<(
@@ -152,7 +164,7 @@ async fn setup_test(
152164

153165
let validator_keys = get_test_validators(config);
154166
let reserved_validators = account_ids_from_keys(&validator_keys.reserved);
155-
let non_reserved_validators = account_ids_from_keys(&validator_keys.non_reserved);
167+
let non_reserved_validators = account_ids_from_keys(&validator_keys.non_reserved).into_iter().take(2).collect::<Vec<_>>();
156168
let seats = CommitteeSeats {
157169
reserved_seats: RESERVED_SEATS,
158170
non_reserved_seats: NON_RESERVED_SEATS,
@@ -199,7 +211,7 @@ async fn setup_test(
199211
)
200212
.await?;
201213

202-
root_connection.wait_for_n_eras(2, BlockStatus::Best).await;
214+
root_connection.wait_for_n_eras(1, BlockStatus::Best).await;
203215
info!("Validators are changed.");
204216
}
205217

@@ -229,26 +241,27 @@ fn check_validators(
229241
}
230242

231243
async fn change_finality_version<C: SessionApi + AlephSudoApi + AlephWaiting>(
244+
finality_version: u32,
232245
connection: &C,
233246
) -> anyhow::Result<()> {
234247
info!("Changing finality version to 5.");
235-
let session_for_upgrade = connection.get_session(None).await + 3;
248+
let session_for_upgrade = connection.get_session(None).await + 2;
236249
connection
237250
.schedule_finality_version_change(
238-
ABFT_PERFORMANCE_VERSION,
251+
finality_version,
239252
session_for_upgrade,
240253
TxStatus::InBlock,
241254
)
242255
.await?;
243256
connection
244-
.wait_for_session(session_for_upgrade + 1, BlockStatus::Best)
257+
.wait_for_session(session_for_upgrade, BlockStatus::Best)
245258
.await;
246259
info!("Finality version is changed.");
247260

248261
Ok(())
249262
}
250263

251-
async fn check_ban_config<C: CommitteeManagementApi>(
264+
async fn check_finality_ban_config<C: CommitteeManagementApi>(
252265
connection: &C,
253266
expected_minimal_expected_performance: u16,
254267
expected_session_count_threshold: SessionCount,

0 commit comments

Comments
 (0)