Skip to content

Commit a10d4c9

Browse files
authored
Merge pull request #6305 from obycode/fix/miner-stall-flakiness
test: fix flakiness in tests that use `TEST_MINE_STALL`
2 parents ac5b257 + cc6d7ee commit a10d4c9

File tree

4 files changed

+133
-102
lines changed

4 files changed

+133
-102
lines changed

stacks-node/src/nakamoto_node/miner.rs

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,19 @@ use crate::nakamoto_node::VRF_MOCK_MINER_KEY;
6767
use crate::neon_node;
6868
use crate::run_loop::nakamoto::Globals;
6969
use crate::run_loop::RegisteredKey;
70+
71+
#[cfg(test)]
72+
#[derive(Default, Clone, PartialEq)]
73+
enum TestMineStall {
74+
#[default]
75+
NotStalled,
76+
Pending,
77+
Stalled,
78+
}
79+
7080
#[cfg(test)]
7181
/// Test flag to stall the miner thread
72-
pub static TEST_MINE_STALL: LazyLock<TestFlag<bool>> = LazyLock::new(TestFlag::default);
82+
static TEST_MINE_STALL: LazyLock<TestFlag<TestMineStall>> = LazyLock::new(TestFlag::default);
7383
#[cfg(test)]
7484
/// Test flag to stall block proposal broadcasting for the specified miner keys
7585
pub static TEST_BROADCAST_PROPOSAL_STALL: LazyLock<TestFlag<Vec<Secp256k1PublicKey>>> =
@@ -90,6 +100,23 @@ pub static TEST_P2P_BROADCAST_STALL: LazyLock<TestFlag<bool>> = LazyLock::new(Te
90100
// Test flag to skip pushing blocks to the signers
91101
pub static TEST_BLOCK_PUSH_SKIP: LazyLock<TestFlag<bool>> = LazyLock::new(TestFlag::default);
92102

103+
#[cfg(test)]
104+
/// Set the `TEST_MINE_STALL` flag to `Pending` and block until the miner is stalled.
105+
pub fn fault_injection_stall_miner() {
106+
if TEST_MINE_STALL.get() == TestMineStall::NotStalled {
107+
TEST_MINE_STALL.set(TestMineStall::Pending);
108+
}
109+
while TEST_MINE_STALL.get() != TestMineStall::Stalled {
110+
std::thread::sleep(std::time::Duration::from_millis(10));
111+
}
112+
}
113+
114+
#[cfg(test)]
115+
/// Unstall the miner by setting the `TEST_MINE_STALL` flag to `NotStalled`.
116+
pub fn fault_injection_unstall_miner() {
117+
TEST_MINE_STALL.set(TestMineStall::NotStalled);
118+
}
119+
93120
/// If the miner was interrupted while mining a block, how long should the
94121
/// miner thread sleep before trying again?
95122
const ABORT_TRY_AGAIN_MS: u64 = 200;
@@ -394,21 +421,23 @@ impl BlockMinerThread {
394421
false
395422
}
396423

397-
/// Stop a miner tenure by blocking the miner and then joining the tenure thread
424+
/// Stall the miner based on the `TEST_MINE_STALL` flag.
425+
/// Block the miner until the test flag is set to `NotStalled`.
398426
#[cfg(test)]
399-
fn fault_injection_stall_miner() {
400-
if TEST_MINE_STALL.get() {
427+
fn fault_injection_miner_stall() {
428+
if TEST_MINE_STALL.get() != TestMineStall::NotStalled {
401429
// Do an extra check just so we don't log EVERY time.
402430
warn!("Mining is stalled due to testing directive");
403-
while TEST_MINE_STALL.get() {
431+
TEST_MINE_STALL.set(TestMineStall::Stalled);
432+
while TEST_MINE_STALL.get() != TestMineStall::NotStalled {
404433
std::thread::sleep(std::time::Duration::from_millis(10));
405434
}
406435
warn!("Mining is no longer stalled due to testing directive. Continuing...");
407436
}
408437
}
409438

410439
#[cfg(not(test))]
411-
fn fault_injection_stall_miner() {}
440+
fn fault_injection_miner_stall() {}
412441

413442
pub fn run_miner(
414443
mut self,
@@ -520,7 +549,7 @@ impl BlockMinerThread {
520549
last_block_rejected: &mut bool,
521550
reward_set: &RewardSet,
522551
) -> Result<(), NakamotoNodeError> {
523-
Self::fault_injection_stall_miner();
552+
Self::fault_injection_miner_stall();
524553
let mut chain_state =
525554
neon_node::open_chainstate_with_faults(&self.config).map_err(|e| {
526555
NakamotoNodeError::SigningCoordinatorFailure(format!(

stacks-node/src/tests/nakamoto_integrations.rs

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,8 @@ use stacks_signer::v0::SpawnedSigner;
109109

110110
use super::bitcoin_regtest::BitcoinCoreController;
111111
use crate::nakamoto_node::miner::{
112-
TEST_BLOCK_ANNOUNCE_STALL, TEST_BROADCAST_PROPOSAL_STALL, TEST_MINE_STALL,
113-
TEST_P2P_BROADCAST_SKIP, TEST_P2P_BROADCAST_STALL,
112+
fault_injection_stall_miner, fault_injection_unstall_miner, TEST_BLOCK_ANNOUNCE_STALL,
113+
TEST_BROADCAST_PROPOSAL_STALL, TEST_P2P_BROADCAST_SKIP, TEST_P2P_BROADCAST_STALL,
114114
};
115115
use crate::nakamoto_node::relayer::TEST_MINER_THREAD_STALL;
116116
use crate::neon::Counters;
@@ -6020,7 +6020,7 @@ fn nakamoto_attempt_time() {
60206020

60216021
// Stall the miner to make sure it waits until all transactions are
60226022
// submitted before it mines a block
6023-
TEST_MINE_STALL.set(true);
6023+
fault_injection_stall_miner();
60246024

60256025
let mut sender_nonce = account.nonce;
60266026
for _ in 0..txs_per_block {
@@ -6036,7 +6036,7 @@ fn nakamoto_attempt_time() {
60366036
submit_tx(&http_origin, &transfer_tx);
60376037
}
60386038

6039-
TEST_MINE_STALL.set(false);
6039+
fault_injection_unstall_miner();
60406040

60416041
// Miner should have made a new block by now
60426042
let wait_start = Instant::now();
@@ -6304,7 +6304,7 @@ fn clarity_burn_state() {
63046304
result.expect_result_ok().expect("Read-only call failed");
63056305

63066306
// Pause mining to prevent the stacks block from being mined before the tenure change is processed
6307-
TEST_MINE_STALL.set(true);
6307+
fault_injection_stall_miner();
63086308
// Submit a tx for the next block (the next block will be a new tenure, so the burn block height will increment)
63096309
let call_tx = make_contract_call(
63106310
&sender_sk,
@@ -6329,7 +6329,7 @@ fn clarity_burn_state() {
63296329
Ok(commits_submitted.load(Ordering::SeqCst) > commits_before)
63306330
})
63316331
.unwrap();
6332-
TEST_MINE_STALL.set(false);
6332+
fault_injection_unstall_miner();
63336333
wait_for(20, || {
63346334
Ok(coord_channel
63356335
.lock()
@@ -10368,7 +10368,7 @@ fn clarity_cost_spend_down() {
1036810368
.expect("Mutex poisoned")
1036910369
.get_stacks_blocks_processed();
1037010370
// Pause mining so we can add all our transactions to the mempool at once.
10371-
TEST_MINE_STALL.set(true);
10371+
fault_injection_stall_miner();
1037210372
for _nmb_tx in 0..nmb_txs_per_signer {
1037310373
for sender_sk in sender_sks.iter() {
1037410374
let sender_nonce = get_and_increment_nonce(sender_sk, &mut sender_nonces);
@@ -10394,7 +10394,7 @@ fn clarity_cost_spend_down() {
1039410394
}
1039510395
}
1039610396
}
10397-
TEST_MINE_STALL.set(false);
10397+
fault_injection_unstall_miner();
1039810398
wait_for(120, || {
1039910399
let blocks_processed = coord_channel
1040010400
.lock()
@@ -10672,7 +10672,7 @@ fn test_tenure_extend_from_flashblocks() {
1067210672
assert_eq!(sort_tip.consensus_hash, election_tip.consensus_hash);
1067310673

1067410674
// stop the relayer thread from starting a miner thread, and stop the miner thread from mining
10675-
TEST_MINE_STALL.set(true);
10675+
fault_injection_stall_miner();
1067610676
TEST_MINER_THREAD_STALL.set(true);
1067710677

1067810678
// mine another Bitcoin block right away, and force it to be a flash block
@@ -10738,7 +10738,7 @@ fn test_tenure_extend_from_flashblocks() {
1073810738

1073910739
// unstall miner thread and allow block-commits again
1074010740
counters.naka_skip_commit_op.set(false);
10741-
TEST_MINE_STALL.set(false);
10741+
fault_injection_unstall_miner();
1074210742

1074310743
// wait for the miner directive to be processed
1074410744
wait_for(60, || {
@@ -11509,7 +11509,7 @@ fn large_mempool_base(strategy: MemPoolWalkStrategy, set_fee: impl Fn() -> u64)
1150911509
info!("Pause mining and fill the mempool with the transfers");
1151011510

1151111511
// Pause block mining
11512-
TEST_MINE_STALL.set(true);
11512+
fault_injection_stall_miner();
1151311513

1151411514
let db_tx = conn.transaction().unwrap();
1151511515
let timer = Instant::now();
@@ -11549,7 +11549,7 @@ fn large_mempool_base(strategy: MemPoolWalkStrategy, set_fee: impl Fn() -> u64)
1154911549
let proposed_blocks_before = test_observer::get_mined_nakamoto_blocks().len();
1155011550

1155111551
// Unpause block mining
11552-
TEST_MINE_STALL.set(false);
11552+
fault_injection_unstall_miner();
1155311553

1155411554
// Wait for the first block to be proposed.
1155511555
wait_for(30, || {
@@ -11846,7 +11846,7 @@ fn larger_mempool() {
1184611846
info!("Pause mining and fill the mempool with the transfers");
1184711847

1184811848
// Pause block mining
11849-
TEST_MINE_STALL.set(true);
11849+
fault_injection_stall_miner();
1185011850

1185111851
let timer = Instant::now();
1185211852

@@ -11890,7 +11890,7 @@ fn larger_mempool() {
1189011890
let timer = Instant::now();
1189111891

1189211892
// Unpause block mining
11893-
TEST_MINE_STALL.set(false);
11893+
fault_injection_unstall_miner();
1189411894

1189511895
// Wait for the first block to be proposed.
1189611896
wait_for(10, || {
@@ -12184,7 +12184,7 @@ fn handle_considered_txs_foreign_key_failure() {
1218412184
let height_before = get_chain_info(&naka_conf).stacks_tip_height;
1218512185

1218612186
// Initiate the transaction stall, then submit transactions.
12187-
TEST_MINE_STALL.set(true);
12187+
fault_injection_stall_miner();
1218812188
TEST_TX_STALL.set(true);
1218912189

1219012190
let bad_transfer_tx = make_stacks_transfer_serialized(
@@ -12198,7 +12198,7 @@ fn handle_considered_txs_foreign_key_failure() {
1219812198
let txid = submit_tx(&http_origin, &bad_transfer_tx);
1219912199
info!("Bad transaction submitted: {txid}");
1220012200

12201-
TEST_MINE_STALL.set(false);
12201+
fault_injection_unstall_miner();
1220212202

1220312203
// Sleep long enough to ensure that the miner has started processing the tx
1220412204
sleep_ms(5_000);
@@ -12432,7 +12432,7 @@ fn miner_constructs_replay_block() {
1243212432

1243312433
// Pause mining to prevent any of the submitted txs getting mined.
1243412434
info!("Stalling mining...");
12435-
TEST_MINE_STALL.set(true);
12435+
fault_injection_stall_miner();
1243612436
let burn_height_before = get_chain_info(&naka_conf).burn_block_height;
1243712437
// Mine 1 bitcoin block to trigger a new block found transaction
1243812438
next_block_and(&mut btc_regtest_controller, 60, || {
@@ -12515,7 +12515,7 @@ fn miner_constructs_replay_block() {
1251512515
let blocks_before = test_observer::get_blocks().len();
1251612516
assert_eq!(observed_before, 0);
1251712517
info!("Resuming mining...");
12518-
TEST_MINE_STALL.set(false);
12518+
fault_injection_unstall_miner();
1251912519

1252012520
info!("Waiting for two stacks block to be mined...");
1252112521
wait_for(30, || {

stacks-node/src/tests/signer/commands/stacks_mining.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use madhouse::{Command, CommandWrapper};
44
use proptest::prelude::{prop_oneof, Just, Strategy};
55

66
use super::context::{SignerTestContext, SignerTestState};
7+
use crate::nakamoto_node::miner::{fault_injection_stall_miner, fault_injection_unstall_miner};
78

89
/// Command to globally pause or resume Stacks block mining within the test environment.
910
/// This command is used to simulate network-wide conditions where Stacks block production might halt or resume.
@@ -46,7 +47,11 @@ impl Command<SignerTestState, SignerTestContext> for ChainStacksMining {
4647
"Resuming Stacks mining"
4748
};
4849
info!("Applying: {}", operation_desc);
49-
crate::tests::signer::v0::test_mine_stall_set(self.should_pause);
50+
if self.should_pause {
51+
fault_injection_stall_miner();
52+
} else {
53+
fault_injection_unstall_miner();
54+
}
5055
state.mining_stalled = self.should_pause;
5156
}
5257

0 commit comments

Comments
 (0)