Skip to content

Commit 983f9ce

Browse files
committed
feat: add failsafe to transaction replay
1 parent fd029be commit 983f9ce

File tree

3 files changed

+299
-7
lines changed

3 files changed

+299
-7
lines changed

stacks-signer/src/v0/signer_state.rs

Lines changed: 85 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use std::time::{Duration, UNIX_EPOCH};
2121
use blockstack_lib::chainstate::burn::ConsensusHashExtensions;
2222
use blockstack_lib::chainstate::nakamoto::{NakamotoBlock, NakamotoBlockHeader};
2323
use blockstack_lib::chainstate::stacks::{StacksTransaction, TransactionPayload};
24+
use blockstack_lib::net::api::get_tenures_fork_info::TenureForkingInfo;
2425
use blockstack_lib::net::api::postblock_proposal::NakamotoBlockProposal;
2526
use clarity::types::chainstate::StacksAddress;
2627
#[cfg(any(test, feature = "testing"))]
@@ -620,7 +621,7 @@ impl LocalStateMachine {
620621
client,
621622
&expected_burn_block,
622623
&prior_state_machine,
623-
replay_state,
624+
&replay_state,
624625
)? {
625626
match new_replay_state {
626627
ReplayState::Unset => {
@@ -632,6 +633,18 @@ impl LocalStateMachine {
632633
*tx_replay_scope = Some(new_scope);
633634
}
634635
}
636+
} else {
637+
if Self::handle_possible_replay_failsafe(
638+
&replay_state,
639+
&expected_burn_block,
640+
client,
641+
)? {
642+
info!(
643+
"Signer state: replay set is stalled after 2 tenures. Clearing the replay set."
644+
);
645+
tx_replay_set = ReplayTransactionSet::none();
646+
*tx_replay_scope = None;
647+
}
635648
}
636649
}
637650

@@ -981,7 +994,7 @@ impl LocalStateMachine {
981994
client: &StacksClient,
982995
expected_burn_block: &NewBurnBlock,
983996
prior_state_machine: &SignerStateMachine,
984-
replay_state: ReplayState,
997+
replay_state: &ReplayState,
985998
) -> Result<Option<ReplayState>, SignerChainstateError> {
986999
if expected_burn_block.burn_block_height > prior_state_machine.burn_block_height {
9871000
// no bitcoin fork, because we're advancing the burn block height
@@ -1088,7 +1101,7 @@ impl LocalStateMachine {
10881101
client: &StacksClient,
10891102
expected_burn_block: &NewBurnBlock,
10901103
prior_state_machine: &SignerStateMachine,
1091-
scope: ReplayScope,
1104+
scope: &ReplayScope,
10921105
) -> Result<Option<ReplayState>, SignerChainstateError> {
10931106
info!("Tx Replay: detected bitcoin fork while in replay mode. Tryng to handle the fork";
10941107
"expected_burn_block.height" => expected_burn_block.burn_block_height,
@@ -1182,6 +1195,10 @@ impl LocalStateMachine {
11821195
return Ok(None);
11831196
}
11841197

1198+
Ok(Some(Self::get_forked_txs_from_fork_info(&fork_info)))
1199+
}
1200+
1201+
fn get_forked_txs_from_fork_info(fork_info: &Vec<TenureForkingInfo>) -> Vec<StacksTransaction> {
11851202
// Collect transactions to be replayed across the forked blocks
11861203
let mut forked_blocks = fork_info
11871204
.iter()
@@ -1201,6 +1218,70 @@ impl LocalStateMachine {
12011218
))
12021219
.cloned()
12031220
.collect::<Vec<_>>();
1204-
Ok(Some(forked_txs))
1221+
forked_txs
1222+
}
1223+
1224+
/// If it has been 2 burn blocks since the origin of our replay set, and
1225+
/// we haven't produced any replay blocks since then, we should reset our replay set
1226+
///
1227+
/// Returns a `bool` indicating whether the replay set should be reset.
1228+
fn handle_possible_replay_failsafe(
1229+
replay_state: &ReplayState,
1230+
new_burn_block: &NewBurnBlock,
1231+
client: &StacksClient,
1232+
) -> Result<bool, SignerChainstateError> {
1233+
let ReplayState::InProgress(_, replay_scope) = replay_state else {
1234+
// Not in replay - skip
1235+
return Ok(false);
1236+
};
1237+
1238+
// if replay_scope.fork_origin.burn_block_height + 2 >= new_burn_block.burn_block_height {
1239+
if new_burn_block.burn_block_height < replay_scope.fork_origin.burn_block_height + 2 {
1240+
// We havent' had two burn blocks yet - skip
1241+
return Ok(false);
1242+
}
1243+
1244+
info!("Signer state: checking for replay set failsafe";
1245+
"replay_scope.fork_origin.burn_block_height" => replay_scope.fork_origin.burn_block_height,
1246+
"new_burn_block.burn_block_height" => new_burn_block.burn_block_height,
1247+
);
1248+
let Ok(fork_info) = client.get_tenure_forking_info(
1249+
&replay_scope.fork_origin.consensus_hash,
1250+
&new_burn_block.consensus_hash,
1251+
) else {
1252+
warn!("Signer state: failed to get fork info");
1253+
return Ok(false);
1254+
};
1255+
1256+
let tenures_with_sortition = fork_info
1257+
.iter()
1258+
.filter(|fork_info| {
1259+
fork_info.was_sortition
1260+
&& fork_info
1261+
.nakamoto_blocks
1262+
.as_ref()
1263+
.map(|b| b.len())
1264+
.unwrap_or(0)
1265+
> 0
1266+
})
1267+
.count();
1268+
1269+
info!("Signer state: fork info in failsafe check";
1270+
"tenures_with_sortition" => tenures_with_sortition,
1271+
"fork_info" => ?fork_info,
1272+
);
1273+
1274+
if tenures_with_sortition < 2 {
1275+
// We might have had 2 burn blocks, but not 2 tenures.
1276+
return Ok(false);
1277+
}
1278+
1279+
let forked_txs = Self::get_forked_txs_from_fork_info(&fork_info);
1280+
1281+
info!("Signer state: forked txs in failsafe check";
1282+
"forked_txs_len" => forked_txs.len(),
1283+
);
1284+
1285+
Ok(forked_txs.is_empty())
12051286
}
12061287
}

stackslib/src/net/api/postblock_proposal.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ pub static TEST_REPLAY_TRANSACTIONS: LazyLock<
6868
TestFlag<std::collections::VecDeque<StacksTransaction>>,
6969
> = LazyLock::new(TestFlag::default);
7070

71+
#[cfg(any(test, feature = "testing"))]
72+
/// Whether to reject any transaction while we're in a replay set.
73+
pub static TEST_REJECT_REPLAY_TXS: LazyLock<TestFlag<bool>> = LazyLock::new(TestFlag::default);
74+
7175
// This enum is used to supply a `reason_code` for validation
7276
// rejection responses. This is serialized as an enum with string
7377
// type (in jsonschema terminology).
@@ -200,6 +204,24 @@ fn fault_injection_validation_delay() {
200204
#[cfg(not(any(test, feature = "testing")))]
201205
fn fault_injection_validation_delay() {}
202206

207+
#[cfg(any(test, feature = "testing"))]
208+
fn fault_injection_reject_replay_txs() -> Result<(), BlockValidateRejectReason> {
209+
let reject = TEST_REJECT_REPLAY_TXS.get();
210+
if reject {
211+
Err(BlockValidateRejectReason {
212+
reason_code: ValidateRejectCode::InvalidTransactionReplay,
213+
reason: "Rejected by test flag".into(),
214+
})
215+
} else {
216+
Ok(())
217+
}
218+
}
219+
220+
#[cfg(not(any(test, feature = "testing")))]
221+
fn fault_injection_reject_replay_txs() -> Result<(), BlockValidateRejectReason> {
222+
Ok(())
223+
}
224+
203225
/// Represents a block proposed to the `v3/block_proposal` endpoint for validation
204226
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
205227
pub struct NakamotoBlockProposal {
@@ -723,6 +745,7 @@ impl NakamotoBlockProposal {
723745
// Allow this to happen, tenure extend checks happen elsewhere.
724746
break;
725747
}
748+
fault_injection_reject_replay_txs()?;
726749
let Some(replay_tx) = replay_txs.pop_front() else {
727750
// During transaction replay, we expect that the block only
728751
// contains transactions from the replay set. Thus, if we're here,

testnet/stacks-node/src/tests/signer/v0.rs

Lines changed: 191 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ use stacks::chainstate::stacks::boot::MINERS_NAME;
4949
use stacks::chainstate::stacks::db::{StacksBlockHeaderTypes, StacksChainState, StacksHeaderInfo};
5050
use stacks::chainstate::stacks::miner::{
5151
TransactionEvent, TransactionSuccessEvent, TEST_EXCLUDE_REPLAY_TXS,
52-
TEST_MINE_ALLOWED_REPLAY_TXS,
5352
};
5453
use stacks::chainstate::stacks::{
5554
StacksTransaction, TenureChangeCause, TenureChangePayload, TransactionPayload,
@@ -65,8 +64,8 @@ use stacks::core::{StacksEpochId, CHAIN_ID_TESTNET, HELIUM_BLOCK_LIMIT_20};
6564
use stacks::libstackerdb::StackerDBChunkData;
6665
use stacks::net::api::getsigner::GetSignerResponse;
6766
use stacks::net::api::postblock_proposal::{
68-
BlockValidateResponse, ValidateRejectCode, TEST_VALIDATE_DELAY_DURATION_SECS,
69-
TEST_VALIDATE_STALL,
67+
BlockValidateResponse, ValidateRejectCode, TEST_REJECT_REPLAY_TXS,
68+
TEST_VALIDATE_DELAY_DURATION_SECS, TEST_VALIDATE_STALL,
7069
};
7170
use stacks::net::relay::fault_injection::{clear_ignore_block, set_ignore_block};
7271
use stacks::types::chainstate::{
@@ -3802,6 +3801,195 @@ fn tx_replay_btc_on_stx_invalidation() {
38023801
signer_test.shutdown();
38033802
}
38043803

3804+
/// Test scenario to ensure that the replay set is cleared
3805+
/// if there have been multiple tenures with a stalled replay set.
3806+
///
3807+
/// This test is executed by triggering a fork, and then using
3808+
/// a test flag to reject any transaction replay blocks.
3809+
///
3810+
/// The test mines a number of burn blocks during replay before
3811+
/// validating that the replay set is eventually cleared.
3812+
#[ignore]
3813+
#[test]
3814+
fn tx_replay_failsafe() {
3815+
if env::var("BITCOIND_TEST") != Ok("1".into()) {
3816+
return;
3817+
}
3818+
3819+
let num_signers = 5;
3820+
let sender_sk = Secp256k1PrivateKey::from_seed("sender_1".as_bytes());
3821+
let sender_addr = tests::to_addr(&sender_sk);
3822+
let send_amt = 100;
3823+
let send_fee = 180;
3824+
let signer_test: SignerTest<SpawnedSigner> =
3825+
SignerTest::new_with_config_modifications_and_snapshot(
3826+
num_signers,
3827+
vec![(sender_addr, (send_amt + send_fee) * 10)],
3828+
|c| {
3829+
c.validate_with_replay_tx = true;
3830+
},
3831+
|node_config| {
3832+
node_config.miner.block_commit_delay = Duration::from_secs(1);
3833+
node_config.miner.replay_transactions = true;
3834+
node_config.miner.activated_vrf_key_path =
3835+
Some(format!("{}/vrf_key", node_config.node.working_dir));
3836+
},
3837+
None,
3838+
None,
3839+
Some(function_name!()),
3840+
);
3841+
3842+
let conf = &signer_test.running_nodes.conf;
3843+
let _http_origin = format!("http://{}", &conf.node.rpc_bind);
3844+
let btc_controller = &signer_test.running_nodes.btc_regtest_controller;
3845+
3846+
if signer_test.bootstrap_snapshot() {
3847+
signer_test.shutdown_and_snapshot();
3848+
return;
3849+
}
3850+
3851+
info!("------------------------- Beginning test -------------------------");
3852+
3853+
let burnchain = conf.get_burnchain();
3854+
3855+
let tip = signer_test.get_peer_info();
3856+
let pox_info = signer_test.get_pox_data();
3857+
3858+
info!("---- Burnchain ----";
3859+
// "burnchain" => ?conf.burnchain,
3860+
"pox_constants" => ?burnchain.pox_constants,
3861+
"cycle" => burnchain.pox_constants.reward_cycle_index(0, tip.burn_block_height),
3862+
"pox_info" => ?pox_info,
3863+
);
3864+
3865+
let pre_fork_tenures = 11;
3866+
for i in 0..pre_fork_tenures {
3867+
info!("Mining pre-fork tenure {} of {pre_fork_tenures}", i + 1);
3868+
signer_test.mine_nakamoto_block(Duration::from_secs(30), true);
3869+
}
3870+
3871+
info!("---- Submitting STX transfer ----");
3872+
3873+
let tip = get_chain_info(&conf);
3874+
// Make a transfer tx (this will get forked)
3875+
let (txid, nonce) = signer_test
3876+
.submit_transfer_tx(&sender_sk, send_fee, send_amt)
3877+
.unwrap();
3878+
3879+
// Ensure we got a new block with this tx
3880+
signer_test
3881+
.wait_for_nonce_increase(&sender_addr, nonce)
3882+
.expect("Timed out waiting for transfer tx to be mined");
3883+
3884+
wait_for(30, || {
3885+
let new_tip = get_chain_info(&conf);
3886+
Ok(new_tip.stacks_tip_height > tip.stacks_tip_height)
3887+
})
3888+
.expect("Timed out waiting for transfer tx to be mined");
3889+
3890+
let tip = get_chain_info(&conf);
3891+
3892+
info!("---- Triggering Bitcoin fork ----";
3893+
"tip.stacks_tip_height" => tip.stacks_tip_height,
3894+
"tip.burn_block_height" => tip.burn_block_height,
3895+
);
3896+
3897+
let burn_header_hash_to_fork = btc_controller.get_block_hash(tip.burn_block_height - 2);
3898+
btc_controller.invalidate_block(&burn_header_hash_to_fork);
3899+
btc_controller.build_next_block(3);
3900+
3901+
TEST_MINE_STALL.set(true);
3902+
3903+
let submitted_commits = signer_test
3904+
.running_nodes
3905+
.counters
3906+
.naka_submitted_commits
3907+
.clone();
3908+
3909+
// we need to mine some blocks to get back to being considered a frequent miner
3910+
for i in 0..3 {
3911+
let current_burn_height = get_chain_info(&conf).burn_block_height;
3912+
info!(
3913+
"Mining block #{i} to be considered a frequent miner";
3914+
"current_burn_height" => current_burn_height,
3915+
);
3916+
let commits_count = submitted_commits.load(Ordering::SeqCst);
3917+
next_block_and(&btc_controller, 60, || {
3918+
Ok(submitted_commits.load(Ordering::SeqCst) > commits_count)
3919+
})
3920+
.unwrap();
3921+
}
3922+
3923+
info!("---- Wait for tx replay set to be updated ----");
3924+
3925+
signer_test
3926+
.wait_for_signer_state_check(30, |state| {
3927+
let Some(tx_replay_set) = state.get_tx_replay_set() else {
3928+
return Ok(false);
3929+
};
3930+
let len_ok = tx_replay_set.len() == 1;
3931+
let txid_ok = tx_replay_set[0].txid().to_hex() == txid;
3932+
info!("---- Signer state check ----";
3933+
"tx_replay_set" => ?tx_replay_set,
3934+
"len_ok" => len_ok,
3935+
"txid_ok" => txid_ok,
3936+
);
3937+
Ok(len_ok && txid_ok)
3938+
})
3939+
.expect("Timed out waiting for tx replay set to be updated");
3940+
3941+
let tip_after_fork = get_chain_info(&conf);
3942+
3943+
info!("---- Waiting for two tenures, without replay set cleared ----";
3944+
"tip_after_fork.stacks_tip_height" => tip_after_fork.stacks_tip_height,
3945+
"tip_after_fork.burn_block_height" => tip_after_fork.burn_block_height
3946+
);
3947+
3948+
TEST_REJECT_REPLAY_TXS.set(true);
3949+
TEST_MINE_STALL.set(false);
3950+
3951+
wait_for(30, || {
3952+
let tip = get_chain_info(&conf);
3953+
Ok(tip.stacks_tip_height > tip_after_fork.stacks_tip_height)
3954+
})
3955+
.expect("Timed out waiting for one TenureChange block to be mined");
3956+
3957+
signer_test
3958+
.wait_for_signer_state_check(30, |state| Ok(state.get_tx_replay_set().is_some()))
3959+
.expect("Expected replay set to still be set");
3960+
3961+
info!("---- Mining a second tenure ----");
3962+
3963+
signer_test.mine_nakamoto_block(Duration::from_secs(30), true);
3964+
3965+
signer_test
3966+
.wait_for_signer_state_check(30, |state| Ok(state.get_tx_replay_set().is_some()))
3967+
.expect("Expected replay set to still be set");
3968+
3969+
wait_for(30, || {
3970+
let tip = get_chain_info(&conf);
3971+
Ok(tip.stacks_tip_height > tip_after_fork.stacks_tip_height + 1)
3972+
})
3973+
.expect("Timed out waiting for a TenureChange block to be mined");
3974+
3975+
info!("---- Mining a third tenure ----");
3976+
signer_test.mine_nakamoto_block(Duration::from_secs(30), true);
3977+
3978+
wait_for(30, || {
3979+
let tip = get_chain_info(&conf);
3980+
Ok(tip.stacks_tip_height > tip_after_fork.stacks_tip_height + 1)
3981+
})
3982+
.expect("Timed out waiting for a TenureChange block to be mined");
3983+
3984+
info!("---- Waiting for tx replay set to be cleared ----");
3985+
3986+
signer_test
3987+
.wait_for_signer_state_check(30, |state| Ok(state.get_tx_replay_set().is_none()))
3988+
.expect("Expected replay set to be cleared");
3989+
3990+
signer_test.shutdown();
3991+
}
3992+
38053993
/// Test scenario where two signers disagree on the tx replay set,
38063994
/// which means there is no consensus on the tx replay set.
38073995
#[test]

0 commit comments

Comments
 (0)