Skip to content

Commit 3bf47dd

Browse files
committed
Remove drive_chain_livenes which was meant to address potential bug in affirmation map flow around failure to announce blocks
Signed-off-by: Jacinta Ferrant <[email protected]>
1 parent c4fad7f commit 3bf47dd

File tree

1 file changed

+1
-159
lines changed

1 file changed

+1
-159
lines changed

stacks-node/src/run_loop/neon.rs

Lines changed: 1 addition & 159 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ use std::sync::atomic::AtomicU64;
33
use std::sync::atomic::{AtomicBool, Ordering};
44
use std::sync::mpsc::sync_channel;
55
use std::sync::{Arc, Mutex};
6+
use std::thread;
67
use std::thread::JoinHandle;
7-
use std::{cmp, thread};
88

99
use libc;
1010
use stacks::burnchains::bitcoin::address::{BitcoinAddress, LegacyBitcoinAddressType};
@@ -27,7 +27,6 @@ use stacks_common::deps_common::ctrlc as termination;
2727
use stacks_common::deps_common::ctrlc::SignalId;
2828
use stacks_common::types::PublicKey;
2929
use stacks_common::util::hash::Hash160;
30-
use stacks_common::util::{get_epoch_time_secs, sleep_ms};
3130
use stx_genesis::GenesisData;
3231

3332
use super::RunLoopCallbacks;
@@ -65,12 +64,6 @@ pub struct RunLoopCounter(pub Arc<AtomicU64>);
6564
#[derive(Clone)]
6665
pub struct RunLoopCounter();
6766

68-
#[cfg(test)]
69-
const UNCONDITIONAL_CHAIN_LIVENESS_CHECK: u64 = 30;
70-
71-
#[cfg(not(test))]
72-
const UNCONDITIONAL_CHAIN_LIVENESS_CHECK: u64 = 300;
73-
7467
impl Default for RunLoopCounter {
7568
#[cfg(test)]
7669
fn default() -> Self {
@@ -781,153 +774,6 @@ impl RunLoop {
781774
)
782775
}
783776

784-
/// Wake up and drive stacks block processing if there's been a PoX reorg.
785-
/// Be careful not to saturate calls to announce new stacks blocks, because that will disable
786-
/// mining (which would prevent a miner attempting to fix a hidden PoX anchor block from making
787-
/// progress).
788-
fn drive_pox_reorg_stacks_block_processing(
789-
globals: &Globals,
790-
config: &Config,
791-
last_stacks_pox_reorg_recover_time: &mut u128,
792-
) {
793-
let miner_config = config.get_miner_config();
794-
let delay = cmp::max(
795-
config.node.chain_liveness_poll_time_secs,
796-
cmp::max(
797-
miner_config.first_attempt_time_ms,
798-
miner_config.subsequent_attempt_time_ms,
799-
) / 1000,
800-
);
801-
802-
if *last_stacks_pox_reorg_recover_time + (delay as u128) >= get_epoch_time_secs().into() {
803-
// too soon
804-
return;
805-
}
806-
807-
// announce a new stacks block to force the chains coordinator
808-
// to wake up anyways. this isn't free, so we have to make sure
809-
// the chain-liveness thread doesn't wake up too often
810-
globals.coord().announce_new_stacks_block();
811-
812-
*last_stacks_pox_reorg_recover_time = get_epoch_time_secs().into();
813-
}
814-
815-
/// Wake up and drive sortition processing if there's been a PoX reorg.
816-
/// Be careful not to saturate calls to announce new burn blocks, because that will disable
817-
/// mining (which would prevent a miner attempting to fix a hidden PoX anchor block from making
818-
/// progress).
819-
///
820-
/// only call if no in ibd
821-
fn drive_pox_reorg_burn_block_processing(
822-
globals: &Globals,
823-
config: &Config,
824-
burnchain: &Burnchain,
825-
sortdb: &SortitionDB,
826-
last_burn_pox_reorg_recover_time: &mut u128,
827-
last_announce_time: &mut u128,
828-
) {
829-
let miner_config = config.get_miner_config();
830-
let delay = cmp::max(
831-
config.node.chain_liveness_poll_time_secs,
832-
cmp::max(
833-
miner_config.first_attempt_time_ms,
834-
miner_config.subsequent_attempt_time_ms,
835-
) / 1000,
836-
);
837-
838-
if *last_burn_pox_reorg_recover_time + (delay as u128) >= get_epoch_time_secs().into() {
839-
// too soon
840-
return;
841-
}
842-
843-
// compare sortition and heaviest AMs
844-
let burnchain_db = burnchain
845-
.open_burnchain_db(false)
846-
.expect("FATAL: failed to open burnchain DB");
847-
848-
let highest_sn = SortitionDB::get_highest_known_burn_chain_tip(sortdb.conn())
849-
.expect("FATAL: could not read sortition DB");
850-
851-
let canonical_burnchain_tip = burnchain_db
852-
.get_canonical_chain_tip()
853-
.expect("FATAL: could not read burnchain DB");
854-
855-
if canonical_burnchain_tip.block_height > highest_sn.block_height {
856-
// still processing sortitions
857-
test_debug!(
858-
"Drive burn block processing: still processing sortitions ({} > {})",
859-
canonical_burnchain_tip.block_height,
860-
highest_sn.block_height
861-
);
862-
return;
863-
}
864-
865-
*last_burn_pox_reorg_recover_time = get_epoch_time_secs().into();
866-
867-
// unconditionally bump every 5 minutes, just in case.
868-
// this can get the node un-stuck if we're short on sortition processing but are unable to
869-
// sync with the remote node because it keeps NACK'ing us, leading to a runloop stall.
870-
if *last_announce_time + (UNCONDITIONAL_CHAIN_LIVENESS_CHECK as u128)
871-
< get_epoch_time_secs().into()
872-
{
873-
debug!("Drive burnchain processing: unconditional bump");
874-
globals.coord().announce_new_burn_block();
875-
globals.coord().announce_new_stacks_block();
876-
*last_announce_time = get_epoch_time_secs().into();
877-
}
878-
}
879-
880-
/// In a separate thread, periodically drive coordinator liveness by checking to see if there's
881-
/// a pending reorg and if so, waking up the coordinator to go and process new blocks
882-
fn drive_chain_liveness(
883-
globals: Globals,
884-
config: Config,
885-
burnchain: Burnchain,
886-
sortdb: SortitionDB,
887-
) {
888-
let mut last_burn_pox_reorg_recover_time = 0;
889-
let mut last_stacks_pox_reorg_recover_time = 0;
890-
let mut last_burn_announce_time = 0;
891-
892-
debug!("Chain-liveness thread start!");
893-
894-
while globals.keep_running() {
895-
debug!("Chain-liveness checkup");
896-
Self::drive_pox_reorg_burn_block_processing(
897-
&globals,
898-
&config,
899-
&burnchain,
900-
&sortdb,
901-
&mut last_burn_pox_reorg_recover_time,
902-
&mut last_burn_announce_time,
903-
);
904-
Self::drive_pox_reorg_stacks_block_processing(
905-
&globals,
906-
&config,
907-
&mut last_stacks_pox_reorg_recover_time,
908-
);
909-
910-
sleep_ms(3000);
911-
}
912-
913-
debug!("Chain-liveness thread exit!");
914-
}
915-
916-
/// Spawn a thread to drive chain liveness
917-
fn spawn_chain_liveness_thread(&self, globals: Globals) -> JoinHandle<()> {
918-
let config = self.config.clone();
919-
let burnchain = self.get_burnchain();
920-
let sortdb = burnchain
921-
.open_sortition_db(true)
922-
.expect("FATAL: could not open sortition DB");
923-
924-
thread::Builder::new()
925-
.name(format!("chain-liveness-{}", config.node.rpc_bind))
926-
.stack_size(BLOCK_PROCESSOR_STACK_SIZE)
927-
.spawn(move || Self::drive_chain_liveness(globals, config, burnchain, sortdb))
928-
.expect("FATAL: failed to spawn chain liveness thread")
929-
}
930-
931777
/// Starts the node runloop.
932778
///
933779
/// This function will block by looping infinitely.
@@ -1025,7 +871,6 @@ impl RunLoop {
1025871
// Boot up the p2p network and relayer, and figure out how many sortitions we have so far
1026872
// (it could be non-zero if the node is resuming from chainstate)
1027873
let mut node = StacksNode::spawn(self, globals.clone(), relay_recv);
1028-
let liveness_thread = self.spawn_chain_liveness_thread(globals.clone());
1029874

1030875
// Wait for all pending sortitions to process
1031876
let burnchain_db = burnchain_config
@@ -1061,7 +906,6 @@ impl RunLoop {
1061906
globals.coord().stop_chains_coordinator();
1062907
coordinator_thread_handle.join().unwrap();
1063908
let peer_network = node.join();
1064-
liveness_thread.join().unwrap();
1065909

1066910
// Data that will be passed to Nakamoto run loop
1067911
// Only gets transfered on clean shutdown of neon run loop
@@ -1185,7 +1029,6 @@ impl RunLoop {
11851029
globals.coord().stop_chains_coordinator();
11861030
coordinator_thread_handle.join().unwrap();
11871031
let peer_network = node.join();
1188-
liveness_thread.join().unwrap();
11891032

11901033
// Data that will be passed to Nakamoto run loop
11911034
// Only gets transfered on clean shutdown of neon run loop
@@ -1258,7 +1101,6 @@ impl RunLoop {
12581101
globals.coord().stop_chains_coordinator();
12591102
coordinator_thread_handle.join().unwrap();
12601103
let peer_network = node.join();
1261-
liveness_thread.join().unwrap();
12621104

12631105
// Data that will be passed to Nakamoto run loop
12641106
// Only gets transfered on clean shutdown of neon run loop

0 commit comments

Comments
 (0)