Skip to content

Commit 5715f67

Browse files
committed
fix: a handful of issues causing timing-related test failures in CI
* make `not_available_try_again` error in `GetStackers`, and make it transient in the signer binary * make signer binary timeout on retries in client * update signer outer runloop to differentiate between 'not in signer set' and 'have not loaded info yet' * update signer outer runloop to handle errors and non-presence differently in the signer config refresh * update signer outer runloop to perform signer config refresh on the current cycle (if not loaded yet) and on the next cycle (if in prepare phase for the next cycle). This was causing an issue on exactly the first cycle of Nakamoto, because the signer set cannot be loaded for the first cycle until after the prepare phase * update the signer outer runloop to check the node’s block height on event receipt as well * update the testing harnesses to wait and check more appropriately for status checks from signers, etc.
1 parent a4d3ff9 commit 5715f67

File tree

12 files changed

+450
-269
lines changed

12 files changed

+450
-269
lines changed

libsigner/src/runloop.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -246,13 +246,14 @@ impl<
246246
let (event_send, event_recv) = channel();
247247
event_receiver.add_consumer(event_send);
248248

249+
let bind_port = bind_addr.port();
249250
event_receiver.bind(bind_addr)?;
250251
let stop_signaler = event_receiver.get_stop_signaler()?;
251252
let mut ret_stop_signaler = event_receiver.get_stop_signaler()?;
252253

253254
// start a thread for the event receiver
254255
let event_thread = thread::Builder::new()
255-
.name("event_receiver".to_string())
256+
.name(format!("event_receiver:{bind_port}"))
256257
.stack_size(THREAD_STACK_SIZE)
257258
.spawn(move || event_receiver.main_loop())
258259
.map_err(|e| {
@@ -262,7 +263,7 @@ impl<
262263

263264
// start receiving events and doing stuff with them
264265
let runloop_thread = thread::Builder::new()
265-
.name(format!("signer_runloop:{}", bind_addr.port()))
266+
.name(format!("signer_runloop:{bind_port}"))
266267
.stack_size(THREAD_STACK_SIZE)
267268
.spawn(move || {
268269
signer_loop.main_loop(event_recv, command_receiver, result_sender, stop_signaler)

stacks-signer/src/client/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ use stacks_common::debug;
3434
const BACKOFF_INITIAL_INTERVAL: u64 = 128;
3535
/// Backoff timer max interval in milliseconds
3636
const BACKOFF_MAX_INTERVAL: u64 = 16384;
37+
/// Backoff timer max elapsed seconds
38+
const BACKOFF_MAX_ELAPSED: u64 = 5;
3739

3840
#[derive(thiserror::Error, Debug)]
3941
/// Client error type
@@ -109,6 +111,7 @@ where
109111
let backoff_timer = backoff::ExponentialBackoffBuilder::new()
110112
.with_initial_interval(Duration::from_millis(BACKOFF_INITIAL_INTERVAL))
111113
.with_max_interval(Duration::from_millis(BACKOFF_MAX_INTERVAL))
114+
.with_max_elapsed_time(Some(Duration::from_secs(BACKOFF_MAX_ELAPSED)))
112115
.build();
113116

114117
backoff::retry_notify(backoff_timer, request_fn, notify).map_err(|_| ClientError::RetryTimeout)

stacks-signer/src/client/stacks_client.rs

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ use clarity::vm::types::{PrincipalData, QualifiedContractIdentifier};
4444
use clarity::vm::{ClarityName, ContractName, Value as ClarityValue};
4545
use libsigner::v0::messages::PeerInfo;
4646
use reqwest::header::AUTHORIZATION;
47+
use serde::Deserialize;
4748
use serde_json::json;
4849
use slog::{slog_debug, slog_warn};
4950
use stacks_common::codec::StacksMessageCodec;
@@ -80,6 +81,12 @@ pub struct StacksClient {
8081
auth_password: String,
8182
}
8283

84+
#[derive(Deserialize)]
85+
struct GetStackersErrorResp {
86+
err_type: String,
87+
err_msg: String,
88+
}
89+
8390
impl From<&GlobalConfig> for StacksClient {
8491
fn from(config: &GlobalConfig) -> Self {
8592
Self {
@@ -514,23 +521,38 @@ impl StacksClient {
514521
&self,
515522
reward_cycle: u64,
516523
) -> Result<Option<Vec<NakamotoSignerEntry>>, ClientError> {
517-
debug!("Getting reward set for reward cycle {reward_cycle}...");
518524
let timer = crate::monitoring::new_rpc_call_timer(
519525
&self.reward_set_path(reward_cycle),
520526
&self.http_origin,
521527
);
522528
let send_request = || {
523-
self.stacks_node_client
529+
let response = self
530+
.stacks_node_client
524531
.get(self.reward_set_path(reward_cycle))
525532
.send()
526-
.map_err(backoff::Error::transient)
533+
.map_err(|e| backoff::Error::transient(e.into()))?;
534+
let status = response.status();
535+
if status.is_success() {
536+
return response
537+
.json()
538+
.map_err(|e| backoff::Error::permanent(e.into()));
539+
}
540+
let error_data = response.json::<GetStackersErrorResp>().map_err(|e| {
541+
warn!("Failed to parse the GetStackers error response: {e}");
542+
backoff::Error::permanent(e.into())
543+
})?;
544+
if error_data.err_type == "not_available_try_again" {
545+
return Err(backoff::Error::transient(ClientError::NoSortitionOnChain));
546+
} else {
547+
warn!("Got error response ({status}): {}", error_data.err_msg);
548+
return Err(backoff::Error::permanent(ClientError::RequestFailure(
549+
status,
550+
)));
551+
}
527552
};
528-
let response = retry_with_exponential_backoff(send_request)?;
553+
let stackers_response =
554+
retry_with_exponential_backoff::<_, ClientError, GetStackersResponse>(send_request)?;
529555
timer.stop_and_record();
530-
if !response.status().is_success() {
531-
return Err(ClientError::RequestFailure(response.status()));
532-
}
533-
let stackers_response = response.json::<GetStackersResponse>()?;
534556
Ok(stackers_response.stacker_set.signers)
535557
}
536558

stacks-signer/src/lib.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,6 @@ use crate::runloop::{RunLoop, RunLoopCommand};
6161
pub trait Signer<T: SignerEventTrait>: Debug + Display {
6262
/// Create a new `Signer` instance
6363
fn new(config: SignerConfig) -> Self;
64-
/// Update the `Signer` instance's with the next reward cycle data `SignerConfig`
65-
fn update_signer(&mut self, next_signer_config: &SignerConfig);
6664
/// Get the reward cycle of the signer
6765
fn reward_cycle(&self) -> u64;
6866
/// Process an event

0 commit comments

Comments
 (0)