Skip to content

Commit d2b52b3

Browse files
committed
feat: add signer capitulation latency metric, #5918
1 parent 66ea1e0 commit d2b52b3

File tree

7 files changed

+212
-21
lines changed

7 files changed

+212
-21
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

stacks-signer/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ rusqlite = { workspace = true, features = ["functions"] }
4949
clarity = { path = "../clarity", features = ["testing"] }
5050
stacks-common = { path = "../stacks-common", features = ["testing"] }
5151
num-traits = "0.2.18"
52+
chrono = "0.4.19"
5253

5354
[dependencies.serde_json]
5455
version = "1.0"

stacks-signer/src/monitoring/mod.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,13 @@ pub mod actions {
157157
.inc();
158158
}
159159

160+
/// Record the time (seconds) taken to a signer to agree with the signer set
161+
pub fn record_signer_agreement_capitulation_latency(latency_s: u64) {
162+
SIGNER_AGREEMENT_CAPITULATION_LATENCIES_HISTOGRAM
163+
.with_label_values(&[])
164+
.observe(latency_s as f64);
165+
}
166+
160167
/// Start serving monitoring metrics.
161168
/// This will only serve the metrics if the `monitoring_prom` feature is enabled.
162169
pub fn start_serving_monitoring_metrics(config: GlobalConfig) -> Result<(), String> {
@@ -238,6 +245,9 @@ pub mod actions {
238245
/// Increment signer agreement state conflict counter
239246
pub fn increment_signer_agreement_state_conflict(_conflict: SignerAgreementStateConflict) {}
240247

248+
/// Record the time (seconds) taken to a signer to agree with the signer set
249+
pub fn record_signer_agreement_capitulation_latency(_latency_s: u64) {}
250+
241251
/// Start serving monitoring metrics.
242252
/// This will only serve the metrics if the `monitoring_prom` feature is enabled.
243253
pub fn start_serving_monitoring_metrics(config: GlobalConfig) -> Result<(), String> {

stacks-signer/src/monitoring/prometheus.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,12 @@ lazy_static! {
9191
&["conflict"]
9292
).unwrap();
9393

94+
pub static ref SIGNER_AGREEMENT_CAPITULATION_LATENCIES_HISTOGRAM: HistogramVec = register_histogram_vec!(histogram_opts!(
95+
"stacks_signer_agreement_capitulation_latencies_histogram",
96+
"Measuring the time (in seconds) for the signer to agree (capitulate) with the signer set",
97+
vec![0.0, 1.0, 3.0, 5.0, 10.0, 20.0, 30.0, 60.0, 120.0]
98+
), &[]).unwrap();
99+
94100
pub static ref SIGNER_LOCAL_STATE_MACHINE: Mutex<Option<LocalStateMachine>> = Mutex::new(None);
95101
}
96102

stacks-signer/src/signerdb.rs

Lines changed: 147 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,7 @@ CREATE TABLE IF NOT EXISTS signer_state_machine_updates (
528528
signer_addr TEXT NOT NULL,
529529
reward_cycle INTEGER NOT NULL,
530530
state_update TEXT NOT NULL,
531-
received_time TEXT NOT NULL,
531+
received_time INTEGER NOT NULL,
532532
PRIMARY KEY (signer_addr, reward_cycle)
533533
) STRICT;"#;
534534

@@ -1406,6 +1406,22 @@ impl SignerDb {
14061406
}
14071407
Ok(result)
14081408
}
1409+
1410+
/// Retrieve the elapsed time (in seconds) between
1411+
/// the oldest and the newest state machine update messages
1412+
/// produced by the signer set
1413+
pub fn get_signer_state_machine_updates_latency(
1414+
&self,
1415+
reward_cycle: u64,
1416+
) -> Result<u64, DBError> {
1417+
let query = "SELECT COALESCE( (MAX(received_time) - MIN(received_time)), 0 ) AS elapsed_time FROM signer_state_machine_updates WHERE reward_cycle = ?1";
1418+
let args = params![u64_to_sql(reward_cycle)?];
1419+
let elapsed_time_opt: Option<u64> = query_row(&self.db, query, args)?;
1420+
match elapsed_time_opt {
1421+
Some(seconds) => Ok(seconds),
1422+
None => Ok(0),
1423+
}
1424+
}
14091425
}
14101426

14111427
fn try_deserialize<T>(s: Option<String>) -> Result<Option<T>, DBError>
@@ -1551,6 +1567,48 @@ pub mod tests {
15511567
assert_eq!(BlockInfo::from(block_proposal_2), block_info);
15521568
}
15531569

1570+
fn system_time_from_ymd_hms_optional(
1571+
year: u32,
1572+
month: u32,
1573+
day: u32,
1574+
hour: u32,
1575+
minute: u32,
1576+
second: u32,
1577+
) -> Option<SystemTime> {
1578+
use std::time::{Duration, UNIX_EPOCH};
1579+
1580+
use chrono::NaiveDate;
1581+
// Make sure year is safe to cast
1582+
if year > i32::MAX as u32 {
1583+
return None;
1584+
}
1585+
// Cast year to i32 safely
1586+
let year_i32 = year as i32;
1587+
let naive =
1588+
NaiveDate::from_ymd_opt(year_i32, month, day)?.and_hms_opt(hour, minute, second)?;
1589+
1590+
let timestamp = naive.timestamp();
1591+
if timestamp < 0 {
1592+
return None;
1593+
}
1594+
1595+
// Convert to SystemTime
1596+
let duration = timestamp as u64;
1597+
Some(UNIX_EPOCH + Duration::from_secs(duration))
1598+
}
1599+
1600+
fn system_time_from_ymd_hms(
1601+
year: u32,
1602+
month: u32,
1603+
day: u32,
1604+
hour: u32,
1605+
minute: u32,
1606+
second: u32,
1607+
) -> SystemTime {
1608+
system_time_from_ymd_hms_optional(year, month, day, hour, minute, second)
1609+
.expect("Invalid system time")
1610+
}
1611+
15541612
#[test]
15551613
fn test_basic_signer_db() {
15561614
let db_path = tmp_db_path();
@@ -2500,4 +2558,92 @@ pub mod tests {
25002558
assert_eq!(updates.get(&address_2), None);
25012559
assert_eq!(updates.get(&address_3), Some(&update_3));
25022560
}
2561+
2562+
#[test]
2563+
fn retrieve_latency_for_signer_state_machine_updates() {
2564+
let db_path = tmp_db_path();
2565+
let mut db = SignerDb::new(db_path).expect("Failed to create signer db");
2566+
let reward_cycle_1 = 1;
2567+
let address_1 = StacksAddress::p2pkh(false, &StacksPublicKey::new());
2568+
let update_1 = StateMachineUpdate::new(
2569+
0,
2570+
3,
2571+
StateMachineUpdateContent::V0 {
2572+
burn_block: ConsensusHash([0x55; 20]),
2573+
burn_block_height: 100,
2574+
current_miner: StateMachineUpdateMinerState::ActiveMiner {
2575+
current_miner_pkh: Hash160([0xab; 20]),
2576+
tenure_id: ConsensusHash([0x44; 20]),
2577+
parent_tenure_id: ConsensusHash([0x22; 20]),
2578+
parent_tenure_last_block: StacksBlockId([0x33; 32]),
2579+
parent_tenure_last_block_height: 1,
2580+
},
2581+
},
2582+
)
2583+
.unwrap();
2584+
let time_1 = system_time_from_ymd_hms(2025, 04, 17, 12, 30, 00);
2585+
2586+
let address_2 = StacksAddress::p2pkh(false, &StacksPublicKey::new());
2587+
let update_2 = StateMachineUpdate::new(
2588+
0,
2589+
4,
2590+
StateMachineUpdateContent::V0 {
2591+
burn_block: ConsensusHash([0x55; 20]),
2592+
burn_block_height: 100,
2593+
current_miner: StateMachineUpdateMinerState::NoValidMiner,
2594+
},
2595+
)
2596+
.unwrap();
2597+
let time_2 = system_time_from_ymd_hms(2025, 04, 17, 12, 30, 01);
2598+
2599+
let address_3 = StacksAddress::p2pkh(false, &StacksPublicKey::new());
2600+
let update_3 = StateMachineUpdate::new(
2601+
0,
2602+
2,
2603+
StateMachineUpdateContent::V0 {
2604+
burn_block: ConsensusHash([0x66; 20]),
2605+
burn_block_height: 101,
2606+
current_miner: StateMachineUpdateMinerState::NoValidMiner,
2607+
},
2608+
)
2609+
.unwrap();
2610+
let time_3 = system_time_from_ymd_hms(2025, 04, 17, 12, 30, 10);
2611+
2612+
assert_eq!(
2613+
0,
2614+
db.get_signer_state_machine_updates_latency(reward_cycle_1)
2615+
.unwrap(),
2616+
"latency on empty database should be 0 seconds for reward_cycle {reward_cycle_1}"
2617+
);
2618+
2619+
db.insert_state_machine_update(reward_cycle_1, &address_1, &update_1, &time_1)
2620+
.expect("Unable to insert block into db");
2621+
2622+
assert_eq!(
2623+
0,
2624+
db.get_signer_state_machine_updates_latency(reward_cycle_1)
2625+
.unwrap(),
2626+
"latency between same update should be 0 seconds"
2627+
);
2628+
2629+
db.insert_state_machine_update(reward_cycle_1, &address_2, &update_2, &time_2)
2630+
.expect("Unable to insert block into db");
2631+
2632+
assert_eq!(
2633+
1,
2634+
db.get_signer_state_machine_updates_latency(reward_cycle_1)
2635+
.unwrap(),
2636+
"latency between updates should be 1 second"
2637+
);
2638+
2639+
db.insert_state_machine_update(reward_cycle_1, &address_3, &update_3, &time_3)
2640+
.expect("Unable to insert block into db");
2641+
2642+
assert_eq!(
2643+
10,
2644+
db.get_signer_state_machine_updates_latency(reward_cycle_1)
2645+
.unwrap(),
2646+
"latency between updates should be 10 second"
2647+
);
2648+
}
25032649
}

stacks-signer/src/v0/signer.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,7 @@ impl Signer {
619619
&mut self.signer_db,
620620
&mut self.global_state_evaluator,
621621
self.stacks_address,
622+
self.reward_cycle,
622623
);
623624
}
624625

stacks-signer/src/v0/signer_state.rs

Lines changed: 46 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,7 @@ impl LocalStateMachine {
773773
signerdb: &mut SignerDb,
774774
eval: &mut GlobalStateEvaluator,
775775
local_address: StacksAddress,
776+
reward_cycle: u64,
776777
) {
777778
// Before we ever access eval...we should make sure to include our own local state machine update message in the evaluation
778779
let local_update: Result<StateMachineUpdateMessage, _> = (&*self).try_into();
@@ -833,26 +834,8 @@ impl LocalStateMachine {
833834
crate::monitoring::actions::increment_signer_agreement_state_change_reason(
834835
crate::monitoring::SignerAgreementStateChangeReason::MinerViewUpdate,
835836
);
836-
#[cfg(feature = "monitoring_prom")]
837-
match (&current_miner, &new_miner) {
838-
(
839-
StateMachineUpdateMinerState::ActiveMiner {
840-
parent_tenure_id: current_parent_tenure,
841-
..
842-
},
843-
StateMachineUpdateMinerState::ActiveMiner {
844-
parent_tenure_id: new_parent_tenure,
845-
..
846-
},
847-
) => {
848-
if current_parent_tenure != new_parent_tenure {
849-
crate::monitoring::actions::increment_signer_agreement_state_change_reason(
850-
crate::monitoring::SignerAgreementStateChangeReason::MinerParentTenureUpdate,
851-
);
852-
}
853-
}
854-
_ => {}
855-
}
837+
Self::monitor_miner_parent_tenure_update(&current_miner, &new_miner);
838+
Self::monitor_capitulation_latency(signerdb, reward_cycle);
856839

857840
*self = Self::Initialized(SignerStateMachine {
858841
burn_block,
@@ -862,4 +845,47 @@ impl LocalStateMachine {
862845
});
863846
}
864847
}
848+
849+
#[allow(unused_variables)]
850+
fn monitor_miner_parent_tenure_update(
851+
current_miner: &StateMachineUpdateMinerState,
852+
new_miner: &StateMachineUpdateMinerState,
853+
) {
854+
#[cfg(feature = "monitoring_prom")]
855+
match (&current_miner, &new_miner) {
856+
(
857+
StateMachineUpdateMinerState::ActiveMiner {
858+
parent_tenure_id: current_parent_tenure,
859+
..
860+
},
861+
StateMachineUpdateMinerState::ActiveMiner {
862+
parent_tenure_id: new_parent_tenure,
863+
..
864+
},
865+
) => {
866+
if current_parent_tenure != new_parent_tenure {
867+
crate::monitoring::actions::increment_signer_agreement_state_change_reason(
868+
crate::monitoring::SignerAgreementStateChangeReason::MinerParentTenureUpdate,
869+
);
870+
}
871+
}
872+
_ => {}
873+
}
874+
}
875+
876+
#[allow(unused_variables)]
877+
fn monitor_capitulation_latency(signer_db: &SignerDb, reward_cycle: u64) {
878+
#[cfg(feature = "monitoring_prom")]
879+
{
880+
let latency_result = signer_db.get_signer_state_machine_updates_latency(reward_cycle);
881+
match latency_result {
882+
Ok(seconds) => {
883+
crate::monitoring::actions::record_signer_agreement_capitulation_latency(
884+
seconds,
885+
)
886+
}
887+
Err(e) => warn!("Failed to retrieve state updates latency in signerdb: {e}"),
888+
}
889+
}
890+
}
865891
}

0 commit comments

Comments
 (0)