Skip to content

Commit cae9099

Browse files
authored
Merge branch 'develop' into fix/burn-view
2 parents 60917a1 + 619c2fe commit cae9099

File tree

7 files changed

+191
-117
lines changed

7 files changed

+191
-117
lines changed

stacks-signer/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE
1717

1818
- Introduced the `block_proposal_max_age_secs` configuration option for signers, enabling them to automatically ignore block proposals that exceed the specified age in seconds.
1919
- When a new block proposal is received while the signer is waiting for an existing proposal to be validated, the signer will wait until the existing block is done validating before submitting the new one for validating. ([#5453](https://github.com/stacks-network/stacks-core/pull/5453))
20+
- Introduced two new prometheus metrics:
21+
- `stacks_signer_block_validation_latencies_histogram`: the validation_time_ms reported by the node when validating a block proposal
22+
- `stacks_signer_block_response_latencies_histogram`: the "end-to-end" time it takes for the signer to issue a block response
2023

2124
## Changed
2225
- Improvements to the stale signer cleanup logic: deletes the prior signer if it has no remaining unprocessed blocks in its database

stacks-signer/src/client/stacks_client.rs

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -323,8 +323,10 @@ impl StacksClient {
323323
block,
324324
chain_id: self.chain_id,
325325
};
326-
let timer =
327-
crate::monitoring::new_rpc_call_timer(&self.block_proposal_path(), &self.http_origin);
326+
let timer = crate::monitoring::actions::new_rpc_call_timer(
327+
&self.block_proposal_path(),
328+
&self.http_origin,
329+
);
328330
let send_request = || {
329331
self.stacks_node_client
330332
.post(self.block_proposal_path())
@@ -399,7 +401,8 @@ impl StacksClient {
399401
"{}{RPC_TENURE_FORKING_INFO_PATH}/:start/:stop",
400402
self.http_origin
401403
);
402-
let timer = crate::monitoring::new_rpc_call_timer(&metrics_path, &self.http_origin);
404+
let timer =
405+
crate::monitoring::actions::new_rpc_call_timer(&metrics_path, &self.http_origin);
403406
let send_request = || {
404407
self.stacks_node_client
405408
.get(&path)
@@ -420,7 +423,7 @@ impl StacksClient {
420423
pub fn get_current_and_last_sortition(&self) -> Result<CurrentAndLastSortition, ClientError> {
421424
debug!("StacksClient: Getting current and prior sortition");
422425
let path = format!("{}/latest_and_last", self.sortition_info_path());
423-
let timer = crate::monitoring::new_rpc_call_timer(&path, &self.http_origin);
426+
let timer = crate::monitoring::actions::new_rpc_call_timer(&path, &self.http_origin);
424427
let send_request = || {
425428
self.stacks_node_client.get(&path).send().map_err(|e| {
426429
warn!("Signer failed to request latest sortition"; "err" => ?e);
@@ -460,8 +463,10 @@ impl StacksClient {
460463
/// Get the current peer info data from the stacks node
461464
pub fn get_peer_info(&self) -> Result<PeerInfo, ClientError> {
462465
debug!("StacksClient: Getting peer info");
463-
let timer =
464-
crate::monitoring::new_rpc_call_timer(&self.core_info_path(), &self.http_origin);
466+
let timer = crate::monitoring::actions::new_rpc_call_timer(
467+
&self.core_info_path(),
468+
&self.http_origin,
469+
);
465470
let send_request = || {
466471
self.stacks_node_client
467472
.get(self.core_info_path())
@@ -485,7 +490,7 @@ impl StacksClient {
485490
debug!("StacksClient: Getting reward set signers";
486491
"reward_cycle" => reward_cycle,
487492
);
488-
let timer = crate::monitoring::new_rpc_call_timer(
493+
let timer = crate::monitoring::actions::new_rpc_call_timer(
489494
&format!("{}/v3/stacker_set/:reward_cycle", self.http_origin),
490495
&self.http_origin,
491496
);
@@ -521,7 +526,8 @@ impl StacksClient {
521526
/// Retrieve the current pox data from the stacks node
522527
pub fn get_pox_data(&self) -> Result<RPCPoxInfoData, ClientError> {
523528
debug!("StacksClient: Getting pox data");
524-
let timer = crate::monitoring::new_rpc_call_timer(&self.pox_path(), &self.http_origin);
529+
let timer =
530+
crate::monitoring::actions::new_rpc_call_timer(&self.pox_path(), &self.http_origin);
525531
let send_request = || {
526532
self.stacks_node_client
527533
.get(self.pox_path())
@@ -572,7 +578,7 @@ impl StacksClient {
572578
"address" => %address,
573579
);
574580
let timer_label = format!("{}/v2/accounts/:principal", self.http_origin);
575-
let timer = crate::monitoring::new_rpc_call_timer(&timer_label, &self.http_origin);
581+
let timer = crate::monitoring::actions::new_rpc_call_timer(&timer_label, &self.http_origin);
576582
let send_request = || {
577583
self.stacks_node_client
578584
.get(self.accounts_path(address))
@@ -628,7 +634,7 @@ impl StacksClient {
628634
"block_height" => %block.header.chain_length,
629635
);
630636
let path = format!("{}{}?broadcast=1", self.http_origin, postblock_v3::PATH);
631-
let timer = crate::monitoring::new_rpc_call_timer(&path, &self.http_origin);
637+
let timer = crate::monitoring::actions::new_rpc_call_timer(&path, &self.http_origin);
632638
let send_request = || {
633639
self.stacks_node_client
634640
.post(&path)
@@ -678,7 +684,7 @@ impl StacksClient {
678684
"{}/v2/contracts/call-read/:principal/{contract_name}/{function_name}",
679685
self.http_origin
680686
);
681-
let timer = crate::monitoring::new_rpc_call_timer(&timer_label, &self.http_origin);
687+
let timer = crate::monitoring::actions::new_rpc_call_timer(&timer_label, &self.http_origin);
682688
let response = self
683689
.stacks_node_client
684690
.post(path)

stacks-signer/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ impl<S: Signer<T> + Send + 'static, T: SignerEventTrait + 'static> SpawnedSigner
125125
);
126126
let (res_send, res_recv) = channel();
127127
let ev = SignerEventReceiver::new(config.network.is_mainnet());
128-
crate::monitoring::start_serving_monitoring_metrics(config.clone()).ok();
128+
crate::monitoring::actions::start_serving_monitoring_metrics(config.clone()).ok();
129129
let runloop = RunLoop::new(config.clone());
130130
let mut signer: RunLoopSigner<S, T> = libsigner::Signer::new(runloop, ev, res_send);
131131
let running_signer = signer.spawn(endpoint).expect("Failed to spawn signer");

stacks-signer/src/monitoring/mod.rs

Lines changed: 135 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -14,139 +14,176 @@
1414
// You should have received a copy of the GNU General Public License
1515
// along with this program. If not, see <http://www.gnu.org/licenses/>.
1616

17-
#[cfg(feature = "monitoring_prom")]
18-
use ::prometheus::HistogramTimer;
19-
#[cfg(feature = "monitoring_prom")]
20-
use slog::slog_error;
21-
#[cfg(not(feature = "monitoring_prom"))]
22-
use slog::slog_info;
23-
#[cfg(feature = "monitoring_prom")]
24-
use stacks_common::error;
25-
#[cfg(not(feature = "monitoring_prom"))]
26-
use stacks_common::info;
27-
28-
use crate::config::GlobalConfig;
29-
3017
#[cfg(feature = "monitoring_prom")]
3118
mod prometheus;
3219

3320
#[cfg(feature = "monitoring_prom")]
3421
mod server;
3522

36-
/// Update stacks tip height gauge
37-
#[allow(unused_variables)]
38-
pub fn update_stacks_tip_height(height: i64) {
39-
#[cfg(feature = "monitoring_prom")]
40-
prometheus::STACKS_TIP_HEIGHT_GAUGE.set(height);
41-
}
23+
/// Actions for updating metrics
24+
#[cfg(feature = "monitoring_prom")]
25+
pub mod actions {
26+
use ::prometheus::HistogramTimer;
27+
use blockstack_lib::chainstate::nakamoto::NakamotoBlock;
28+
use slog::slog_error;
29+
use stacks_common::error;
30+
31+
use crate::config::GlobalConfig;
32+
use crate::monitoring::prometheus::*;
33+
34+
/// Update stacks tip height gauge
35+
pub fn update_stacks_tip_height(height: i64) {
36+
STACKS_TIP_HEIGHT_GAUGE.set(height);
37+
}
4238

43-
/// Update the current reward cycle
44-
#[allow(unused_variables)]
45-
pub fn update_reward_cycle(reward_cycle: i64) {
46-
#[cfg(feature = "monitoring_prom")]
47-
prometheus::CURRENT_REWARD_CYCLE.set(reward_cycle);
48-
}
39+
/// Update the current reward cycle
40+
pub fn update_reward_cycle(reward_cycle: i64) {
41+
CURRENT_REWARD_CYCLE.set(reward_cycle);
42+
}
4943

50-
/// Increment the block validation responses counter
51-
#[allow(unused_variables)]
52-
pub fn increment_block_validation_responses(accepted: bool) {
53-
#[cfg(feature = "monitoring_prom")]
54-
{
44+
/// Increment the block validation responses counter
45+
pub fn increment_block_validation_responses(accepted: bool) {
5546
let label_value = if accepted { "accepted" } else { "rejected" };
56-
prometheus::BLOCK_VALIDATION_RESPONSES
47+
BLOCK_VALIDATION_RESPONSES
5748
.with_label_values(&[label_value])
5849
.inc();
5950
}
60-
}
6151

62-
/// Increment the block responses sent counter
63-
#[allow(unused_variables)]
64-
pub fn increment_block_responses_sent(accepted: bool) {
65-
#[cfg(feature = "monitoring_prom")]
66-
{
52+
/// Increment the block responses sent counter
53+
pub fn increment_block_responses_sent(accepted: bool) {
6754
let label_value = if accepted { "accepted" } else { "rejected" };
68-
prometheus::BLOCK_RESPONSES_SENT
69-
.with_label_values(&[label_value])
70-
.inc();
55+
BLOCK_RESPONSES_SENT.with_label_values(&[label_value]).inc();
7156
}
72-
}
7357

74-
/// Increment the number of block proposals received
75-
#[allow(unused_variables)]
76-
pub fn increment_block_proposals_received() {
77-
#[cfg(feature = "monitoring_prom")]
78-
prometheus::BLOCK_PROPOSALS_RECEIVED.inc();
79-
}
80-
81-
/// Update the stx balance of the signer
82-
#[allow(unused_variables)]
83-
pub fn update_signer_stx_balance(balance: i64) {
84-
#[cfg(feature = "monitoring_prom")]
85-
prometheus::SIGNER_STX_BALANCE.set(balance);
86-
}
58+
/// Increment the number of block proposals received
59+
pub fn increment_block_proposals_received() {
60+
BLOCK_PROPOSALS_RECEIVED.inc();
61+
}
8762

88-
/// Update the signer nonce metric
89-
#[allow(unused_variables)]
90-
pub fn update_signer_nonce(nonce: u64) {
91-
#[cfg(feature = "monitoring_prom")]
92-
prometheus::SIGNER_NONCE.set(nonce as i64);
93-
}
63+
/// Update the stx balance of the signer
64+
pub fn update_signer_stx_balance(balance: i64) {
65+
SIGNER_STX_BALANCE.set(balance);
66+
}
9467

95-
// Allow dead code because this is only used in the `monitoring_prom` feature
96-
// but we want to run it in a test
97-
#[allow(dead_code)]
98-
/// Remove the origin from the full path to avoid duplicate metrics for different origins
99-
fn remove_origin_from_path(full_path: &str, origin: &str) -> String {
100-
full_path.replace(origin, "")
101-
}
68+
/// Update the signer nonce metric
69+
pub fn update_signer_nonce(nonce: u64) {
70+
SIGNER_NONCE.set(nonce as i64);
71+
}
10272

103-
/// Start a new RPC call timer.
104-
/// The `origin` parameter is the base path of the RPC call, e.g. `http://node.com`.
105-
/// The `origin` parameter is removed from `full_path` when storing in prometheus.
106-
#[cfg(feature = "monitoring_prom")]
107-
pub fn new_rpc_call_timer(full_path: &str, origin: &str) -> HistogramTimer {
108-
let path = remove_origin_from_path(full_path, origin);
109-
let histogram = prometheus::SIGNER_RPC_CALL_LATENCIES_HISTOGRAM.with_label_values(&[&path]);
110-
histogram.start_timer()
111-
}
73+
/// Start a new RPC call timer.
74+
/// The `origin` parameter is the base path of the RPC call, e.g. `http://node.com`.
75+
/// The `origin` parameter is removed from `full_path` when storing in prometheus.
76+
pub fn new_rpc_call_timer(full_path: &str, origin: &str) -> HistogramTimer {
77+
let path = super::remove_origin_from_path(full_path, origin);
78+
let histogram = SIGNER_RPC_CALL_LATENCIES_HISTOGRAM.with_label_values(&[&path]);
79+
histogram.start_timer()
80+
}
11281

113-
/// NoOp timer uses for monitoring when the monitoring feature is not enabled.
114-
pub struct NoOpTimer;
115-
impl NoOpTimer {
116-
/// NoOp method to stop recording when the monitoring feature is not enabled.
117-
pub fn stop_and_record(&self) {}
118-
}
82+
/// Record the time taken to issue a block response for
83+
/// a given block. The block's timestamp is used to calculate the latency.
84+
///
85+
/// Call this right after broadcasting a BlockResponse
86+
pub fn record_block_response_latency(block: &NakamotoBlock) {
87+
use clarity::util::get_epoch_time_ms;
88+
89+
let diff =
90+
get_epoch_time_ms().saturating_sub(block.header.timestamp.saturating_mul(1000).into());
91+
SIGNER_BLOCK_RESPONSE_LATENCIES_HISTOGRAM
92+
.with_label_values(&[])
93+
.observe(diff as f64 / 1000.0);
94+
}
11995

120-
/// Stop and record the no-op timer.
121-
#[cfg(not(feature = "monitoring_prom"))]
122-
pub fn new_rpc_call_timer(_full_path: &str, _origin: &str) -> NoOpTimer {
123-
NoOpTimer
124-
}
96+
/// Record the time taken to validate a block, as reported by the Stacks node.
97+
pub fn record_block_validation_latency(latency_ms: u64) {
98+
SIGNER_BLOCK_VALIDATION_LATENCIES_HISTOGRAM
99+
.with_label_values(&[])
100+
.observe(latency_ms as f64 / 1000.0);
101+
}
125102

126-
/// Start serving monitoring metrics.
127-
/// This will only serve the metrics if the `monitoring_prom` feature is enabled.
128-
#[allow(unused_variables)]
129-
pub fn start_serving_monitoring_metrics(config: GlobalConfig) -> Result<(), String> {
130-
#[cfg(feature = "monitoring_prom")]
131-
{
103+
/// Start serving monitoring metrics.
104+
/// This will only serve the metrics if the `monitoring_prom` feature is enabled.
105+
pub fn start_serving_monitoring_metrics(config: GlobalConfig) -> Result<(), String> {
132106
if config.metrics_endpoint.is_none() {
133107
return Ok(());
134108
}
135-
let thread = std::thread::Builder::new()
109+
let _ = std::thread::Builder::new()
136110
.name("signer_metrics".to_string())
137111
.spawn(move || {
138-
if let Err(monitoring_err) = server::MonitoringServer::start(&config) {
112+
if let Err(monitoring_err) = super::server::MonitoringServer::start(&config) {
139113
error!("Monitoring: Error in metrics server: {:?}", monitoring_err);
140114
}
141115
});
116+
Ok(())
117+
}
118+
}
119+
120+
/// No-op actions for updating metrics
121+
#[cfg(not(feature = "monitoring_prom"))]
122+
pub mod actions {
123+
use blockstack_lib::chainstate::nakamoto::NakamotoBlock;
124+
use slog::slog_info;
125+
use stacks_common::info;
126+
127+
use crate::GlobalConfig;
128+
129+
/// Update stacks tip height gauge
130+
pub fn update_stacks_tip_height(_height: i64) {}
131+
132+
/// Update the current reward cycle
133+
pub fn update_reward_cycle(_reward_cycle: i64) {}
134+
135+
/// Increment the block validation responses counter
136+
pub fn increment_block_validation_responses(_accepted: bool) {}
137+
138+
/// Increment the block responses sent counter
139+
pub fn increment_block_responses_sent(_accepted: bool) {}
140+
141+
/// Increment the number of block proposals received
142+
pub fn increment_block_proposals_received() {}
143+
144+
/// Update the stx balance of the signer
145+
pub fn update_signer_stx_balance(_balance: i64) {}
146+
147+
/// Update the signer nonce metric
148+
pub fn update_signer_nonce(_nonce: u64) {}
149+
150+
/// NoOp timer uses for monitoring when the monitoring feature is not enabled.
151+
pub struct NoOpTimer;
152+
impl NoOpTimer {
153+
/// NoOp method to stop recording when the monitoring feature is not enabled.
154+
pub fn stop_and_record(&self) {}
155+
}
156+
157+
/// Stop and record the no-op timer.
158+
pub fn new_rpc_call_timer(_full_path: &str, _origin: &str) -> NoOpTimer {
159+
NoOpTimer
142160
}
143-
#[cfg(not(feature = "monitoring_prom"))]
144-
{
161+
162+
/// Record the time taken to issue a block response for
163+
/// a given block. The block's timestamp is used to calculate the latency.
164+
///
165+
/// Call this right after broadcasting a BlockResponse
166+
pub fn record_block_response_latency(_block: &NakamotoBlock) {}
167+
168+
/// Record the time taken to validate a block, as reported by the Stacks node.
169+
pub fn record_block_validation_latency(_latency_ms: u64) {}
170+
171+
/// Start serving monitoring metrics.
172+
/// This will only serve the metrics if the `monitoring_prom` feature is enabled.
173+
pub fn start_serving_monitoring_metrics(config: GlobalConfig) -> Result<(), String> {
145174
if config.metrics_endpoint.is_some() {
146175
info!("`metrics_endpoint` is configured for the signer, but the monitoring_prom feature is not enabled. Not starting monitoring metrics server.");
147176
}
177+
Ok(())
148178
}
149-
Ok(())
179+
}
180+
181+
// Allow dead code because this is only used in the `monitoring_prom` feature
182+
// but we want to run it in a test
183+
#[allow(dead_code)]
184+
/// Remove the origin from the full path to avoid duplicate metrics for different origins
185+
fn remove_origin_from_path(full_path: &str, origin: &str) -> String {
186+
full_path.replace(origin, "")
150187
}
151188

152189
#[test]

0 commit comments

Comments
 (0)