Skip to content

Commit c09213b

Browse files
authored
Merge pull request #1263 from wprzytula/330-make-metrics-collection-optional/faster
Make metrics collection optional/faster
2 parents 89b5af9 + a1bcaa5 commit c09213b

File tree

19 files changed

+658
-58
lines changed

19 files changed

+658
-58
lines changed

.github/workflows/rust.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ jobs:
5050
# Various (de)serialization features.
5151
- name: Cargo check with all serialization features
5252
run: cargo check --all-targets --manifest-path "scylla/Cargo.toml" --features "full-serialization"
53+
- name: Cargo check with metrics feature
54+
run: cargo check --all-targets --manifest-path "scylla/Cargo.toml" --features "metrics"
5355
- name: Cargo check with secrecy-08 feature
5456
run: cargo check --all-targets --manifest-path "scylla/Cargo.toml" --features "secrecy-08"
5557
- name: Cargo check with chrono-04 feature

Cargo.lock.msrv

Lines changed: 5 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/source/metrics/metrics.md

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Driver metrics
22

3+
This feature is available only under the crate feature `metrics`.
4+
35
During operation the driver collects various metrics.
46

57
They can be accessed at any moment using `Session::get_metrics()`
@@ -11,6 +13,9 @@ They can be accessed at any moment using `Session::get_metrics()`
1113
* Total number of paged queries
1214
* Number of errors during paged queries
1315
* Number of retries
16+
* Latency histogram statistics (min, max, mean, standard deviation, percentiles)
17+
* Rates of queries per second in various time frames
18+
* Number of active connections, and connection and request timeouts
1419

1520
### Example
1621
```rust
@@ -24,11 +29,32 @@ println!("Queries requested: {}", metrics.get_queries_num());
2429
println!("Iter queries requested: {}", metrics.get_queries_iter_num());
2530
println!("Errors occurred: {}", metrics.get_errors_num());
2631
println!("Iter errors occurred: {}", metrics.get_errors_iter_num());
27-
println!("Average latency: {}", metrics.get_latency_avg_ms().unwrap());
32+
println!("Average latency: {}", metrics.get_latency_avg_ms()?);
2833
println!(
2934
"99.9 latency percentile: {}",
30-
metrics.get_latency_percentile_ms(99.9).unwrap()
35+
metrics.get_latency_percentile_ms(99.9)?
3136
);
37+
38+
let snapshot = metrics.get_snapshot()?;
39+
println!("Min: {}", snapshot.min);
40+
println!("Max: {}", snapshot.max);
41+
println!("Mean: {}", snapshot.mean);
42+
println!("Standard deviation: {}", snapshot.stddev);
43+
println!("Median: {}", snapshot.median);
44+
println!("75th percentile: {}", snapshot.percentile_75);
45+
println!("95th percentile: {}", snapshot.percentile_95);
46+
println!("98th percentile: {}", snapshot.percentile_98);
47+
println!("99th percentile: {}", snapshot.percentile_99);
48+
println!("99.9th percentile: {}", snapshot.percentile_99_9);
49+
50+
println!("Mean rate: {}", metrics.get_mean_rate());
51+
println!("One minute rate: {}", metrics.get_one_minute_rate());
52+
println!("Five minute rate: {}", metrics.get_five_minute_rate());
53+
println!("Fifteen minute rate: {}", metrics.get_fifteen_minute_rate());
54+
55+
println!("Total connections: {}", metrics.get_total_connections());
56+
println!("Connection timeouts: {}", metrics.get_connection_timeouts());
57+
println!("Requests timeouts: {}", metrics.get_request_timeouts());
3258
# Ok(())
3359
# }
3460
```

docs/source/speculative-execution/percentile.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ This policy has access to `Metrics` shared with session, and triggers
44
speculative execution when the request to the current host is above a
55
given percentile.
66

7+
This policy requires enabling crate feature `"metrics"` to be available.
8+
79

810
### Example
911
To use this policy in `Session`:

examples/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ scylla = { path = "../scylla", features = [
1919
"num-bigint-03",
2020
"num-bigint-04",
2121
"bigdecimal-04",
22+
"metrics",
2223
] }
2324
tokio = { version = "1.34", features = ["full"] }
2425
tracing = { version = "0.1.25", features = ["log"] }

examples/basic.rs

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,12 +94,33 @@ async fn main() -> Result<()> {
9494
println!("Iter queries requested: {}", metrics.get_queries_iter_num());
9595
println!("Errors occurred: {}", metrics.get_errors_num());
9696
println!("Iter errors occurred: {}", metrics.get_errors_iter_num());
97-
println!("Average latency: {}", metrics.get_latency_avg_ms().unwrap());
97+
println!("Average latency: {}", metrics.get_latency_avg_ms()?);
9898
println!(
9999
"99.9 latency percentile: {}",
100-
metrics.get_latency_percentile_ms(99.9).unwrap()
100+
metrics.get_latency_percentile_ms(99.9)?
101101
);
102102

103+
let snapshot = metrics.get_snapshot()?;
104+
println!("Min: {}", snapshot.min);
105+
println!("Max: {}", snapshot.max);
106+
println!("Mean: {}", snapshot.mean);
107+
println!("Standard deviation: {}", snapshot.stddev);
108+
println!("Median: {}", snapshot.median);
109+
println!("75th percentile: {}", snapshot.percentile_75);
110+
println!("95th percentile: {}", snapshot.percentile_95);
111+
println!("98th percentile: {}", snapshot.percentile_98);
112+
println!("99th percentile: {}", snapshot.percentile_99);
113+
println!("99.9th percentile: {}", snapshot.percentile_99_9);
114+
115+
println!("Mean rate: {}", metrics.get_mean_rate());
116+
println!("One minute rate: {}", metrics.get_one_minute_rate());
117+
println!("Five minute rate: {}", metrics.get_five_minute_rate());
118+
println!("Fifteen minute rate: {}", metrics.get_fifteen_minute_rate());
119+
120+
println!("Total connections: {}", metrics.get_total_connections());
121+
println!("Connection timeouts: {}", metrics.get_connection_timeouts());
122+
println!("Requests timeouts: {}", metrics.get_request_timeouts());
123+
103124
println!("Ok.");
104125

105126
Ok(())

scylla/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ full-serialization = [
3939
"num-bigint-04",
4040
"bigdecimal-04",
4141
]
42+
metrics = ["dep:histogram"]
4243
unstable-testing = []
4344

4445
[dependencies]
@@ -47,7 +48,7 @@ byteorder = "1.3.4"
4748
bytes = "1.0.1"
4849
futures = "0.3.6"
4950
hashbrown = "0.14"
50-
histogram = "0.6.9"
51+
histogram = { version = "0.11.1", optional = true }
5152
tokio = { version = "1.34", features = [
5253
"net",
5354
"time",

scylla/src/client/pager.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ use crate::frame::response::result;
3232
use crate::network::Connection;
3333
use crate::observability::driver_tracing::RequestSpan;
3434
use crate::observability::history::{self, HistoryListener};
35+
#[cfg(feature = "metrics")]
3536
use crate::observability::metrics::Metrics;
3637
use crate::policies::load_balancing::{self, RoutingInfo};
3738
use crate::policies::retry::{RequestInfo, RetryDecision, RetrySession};
@@ -66,6 +67,7 @@ pub(crate) struct PreparedIteratorConfig {
6667
pub(crate) values: SerializedValues,
6768
pub(crate) execution_profile: Arc<ExecutionProfileInner>,
6869
pub(crate) cluster_state: Arc<ClusterState>,
70+
#[cfg(feature = "metrics")]
6971
pub(crate) metrics: Arc<Metrics>,
7072
}
7173

@@ -139,6 +141,7 @@ struct PagerWorker<'a, QueryFunc, SpanCreatorFunc> {
139141
query_consistency: Consistency,
140142
retry_session: Box<dyn RetrySession>,
141143
execution_profile: Arc<ExecutionProfileInner>,
144+
#[cfg(feature = "metrics")]
142145
metrics: Arc<Metrics>,
143146

144147
paging_state: PagingState,
@@ -237,11 +240,13 @@ where
237240

238241
match retry_decision {
239242
RetryDecision::RetrySameTarget(cl) => {
243+
#[cfg(feature = "metrics")]
240244
self.metrics.inc_retries_num();
241245
current_consistency = cl.unwrap_or(current_consistency);
242246
continue 'same_node_retries;
243247
}
244248
RetryDecision::RetryNextTarget(cl) => {
249+
#[cfg(feature = "metrics")]
245250
self.metrics.inc_retries_num();
246251
current_consistency = cl.unwrap_or(current_consistency);
247252
continue 'nodes_in_plan;
@@ -301,6 +306,7 @@ where
301306
node: NodeRef<'_>,
302307
request_span: &RequestSpan,
303308
) -> Result<ControlFlow<PageSendAttemptedProof, ()>, RequestAttemptError> {
309+
#[cfg(feature = "metrics")]
304310
self.metrics.inc_total_paged_queries();
305311
let query_start = std::time::Instant::now();
306312

@@ -326,6 +332,7 @@ where
326332
tracing_id,
327333
..
328334
}) => {
335+
#[cfg(feature = "metrics")]
329336
let _ = self.metrics.log_query_latency(elapsed.as_millis() as u64);
330337
self.log_attempt_success();
331338
self.log_request_success();
@@ -361,6 +368,7 @@ where
361368
Ok(ControlFlow::Continue(()))
362369
}
363370
Err(err) => {
371+
#[cfg(feature = "metrics")]
364372
self.metrics.inc_failed_paged_queries();
365373
self.execution_profile
366374
.load_balancing_policy
@@ -380,6 +388,7 @@ where
380388
Ok(ControlFlow::Break(proof))
381389
}
382390
Ok(response) => {
391+
#[cfg(feature = "metrics")]
383392
self.metrics.inc_failed_paged_queries();
384393
let err =
385394
RequestAttemptError::UnexpectedResponse(response.response.to_response_kind());
@@ -664,7 +673,7 @@ impl QueryPager {
664673
statement: Statement,
665674
execution_profile: Arc<ExecutionProfileInner>,
666675
cluster_state: Arc<ClusterState>,
667-
metrics: Arc<Metrics>,
676+
#[cfg(feature = "metrics")] metrics: Arc<Metrics>,
668677
) -> Result<Self, NextPageError> {
669678
let (sender, receiver) = mpsc::channel::<Result<ReceivedPage, NextPageError>>(1);
670679

@@ -727,6 +736,7 @@ impl QueryPager {
727736
query_consistency: consistency,
728737
retry_session,
729738
execution_profile,
739+
#[cfg(feature = "metrics")]
730740
metrics,
731741
paging_state: PagingState::start(),
732742
history_listener: statement.config.history_listener.clone(),
@@ -847,6 +857,7 @@ impl QueryPager {
847857
query_consistency: consistency,
848858
retry_session,
849859
execution_profile: config.execution_profile,
860+
#[cfg(feature = "metrics")]
850861
metrics: config.metrics,
851862
paging_state: PagingState::start(),
852863
history_listener: config.prepared.config.history_listener.clone(),

0 commit comments

Comments
 (0)