Skip to content

Commit 3ffc938

Browse files
committed
[Metrics] Create a thread local version to be used in hot paths
We have lots of counters in the codebase. Often, the way we use them is that we call `with_label_values` and then record values. `with_label_values` would need to acquire a read lock, which is usually not expensive but can be nontrivial in hot paths if there are many threads. This change creates an alternative which is a thread local version. The caller would just record the value in the thread local counter/histogram, and flush periodically. This should reduce the contention and speed things up a little.
1 parent d170aff commit 3ffc938

File tree

5 files changed

+319
-12
lines changed

5 files changed

+319
-12
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

aptos-move/block-executor/src/counters.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ pub static BLOCK_VIEW_BASE_VALUES_MEMORY_USAGE: Lazy<HistogramVec> = Lazy::new(|
241241
)
242242
});
243243

244-
fn observe_gas(counter: &Lazy<HistogramVec>, mode_str: &str, fee_statement: &FeeStatement) {
244+
fn observe_gas(counter: &'static Lazy<HistogramVec>, mode_str: &str, fee_statement: &FeeStatement) {
245245
counter.observe_with(
246246
&[mode_str, GasType::TOTAL_GAS],
247247
fee_statement.gas_used() as f64,

crates/aptos-metrics-core/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ rust-version = { workspace = true }
1414

1515
[dependencies]
1616
anyhow = { workspace = true }
17+
once_cell = { workspace = true }
18+
paste = { workspace = true }
1719
prometheus = { workspace = true }
1820

1921
[dev-dependencies]

crates/aptos-metrics-core/src/lib.rs

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
// Parts of the project are originally copyright © Meta Platforms, Inc.
33
// SPDX-License-Identifier: Apache-2.0
44

5+
pub use crate::{
6+
avg_counter::{register_avg_counter, register_avg_counter_vec},
7+
thread_local::{ThreadLocalHistogramVec, ThreadLocalIntCounter, ThreadLocalIntCounterVec},
8+
};
59
// Re-export counter types from prometheus crate
610
pub use prometheus::{
711
exponential_buckets, gather, histogram_opts, register_counter, register_gauge,
@@ -12,23 +16,29 @@ pub use prometheus::{
1216
};
1317

1418
mod avg_counter;
15-
pub use avg_counter::{register_avg_counter, register_avg_counter_vec};
1619
pub mod const_metric;
1720
pub mod op_counters;
21+
pub mod thread_local;
1822

1923
pub trait TimerHelper {
20-
fn timer_with(&self, labels: &[&str]) -> HistogramTimer;
24+
type TimerType<'a>
25+
where
26+
Self: 'a;
27+
28+
fn timer_with<'a>(&'static self, labels: &'a [&str]) -> Self::TimerType<'a>;
2129

22-
fn observe_with(&self, labels: &[&str], val: f64);
30+
fn observe_with(&'static self, labels: &[&str], val: f64);
2331
}
2432

2533
impl TimerHelper for HistogramVec {
26-
fn timer_with(&self, vals: &[&str]) -> HistogramTimer {
27-
self.with_label_values(vals).start_timer()
34+
type TimerType<'a> = HistogramTimer;
35+
36+
fn timer_with<'a>(&'static self, labels: &'a [&str]) -> Self::TimerType<'a> {
37+
self.with_label_values(labels).start_timer()
2838
}
2939

30-
fn observe_with(&self, labels: &[&str], val: f64) {
31-
self.with_label_values(labels).observe(val)
40+
fn observe_with(&'static self, labels: &[&str], val: f64) {
41+
self.with_label_values(labels).observe(val);
3242
}
3343
}
3444

@@ -68,19 +78,27 @@ impl IntGaugeVecHelper for IntGaugeVec {
6878
pub trait IntCounterVecHelper {
6979
type IntType;
7080

71-
fn inc_with(&self, labels: &[&str]);
81+
fn inc_with(&'static self, labels: &[&str]);
7282

73-
fn inc_with_by(&self, labels: &[&str], by: Self::IntType);
83+
fn inc_with_by(&'static self, labels: &[&str], by: Self::IntType);
7484
}
7585

7686
impl IntCounterVecHelper for IntCounterVec {
7787
type IntType = u64;
7888

79-
fn inc_with(&self, labels: &[&str]) {
89+
fn inc_with(&'static self, labels: &[&str]) {
8090
self.with_label_values(labels).inc()
8191
}
8292

83-
fn inc_with_by(&self, labels: &[&str], v: Self::IntType) {
93+
fn inc_with_by(&'static self, labels: &[&str], v: Self::IntType) {
8494
self.with_label_values(labels).inc_by(v)
8595
}
8696
}
97+
98+
pub trait IntCounterHelper {
99+
type IntType;
100+
101+
fn inc(&'static self);
102+
103+
fn inc_by(&'static self, v: Self::IntType);
104+
}
Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
// Copyright © Aptos Foundation
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
pub mod __private {
5+
pub use once_cell::sync::Lazy;
6+
pub use paste::paste;
7+
}
8+
9+
use crate::{IntCounterHelper, IntCounterVecHelper, TimerHelper};
10+
use std::{
11+
cell::RefCell,
12+
thread::LocalKey,
13+
time::{Duration, Instant},
14+
};
15+
16+
const FLUSH_INTERVAL: Duration = Duration::from_secs(1);
17+
18+
pub struct ThreadLocalIntCounter {
19+
inner: prometheus::local::LocalIntCounter,
20+
last_flush: Instant,
21+
}
22+
23+
impl ThreadLocalIntCounter {
24+
pub fn new(shared: &prometheus::IntCounter) -> Self {
25+
Self {
26+
inner: shared.local(),
27+
last_flush: Instant::now(),
28+
}
29+
}
30+
31+
fn maybe_flush(&mut self) {
32+
let now = Instant::now();
33+
if now.duration_since(self.last_flush) > FLUSH_INTERVAL {
34+
self.inner.flush();
35+
}
36+
self.last_flush = now;
37+
}
38+
}
39+
40+
impl IntCounterHelper for LocalKey<RefCell<ThreadLocalIntCounter>> {
41+
type IntType = u64;
42+
43+
fn inc(&'static self) {
44+
self.inc_by(1);
45+
}
46+
47+
fn inc_by(&'static self, v: Self::IntType) {
48+
self.with_borrow_mut(|x| {
49+
x.inner.inc_by(v);
50+
x.maybe_flush();
51+
})
52+
}
53+
}
54+
55+
pub struct ThreadLocalIntCounterVec {
56+
inner: prometheus::local::LocalIntCounterVec,
57+
last_flush: Instant,
58+
}
59+
60+
impl ThreadLocalIntCounterVec {
61+
pub fn new(shared: &prometheus::IntCounterVec) -> Self {
62+
Self {
63+
inner: shared.local(),
64+
last_flush: Instant::now(),
65+
}
66+
}
67+
68+
fn maybe_flush(&mut self) {
69+
let now = Instant::now();
70+
if now.duration_since(self.last_flush) > FLUSH_INTERVAL {
71+
self.inner.flush();
72+
}
73+
self.last_flush = now;
74+
}
75+
}
76+
77+
impl IntCounterVecHelper for LocalKey<RefCell<ThreadLocalIntCounterVec>> {
78+
type IntType = u64;
79+
80+
fn inc_with(&'static self, labels: &[&str]) {
81+
self.inc_with_by(labels, 1);
82+
}
83+
84+
fn inc_with_by(&'static self, labels: &[&str], v: Self::IntType) {
85+
self.with_borrow_mut(|x| {
86+
x.inner.with_label_values(labels).inc_by(v);
87+
x.maybe_flush();
88+
});
89+
}
90+
}
91+
92+
pub struct ThreadLocalHistogramTimer<'a> {
93+
start: Instant,
94+
labels: &'a [&'a str],
95+
parent: &'static LocalKey<RefCell<ThreadLocalHistogramVec>>,
96+
}
97+
98+
impl<'a> ThreadLocalHistogramTimer<'a> {
99+
fn new(
100+
labels: &'a [&'a str],
101+
parent: &'static LocalKey<RefCell<ThreadLocalHistogramVec>>,
102+
) -> Self {
103+
Self {
104+
start: Instant::now(),
105+
labels,
106+
parent,
107+
}
108+
}
109+
}
110+
111+
impl<'a> Drop for ThreadLocalHistogramTimer<'a> {
112+
fn drop(&mut self) {
113+
self.parent
114+
.observe_with(self.labels, self.start.elapsed().as_secs_f64());
115+
}
116+
}
117+
118+
pub struct ThreadLocalHistogramVec {
119+
inner: prometheus::local::LocalHistogramVec,
120+
last_flush: Instant,
121+
}
122+
123+
impl ThreadLocalHistogramVec {
124+
pub fn new(shared: &prometheus::HistogramVec) -> Self {
125+
Self {
126+
inner: shared.local(),
127+
last_flush: Instant::now(),
128+
}
129+
}
130+
131+
fn maybe_flush(&mut self) {
132+
let now = Instant::now();
133+
if now.duration_since(self.last_flush) > FLUSH_INTERVAL {
134+
self.inner.flush();
135+
}
136+
self.last_flush = now;
137+
}
138+
}
139+
140+
impl TimerHelper for LocalKey<RefCell<ThreadLocalHistogramVec>> {
141+
type TimerType<'a> = ThreadLocalHistogramTimer<'a>;
142+
143+
fn timer_with<'a>(&'static self, labels: &'a [&str]) -> Self::TimerType<'a> {
144+
// We could use `self.with_borrow_mut(|x| x.inner.with_label_values(labels).start_timer())`.
145+
// However, this creates a `LocalHistogramTimer`, which internally stores a copy of
146+
// `LocalHistogram`:
147+
// https://github.com/tikv/rust-prometheus/blob/1d3174bf5ddf056dcb0fe59e06cad4ef42ebec68/src/histogram.rs#L1077-L1080.
148+
// When the timer is dropped, the copied `LocalHistogram` is also dropped, and this always
149+
// causes a flush:
150+
// https://github.com/tikv/rust-prometheus/blob/1d3174bf5ddf056dcb0fe59e06cad4ef42ebec68/src/histogram.rs#L1142-L1146
151+
ThreadLocalHistogramTimer::new(labels, self)
152+
}
153+
154+
fn observe_with(&'static self, labels: &[&str], val: f64) {
155+
self.with_borrow_mut(|x| {
156+
x.inner.with_label_values(labels).observe(val);
157+
x.maybe_flush();
158+
});
159+
}
160+
}
161+
162+
#[macro_export]
163+
macro_rules! make_thread_local_int_counter {
164+
(
165+
$(#[$attr:meta])*
166+
$vis:vis,
167+
$var_name:ident,
168+
$name:expr,
169+
$help:expr $(,)?
170+
) => {
171+
$crate::thread_local::__private::paste! {
172+
static [<__ $var_name>]: $crate::thread_local::__private::Lazy<$crate::IntCounter> =
173+
$crate::thread_local::__private::Lazy::new(|| {
174+
$crate::register_int_counter!($name, $help)
175+
.expect("register_int_counter should succeed")
176+
});
177+
::std::thread_local! {
178+
$(#[$attr])*
179+
$vis static $var_name: ::std::cell::RefCell<$crate::thread_local::ThreadLocalIntCounter> =
180+
::std::cell::RefCell::new(
181+
$crate::thread_local::ThreadLocalIntCounter::new(&[<__ $var_name>]),
182+
);
183+
}
184+
}
185+
}
186+
}
187+
188+
#[macro_export]
189+
macro_rules! make_thread_local_int_counter_vec {
190+
(
191+
$(#[$attr:meta])*
192+
$vis:vis,
193+
$var_name:ident,
194+
$name:expr,
195+
$help:expr,
196+
$labels_names:expr $(,)?
197+
) => {
198+
$crate::thread_local::__private::paste! {
199+
static [<__ $var_name>]: $crate::thread_local::__private::Lazy<$crate::IntCounterVec> =
200+
$crate::thread_local::__private::Lazy::new(|| {
201+
$crate::register_int_counter_vec!($name, $help, $labels_names)
202+
.expect("register_int_counter_vec should succeed")
203+
});
204+
::std::thread_local! {
205+
$(#[$attr])*
206+
$vis static $var_name: ::std::cell::RefCell<$crate::thread_local::ThreadLocalIntCounterVec> =
207+
::std::cell::RefCell::new(
208+
$crate::thread_local::ThreadLocalIntCounterVec::new(&[<__ $var_name>]),
209+
);
210+
}
211+
}
212+
}
213+
}
214+
215+
#[macro_export]
216+
macro_rules! make_thread_local_histogram_vec {
217+
(
218+
$(#[$attr:meta])*
219+
$vis:vis,
220+
$var_name:ident,
221+
$name:expr,
222+
$help:expr,
223+
$labels_names:expr
224+
$(, $buckets:expr)? $(,)?
225+
) => {
226+
$crate::thread_local::__private::paste! {
227+
static [<__ $var_name>]: $crate::thread_local::__private::Lazy<$crate::HistogramVec> =
228+
$crate::thread_local::__private::Lazy::new(|| {
229+
$crate::register_histogram_vec!($name, $help, $labels_names $(, $buckets)?)
230+
.expect("register_histogram_vec should succeed")
231+
});
232+
::std::thread_local! {
233+
$(#[$attr])*
234+
$vis static $var_name: ::std::cell::RefCell<$crate::thread_local::ThreadLocalHistogramVec> =
235+
::std::cell::RefCell::new(
236+
$crate::thread_local::ThreadLocalHistogramVec::new(&[<__ $var_name>]),
237+
);
238+
}
239+
}
240+
}
241+
}
242+
243+
#[cfg(test)]
244+
mod tests {
245+
use crate::{IntCounterHelper, IntCounterVecHelper, TimerHelper};
246+
247+
make_thread_local_int_counter!(
248+
pub(self),
249+
TEST_INT_COUNTER,
250+
"aptos_test_int_counter",
251+
"this is a help message",
252+
);
253+
make_thread_local_int_counter_vec!(
254+
pub(self),
255+
TEST_INT_COUNTER_VEC,
256+
"aptos_test_int_counter_vec",
257+
"this is a help message",
258+
&["label"],
259+
);
260+
make_thread_local_histogram_vec!(
261+
pub(self),
262+
TEST_HISTOGRAM_VEC,
263+
"aptos_test_histogram_vec",
264+
"this is a help message",
265+
&["label"],
266+
);
267+
268+
#[test]
269+
fn test_thread_local_int_counter() {
270+
TEST_INT_COUNTER.inc();
271+
TEST_INT_COUNTER.inc_by(2);
272+
}
273+
274+
#[test]
275+
fn test_thread_local_int_counter_vec() {
276+
TEST_INT_COUNTER_VEC.inc_with(&["foo"]);
277+
TEST_INT_COUNTER_VEC.inc_with_by(&["foo"], 2);
278+
}
279+
280+
#[test]
281+
fn test_thread_local_histogram_vec() {
282+
let _timer = TEST_HISTOGRAM_VEC.timer_with(&["foo"]);
283+
TEST_HISTOGRAM_VEC.observe_with(&["bar"], 1.0);
284+
}
285+
}

0 commit comments

Comments
 (0)