Skip to content

Commit bb57d28

Browse files
committed
feat(metrics/block): add file access latency metrics
These metrics measure how long Firecracker VMM thread is blocked on the read/write syscalls when accessing data in the drive file. By looking at them, we will be able to see what portion of block latency is attributed to factors external to Firecracker. (cherry picked from commit e00d62d) Signed-off-by: Nikita Kalyazin <[email protected]>
1 parent a7a4098 commit bb57d28

File tree

3 files changed

+27
-3
lines changed

3 files changed

+27
-3
lines changed

src/vmm/src/devices/virtio/block/virtio/metrics.rs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ use std::sync::{Arc, RwLock};
8383
use serde::ser::SerializeMap;
8484
use serde::{Serialize, Serializer};
8585

86-
use crate::logger::{IncMetric, SharedIncMetric};
86+
use crate::logger::{IncMetric, LatencyAggregateMetrics, SharedIncMetric};
8787

8888
/// map of block drive id and metrics
8989
/// this should be protected by a lock before accessing.
@@ -105,7 +105,7 @@ impl BlockMetricsPerDevice {
105105
.write()
106106
.unwrap()
107107
.metrics
108-
.insert(drive_id.clone(), Arc::new(BlockDeviceMetrics::default()));
108+
.insert(drive_id.clone(), Arc::new(BlockDeviceMetrics::new()));
109109
}
110110
METRICS
111111
.read()
@@ -178,6 +178,10 @@ pub struct BlockDeviceMetrics {
178178
pub read_count: SharedIncMetric,
179179
/// Number of successful write operations.
180180
pub write_count: SharedIncMetric,
181+
/// Duration of all read operations.
182+
pub read_agg: LatencyAggregateMetrics,
183+
/// Duration of all write operations.
184+
pub write_agg: LatencyAggregateMetrics,
181185
/// Number of rate limiter throttling events.
182186
pub rate_limiter_throttled_events: SharedIncMetric,
183187
/// Number of virtio events throttled because of the IO engine.
@@ -186,6 +190,15 @@ pub struct BlockDeviceMetrics {
186190
}
187191

188192
impl BlockDeviceMetrics {
193+
/// Const default construction.
194+
pub fn new() -> Self {
195+
Self {
196+
read_agg: LatencyAggregateMetrics::new(),
197+
write_agg: LatencyAggregateMetrics::new(),
198+
..Default::default()
199+
}
200+
}
201+
189202
/// block metrics are SharedIncMetric where the diff of current vs
190203
/// old is serialized i.e. serialize_u64(current-old).
191204
/// So to have the aggregate serialized in same way we need to
@@ -210,6 +223,10 @@ impl BlockDeviceMetrics {
210223
self.write_bytes.add(other.write_bytes.fetch_diff());
211224
self.read_count.add(other.read_count.fetch_diff());
212225
self.write_count.add(other.write_count.fetch_diff());
226+
self.read_agg.sum_us.add(other.read_agg.sum_us.fetch_diff());
227+
self.write_agg
228+
.sum_us
229+
.add(other.write_agg.sum_us.fetch_diff());
213230
self.rate_limiter_throttled_events
214231
.add(other.rate_limiter_throttled_events.fetch_diff());
215232
self.io_engine_throttled_events

src/vmm/src/devices/virtio/block/virtio/request.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,10 +373,12 @@ impl Request {
373373
let pending = self.to_pending_request(desc_idx);
374374
let res = match self.r#type {
375375
RequestType::In => {
376+
let _metric = block_metrics.read_agg.record_latency_metrics();
376377
disk.file_engine
377378
.read(self.offset(), mem, self.data_addr, self.data_len, pending)
378379
}
379380
RequestType::Out => {
381+
let _metric = block_metrics.write_agg.record_latency_metrics();
380382
disk.file_engine
381383
.write(self.offset(), mem, self.data_addr, self.data_len, pending)
382384
}

tests/host_tools/fcmetrics.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,12 @@ def validate_fc_metrics(metrics):
234234
],
235235
}
236236

237-
latency_agg_metrics = {}
237+
latency_agg_metrics = {
238+
"block": [
239+
"read_agg",
240+
"write_agg",
241+
],
242+
}
238243

239244
# validate timestamp before jsonschema validation which some more time
240245
utc_time = datetime.datetime.now(datetime.timezone.utc)

0 commit comments

Comments
 (0)