Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ and this project adheres to

### Removed

- [#5439](https://github.com/firecracker-microvm/firecracker/pull/5439): Removed
the `rx_partial_writes`, `tx_partial_reads`, `sync_response_fails`,
`sync_vmm_send_timeout_count`, `deprecated_cmd_line_api_calls`, `log_fails`
and `device_events` metrics, as they were never incremented.

### Fixed

- [#5418](https://github.com/firecracker-microvm/firecracker/pull/5418): Fixed
Expand Down
8 changes: 0 additions & 8 deletions src/vmm/src/devices/virtio/net/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,6 @@ pub struct NetDeviceMetrics {
pub rx_queue_event_count: SharedIncMetric,
/// Number of events associated with the rate limiter installed on the receiving path.
pub rx_event_rate_limiter_count: SharedIncMetric,
/// Number of RX partial writes to guest.
pub rx_partial_writes: SharedIncMetric,
/// Number of RX rate limiter throttling events.
pub rx_rate_limiter_throttled: SharedIncMetric,
/// Number of events received on the associated tap.
Expand Down Expand Up @@ -191,8 +189,6 @@ pub struct NetDeviceMetrics {
pub tx_count: SharedIncMetric,
/// Number of transmitted packets.
pub tx_packets_count: SharedIncMetric,
/// Number of TX partial reads from guest.
pub tx_partial_reads: SharedIncMetric,
/// Number of events associated with the transmitting queue.
pub tx_queue_event_count: SharedIncMetric,
/// Number of events associated with the rate limiter installed on the transmitting path.
Expand Down Expand Up @@ -233,8 +229,6 @@ impl NetDeviceMetrics {
.add(other.rx_queue_event_count.fetch_diff());
self.rx_event_rate_limiter_count
.add(other.rx_event_rate_limiter_count.fetch_diff());
self.rx_partial_writes
.add(other.rx_partial_writes.fetch_diff());
self.rx_rate_limiter_throttled
.add(other.rx_rate_limiter_throttled.fetch_diff());
self.rx_tap_event_count
Expand All @@ -256,8 +250,6 @@ impl NetDeviceMetrics {
self.tx_count.add(other.tx_count.fetch_diff());
self.tx_packets_count
.add(other.tx_packets_count.fetch_diff());
self.tx_partial_reads
.add(other.tx_partial_reads.fetch_diff());
self.tx_queue_event_count
.add(other.tx_queue_event_count.fetch_diff());
self.tx_rate_limiter_event_count
Expand Down
15 changes: 0 additions & 15 deletions src/vmm/src/logger/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -337,19 +337,13 @@ pub struct ApiServerMetrics {
pub process_startup_time_us: SharedStoreMetric,
/// Measures the cpu's startup time in microseconds.
pub process_startup_time_cpu_us: SharedStoreMetric,
/// Number of failures on API requests triggered by internal errors.
pub sync_response_fails: SharedIncMetric,
/// Number of timeouts during communication with the VMM.
pub sync_vmm_send_timeout_count: SharedIncMetric,
}
impl ApiServerMetrics {
/// Const default construction.
pub const fn new() -> Self {
Self {
process_startup_time_us: SharedStoreMetric::new(),
process_startup_time_cpu_us: SharedStoreMetric::new(),
sync_response_fails: SharedIncMetric::new(),
sync_vmm_send_timeout_count: SharedIncMetric::new(),
}
}
}
Expand Down Expand Up @@ -497,15 +491,12 @@ impl PatchRequestsMetrics {
pub struct DeprecatedApiMetrics {
/// Total number of calls to deprecated HTTP endpoints.
pub deprecated_http_api_calls: SharedIncMetric,
/// Total number of calls to deprecated CMD line parameters.
pub deprecated_cmd_line_api_calls: SharedIncMetric,
}
impl DeprecatedApiMetrics {
/// Const default construction.
pub const fn new() -> Self {
Self {
deprecated_http_api_calls: SharedIncMetric::new(),
deprecated_cmd_line_api_calls: SharedIncMetric::new(),
}
}
}
Expand All @@ -519,8 +510,6 @@ pub struct LoggerSystemMetrics {
pub metrics_fails: SharedIncMetric,
/// Number of misses on logging human readable content.
pub missed_log_count: SharedIncMetric,
/// Number of errors while trying to log human readable content.
pub log_fails: SharedIncMetric,
}
impl LoggerSystemMetrics {
/// Const default construction.
Expand All @@ -529,7 +518,6 @@ impl LoggerSystemMetrics {
missed_metrics_count: SharedIncMetric::new(),
metrics_fails: SharedIncMetric::new(),
missed_log_count: SharedIncMetric::new(),
log_fails: SharedIncMetric::new(),
}
}
}
Expand Down Expand Up @@ -806,16 +794,13 @@ impl VcpuMetrics {
/// Metrics specific to the machine manager as a whole.
#[derive(Debug, Default, Serialize)]
pub struct VmmMetrics {
/// Number of device related events received for a VM.
pub device_events: SharedIncMetric,
/// Metric for signaling a panic has occurred.
pub panic_count: SharedStoreMetric,
}
impl VmmMetrics {
/// Const default construction.
pub const fn new() -> Self {
Self {
device_events: SharedIncMetric::new(),
panic_count: SharedStoreMetric::new(),
}
}
Expand Down
67 changes: 60 additions & 7 deletions tests/host_tools/fcmetrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
import jsonschema
import pytest

from framework import utils
from framework.properties import global_props
from framework.utils_repo import git_repo_files
from host_tools.metrics import get_metrics_logger


Expand Down Expand Up @@ -105,7 +107,6 @@ def validate_fc_metrics(metrics):
"event_fails",
"rx_queue_event_count",
"rx_event_rate_limiter_count",
"rx_partial_writes",
"rx_rate_limiter_throttled",
"rx_tap_event_count",
"rx_bytes_count",
Expand All @@ -119,7 +120,6 @@ def validate_fc_metrics(metrics):
"tx_fails",
"tx_count",
"tx_packets_count",
"tx_partial_reads",
"tx_queue_event_count",
"tx_rate_limiter_event_count",
"tx_rate_limiter_throttled",
Expand All @@ -132,8 +132,6 @@ def validate_fc_metrics(metrics):
"api_server": [
"process_startup_time_us",
"process_startup_time_cpu_us",
"sync_response_fails",
"sync_vmm_send_timeout_count",
],
"balloon": [
"activate_fails",
Expand All @@ -146,7 +144,6 @@ def validate_fc_metrics(metrics):
"block": block_metrics,
"deprecated_api": [
"deprecated_http_api_calls",
"deprecated_cmd_line_api_calls",
],
"get_api_requests": [
"instance_info_count",
Expand Down Expand Up @@ -178,7 +175,6 @@ def validate_fc_metrics(metrics):
"missed_metrics_count",
"metrics_fails",
"missed_log_count",
"log_fails",
],
"mmds": [
"rx_accepted",
Expand Down Expand Up @@ -246,7 +242,6 @@ def validate_fc_metrics(metrics):
{"exit_mmio_write_agg": latency_agg_metrics_fields},
],
"vmm": [
"device_events",
"panic_count",
],
"uart": [
Expand Down Expand Up @@ -570,3 +565,61 @@ def run(self):
time.sleep(1)
if self.running is False:
break


def find_metrics_files():
"""Gets a list of all Firecracker sources files ending with 'metrics.rs'"""
return list(git_repo_files(root="..", glob="*metrics.rs"))


def extract_fields(file_path):
"""Gets a list of all metrics defined in the given file, in the form tuples (name, type)"""
fields = utils.run_cmd(
rf'grep -Po "(?<=pub )(\w+): (Shared(?:Inc|Store)Metric|LatencyAggregateMetrics)" {file_path}'
).stdout.strip()

return [field.split(": ", maxsplit=1) for field in fields.splitlines()]


def is_file_production(filepath):
"""Returns True iff accesses to metric fields in the given file should cause the metric be considered 'used in production code'. Excludes, for example, files in which the metrics are defined, where accesses happen as part of copy constructors, etc."""
path = filepath.lower()
return (
"/test/" in path
or "/tests/" in path
or path.endswith("_test.rs")
or "test_" in path
or "tests.rs" in path
or ("metrics.rs" in path and "vmm" in path)
)


KNOWN_FALSE_POSITIVES = [
"min_us",
"max_us",
"sum_us",
"process_startup_time_us",
"process_startup_time_cpu_us",
]


def is_metric_used(field, field_type):
"""Returns True iff the given metric has a production use in the firecracker codebase"""
if field in KNOWN_FALSE_POSITIVES:
return True

if field_type in ("SharedIncMetric", "SharedStoreMetric"):
pattern = rf"{field}\s*\.\s*store|{field}\s*\.\s*inc|{field}\s*\.\s*add|{field}\s*\.\s*fetch|METRICS.*{field}"
elif field_type == "LatencyAggregateMetrics":
pattern = rf"{field}\s*\.\s*record_latency_metrics"
else:
raise RuntimeError(f"Unknown metric type: {field_type}")

result = utils.run_cmd(f'grep -RPzo "{pattern}" ../src')

for line in result.stdout.strip().split("\0"):
if not line:
continue
if not is_file_production(line.split(":", maxsplit=1)[0]):
return True
return False
1 change: 0 additions & 1 deletion tests/integration_tests/functional/test_pause_resume.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ def verify_net_emulation_paused(metrics):
"""Verify net emulation is paused based on provided metrics."""
net_metrics = metrics["net"]
assert net_metrics["rx_queue_event_count"] == 0
assert net_metrics["rx_partial_writes"] == 0
assert net_metrics["rx_tap_event_count"] == 0
assert net_metrics["rx_bytes_count"] == 0
assert net_metrics["rx_packets_count"] == 0
Expand Down
33 changes: 33 additions & 0 deletions tests/integration_tests/style/test_rust.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
"""Tests ensuring codebase style compliance for Rust."""
from collections import defaultdict

from framework import utils
from host_tools.fcmetrics import extract_fields, find_metrics_files, is_metric_used


def test_rust_order():
Expand All @@ -24,3 +26,34 @@ def test_rust_style():

# rustfmt prepends `"Diff in"` to the reported output.
assert "Diff in" not in stdout


def test_unused_metrics():
"""Tests that all metrics defined in Firecracker's metrics.rs files actually have code
paths that increment them."""
metrics_files = find_metrics_files()
unused = defaultdict(list)

assert metrics_files

for file_path in metrics_files:
fields = extract_fields(file_path)
if not fields:
continue

for field, ty in fields:
if not is_metric_used(field, ty):
unused[file_path].append((field, ty))

# Grouped output
for file_path, fields in unused.items():
print(f"πŸ“„ Defined in: {file_path}")
print("Possibly Unused: \n")
for field, field_type in fields:
print(f" ❌ {field} ({field_type})")

print()

assert (
not unused
), "Unused metrics founds, see stdout. Please either hook them up, or remove them"