diff --git a/CHANGELOG.md b/CHANGELOG.md index 8af38184bbc..b754729d312 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,11 @@ and this project adheres to ### Removed +- [#5439](https://github.com/firecracker-microvm/firecracker/pull/5439): Removed + the `rx_partial_writes`, `tx_partial_reads`, `sync_response_fails`, + `sync_vmm_send_timeout_count`, `deprecated_cmd_line_api_calls`, `log_fails` + and `device_events` metrics, as they were never incremented. + ### Fixed - [#5418](https://github.com/firecracker-microvm/firecracker/pull/5418): Fixed diff --git a/src/vmm/src/devices/virtio/net/metrics.rs b/src/vmm/src/devices/virtio/net/metrics.rs index 60e03f224de..a2d18c8412b 100644 --- a/src/vmm/src/devices/virtio/net/metrics.rs +++ b/src/vmm/src/devices/virtio/net/metrics.rs @@ -161,8 +161,6 @@ pub struct NetDeviceMetrics { pub rx_queue_event_count: SharedIncMetric, /// Number of events associated with the rate limiter installed on the receiving path. pub rx_event_rate_limiter_count: SharedIncMetric, - /// Number of RX partial writes to guest. - pub rx_partial_writes: SharedIncMetric, /// Number of RX rate limiter throttling events. pub rx_rate_limiter_throttled: SharedIncMetric, /// Number of events received on the associated tap. @@ -191,8 +189,6 @@ pub struct NetDeviceMetrics { pub tx_count: SharedIncMetric, /// Number of transmitted packets. pub tx_packets_count: SharedIncMetric, - /// Number of TX partial reads from guest. - pub tx_partial_reads: SharedIncMetric, /// Number of events associated with the transmitting queue. pub tx_queue_event_count: SharedIncMetric, /// Number of events associated with the rate limiter installed on the transmitting path. @@ -233,8 +229,6 @@ impl NetDeviceMetrics { .add(other.rx_queue_event_count.fetch_diff()); self.rx_event_rate_limiter_count .add(other.rx_event_rate_limiter_count.fetch_diff()); - self.rx_partial_writes - .add(other.rx_partial_writes.fetch_diff()); self.rx_rate_limiter_throttled .add(other.rx_rate_limiter_throttled.fetch_diff()); self.rx_tap_event_count @@ -256,8 +250,6 @@ impl NetDeviceMetrics { self.tx_count.add(other.tx_count.fetch_diff()); self.tx_packets_count .add(other.tx_packets_count.fetch_diff()); - self.tx_partial_reads - .add(other.tx_partial_reads.fetch_diff()); self.tx_queue_event_count .add(other.tx_queue_event_count.fetch_diff()); self.tx_rate_limiter_event_count diff --git a/src/vmm/src/logger/metrics.rs b/src/vmm/src/logger/metrics.rs index e10e746d3d1..294af948591 100644 --- a/src/vmm/src/logger/metrics.rs +++ b/src/vmm/src/logger/metrics.rs @@ -337,10 +337,6 @@ pub struct ApiServerMetrics { pub process_startup_time_us: SharedStoreMetric, /// Measures the cpu's startup time in microseconds. pub process_startup_time_cpu_us: SharedStoreMetric, - /// Number of failures on API requests triggered by internal errors. - pub sync_response_fails: SharedIncMetric, - /// Number of timeouts during communication with the VMM. - pub sync_vmm_send_timeout_count: SharedIncMetric, } impl ApiServerMetrics { /// Const default construction. @@ -348,8 +344,6 @@ impl ApiServerMetrics { Self { process_startup_time_us: SharedStoreMetric::new(), process_startup_time_cpu_us: SharedStoreMetric::new(), - sync_response_fails: SharedIncMetric::new(), - sync_vmm_send_timeout_count: SharedIncMetric::new(), } } } @@ -497,15 +491,12 @@ impl PatchRequestsMetrics { pub struct DeprecatedApiMetrics { /// Total number of calls to deprecated HTTP endpoints. pub deprecated_http_api_calls: SharedIncMetric, - /// Total number of calls to deprecated CMD line parameters. - pub deprecated_cmd_line_api_calls: SharedIncMetric, } impl DeprecatedApiMetrics { /// Const default construction. pub const fn new() -> Self { Self { deprecated_http_api_calls: SharedIncMetric::new(), - deprecated_cmd_line_api_calls: SharedIncMetric::new(), } } } @@ -519,8 +510,6 @@ pub struct LoggerSystemMetrics { pub metrics_fails: SharedIncMetric, /// Number of misses on logging human readable content. pub missed_log_count: SharedIncMetric, - /// Number of errors while trying to log human readable content. - pub log_fails: SharedIncMetric, } impl LoggerSystemMetrics { /// Const default construction. @@ -529,7 +518,6 @@ impl LoggerSystemMetrics { missed_metrics_count: SharedIncMetric::new(), metrics_fails: SharedIncMetric::new(), missed_log_count: SharedIncMetric::new(), - log_fails: SharedIncMetric::new(), } } } @@ -806,8 +794,6 @@ impl VcpuMetrics { /// Metrics specific to the machine manager as a whole. #[derive(Debug, Default, Serialize)] pub struct VmmMetrics { - /// Number of device related events received for a VM. - pub device_events: SharedIncMetric, /// Metric for signaling a panic has occurred. pub panic_count: SharedStoreMetric, } @@ -815,7 +801,6 @@ impl VmmMetrics { /// Const default construction. pub const fn new() -> Self { Self { - device_events: SharedIncMetric::new(), panic_count: SharedStoreMetric::new(), } } diff --git a/tests/host_tools/fcmetrics.py b/tests/host_tools/fcmetrics.py index e2a1862c21f..3a93ced14d3 100644 --- a/tests/host_tools/fcmetrics.py +++ b/tests/host_tools/fcmetrics.py @@ -18,7 +18,9 @@ import jsonschema import pytest +from framework import utils from framework.properties import global_props +from framework.utils_repo import git_repo_files from host_tools.metrics import get_metrics_logger @@ -105,7 +107,6 @@ def validate_fc_metrics(metrics): "event_fails", "rx_queue_event_count", "rx_event_rate_limiter_count", - "rx_partial_writes", "rx_rate_limiter_throttled", "rx_tap_event_count", "rx_bytes_count", @@ -119,7 +120,6 @@ def validate_fc_metrics(metrics): "tx_fails", "tx_count", "tx_packets_count", - "tx_partial_reads", "tx_queue_event_count", "tx_rate_limiter_event_count", "tx_rate_limiter_throttled", @@ -132,8 +132,6 @@ def validate_fc_metrics(metrics): "api_server": [ "process_startup_time_us", "process_startup_time_cpu_us", - "sync_response_fails", - "sync_vmm_send_timeout_count", ], "balloon": [ "activate_fails", @@ -146,7 +144,6 @@ def validate_fc_metrics(metrics): "block": block_metrics, "deprecated_api": [ "deprecated_http_api_calls", - "deprecated_cmd_line_api_calls", ], "get_api_requests": [ "instance_info_count", @@ -178,7 +175,6 @@ def validate_fc_metrics(metrics): "missed_metrics_count", "metrics_fails", "missed_log_count", - "log_fails", ], "mmds": [ "rx_accepted", @@ -246,7 +242,6 @@ def validate_fc_metrics(metrics): {"exit_mmio_write_agg": latency_agg_metrics_fields}, ], "vmm": [ - "device_events", "panic_count", ], "uart": [ @@ -570,3 +565,61 @@ def run(self): time.sleep(1) if self.running is False: break + + +def find_metrics_files(): + """Gets a list of all Firecracker sources files ending with 'metrics.rs'""" + return list(git_repo_files(root="..", glob="*metrics.rs")) + + +def extract_fields(file_path): + """Gets a list of all metrics defined in the given file, in the form tuples (name, type)""" + fields = utils.run_cmd( + rf'grep -Po "(?<=pub )(\w+): (Shared(?:Inc|Store)Metric|LatencyAggregateMetrics)" {file_path}' + ).stdout.strip() + + return [field.split(": ", maxsplit=1) for field in fields.splitlines()] + + +def is_file_production(filepath): + """Returns True iff accesses to metric fields in the given file should cause the metric be considered 'used in production code'. Excludes, for example, files in which the metrics are defined, where accesses happen as part of copy constructors, etc.""" + path = filepath.lower() + return ( + "/test/" in path + or "/tests/" in path + or path.endswith("_test.rs") + or "test_" in path + or "tests.rs" in path + or ("metrics.rs" in path and "vmm" in path) + ) + + +KNOWN_FALSE_POSITIVES = [ + "min_us", + "max_us", + "sum_us", + "process_startup_time_us", + "process_startup_time_cpu_us", +] + + +def is_metric_used(field, field_type): + """Returns True iff the given metric has a production use in the firecracker codebase""" + if field in KNOWN_FALSE_POSITIVES: + return True + + if field_type in ("SharedIncMetric", "SharedStoreMetric"): + pattern = rf"{field}\s*\.\s*store|{field}\s*\.\s*inc|{field}\s*\.\s*add|{field}\s*\.\s*fetch|METRICS.*{field}" + elif field_type == "LatencyAggregateMetrics": + pattern = rf"{field}\s*\.\s*record_latency_metrics" + else: + raise RuntimeError(f"Unknown metric type: {field_type}") + + result = utils.run_cmd(f'grep -RPzo "{pattern}" ../src') + + for line in result.stdout.strip().split("\0"): + if not line: + continue + if not is_file_production(line.split(":", maxsplit=1)[0]): + return True + return False diff --git a/tests/integration_tests/functional/test_pause_resume.py b/tests/integration_tests/functional/test_pause_resume.py index ca2bb6936b3..d27e21f3103 100644 --- a/tests/integration_tests/functional/test_pause_resume.py +++ b/tests/integration_tests/functional/test_pause_resume.py @@ -13,7 +13,6 @@ def verify_net_emulation_paused(metrics): """Verify net emulation is paused based on provided metrics.""" net_metrics = metrics["net"] assert net_metrics["rx_queue_event_count"] == 0 - assert net_metrics["rx_partial_writes"] == 0 assert net_metrics["rx_tap_event_count"] == 0 assert net_metrics["rx_bytes_count"] == 0 assert net_metrics["rx_packets_count"] == 0 diff --git a/tests/integration_tests/style/test_rust.py b/tests/integration_tests/style/test_rust.py index 580c33eb03d..a2278ebf936 100644 --- a/tests/integration_tests/style/test_rust.py +++ b/tests/integration_tests/style/test_rust.py @@ -1,8 +1,10 @@ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests ensuring codebase style compliance for Rust.""" +from collections import defaultdict from framework import utils +from host_tools.fcmetrics import extract_fields, find_metrics_files, is_metric_used def test_rust_order(): @@ -24,3 +26,34 @@ def test_rust_style(): # rustfmt prepends `"Diff in"` to the reported output. assert "Diff in" not in stdout + + +def test_unused_metrics(): + """Tests that all metrics defined in Firecracker's metrics.rs files actually have code + paths that increment them.""" + metrics_files = find_metrics_files() + unused = defaultdict(list) + + assert metrics_files + + for file_path in metrics_files: + fields = extract_fields(file_path) + if not fields: + continue + + for field, ty in fields: + if not is_metric_used(field, ty): + unused[file_path].append((field, ty)) + + # Grouped output + for file_path, fields in unused.items(): + print(f"📄 Defined in: {file_path}") + print("Possibly Unused: \n") + for field, field_type in fields: + print(f" ❌ {field} ({field_type})") + + print() + + assert ( + not unused + ), "Unused metrics founds, see stdout. Please either hook them up, or remove them"