test(perf): add block latency test

roypat · roypat · commit d49a369f7e5e · 2025-04-23T15:29:48.000+01:00
fio emits latency metrics regarding how much time was spent inside the
guest operating system (submission latency, slat) or how much time was
spent in the device (clat). For firecracker, the latter could be
relevant, so emit them from the block performance tests.

Signed-off-by: Patrick Roy &lt;roypat@amazon.co.uk&gt;
diff --git a/tests/integration_tests/performance/test_block_ab.py b/tests/integration_tests/performance/test_block_ab.py
@@ -3,6 +3,7 @@
 """Performance benchmark for block device emulation."""
 
 import concurrent
+import glob
 import os
 import shutil
 from pathlib import Path
@@ -68,6 +69,7 @@ def run_fio(microvm, mode, block_size):
         # Instruct fio to pin one worker per vcpu
         .with_arg("--cpus_allowed_policy=split")
         .with_arg(f"--write_bw_log={mode}")
+        .with_arg(f"--write_lat_log={mode}")
         .with_arg("--log_avg_msec=1000")
         .build()
     )
@@ -101,37 +103,51 @@ def run_fio(microvm, mode, block_size):
         return logs_path, cpu_load_future.result()
 
 
-def process_fio_logs(vm, fio_mode, logs_dir, metrics):
-    """Parses the fio logs in `{logs_dir}/{fio_mode}_bw.*.log and emits their contents as CloudWatch metrics"""
-
+def process_fio_log_files(logs_glob):
+    """Parses all fio log files matching the given glob and yields tuples of same-timestamp read and write metrics"""
     data = [
-        Path(f"{logs_dir}/{fio_mode}_bw.{job_id + 1}.log")
-        .read_text("UTF-8")
-        .splitlines()
-        for job_id in range(vm.vcpus_count)
+        Path(pathname).read_text("UTF-8").splitlines()
+        for pathname in glob.glob(logs_glob)
     ]
 
+    assert data, "no log files found!"
+
     for tup in zip(*data):
-        bw_read = 0
-        bw_write = 0
+        read_values = []
+        write_values = []
 
         for line in tup:
+            # See https://fio.readthedocs.io/en/latest/fio_doc.html#log-file-formats
             _, value, direction, _ = line.split(",", maxsplit=3)
             value = int(value.strip())
 
-            # See https://fio.readthedocs.io/en/latest/fio_doc.html#log-file-formats
             match direction.strip():
                 case "0":
-                    bw_read += value
+                    read_values.append(value)
                 case "1":
-                    bw_write += value
+                    write_values.append(value)
                 case _:
                     assert False
 
+        yield read_values, write_values
+
+
+def emit_fio_metrics(logs_dir, metrics):
+    """Parses the fio logs in `{logs_dir}/*_[bw|lat].*.log and emits their contents as CloudWatch metrics"""
+
+    for bw_read, bw_write in process_fio_log_files(f"{logs_dir}/*_bw.*.log"):
         if bw_read:
-            metrics.put_metric("bw_read", bw_read, "Kilobytes/Second")
+            metrics.put_metric("bw_read", sum(bw_read), "Kilobytes/Second")
         if bw_write:
-            metrics.put_metric("bw_write", bw_write, "Kilobytes/Second")
+            metrics.put_metric("bw_write", sum(bw_write), "Kilobytes/Second")
+
+    for lat_read, lat_write in process_fio_log_files(f"{logs_dir}/*_clat.*.log"):
+        # latency values in fio logs are in nanosecons, but cloudwatch only supports
+        # microseconds as the more granular unit, so need to divide by 1000.
+        for value in lat_read:
+            metrics.put_metric("clat_read", value / 1000, "Microseconds")
+        for value in lat_write:
+            metrics.put_metric("clat_write", value / 1000, "Microseconds")
 
 
 @pytest.mark.timeout(120)
@@ -177,7 +193,7 @@ def test_block_performance(
 
     logs_dir, cpu_util = run_fio(vm, fio_mode, fio_block_size)
 
-    process_fio_logs(vm, fio_mode, logs_dir, metrics)
+    emit_fio_metrics(logs_dir, metrics)
 
     for thread_name, values in cpu_util.items():
         for value in values:
@@ -226,7 +242,7 @@ def test_block_vhost_user_performance(
 
     logs_dir, cpu_util = run_fio(vm, fio_mode, fio_block_size)
 
-    process_fio_logs(vm, fio_mode, logs_dir, metrics)
+    emit_fio_metrics(logs_dir, metrics)
 
     for thread_name, values in cpu_util.items():
         for value in values: