firecracker-microvm · ShadowCurse · Oct 9, 2025 · Oct 9, 2025 · Oct 10, 2025 · Oct 10, 2025
diff --git a/.buildkite/pipeline_perf.py b/.buildkite/pipeline_perf.py
@@ -65,6 +65,11 @@
         "tests": "integration_tests/performance/test_jailer.py",
         "devtool_opts": "-c 1-10 -m 0",
     },
+    "pmem": {
+        "label": "💿 Pmem Performance",
+        "tests": "integration_tests/performance/test_pmem.py",
+        "devtool_opts": "-c 1-10 -m 0",
+    },
 }
 
 REVISION_A = os.environ.get("REVISION_A")

diff --git a/tests/framework/utils_fio.py b/tests/framework/utils_fio.py
@@ -0,0 +1,200 @@
+# Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""File containing utility methods for fio-based performance tests"""
+
+import json
+import os
+from enum import Enum
+from pathlib import Path
+
+from framework.utils import CmdBuilder
+
+DEFAULT_RUNTIME_SEC = 30
+DEFAULT_WARMUP_SEC = 10
+
+
+class Mode(str, Enum):
+    """
+    Modes of fio operation
+    """
+
+    # Sequential reads.
+    READ = "read"
+    # Sequential writes.
+    WRITE = "write"
+    # Sequential trims (Linux block devices and SCSI character devices only).
+    TRIM = "trim"
+    # RANDOM reads.
+    RANDREAD = "randread"
+    # RANDOM writes.
+    RANDWRITE = "randwrite"
+    # RANDOM trims (Linux block devices and SCSI character devices only).
+    RANDTRIM = "randtrim"
+    # SEQUENTial mixed reads and writes.
+    READWRITE = "readwrite"
+    # RANDOM mixed reads and writes.
+    RANDRW = "randrw"
+
+
+class Engine(str, Enum):
+    """
+    Fio backend engines
+    """
+
+    LIBAIO = "libaio"
+    PSYNC = "psync"
+
+
+def build_cmd(
+    file_path: str,
+    file_size_mb: str | None,
+    block_size: int,
+    mode: Mode,
+    num_jobs: int,
+    io_engine: Engine,
+    runtime: int | None = DEFAULT_RUNTIME_SEC,
+    warmup_time: int | None = DEFAULT_WARMUP_SEC,
+    write_logs: bool = True,
+) -> str:
+    """Build fio cmd"""
+
+    cmd = (
+        CmdBuilder("fio")
+        .with_arg(f"--name={mode.value}-{block_size}")
+        .with_arg(f"--filename={file_path}")
+    )
+
+    if file_size_mb:
+        cmd = cmd.with_arg(f"--size={file_size_mb}M")
+
+    cmd = cmd.with_arg(f"--bs={block_size}")
+
+    if runtime and warmup_time:
+        cmd = (
+            cmd.with_arg("--time_based=1")
+            .with_arg(f"--runtime={runtime}")
+            .with_arg(f"--ramp_time={warmup_time}")
+        )
+
+    cmd = (
+        cmd.with_arg(f"--rw={mode.value}")
+        .with_arg("--direct=1")
+        .with_arg("--randrepeat=0")
+        .with_arg(f"--ioengine={io_engine.value}")
+        .with_arg("--iodepth=32")
+        .with_arg(f"--numjobs={num_jobs}")
+        # Set affinity of the entire fio process to a set of vCPUs equal
+        # in size to number of workers
+        .with_arg(f"--cpus_allowed={','.join(str(i) for i in range(num_jobs))}")
+        # Instruct fio to pin one worker per vcpu
+        .with_arg("--cpus_allowed_policy=split")
+        .with_arg("--output-format=json+")
+        .with_arg("--output=./fio.json")
+    )
+
+    if write_logs:
+        cmd = cmd.with_arg("--log_avg_msec=1000").with_arg(
+            f"--write_bw_log={mode.value}"
+        )
+        # Latency measurements only make sence for psync engine
+        if io_engine == Engine.PSYNC:
+            cmd = cmd.with_arg(f"--write_lat_log={mode}")
+
+    return cmd.build()
+
+
+class LogType(Enum):
+    """Fio log types"""
+
+    BW = "_bw"
+    CLAT = "_clat"
+
+
+def process_log_files(root_dir: str, log_type: LogType) -> ([[str]], [[str]]):
+    """
+    Parses fio logs which have a form of:
+    1000, 2007920, 0, 0, 0
+    1000, 2005276, 1, 0, 0
+    2000, 1996240, 0, 0, 0
+    2000, 1993861, 1, 0, 0
+    ...
+    where the first column is the timestamp, second is the bw/clat and third is the direction
+
+    The logs directory will look smth like this:
+    readwrite_bw.1.log
+    readwrite_bw.2.log
+    readwrite_clat.1.log
+    readwrite_clat.2.log
+    readwrite_lat.1.log
+    readwrite_lat.2.log
+    readwrite_slat.1.log
+    readwrite_slat.2.log
+
+    job0         job1
+    read write   read write
+    [..] [..]    [..] [..]
+     |     |      |     |
+     |   --|-------  ----
+     |   | ------|   |
+    [[], []]   [[], []]
+     reads      writes
+
+    The output is 2 arrays: array of reads and array of writes
+    """
+    paths = []
+    for item in os.listdir(root_dir):
+        if item.endswith(".log") and log_type.value in item:
+            paths.append(Path(root_dir / item))
+
+    if not paths:
+        return [], []
+
+    reads = []
+    writes = []
+    for path in sorted(paths):
+        lines = path.read_text("UTF-8").splitlines()
+        read_values = []
+        write_values = []
+        for line in lines:
+            # See https://fio.readthedocs.io/en/latest/fio_doc.html#log-file-formats
+            _, value, direction, _ = line.split(",", maxsplit=3)
+            value = int(value.strip())
+
+            match direction.strip():
+                case "0":
+                    read_values.append(value)
+                case "1":
+                    write_values.append(value)
+                case _:
+                    assert False
+
+        reads.append(read_values)
+        writes.append(write_values)
+    return reads, writes
+
+
+def process_json_files(root_dir: str) -> ([[int]], [[int]]):
+    """
+    Reads `bw_bytes` values from fio*.json files and
+    packs them into 2 arrays of bw_reads and bw_writes.
+    Each entrly is an array in itself of `jobs` per file.
+    """
+    paths = []
+    for item in os.listdir(root_dir):
+        if item.endswith(".json") and "fio" in item:
+            paths.append(Path(root_dir / item))
+
+    bw_reads = []
+    bw_writes = []
+    for path in sorted(paths):
+        data = json.loads(path.read_text("UTF-8"))
+        reads = []
+        writes = []
+        for job in data["jobs"]:
+            if "read" in job:
+                reads.append(job["read"]["bw_bytes"])
+            if "write" in job:
+                writes.append(job["write"]["bw_bytes"])
+        bw_reads.append(reads)
+        bw_writes.append(writes)
+    return bw_reads, bw_writes
diff --git a/tests/integration_tests/functional/test_pmem.py b/tests/integration_tests/functional/test_pmem.py
@@ -6,6 +6,7 @@
 import os
 
 import host_tools.drive as drive_tools
+from framework import utils
 
 ALIGNMENT = 2 << 20
 
@@ -140,3 +141,66 @@ def test_pmem_add_as_root_ro(uvm_plain_any, rootfs, microvm_factory):
     snapshot = vm.snapshot_full()
     restored_vm = microvm_factory.build_from_snapshot(snapshot)
     check_pmem_exist(restored_vm, 0, True, True, align(rootfs_size), "squashfs")
+
+
+def inside_buff_cache(vm) -> int:
+    """Get buffer/cache usage from inside the vm"""
+    _, stdout, _ = vm.ssh.check_output("free")
+    # Get the `buffer/cache` of the `free` command which represents
+    # kernel page cache size
+    return int(stdout.splitlines()[1].split()[5])
+
+
+def outside_rssanon(vm) -> int:
+    """Get RssAnon usage from outside the vm"""
+    cmd = f"cat /proc/{vm.firecracker_pid}/status | grep RssAnon"
+    _, stdout, _ = utils.check_output(cmd)
+    return int(stdout.split()[1])
+
+
+def test_pmem_dax_memory_saving(
+    microvm_factory,
+    guest_kernel_acpi,
+    rootfs_rw,
+):
+    """
+    Test that booting from pmem with DAX enabled indeed saves memory in the
+    guest by not needing guest to use its page cache
+    """
+
+    # Boot from a block device
+    vm = microvm_factory.build(
+        guest_kernel_acpi, rootfs_rw, pci=True, monitor_memory=False
+    )
+    vm.spawn()
+    vm.basic_config()
+    vm.add_net_iface()
+    vm.start()
+    block_cache_usage = inside_buff_cache(vm)
+    block_rss_usage = outside_rssanon(vm)
+
+    # Boot from pmem with DAX enabled for root device
+    vm_pmem = microvm_factory.build(
+        guest_kernel_acpi, rootfs_rw, pci=True, monitor_memory=False
+    )
+    vm_pmem.spawn()
+    vm_pmem.basic_config(
+        add_root_device=False,
+        boot_args="reboot=k panic=1 nomodule swiotlb=noforce console=ttyS0 rootflags=dax",
+    )
+    vm_pmem.add_net_iface()
+    vm_pmem.add_pmem("pmem", rootfs_rw, True, False)
+    vm_pmem.start()
+    pmem_cache_usage = inside_buff_cache(vm_pmem)
+    pmem_rss_usage = outside_rssanon(vm_pmem)
+
+    # The pmem cache usage should be much lower than drive cache usage.
+    # The 50% is an arbitrary number, but does provide a good guarantee
+    # that DAX is working
+    assert (
+        pmem_cache_usage < block_cache_usage * 0.5
+    ), f"{block_cache_usage} <= {pmem_cache_usage}"
+    # RssAnon difference will be smaller, so no multipliers
+    assert (
+        pmem_rss_usage < block_rss_usage
+    ), f"{block_cache_usage} <= {pmem_cache_usage}"