From a90cee0d3e5280d503c449e37668c08e759ec17b Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Tue, 17 Jun 2025 12:19:06 +0100 Subject: [PATCH 1/5] test: run snapshot creation latency perf test in A/B and nightly So we track the metric better Signed-off-by: Patrick Roy --- .buildkite/pipeline_perf.py | 2 +- tests/integration_tests/performance/test_snapshot_ab.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.buildkite/pipeline_perf.py b/.buildkite/pipeline_perf.py index ef3280b0672..e680b802dc1 100755 --- a/.buildkite/pipeline_perf.py +++ b/.buildkite/pipeline_perf.py @@ -42,7 +42,7 @@ }, "snapshot-latency": { "label": "๐Ÿ“ธ Snapshot Latency", - "tests": "integration_tests/performance/test_snapshot_ab.py::test_restore_latency integration_tests/performance/test_snapshot_ab.py::test_post_restore_latency", + "tests": "integration_tests/performance/test_snapshot_ab.py::test_restore_latency integration_tests/performance/test_snapshot_ab.py::test_post_restore_latency integration_tests/performance/test_snapshot_ab.py::test_snapshot_create_latency", "devtool_opts": "-c 1-12 -m 0", }, "population-latency": { diff --git a/tests/integration_tests/performance/test_snapshot_ab.py b/tests/integration_tests/performance/test_snapshot_ab.py index 0ce24903743..d230decba47 100644 --- a/tests/integration_tests/performance/test_snapshot_ab.py +++ b/tests/integration_tests/performance/test_snapshot_ab.py @@ -251,6 +251,7 @@ def test_population_latency( raise RuntimeError("UFFD handler did not print population latency after 5s") +@pytest.mark.nonci def test_snapshot_create_latency( microvm_factory, guest_kernel_linux_5_10, From 582a54dd6e1825daa0aebaa05a172f6277b99a5e Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Tue, 17 Jun 2025 12:20:52 +0100 Subject: [PATCH 2/5] test: drop _ab suffix from performance tests There is nothing that makes these tests specifically "ab" - any test can be run as an A/B-test as long as it produces more than one data point for a metric. So drop this suffix. It was only added originally when we transitioned to A/B-testing and the A/B-capable versions of the tests had to co-exist with the old baseline version of the tests in separate files. Signed-off-by: Patrick Roy --- .buildkite/pipeline_perf.py | 14 +++++++------- tests/README.md | 2 +- .../{test_block_ab.py => test_block.py} | 0 .../{test_network_ab.py => test_network.py} | 0 .../{test_snapshot_ab.py => test_snapshot.py} | 0 .../{test_vsock_ab.py => test_vsock.py} | 0 6 files changed, 8 insertions(+), 8 deletions(-) rename tests/integration_tests/performance/{test_block_ab.py => test_block.py} (100%) rename tests/integration_tests/performance/{test_network_ab.py => test_network.py} (100%) rename tests/integration_tests/performance/{test_snapshot_ab.py => test_snapshot.py} (100%) rename tests/integration_tests/performance/{test_vsock_ab.py => test_vsock.py} (100%) diff --git a/.buildkite/pipeline_perf.py b/.buildkite/pipeline_perf.py index e680b802dc1..7604d48982e 100755 --- a/.buildkite/pipeline_perf.py +++ b/.buildkite/pipeline_perf.py @@ -18,23 +18,23 @@ perf_test = { "virtio-block-sync": { "label": "๐Ÿ’ฟ Virtio Sync Block Performance", - "tests": "integration_tests/performance/test_block_ab.py::test_block_performance -k 'not Async'", + "tests": "integration_tests/performance/test_block.py::test_block_performance -k 'not Async'", "devtool_opts": "-c 1-10 -m 0", }, "virtio-block-async": { "label": "๐Ÿ’ฟ Virtio Async Block Performance", - "tests": "integration_tests/performance/test_block_ab.py::test_block_performance -k Async", + "tests": "integration_tests/performance/test_block.py::test_block_performance -k Async", "devtool_opts": "-c 1-10 -m 0", }, "vhost-user-block": { "label": "๐Ÿ’ฟ vhost-user Block Performance", - "tests": "integration_tests/performance/test_block_ab.py::test_block_vhost_user_performance", + "tests": "integration_tests/performance/test_block.py::test_block_vhost_user_performance", "devtool_opts": "-c 1-10 -m 0", "ab_opts": "--noise-threshold 0.1", }, "network": { "label": "๐Ÿ“  Network Latency and Throughput", - "tests": "integration_tests/performance/test_network_ab.py", + "tests": "integration_tests/performance/test_network.py", "devtool_opts": "-c 1-10 -m 0", # Triggers if delta is > 0.01ms (10ยตs) or default relative threshold (5%) # only relevant for latency test, throughput test will always be magnitudes above this anyway @@ -42,17 +42,17 @@ }, "snapshot-latency": { "label": "๐Ÿ“ธ Snapshot Latency", - "tests": "integration_tests/performance/test_snapshot_ab.py::test_restore_latency integration_tests/performance/test_snapshot_ab.py::test_post_restore_latency integration_tests/performance/test_snapshot_ab.py::test_snapshot_create_latency", + "tests": "integration_tests/performance/test_snapshot.py::test_restore_latency integration_tests/performance/test_snapshot.py::test_post_restore_latency integration_tests/performance/test_snapshot.py::test_snapshot_create_latency", "devtool_opts": "-c 1-12 -m 0", }, "population-latency": { "label": "๐Ÿ“ธ Memory Population Latency", - "tests": "integration_tests/performance/test_snapshot_ab.py::test_population_latency", + "tests": "integration_tests/performance/test_snapshot.py::test_population_latency", "devtool_opts": "-c 1-12 -m 0", }, "vsock-throughput": { "label": "๐Ÿงฆ Vsock Throughput", - "tests": "integration_tests/performance/test_vsock_ab.py", + "tests": "integration_tests/performance/test_vsock.py", "devtool_opts": "-c 1-10 -m 0", }, "memory-overhead": { diff --git a/tests/README.md b/tests/README.md index 677544889ad..e8ad62d0792 100644 --- a/tests/README.md +++ b/tests/README.md @@ -162,7 +162,7 @@ BUILDKITE_PULL_REQUEST=true BUILDKITE_PULL_REQUEST_BASE_BRANCH=main ./tools/devt Firecracker has a special framework for orchestrating long-running A/B-tests which run outside the pre-PR CI. Instead, these tests are scheduled to run post-merge. Specific tests, such as our -[snapshot restore latency tests](integration_tests/performance/test_snapshot_ab.py) +[snapshot restore latency tests](integration_tests/performance/test_snapshot.py) contain no assertions themselves, but rather they emit data series using the `aws_embedded_metrics` library. When executed by the [`tools/ab_test.py`](../tools/ab_test.py) orchestration script, these data diff --git a/tests/integration_tests/performance/test_block_ab.py b/tests/integration_tests/performance/test_block.py similarity index 100% rename from tests/integration_tests/performance/test_block_ab.py rename to tests/integration_tests/performance/test_block.py diff --git a/tests/integration_tests/performance/test_network_ab.py b/tests/integration_tests/performance/test_network.py similarity index 100% rename from tests/integration_tests/performance/test_network_ab.py rename to tests/integration_tests/performance/test_network.py diff --git a/tests/integration_tests/performance/test_snapshot_ab.py b/tests/integration_tests/performance/test_snapshot.py similarity index 100% rename from tests/integration_tests/performance/test_snapshot_ab.py rename to tests/integration_tests/performance/test_snapshot.py diff --git a/tests/integration_tests/performance/test_vsock_ab.py b/tests/integration_tests/performance/test_vsock.py similarity index 100% rename from tests/integration_tests/performance/test_vsock_ab.py rename to tests/integration_tests/performance/test_vsock.py From b4531c69769bb64c4c071e86530f377534bbf686 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Tue, 17 Jun 2025 12:35:11 +0100 Subject: [PATCH 3/5] test: introduce snapshot_type fixture We have a few tests that are parametrized to run with both full and diff snapshots enabled, so let's move this into a fixture. Signed-off-by: Patrick Roy --- tests/conftest.py | 8 +++++++- tests/integration_tests/functional/test_snapshot_basic.py | 2 -- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index fd941947072..1a0a6bb2e75 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -34,7 +34,7 @@ from framework import defs, utils from framework.artifacts import disks, kernel_params from framework.defs import DEFAULT_BINARY_DIR -from framework.microvm import MicroVMFactory +from framework.microvm import MicroVMFactory, SnapshotType from framework.properties import global_props from framework.utils_cpu_templates import ( custom_cpu_templates_params, @@ -411,6 +411,12 @@ def io_engine(request): return request.param +@pytest.fixture(params=[SnapshotType.DIFF, SnapshotType.FULL]) +def snapshot_type(request): + """All possible snapshot types""" + return request.param + + @pytest.fixture def results_dir(request, pytestconfig): """ diff --git a/tests/integration_tests/functional/test_snapshot_basic.py b/tests/integration_tests/functional/test_snapshot_basic.py index deb9b2dd6c3..2af89b66a69 100644 --- a/tests/integration_tests/functional/test_snapshot_basic.py +++ b/tests/integration_tests/functional/test_snapshot_basic.py @@ -111,7 +111,6 @@ def test_snapshot_current_version(uvm_nano): # - Rootfs: Ubuntu 18.04 # - Microvm: 2vCPU with 512 MB RAM # TODO: Multiple microvm sizes must be tested in the async pipeline. -@pytest.mark.parametrize("snapshot_type", [SnapshotType.DIFF, SnapshotType.FULL]) @pytest.mark.parametrize("use_snapshot_editor", [False, True]) def test_cycled_snapshot_restore( bin_vsock_path, @@ -500,7 +499,6 @@ def test_snapshot_overwrite_self(guest_kernel, rootfs, microvm_factory): # restored, with a new snapshot of this vm, does not break the VM -@pytest.mark.parametrize("snapshot_type", [SnapshotType.DIFF, SnapshotType.FULL]) def test_vmgenid(guest_kernel_linux_6_1, rootfs, microvm_factory, snapshot_type): """ Test VMGenID device upon snapshot resume From dbcdc7da4d5c4940fc9f472568a7263b62bf32ea Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Tue, 17 Jun 2025 12:37:29 +0100 Subject: [PATCH 4/5] test: run snapshot creation performance test with both full and diff Diff snapshots create sparse files, so they should be a lot faster than full snapshots. Signed-off-by: Patrick Roy --- .../performance/test_snapshot.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/integration_tests/performance/test_snapshot.py b/tests/integration_tests/performance/test_snapshot.py index d230decba47..0101cf20ee6 100644 --- a/tests/integration_tests/performance/test_snapshot.py +++ b/tests/integration_tests/performance/test_snapshot.py @@ -11,7 +11,7 @@ import pytest import host_tools.drive as drive_tools -from framework.microvm import HugePagesConfig, Microvm +from framework.microvm import HugePagesConfig, Microvm, SnapshotType USEC_IN_MSEC = 1000 NS_IN_MSEC = 1_000_000 @@ -257,21 +257,30 @@ def test_snapshot_create_latency( guest_kernel_linux_5_10, rootfs, metrics, + snapshot_type, ): """Measure the latency of creating a Full snapshot""" vm = microvm_factory.build(guest_kernel_linux_5_10, rootfs, monitor_memory=False) vm.spawn() - vm.basic_config(vcpu_count=2, mem_size_mib=512) + vm.basic_config( + vcpu_count=2, + mem_size_mib=512, + track_dirty_pages=snapshot_type == SnapshotType.DIFF, + ) vm.start() vm.pin_threads(0) metrics.set_dimensions( - {**vm.dimensions, "performance_test": "test_snapshot_create_latency"} + { + **vm.dimensions, + "performance_test": "test_snapshot_create_latency", + "snapshot_type": snapshot_type.value, + } ) for _ in range(ITERATIONS): - vm.snapshot_full() + vm.make_snapshot(snapshot_type) fc_metrics = vm.flush_metrics() value = fc_metrics["latencies_us"]["full_create_snapshot"] / USEC_IN_MSEC From cf7dfbee65099f42094c836aabc6698a7094ed24 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Wed, 18 Jun 2025 07:41:26 +0100 Subject: [PATCH 5/5] fix(test): add __init__.py files We were seeing "import mismatch error" due to files in functional and performance having the same name. According to [1], a fix is to just have __init__.py files. [1]: https://stackoverflow.com/questions/53918088/import-file-mismatch-in-pytest Signed-off-by: Patrick Roy --- tests/integration_tests/build/__init__.py | 2 ++ tests/integration_tests/functional/__init__.py | 2 ++ tests/integration_tests/performance/__init__.py | 2 ++ tests/integration_tests/security/__init__.py | 2 ++ tests/integration_tests/style/__init__.py | 2 ++ 5 files changed, 10 insertions(+) create mode 100644 tests/integration_tests/build/__init__.py create mode 100644 tests/integration_tests/functional/__init__.py create mode 100644 tests/integration_tests/performance/__init__.py create mode 100644 tests/integration_tests/security/__init__.py create mode 100644 tests/integration_tests/style/__init__.py diff --git a/tests/integration_tests/build/__init__.py b/tests/integration_tests/build/__init__.py new file mode 100644 index 00000000000..99d96f979b3 --- /dev/null +++ b/tests/integration_tests/build/__init__.py @@ -0,0 +1,2 @@ +# Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/integration_tests/functional/__init__.py b/tests/integration_tests/functional/__init__.py new file mode 100644 index 00000000000..99d96f979b3 --- /dev/null +++ b/tests/integration_tests/functional/__init__.py @@ -0,0 +1,2 @@ +# Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/integration_tests/performance/__init__.py b/tests/integration_tests/performance/__init__.py new file mode 100644 index 00000000000..99d96f979b3 --- /dev/null +++ b/tests/integration_tests/performance/__init__.py @@ -0,0 +1,2 @@ +# Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/integration_tests/security/__init__.py b/tests/integration_tests/security/__init__.py new file mode 100644 index 00000000000..99d96f979b3 --- /dev/null +++ b/tests/integration_tests/security/__init__.py @@ -0,0 +1,2 @@ +# Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/integration_tests/style/__init__.py b/tests/integration_tests/style/__init__.py new file mode 100644 index 00000000000..99d96f979b3 --- /dev/null +++ b/tests/integration_tests/style/__init__.py @@ -0,0 +1,2 @@ +# Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0