diff --git a/.buildkite/pipeline_cross.py b/.buildkite/pipeline_cross.py index d7cd261a3d0..1bac8c4bc3b 100755 --- a/.buildkite/pipeline_cross.py +++ b/.buildkite/pipeline_cross.py @@ -85,7 +85,7 @@ f"buildkite-agent artifact download snapshots/{src_instance}_{src_kv}.tar .", f"tar xSvf snapshots/{src_instance}_{src_kv}.tar", *pipeline.devtool_test( - pytest_opts=f"-m nonci -n4 {k_val} integration_tests/functional/test_snapshot_restore_cross_kernel.py", + pytest_opts=f"-m nonci -n8 --dist worksteal {k_val} integration_tests/functional/test_snapshot_restore_cross_kernel.py", ), ], "label": f"🎬 {src_instance} {src_kv} ➡️ {dst_instance} {dst_kv}", diff --git a/.buildkite/pipeline_pr.py b/.buildkite/pipeline_pr.py index 618aa17860b..5b4693f51bd 100755 --- a/.buildkite/pipeline_pr.py +++ b/.buildkite/pipeline_pr.py @@ -76,7 +76,7 @@ pipeline.build_group( "⚙ Functional and security 🔒", pipeline.devtool_test( - pytest_opts="-n 8 --dist worksteal integration_tests/{{functional,security}}", + pytest_opts="-n 16 --dist worksteal integration_tests/{{functional,security}}", ), ) diff --git a/tests/conftest.py b/tests/conftest.py index fb0fe4d5752..8c81714f716 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,6 @@ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 -# We import some fixtures that are unused. Disable that too. -# pylint:disable=unused-import - """Imported by pytest at the start of every test session. # Fixture Goals @@ -25,12 +22,10 @@ import inspect import json import os -import re import shutil import sys import tempfile from pathlib import Path -from typing import Dict import pytest @@ -44,6 +39,7 @@ static_cpu_templates_params, ) from host_tools.metrics import get_metrics_logger +from host_tools.network import NetNs # This codebase uses Python features available in Python 3.10 or above if sys.version_info < (3, 10): @@ -56,7 +52,7 @@ METRICS = get_metrics_logger() -PHASE_REPORT_KEY = pytest.StashKey[Dict[str, pytest.CollectReport]]() +PHASE_REPORT_KEY = pytest.StashKey[dict[str, pytest.CollectReport]]() def pytest_addoption(parser): @@ -265,15 +261,46 @@ def uffd_handler_paths(): yield handlers -@pytest.fixture() -def microvm_factory(request, record_property, results_dir): - """Fixture to create microvms simply. +@pytest.fixture(scope="session") +def netns_factory(worker_id): + """A network namespace factory - In order to avoid running out of space when instantiating many microvms, - we remove the directory manually when the fixture is destroyed - (that is after every test). - One can comment the removal line, if it helps with debugging. + Network namespaces are created once per test session and re-used in subsequent tests. """ + # pylint:disable=protected-access + + class NetNsFactory: + """A Network namespace factory that reuses namespaces.""" + + def __init__(self, prefix: str): + self._all = [] + self._returned = [] + self.prefix = prefix + + def get(self, _netns_id): + """Get a free network namespace""" + if len(self._returned) > 0: + ns = self._returned.pop(0) + while ns.is_used(): + pass + return ns + ns = NetNs(self.prefix + str(len(self._all))) + # change the cleanup function so it is returned to the pool + ns._cleanup_orig = ns.cleanup + ns.cleanup = lambda: self._returned.append(ns) + self._all.append(ns) + return ns + + netns_fcty = NetNsFactory(f"netns-{worker_id}-") + yield netns_fcty.get + + for netns in netns_fcty._all: + netns._cleanup_orig() + + +@pytest.fixture() +def microvm_factory(request, record_property, results_dir, netns_factory): + """Fixture to create microvms simply.""" if binary_dir := request.config.getoption("--binary-dir"): fc_binary_path = Path(binary_dir) / "firecracker" @@ -298,7 +325,10 @@ def microvm_factory(request, record_property, results_dir): # We could override the chroot base like so # jailer_kwargs={"chroot_base": "/srv/jailo"} uvm_factory = MicroVMFactory( - fc_binary_path, jailer_binary_path, custom_cpu_template=custom_cpu_template + fc_binary_path, + jailer_binary_path, + netns_factory=netns_factory, + custom_cpu_template=custom_cpu_template, ) yield uvm_factory @@ -421,19 +451,13 @@ def rootfs_rw(): @pytest.fixture def uvm_plain(microvm_factory, guest_kernel_linux_5_10, rootfs): - """Create a vanilla VM, non-parametrized - kernel: 5.10 - rootfs: Ubuntu 24.04 - """ + """Create a vanilla VM, non-parametrized""" return microvm_factory.build(guest_kernel_linux_5_10, rootfs) @pytest.fixture def uvm_plain_rw(microvm_factory, guest_kernel_linux_5_10, rootfs_rw): - """Create a vanilla VM, non-parametrized - kernel: 5.10 - rootfs: Ubuntu 24.04 - """ + """Create a vanilla VM, non-parametrized""" return microvm_factory.build(guest_kernel_linux_5_10, rootfs_rw) diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index 0903b689dfa..278cb9ecd60 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -1074,6 +1074,7 @@ def __init__(self, fc_binary_path: Path, jailer_binary_path: Path, **kwargs): self.vms = [] self.fc_binary_path = Path(fc_binary_path) self.jailer_binary_path = Path(jailer_binary_path) + self.netns_factory = kwargs.pop("netns_factory", net_tools.NetNs) self.kwargs = kwargs def build(self, kernel=None, rootfs=None, **kwargs): @@ -1086,7 +1087,7 @@ def build(self, kernel=None, rootfs=None, **kwargs): jailer_binary_path=kwargs.pop( "jailer_binary_path", self.jailer_binary_path ), - netns=kwargs.pop("netns", net_tools.NetNs(microvm_id)), + netns=kwargs.pop("netns", self.netns_factory(microvm_id)), **kwargs, ) vm.netns.setup() diff --git a/tests/host_tools/network.py b/tests/host_tools/network.py index af0b3766ee0..93cdb323c50 100644 --- a/tests/host_tools/network.py +++ b/tests/host_tools/network.py @@ -251,15 +251,13 @@ def __init__(self, name, netns, ip=None): It also creates a new tap device, brings it up and moves the interface to the specified namespace. """ - # Avoid a conflict if two tests want to create the same tap device tap0 - # in the host before moving it into its own netns - temp_name = "tap" + random_str(k=8) - utils.check_output(f"ip tuntap add mode tap name {temp_name}") - utils.check_output(f"ip link set {temp_name} name {name} netns {netns}") - if ip: - utils.check_output(f"ip netns exec {netns} ifconfig {name} {ip} up") self._name = name self._netns = netns + # Create the tap device tap0 directly in the network namespace to avoid + # conflicts + self.netns.check_output(f"ip tuntap add mode tap name {name}") + if ip: + self.netns.check_output(f"ifconfig {name} {ip} up") @property def name(self): @@ -273,14 +271,10 @@ def netns(self): def set_tx_queue_len(self, tx_queue_len): """Set the length of the tap's TX queue.""" - utils.check_output( - "ip netns exec {} ip link set {} txqueuelen {}".format( - self.netns, self.name, tx_queue_len - ) - ) + self.netns.check_output(f"ip link set {self.name} txqueuelen {tx_queue_len}") def __repr__(self): - return f"" + return f"" @dataclass(frozen=True, repr=True) @@ -315,7 +309,7 @@ def with_id(i, netmask_len=30): ) -@dataclass(frozen=True, repr=True) +@dataclass(repr=True) class NetNs: """Defines a network namespace.""" @@ -334,6 +328,10 @@ def cmd_prefix(self): """Return the jailer context netns file prefix.""" return f"ip netns exec {self.id}" + def check_output(self, cmd: str): + """Run a command inside the netns.""" + return utils.check_output(f"{self.cmd_prefix()} {cmd}") + def setup(self): """Set up this network namespace.""" if not self.path.exists(): @@ -350,6 +348,19 @@ def add_tap(self, name, ip): We assume that a Tap is always configured with the same IP. """ if name not in self.taps: - tap = Tap(name, self.id, ip) + tap = Tap(name, self, ip) self.taps[name] = tap return self.taps[name] + + def is_used(self): + """Are any of the TAPs still in use + + Waits until there's no carrier signal. + Otherwise trying to reuse the TAP may return + `Resource busy (os error 16)` + """ + for tap in self.taps: + _, stdout, _ = self.check_output(f"cat /sys/class/net/{tap}/carrier") + if stdout.strip() != "0": + return True + return False diff --git a/tests/integration_tests/functional/test_api.py b/tests/integration_tests/functional/test_api.py index 1e54c7b4fb1..94166374bd3 100644 --- a/tests/integration_tests/functional/test_api.py +++ b/tests/integration_tests/functional/test_api.py @@ -191,15 +191,15 @@ def test_net_api_put_update_pre_boot(uvm_plain): test_microvm = uvm_plain test_microvm.spawn() - first_if_name = "first_tap" - tap1 = net_tools.Tap(first_if_name, test_microvm.netns.id) + tap1name = test_microvm.id[:8] + "tap1" + tap1 = net_tools.Tap(tap1name, test_microvm.netns) test_microvm.api.network.put( iface_id="1", guest_mac="06:00:00:00:00:01", host_dev_name=tap1.name ) # Adding new network interfaces is allowed. - second_if_name = "second_tap" - tap2 = net_tools.Tap(second_if_name, test_microvm.netns.id) + tap2name = test_microvm.id[:8] + "tap2" + tap2 = net_tools.Tap(tap2name, test_microvm.netns) test_microvm.api.network.put( iface_id="2", guest_mac="07:00:00:00:00:01", host_dev_name=tap2.name ) @@ -209,28 +209,26 @@ def test_net_api_put_update_pre_boot(uvm_plain): expected_msg = f"The MAC address is already in use: {guest_mac}" with pytest.raises(RuntimeError, match=expected_msg): test_microvm.api.network.put( - iface_id="2", host_dev_name=second_if_name, guest_mac=guest_mac + iface_id="2", host_dev_name=tap2name, guest_mac=guest_mac ) # Updates to a network interface with an available MAC are allowed. test_microvm.api.network.put( - iface_id="2", host_dev_name=second_if_name, guest_mac="08:00:00:00:00:01" + iface_id="2", host_dev_name=tap2name, guest_mac="08:00:00:00:00:01" ) # Updates to a network interface with an unavailable name are not allowed. expected_msg = "Could not create the network device" with pytest.raises(RuntimeError, match=expected_msg): test_microvm.api.network.put( - iface_id="1", host_dev_name=second_if_name, guest_mac="06:00:00:00:00:01" + iface_id="1", host_dev_name=tap2name, guest_mac="06:00:00:00:00:01" ) # Updates to a network interface with an available name are allowed. - iface_id = "1" - tapname = test_microvm.id[:8] + "tap" + iface_id - - tap3 = net_tools.Tap(tapname, test_microvm.netns.id) + tap3name = test_microvm.id[:8] + "tap3" + tap3 = net_tools.Tap(tap3name, test_microvm.netns) test_microvm.api.network.put( - iface_id=iface_id, host_dev_name=tap3.name, guest_mac="06:00:00:00:00:01" + iface_id="3", host_dev_name=tap3.name, guest_mac="06:00:00:00:00:01" ) @@ -266,7 +264,7 @@ def test_api_mmds_config(uvm_plain): test_microvm.api.mmds_config.put(network_interfaces=["foo"]) # Attach network interface. - tap = net_tools.Tap("tap1", test_microvm.netns.id) + tap = net_tools.Tap(f"tap1-{test_microvm.id[:6]}", test_microvm.netns) test_microvm.api.network.put( iface_id="1", guest_mac="06:00:00:00:00:01", host_dev_name=tap.name ) @@ -487,7 +485,7 @@ def test_api_put_update_post_boot(uvm_plain, io_engine): iface_id = "1" tapname = test_microvm.id[:8] + "tap" + iface_id - tap1 = net_tools.Tap(tapname, test_microvm.netns.id) + tap1 = net_tools.Tap(tapname, test_microvm.netns) test_microvm.api.network.put( iface_id=iface_id, host_dev_name=tap1.name, guest_mac="06:00:00:00:00:01" @@ -595,7 +593,7 @@ def test_rate_limiters_api_config(uvm_plain, io_engine): # Test network with tx bw rate-limiting. iface_id = "1" tapname = test_microvm.id[:8] + "tap" + iface_id - tap1 = net_tools.Tap(tapname, test_microvm.netns.id) + tap1 = net_tools.Tap(tapname, test_microvm.netns) test_microvm.api.network.put( iface_id=iface_id, @@ -607,7 +605,7 @@ def test_rate_limiters_api_config(uvm_plain, io_engine): # Test network with rx bw rate-limiting. iface_id = "2" tapname = test_microvm.id[:8] + "tap" + iface_id - tap2 = net_tools.Tap(tapname, test_microvm.netns.id) + tap2 = net_tools.Tap(tapname, test_microvm.netns) test_microvm.api.network.put( iface_id=iface_id, guest_mac="06:00:00:00:00:02", @@ -618,7 +616,7 @@ def test_rate_limiters_api_config(uvm_plain, io_engine): # Test network with tx and rx bw and ops rate-limiting. iface_id = "3" tapname = test_microvm.id[:8] + "tap" + iface_id - tap3 = net_tools.Tap(tapname, test_microvm.netns.id) + tap3 = net_tools.Tap(tapname, test_microvm.netns) test_microvm.api.network.put( iface_id=iface_id, guest_mac="06:00:00:00:00:03", @@ -665,7 +663,7 @@ def test_api_patch_pre_boot(uvm_plain, io_engine): iface_id = "1" tapname = test_microvm.id[:8] + "tap" + iface_id - tap1 = net_tools.Tap(tapname, test_microvm.netns.id) + tap1 = net_tools.Tap(tapname, test_microvm.netns) test_microvm.api.network.put( iface_id=iface_id, host_dev_name=tap1.name, guest_mac="06:00:00:00:00:01" ) @@ -714,7 +712,7 @@ def test_negative_api_patch_post_boot(uvm_plain, io_engine): iface_id = "1" tapname = test_microvm.id[:8] + "tap" + iface_id - tap1 = net_tools.Tap(tapname, test_microvm.netns.id) + tap1 = net_tools.Tap(tapname, test_microvm.netns) test_microvm.api.network.put( iface_id=iface_id, host_dev_name=tap1.name, guest_mac="06:00:00:00:00:01" ) @@ -1245,7 +1243,7 @@ def test_get_full_config(uvm_plain): # Add a net device. iface_id = "1" tapname = test_microvm.id[:8] + "tap" + iface_id - tap1 = net_tools.Tap(tapname, test_microvm.netns.id) + tap1 = net_tools.Tap(tapname, test_microvm.netns) guest_mac = "06:00:00:00:00:01" tx_rl = { "bandwidth": {"size": 1000000, "refill_time": 100, "one_time_burst": None}, diff --git a/tests/integration_tests/functional/test_net.py b/tests/integration_tests/functional/test_net.py index 7b784e453c5..a01157dcf76 100644 --- a/tests/integration_tests/functional/test_net.py +++ b/tests/integration_tests/functional/test_net.py @@ -83,6 +83,9 @@ def test_multi_queue_unsupported(uvm_plain): guest_mac="AA:FC:00:00:00:01", ) + # clean TAP device + utils.run_cmd(f"{microvm.netns.cmd_prefix()} ip link del name {tapname}") + @pytest.fixture def uvm_any(microvm_factory, uvm_ctor, guest_kernel, rootfs):