Add compute restart capability for libvirt func tests

gibizer · SeanMooney · commit afc55c564f90 · 2022-12-01T18:14:01.000Z
The existing generic restart_compute_service() call in the nova test base class is not appropriate for the libvirt functional test that needs to reconfigure the libvirt connection as it is not aware of the libvirt specific mocking needed when a compute service is started. So this patch adds a specific restart_compute_service() call to nova.tests.functional.libvirt.base.ServersTestBase. This will be used by a later patch testing [pci]device_spec reconfiguration scenarios. This change showed that some of the existing libvirt functional test used the incomplete restart_compute_service from the base class. Others used local mocking to inject new pci config to the restart. I moved all these to the new function and removed the local mocking. Conflicts: nova/tests/functional/libvirt/test_device_bus_migration.py Change-Id: Ic717dc42ac6b6cace59d344acaf12f9d1ee35564 (cherry picked from commit 57c253a) (cherry picked from commit f98858a)
diff --git a/nova/tests/functional/libvirt/base.py b/nova/tests/functional/libvirt/base.py
@@ -114,7 +114,7 @@ def _get_connection(
     def start_compute(
         self, hostname='compute1', host_info=None, pci_info=None,
         mdev_info=None, vdpa_info=None, libvirt_version=None,
-        qemu_version=None,
+        qemu_version=None, cell_name=None, connection=None
     ):
         """Start a compute service.
 
@@ -124,16 +124,35 @@ def start_compute(
         :param host_info: A fakelibvirt.HostInfo object for the host. Defaults
             to a HostInfo with 2 NUMA nodes, 2 cores per node, 2 threads per
             core, and 16GB of RAM.
+        :param connection: A fake libvirt connection. You should not provide it
+            directly. However it is used by restart_compute_service to
+            implement restart without loosing the hypervisor state.
         :returns: The hostname of the created service, which can be used to
             lookup the created service and UUID of the assocaited resource
             provider.
         """
+        if connection and (
+            host_info or
+            pci_info or
+            mdev_info or
+            vdpa_info or
+            libvirt_version or
+            qemu_version
+        ):
+            raise ValueError(
+                "Either an existing connection instance can be provided or a "
+                "list of parameters for a new connection"
+            )
 
         def _start_compute(hostname, host_info):
-            fake_connection = self._get_connection(
-                host_info, pci_info, mdev_info, vdpa_info, libvirt_version,
-                qemu_version, hostname,
-            )
+            if connection:
+                fake_connection = connection
+            else:
+                fake_connection = self._get_connection(
+                    host_info, pci_info, mdev_info, vdpa_info, libvirt_version,
+                    qemu_version, hostname,
+                )
+
             # If the compute is configured with PCI devices then we need to
             # make sure that the stubs around sysfs has the MAC address
             # information for the PCI PF devices
@@ -144,7 +163,8 @@ def _start_compute(hostname, host_info):
             # actually start the service.
             orig_con = self.mock_conn.return_value
             self.mock_conn.return_value = fake_connection
-            compute = self.start_service('compute', host=hostname)
+            compute = self.start_service(
+                'compute', host=hostname, cell_name=cell_name)
             # Once that's done, we need to tweak the compute "service" to
             # make sure it returns unique objects.
             compute.driver._host.get_connection = lambda: fake_connection
@@ -165,6 +185,74 @@ def _start_compute(hostname, host_info):
 
         return hostname
 
+    def restart_compute_service(
+        self,
+        hostname,
+        host_info=None,
+        pci_info=None,
+        mdev_info=None,
+        vdpa_info=None,
+        libvirt_version=None,
+        qemu_version=None,
+        keep_hypervisor_state=True,
+    ):
+        """Stops the service and starts a new one to have realistic restart
+
+        :param hostname: the hostname of the nova-compute service to be
+            restarted
+        :param keep_hypervisor_state: If True then we reuse the fake connection
+            from the existing driver. If False a new connection will be created
+            based on the other parameters provided
+        """
+        # We are intentionally not calling super() here. Nova's base test class
+        # defines starting and restarting compute service with a very
+        # different signatures and also those calls are cannot be made aware of
+        # the intricacies of the libvirt fixture. So we simply hide that
+        # implementation.
+
+        if keep_hypervisor_state and (
+            host_info or
+            pci_info or
+            mdev_info or
+            vdpa_info or
+            libvirt_version or
+            qemu_version
+        ):
+            raise ValueError(
+                "Either keep_hypervisor_state=True or a list of libvirt "
+                "parameters can be provided but not both"
+            )
+
+        compute = self.computes.pop(hostname)
+        self.compute_rp_uuids.pop(hostname)
+
+        # NOTE(gibi): The service interface cannot be used to simulate a real
+        # service restart as the manager object will not be recreated after a
+        # service.stop() and service.start() therefore the manager state will
+        # survive. For example the resource tracker will not be recreated after
+        # a stop start. The service.kill() call cannot help as it deletes
+        # the service from the DB which is unrealistic and causes that some
+        # operation that refers to the killed host (e.g. evacuate) fails.
+        # So this helper method will stop the original service and then starts
+        # a brand new compute service for the same host and node. This way
+        # a new ComputeManager instance will be created and initialized during
+        # the service startup.
+        compute.stop()
+
+        # this service was running previously, so we have to make sure that
+        # we restart it in the same cell
+        cell_name = self.host_mappings[compute.host].cell_mapping.name
+
+        old_connection = compute.manager.driver._get_connection()
+
+        self.start_compute(
+            hostname, host_info, pci_info, mdev_info, vdpa_info,
+            libvirt_version, qemu_version, cell_name,
+            old_connection if keep_hypervisor_state else None
+        )
+
+        return self.computes[hostname]
+
 
 class LibvirtMigrationMixin(object):
     """A simple mixin to facilliate successful libvirt live migrations
diff --git a/nova/tests/functional/libvirt/test_numa_live_migration.py b/nova/tests/functional/libvirt/test_numa_live_migration.py
@@ -206,10 +206,8 @@ def _test(self, pin_dest):
         # Increase cpu_dedicated_set to 0-3, expecting the live migrated server
         # to end up on 2,3.
         self.flags(cpu_dedicated_set='0-3', group='compute')
-        self.computes['host_a'] = self.restart_compute_service(
-            self.computes['host_a'])
-        self.computes['host_b'] = self.restart_compute_service(
-            self.computes['host_b'])
+        self.restart_compute_service('host_a')
+        self.restart_compute_service('host_b')
 
         # Live migrate, RPC-pinning the destination host if asked
         if pin_dest:
@@ -333,10 +331,8 @@ def _test(self, pin_dest=False):
         # Increase cpu_dedicated_set to 0-3, expecting the live migrated server
         # to end up on 2,3.
         self.flags(cpu_dedicated_set='0-3', group='compute')
-        self.computes['host_a'] = self.restart_compute_service(
-            self.computes['host_a'])
-        self.computes['host_b'] = self.restart_compute_service(
-            self.computes['host_b'])
+        self.restart_compute_service('host_a')
+        self.restart_compute_service('host_b')
 
         # Live migrate, RPC-pinning the destination host if asked. This is a
         # rollback test, so server_a is expected to remain on host_a.
diff --git a/nova/tests/functional/libvirt/test_numa_servers.py b/nova/tests/functional/libvirt/test_numa_servers.py
@@ -1187,10 +1187,8 @@ def test_vcpu_to_pcpu_reshape(self):
         self.flags(cpu_dedicated_set='0-7', group='compute')
         self.flags(vcpu_pin_set=None)
 
-        computes = {}
-        for host, compute in self.computes.items():
-            computes[host] = self.restart_compute_service(compute)
-        self.computes = computes
+        for host in list(self.computes.keys()):
+            self.restart_compute_service(host)
 
         # verify that the inventory, usages and allocation are correct after
         # the reshape
diff --git a/nova/tests/functional/libvirt/test_pci_sriov_servers.py b/nova/tests/functional/libvirt/test_pci_sriov_servers.py
@@ -907,11 +907,8 @@ def test_create_server_after_change_in_nonsriov_pf_to_sriov_pf(self):
         # Disable SRIOV capabilties in PF and delete the VFs
         self._disable_sriov_in_pf(pci_info_no_sriov)
 
-        fake_connection = self._get_connection(pci_info=pci_info_no_sriov,
-                                               hostname='test_compute0')
-        self.mock_conn.return_value = fake_connection
-
-        self.compute = self.start_service('compute', host='test_compute0')
+        self.start_compute('test_compute0', pci_info=pci_info_no_sriov)
+        self.compute = self.computes['test_compute0']
 
         ctxt = context.get_admin_context()
         pci_devices = objects.PciDeviceList.get_by_compute_node(
@@ -923,13 +920,9 @@ def test_create_server_after_change_in_nonsriov_pf_to_sriov_pf(self):
         self.assertEqual(1, len(pci_devices))
         self.assertEqual('type-PCI', pci_devices[0].dev_type)
 
-        # Update connection with original pci info with sriov PFs
-        fake_connection = self._get_connection(pci_info=pci_info,
-                                               hostname='test_compute0')
-        self.mock_conn.return_value = fake_connection
-
-        # Restart the compute service
-        self.restart_compute_service(self.compute)
+        # Restart the compute service with sriov PFs
+        self.restart_compute_service(
+            self.compute.host, pci_info=pci_info, keep_hypervisor_state=False)
 
         # Verify if PCI devices are of type type-PF or type-VF
         pci_devices = objects.PciDeviceList.get_by_compute_node(
@@ -1014,10 +1007,9 @@ def _test_detach_attach(self, first_port_id, second_port_id):
         host_info = fakelibvirt.HostInfo(cpu_nodes=2, cpu_sockets=1,
                                          cpu_cores=2, cpu_threads=2)
         pci_info = fakelibvirt.HostPCIDevicesInfo(num_pfs=1, num_vfs=1)
-        fake_connection = self._get_connection(host_info, pci_info)
-        self.mock_conn.return_value = fake_connection
-
-        self.compute = self.start_service('compute', host='test_compute0')
+        self.start_compute(
+            'test_compute0', host_info=host_info, pci_info=pci_info)
+        self.compute = self.computes['test_compute0']
 
         # Create server with a port
         server = self._create_server(networks=[{'port': first_port_id}])
diff --git a/nova/tests/functional/libvirt/test_reshape.py b/nova/tests/functional/libvirt/test_reshape.py
@@ -72,11 +72,11 @@ def test_create_servers_with_vgpu(self):
         # ignore the content of the above HostMdevDeviceInfo
         self.flags(enabled_mdev_types='', group='devices')
 
-        hostname = self.start_compute(
+        self.hostname = self.start_compute(
             hostname='compute1',
             mdev_info=fakelibvirt.HostMdevDevicesInfo(devices=mdevs),
         )
-        self.compute = self.computes[hostname]
+        self.compute = self.computes[self.hostname]
 
         # create the VGPU resource in placement manually
         compute_rp_uuid = self.placement.get(
@@ -158,7 +158,7 @@ def test_create_servers_with_vgpu(self):
                 allocations[compute_rp_uuid]['resources'])
 
         # restart compute which will trigger a reshape
-        self.compute = self.restart_compute_service(self.compute)
+        self.compute = self.restart_compute_service(self.hostname)
 
         # verify that the inventory, usages and allocation are correct after
         # the reshape
diff --git a/nova/tests/functional/libvirt/test_vgpu.py b/nova/tests/functional/libvirt/test_vgpu.py
@@ -113,8 +113,8 @@ def _create_mdev(self, physical_device, mdev_type, uuid=None):
                                                    parent=libvirt_parent)})
         return uuid
 
-    def start_compute(self, hostname):
-        hostname = super().start_compute(
+    def start_compute_with_vgpu(self, hostname):
+        hostname = self.start_compute(
             pci_info=fakelibvirt.HostPCIDevicesInfo(
                 num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
             ),
@@ -197,7 +197,7 @@ def setUp(self):
             enabled_mdev_types=fakelibvirt.NVIDIA_11_VGPU_TYPE,
             group='devices')
 
-        self.compute1 = self.start_compute('host1')
+        self.compute1 = self.start_compute_with_vgpu('host1')
 
     def assert_vgpu_usage_for_compute(self, compute, expected):
         self.assert_mdev_usage(compute, expected_amount=expected)
@@ -211,7 +211,7 @@ def test_create_servers_with_vgpu(self):
 
     def test_resize_servers_with_vgpu(self):
         # Add another compute for the sake of resizing
-        self.compute2 = self.start_compute('host2')
+        self.compute2 = self.start_compute_with_vgpu('host2')
         server = self._create_server(
             image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
             flavor_id=self.flavor, host=self.compute1.host,
@@ -337,7 +337,7 @@ def setUp(self):
         # Prepare traits for later on
         self._create_trait('CUSTOM_NVIDIA_11')
         self._create_trait('CUSTOM_NVIDIA_12')
-        self.compute1 = self.start_compute('host1')
+        self.compute1 = self.start_compute_with_vgpu('host1')
 
     def test_create_servers_with_vgpu(self):
         self._create_server(
@@ -369,13 +369,12 @@ def test_create_servers_with_vgpu(self):
 
     def test_create_servers_with_specific_type(self):
         # Regenerate the PCI addresses so both pGPUs now support nvidia-12
-        connection = self.computes[
-            self.compute1.host].driver._host.get_connection()
-        connection.pci_info = fakelibvirt.HostPCIDevicesInfo(
+        pci_info = fakelibvirt.HostPCIDevicesInfo(
             num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
             multiple_gpu_types=True)
         # Make a restart to update the Resource Providers
-        self.compute1 = self.restart_compute_service(self.compute1)
+        self.compute1 = self.restart_compute_service(
+            self.compute1.host, pci_info=pci_info, keep_hypervisor_state=False)
         pgpu1_rp_uuid = self._get_provider_uuid_by_name(
             self.compute1.host + '_' + fakelibvirt.MDEVCAP_DEV1_PCI_ADDR)
         pgpu2_rp_uuid = self._get_provider_uuid_by_name(
@@ -451,7 +450,7 @@ def setUp(self):
                    group='mdev_nvidia-12')
         self.flags(mdev_class='CUSTOM_NOTVGPU', group='mdev_mlx5_core')
 
-        self.compute1 = self.start_compute('host1')
+        self.compute1 = self.start_compute_with_vgpu('host1')
         # Regenerate the PCI addresses so they can support both mlx5 and
         # nvidia-12 types
         connection = self.computes[
@@ -460,7 +459,7 @@ def setUp(self):
             num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
             generic_types=True)
         # Make a restart to update the Resource Providers
-        self.compute1 = self.restart_compute_service(self.compute1)
+        self.compute1 = self.restart_compute_service('host1')
 
     def test_create_servers_with_different_mdev_classes(self):
         physdev1_rp_uuid = self._get_provider_uuid_by_name(
@@ -498,7 +497,7 @@ def test_create_servers_with_different_mdev_classes(self):
 
     def test_resize_servers_with_mlx5(self):
         # Add another compute for the sake of resizing
-        self.compute2 = self.start_compute('host2')
+        self.compute2 = self.start_compute_with_vgpu('host2')
         # Regenerate the PCI addresses so they can support both mlx5 and
         # nvidia-12 types
         connection = self.computes[
@@ -507,7 +506,7 @@ def test_resize_servers_with_mlx5(self):
             num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
             generic_types=True)
         # Make a restart to update the Resource Providers
-        self.compute2 = self.restart_compute_service(self.compute2)
+        self.compute2 = self.restart_compute_service('host2')
 
         # Use the new flavor for booting
         server = self._create_server(