Merge "Add compute restart capability for libvirt func tests" into stable/xena

Zuul · openstack-gerrit · commit d0d474fb39ed · 2022-12-05T17:04:17.000Z
diff --git a/nova/tests/functional/libvirt/base.py b/nova/tests/functional/libvirt/base.py
@@ -114,7 +114,7 @@ def _get_connection(
     def start_compute(
         self, hostname='compute1', host_info=None, pci_info=None,
         mdev_info=None, vdpa_info=None, libvirt_version=None,
-        qemu_version=None,
+        qemu_version=None, cell_name=None, connection=None
     ):
         """Start a compute service.
 
@@ -124,16 +124,35 @@ def start_compute(
         :param host_info: A fakelibvirt.HostInfo object for the host. Defaults
             to a HostInfo with 2 NUMA nodes, 2 cores per node, 2 threads per
             core, and 16GB of RAM.
+        :param connection: A fake libvirt connection. You should not provide it
+            directly. However it is used by restart_compute_service to
+            implement restart without loosing the hypervisor state.
         :returns: The hostname of the created service, which can be used to
             lookup the created service and UUID of the assocaited resource
             provider.
         """
+        if connection and (
+            host_info or
+            pci_info or
+            mdev_info or
+            vdpa_info or
+            libvirt_version or
+            qemu_version
+        ):
+            raise ValueError(
+                "Either an existing connection instance can be provided or a "
+                "list of parameters for a new connection"
+            )
 
         def _start_compute(hostname, host_info):
-            fake_connection = self._get_connection(
-                host_info, pci_info, mdev_info, vdpa_info, libvirt_version,
-                qemu_version, hostname,
-            )
+            if connection:
+                fake_connection = connection
+            else:
+                fake_connection = self._get_connection(
+                    host_info, pci_info, mdev_info, vdpa_info, libvirt_version,
+                    qemu_version, hostname,
+                )
+
             # If the compute is configured with PCI devices then we need to
             # make sure that the stubs around sysfs has the MAC address
             # information for the PCI PF devices
@@ -144,7 +163,8 @@ def _start_compute(hostname, host_info):
             # actually start the service.
             orig_con = self.mock_conn.return_value
             self.mock_conn.return_value = fake_connection
-            compute = self.start_service('compute', host=hostname)
+            compute = self.start_service(
+                'compute', host=hostname, cell_name=cell_name)
             # Once that's done, we need to tweak the compute "service" to
             # make sure it returns unique objects.
             compute.driver._host.get_connection = lambda: fake_connection
@@ -165,6 +185,74 @@ def _start_compute(hostname, host_info):
 
         return hostname
 
+    def restart_compute_service(
+        self,
+        hostname,
+        host_info=None,
+        pci_info=None,
+        mdev_info=None,
+        vdpa_info=None,
+        libvirt_version=None,
+        qemu_version=None,
+        keep_hypervisor_state=True,
+    ):
+        """Stops the service and starts a new one to have realistic restart
+
+        :param hostname: the hostname of the nova-compute service to be
+            restarted
+        :param keep_hypervisor_state: If True then we reuse the fake connection
+            from the existing driver. If False a new connection will be created
+            based on the other parameters provided
+        """
+        # We are intentionally not calling super() here. Nova's base test class
+        # defines starting and restarting compute service with a very
+        # different signatures and also those calls are cannot be made aware of
+        # the intricacies of the libvirt fixture. So we simply hide that
+        # implementation.
+
+        if keep_hypervisor_state and (
+            host_info or
+            pci_info or
+            mdev_info or
+            vdpa_info or
+            libvirt_version or
+            qemu_version
+        ):
+            raise ValueError(
+                "Either keep_hypervisor_state=True or a list of libvirt "
+                "parameters can be provided but not both"
+            )
+
+        compute = self.computes.pop(hostname)
+        self.compute_rp_uuids.pop(hostname)
+
+        # NOTE(gibi): The service interface cannot be used to simulate a real
+        # service restart as the manager object will not be recreated after a
+        # service.stop() and service.start() therefore the manager state will
+        # survive. For example the resource tracker will not be recreated after
+        # a stop start. The service.kill() call cannot help as it deletes
+        # the service from the DB which is unrealistic and causes that some
+        # operation that refers to the killed host (e.g. evacuate) fails.
+        # So this helper method will stop the original service and then starts
+        # a brand new compute service for the same host and node. This way
+        # a new ComputeManager instance will be created and initialized during
+        # the service startup.
+        compute.stop()
+
+        # this service was running previously, so we have to make sure that
+        # we restart it in the same cell
+        cell_name = self.host_mappings[compute.host].cell_mapping.name
+
+        old_connection = compute.manager.driver._get_connection()
+
+        self.start_compute(
+            hostname, host_info, pci_info, mdev_info, vdpa_info,
+            libvirt_version, qemu_version, cell_name,
+            old_connection if keep_hypervisor_state else None
+        )
+
+        return self.computes[hostname]
+
 
 class LibvirtMigrationMixin(object):
     """A simple mixin to facilliate successful libvirt live migrations
diff --git a/nova/tests/functional/libvirt/test_numa_live_migration.py b/nova/tests/functional/libvirt/test_numa_live_migration.py
@@ -206,10 +206,8 @@ def _test(self, pin_dest):
         # Increase cpu_dedicated_set to 0-3, expecting the live migrated server
         # to end up on 2,3.
         self.flags(cpu_dedicated_set='0-3', group='compute')
-        self.computes['host_a'] = self.restart_compute_service(
-            self.computes['host_a'])
-        self.computes['host_b'] = self.restart_compute_service(
-            self.computes['host_b'])
+        self.restart_compute_service('host_a')
+        self.restart_compute_service('host_b')
 
         # Live migrate, RPC-pinning the destination host if asked
         if pin_dest:
@@ -333,10 +331,8 @@ def _test(self, pin_dest=False):
         # Increase cpu_dedicated_set to 0-3, expecting the live migrated server
         # to end up on 2,3.
         self.flags(cpu_dedicated_set='0-3', group='compute')
-        self.computes['host_a'] = self.restart_compute_service(
-            self.computes['host_a'])
-        self.computes['host_b'] = self.restart_compute_service(
-            self.computes['host_b'])
+        self.restart_compute_service('host_a')
+        self.restart_compute_service('host_b')
 
         # Live migrate, RPC-pinning the destination host if asked. This is a
         # rollback test, so server_a is expected to remain on host_a.
diff --git a/nova/tests/functional/libvirt/test_numa_servers.py b/nova/tests/functional/libvirt/test_numa_servers.py
@@ -1187,10 +1187,8 @@ def test_vcpu_to_pcpu_reshape(self):
         self.flags(cpu_dedicated_set='0-7', group='compute')
         self.flags(vcpu_pin_set=None)
 
-        computes = {}
-        for host, compute in self.computes.items():
-            computes[host] = self.restart_compute_service(compute)
-        self.computes = computes
+        for host in list(self.computes.keys()):
+            self.restart_compute_service(host)
 
         # verify that the inventory, usages and allocation are correct after
         # the reshape
diff --git a/nova/tests/functional/libvirt/test_pci_sriov_servers.py b/nova/tests/functional/libvirt/test_pci_sriov_servers.py
@@ -907,11 +907,8 @@ def test_create_server_after_change_in_nonsriov_pf_to_sriov_pf(self):
         # Disable SRIOV capabilties in PF and delete the VFs
         self._disable_sriov_in_pf(pci_info_no_sriov)
 
-        fake_connection = self._get_connection(pci_info=pci_info_no_sriov,
-                                               hostname='test_compute0')
-        self.mock_conn.return_value = fake_connection
-
-        self.compute = self.start_service('compute', host='test_compute0')
+        self.start_compute('test_compute0', pci_info=pci_info_no_sriov)
+        self.compute = self.computes['test_compute0']
 
         ctxt = context.get_admin_context()
         pci_devices = objects.PciDeviceList.get_by_compute_node(
@@ -923,13 +920,9 @@ def test_create_server_after_change_in_nonsriov_pf_to_sriov_pf(self):
         self.assertEqual(1, len(pci_devices))
         self.assertEqual('type-PCI', pci_devices[0].dev_type)
 
-        # Update connection with original pci info with sriov PFs
-        fake_connection = self._get_connection(pci_info=pci_info,
-                                               hostname='test_compute0')
-        self.mock_conn.return_value = fake_connection
-
-        # Restart the compute service
-        self.restart_compute_service(self.compute)
+        # Restart the compute service with sriov PFs
+        self.restart_compute_service(
+            self.compute.host, pci_info=pci_info, keep_hypervisor_state=False)
 
         # Verify if PCI devices are of type type-PF or type-VF
         pci_devices = objects.PciDeviceList.get_by_compute_node(
@@ -1014,10 +1007,9 @@ def _test_detach_attach(self, first_port_id, second_port_id):
         host_info = fakelibvirt.HostInfo(cpu_nodes=2, cpu_sockets=1,
                                          cpu_cores=2, cpu_threads=2)
         pci_info = fakelibvirt.HostPCIDevicesInfo(num_pfs=1, num_vfs=1)
-        fake_connection = self._get_connection(host_info, pci_info)
-        self.mock_conn.return_value = fake_connection
-
-        self.compute = self.start_service('compute', host='test_compute0')
+        self.start_compute(
+            'test_compute0', host_info=host_info, pci_info=pci_info)
+        self.compute = self.computes['test_compute0']
 
         # Create server with a port
         server = self._create_server(networks=[{'port': first_port_id}])
diff --git a/nova/tests/functional/libvirt/test_reshape.py b/nova/tests/functional/libvirt/test_reshape.py
@@ -72,11 +72,11 @@ def test_create_servers_with_vgpu(self):
         # ignore the content of the above HostMdevDeviceInfo
         self.flags(enabled_mdev_types='', group='devices')
 
-        hostname = self.start_compute(
+        self.hostname = self.start_compute(
             hostname='compute1',
             mdev_info=fakelibvirt.HostMdevDevicesInfo(devices=mdevs),
         )
-        self.compute = self.computes[hostname]
+        self.compute = self.computes[self.hostname]
 
         # create the VGPU resource in placement manually
         compute_rp_uuid = self.placement.get(
@@ -158,7 +158,7 @@ def test_create_servers_with_vgpu(self):
                 allocations[compute_rp_uuid]['resources'])
 
         # restart compute which will trigger a reshape
-        self.compute = self.restart_compute_service(self.compute)
+        self.compute = self.restart_compute_service(self.hostname)
 
         # verify that the inventory, usages and allocation are correct after
         # the reshape
diff --git a/nova/tests/functional/libvirt/test_vgpu.py b/nova/tests/functional/libvirt/test_vgpu.py
@@ -113,8 +113,8 @@ def _create_mdev(self, physical_device, mdev_type, uuid=None):
                                                    parent=libvirt_parent)})
         return uuid
 
-    def start_compute(self, hostname):
-        hostname = super().start_compute(
+    def start_compute_with_vgpu(self, hostname):
+        hostname = self.start_compute(
             pci_info=fakelibvirt.HostPCIDevicesInfo(
                 num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
             ),
@@ -197,7 +197,7 @@ def setUp(self):
             enabled_mdev_types=fakelibvirt.NVIDIA_11_VGPU_TYPE,
             group='devices')
 
-        self.compute1 = self.start_compute('host1')
+        self.compute1 = self.start_compute_with_vgpu('host1')
 
     def assert_vgpu_usage_for_compute(self, compute, expected):
         self.assert_mdev_usage(compute, expected_amount=expected)
@@ -211,7 +211,7 @@ def test_create_servers_with_vgpu(self):
 
     def test_resize_servers_with_vgpu(self):
         # Add another compute for the sake of resizing
-        self.compute2 = self.start_compute('host2')
+        self.compute2 = self.start_compute_with_vgpu('host2')
         server = self._create_server(
             image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
             flavor_id=self.flavor, host=self.compute1.host,
@@ -337,7 +337,7 @@ def setUp(self):
         # Prepare traits for later on
         self._create_trait('CUSTOM_NVIDIA_11')
         self._create_trait('CUSTOM_NVIDIA_12')
-        self.compute1 = self.start_compute('host1')
+        self.compute1 = self.start_compute_with_vgpu('host1')
 
     def test_create_servers_with_vgpu(self):
         self._create_server(
@@ -369,13 +369,12 @@ def test_create_servers_with_vgpu(self):
 
     def test_create_servers_with_specific_type(self):
         # Regenerate the PCI addresses so both pGPUs now support nvidia-12
-        connection = self.computes[
-            self.compute1.host].driver._host.get_connection()
-        connection.pci_info = fakelibvirt.HostPCIDevicesInfo(
+        pci_info = fakelibvirt.HostPCIDevicesInfo(
             num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
             multiple_gpu_types=True)
         # Make a restart to update the Resource Providers
-        self.compute1 = self.restart_compute_service(self.compute1)
+        self.compute1 = self.restart_compute_service(
+            self.compute1.host, pci_info=pci_info, keep_hypervisor_state=False)
         pgpu1_rp_uuid = self._get_provider_uuid_by_name(
             self.compute1.host + '_' + fakelibvirt.MDEVCAP_DEV1_PCI_ADDR)
         pgpu2_rp_uuid = self._get_provider_uuid_by_name(
@@ -451,7 +450,7 @@ def setUp(self):
                    group='mdev_nvidia-12')
         self.flags(mdev_class='CUSTOM_NOTVGPU', group='mdev_mlx5_core')
 
-        self.compute1 = self.start_compute('host1')
+        self.compute1 = self.start_compute_with_vgpu('host1')
         # Regenerate the PCI addresses so they can support both mlx5 and
         # nvidia-12 types
         connection = self.computes[
@@ -460,7 +459,7 @@ def setUp(self):
             num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
             generic_types=True)
         # Make a restart to update the Resource Providers
-        self.compute1 = self.restart_compute_service(self.compute1)
+        self.compute1 = self.restart_compute_service('host1')
 
     def test_create_servers_with_different_mdev_classes(self):
         physdev1_rp_uuid = self._get_provider_uuid_by_name(
@@ -498,7 +497,7 @@ def test_create_servers_with_different_mdev_classes(self):
 
     def test_resize_servers_with_mlx5(self):
         # Add another compute for the sake of resizing
-        self.compute2 = self.start_compute('host2')
+        self.compute2 = self.start_compute_with_vgpu('host2')
         # Regenerate the PCI addresses so they can support both mlx5 and
         # nvidia-12 types
         connection = self.computes[
@@ -507,7 +506,7 @@ def test_resize_servers_with_mlx5(self):
             num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
             generic_types=True)
         # Make a restart to update the Resource Providers
-        self.compute2 = self.restart_compute_service(self.compute2)
+        self.compute2 = self.restart_compute_service('host2')
 
         # Use the new flavor for booting
         server = self._create_server(