Merge "Heal allocation for same host resize"

Zuul · openstack-gerrit · commit 2925fd142731 · 2022-09-01T18:16:11.000Z
diff --git a/nova/compute/pci_placement_translator.py b/nova/compute/pci_placement_translator.py
@@ -254,7 +254,8 @@ def update_provider_tree(
     def update_allocations(
         self,
         allocations: dict,
-        provider_tree: provider_tree.ProviderTree
+        provider_tree: provider_tree.ProviderTree,
+        same_host_instances: ty.List[str],
     ) -> bool:
         updated = False
 
@@ -293,6 +294,21 @@ def update_allocations(
                 #    heal_allocation CLI instead.
                 continue
 
+            if consumer in same_host_instances:
+                # This is a nasty special case. This instance is undergoing
+                # a same host resize. So in Placement the source host
+                # allocation is held by the migration UUID *but* the
+                # PciDevice.instance_uuid is set for the instance UUID both
+                # on the source and on the destination host. As the source and
+                # dest are the same for migration we will see PciDevice
+                # objects assigned to this instance that should not be
+                # allocated to the instance UUID in placement.
+                # As noted above we don't want to take care in progress
+                # migration during healing. So we simply ignore this instance.
+                # If the instance needs healing then it will be healed when
+                # after the migration is confirmed or reverted.
+                continue
+
             current_allocs = allocations[consumer]['allocations']
             current_rp_allocs = current_allocs.get(rp_uuid)
 
@@ -326,9 +342,14 @@ def __str__(self) -> str:
 class PlacementView:
     """The PCI Placement view"""
 
-    def __init__(self, hypervisor_hostname: str) -> None:
+    def __init__(
+        self,
+        hypervisor_hostname: str,
+        instances_under_same_host_resize: ty.List[str],
+    ) -> None:
         self.rps: ty.Dict[str, PciResourceProvider] = {}
         self.root_rp_name = hypervisor_hostname
+        self.same_host_instances = instances_under_same_host_resize
 
     def _get_rp_name_for_address(self, addr: str) -> str:
         return f"{self.root_rp_name}_{addr.upper()}"
@@ -459,7 +480,11 @@ def update_allocations(
         """
         updated = False
         for rp in self.rps.values():
-            updated |= rp.update_allocations(allocations, provider_tree)
+            updated |= rp.update_allocations(
+                allocations,
+                provider_tree,
+                self.same_host_instances,
+            )
         return updated
 
 
@@ -500,6 +525,7 @@ def update_provider_tree_for_pci(
     nodename: str,
     pci_tracker: pci_manager.PciDevTracker,
     allocations: dict,
+    instances_under_same_host_resize: ty.List[str],
 ) -> bool:
     """Based on the PciDevice objects in the pci_tracker it calculates what
     inventories and allocations needs to exist in placement and create the
@@ -529,6 +555,8 @@ def update_provider_tree_for_pci(
                 },
                 ...
               }
+    :param instances_under_same_host_resize: A list of instance UUIDs that
+        are undergoing same host resize on this host.
     """
     if not _is_placement_tracking_enabled():
         ensure_tracking_was_not_enabled_before(provider_tree)
@@ -541,7 +569,7 @@ def update_provider_tree_for_pci(
         'Collecting PCI inventories and allocations to track them in Placement'
     )
 
-    pv = PlacementView(nodename)
+    pv = PlacementView(nodename, instances_under_same_host_resize)
     for dev in pci_tracker.pci_devs:
         # match the PCI device with the [pci]dev_spec config to access
         # the configuration metadata tags
diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py
@@ -1260,6 +1260,11 @@ def _update_to_placement(self, context, compute_node, startup):
             context, nodename, provider_tree=prov_tree)
         prov_tree.update_traits(nodename, traits)
 
+        instances_under_same_host_resize = [
+            migration.instance_uuid
+            for migration in self.tracked_migrations.values()
+            if migration.is_same_host_resize
+        ]
         # NOTE(gibi): Tracking PCI in placement is different from other
         # resources.
         #
@@ -1278,7 +1283,12 @@ def _update_to_placement(self, context, compute_node, startup):
         # is enabled. So we need to be ready to heal PCI allocations at
         # every call not just at startup.
         pci_reshaped = pci_placement_translator.update_provider_tree_for_pci(
-            prov_tree, nodename, self.pci_tracker, allocs)
+            prov_tree,
+            nodename,
+            self.pci_tracker,
+            allocs,
+            instances_under_same_host_resize,
+        )
 
         self.provider_tree = prov_tree
 
diff --git a/nova/objects/migration.py b/nova/objects/migration.py
@@ -215,6 +215,10 @@ def is_live_migration(self):
     def is_resize(self):
         return self.migration_type == fields.MigrationType.RESIZE
 
+    @property
+    def is_same_host_resize(self):
+        return self.is_resize and self.source_node == self.dest_node
+
 
 @base.NovaObjectRegistry.register
 class MigrationList(base.ObjectListBase, base.NovaObject):
diff --git a/nova/tests/functional/libvirt/test_pci_in_placement.py b/nova/tests/functional/libvirt/test_pci_in_placement.py
@@ -1346,3 +1346,113 @@ def test_heal_partial_allocations_during_resize_change_dev_type(self):
         del compute1_expected_placement_view["allocations"][server["id"]]
         self.assert_placement_pci_view(
             "compute1", **compute1_expected_placement_view)
+
+    def test_heal_allocation_during_same_host_resize(self):
+        self.flags(allow_resize_to_same_host=True)
+        # The fake libvirt will emulate on the host:
+        # * one type-PFs (slot 0) with 3 type-VFs
+        compute1_pci_info = fakelibvirt.HostPCIDevicesInfo(
+            num_pci=0, num_pfs=1, num_vfs=3)
+        # the config matches just the VFs
+        compute1_device_spec = self._to_device_spec_conf(
+            [
+                {
+                    "vendor_id": fakelibvirt.PCI_VEND_ID,
+                    "product_id": fakelibvirt.VF_PROD_ID,
+                    "address": "0000:81:00.*",
+                },
+            ]
+        )
+        self.flags(group='pci', device_spec=compute1_device_spec)
+        # Start a compute with PCI tracking in placement
+        self.mock_pci_report_in_placement.return_value = True
+        self.start_compute(hostname="compute1", pci_info=compute1_pci_info)
+        self.assertPCIDeviceCounts("compute1", total=3, free=3)
+        compute1_expected_placement_view = {
+            "inventories": {
+                "0000:81:00.0": {self.VF_RC: 3},
+            },
+            "traits": {
+                "0000:81:00.0": [],
+            },
+            "usages": {
+                "0000:81:00.0": {self.VF_RC: 0},
+            },
+            "allocations": {},
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        # Create an instance consuming one VFs
+        extra_spec = {"pci_passthrough:alias": "a-vf:1"}
+        flavor_id = self._create_flavor(extra_spec=extra_spec)
+        server = self._create_server(flavor_id=flavor_id, networks=[])
+        self.assertPCIDeviceCounts("compute1", total=3, free=2)
+        # As scheduling does not support PCI in placement yet no allocation
+        # is created for the PCI consumption by the scheduler. BUT the resource
+        # tracker in the compute will heal the missing PCI allocation
+        compute1_expected_placement_view[
+            "usages"]["0000:81:00.0"][self.VF_RC] = 1
+        compute1_expected_placement_view["allocations"][server["id"]] = {
+            "0000:81:00.0": {self.VF_RC: 1}
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        self._run_periodics()
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+
+        # resize the server to consume 2 VFs on the same host
+        extra_spec = {"pci_passthrough:alias": "a-vf:2"}
+        flavor_id = self._create_flavor(extra_spec=extra_spec)
+        server = self._resize_server(server, flavor_id)
+        # during resize both the source and the dest allocation is kept
+        # and in same host resize that means both consumed from the same host
+        self.assertPCIDeviceCounts("compute1", total=3, free=0)
+        # the source side of the allocation held by the migration
+        self._move_server_allocation(
+            compute1_expected_placement_view["allocations"], server['id'])
+        # NOTE(gibi): we intentionally don't heal allocation for the instance
+        # while it is being resized. See the comment in the
+        # pci_placement_translator about the reasoning.
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        self._run_periodics()
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+
+        # revert the resize
+        self._revert_resize(server)
+        self.assertPCIDeviceCounts("compute1", total=3, free=2)
+        # the original allocations are restored
+        self._move_server_allocation(
+            compute1_expected_placement_view["allocations"],
+            server["id"],
+            revert=True,
+        )
+        compute1_expected_placement_view[
+            "usages"]["0000:81:00.0"][self.VF_RC] = 1
+        compute1_expected_placement_view["allocations"][server["id"]] = {
+            "0000:81:00.0": {self.VF_RC: 1}
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        self._run_periodics()
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+
+        # now resize and then confirm it
+        self._resize_server(server, flavor_id)
+        self._confirm_resize(server)
+
+        # we expect that the consumption is according to the new flavor
+        self.assertPCIDeviceCounts("compute1", total=3, free=1)
+        compute1_expected_placement_view[
+            "usages"]["0000:81:00.0"][self.VF_RC] = 2
+        compute1_expected_placement_view["allocations"][server["id"]] = {
+            "0000:81:00.0": {self.VF_RC: 2}
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        self._run_periodics()
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
diff --git a/nova/tests/unit/compute/test_pci_placement_translator.py b/nova/tests/unit/compute/test_pci_placement_translator.py
@@ -61,7 +61,7 @@ def test_translator_skips_devices_without_matching_spec(self):
         provider_tree = mock.NonCallableMock()
 
         ppt.update_provider_tree_for_pci(
-            provider_tree, "fake-node", pci_tracker, {})
+            provider_tree, "fake-node", pci_tracker, {}, [])
 
         self.assertIn(
             "Device spec is not found for device 0000:81:00.0 in "
@@ -113,7 +113,8 @@ def test_resource_class_normalization(self, pci_dev, rc_name, expected_rc):
         )
 
     def test_dependent_device_pf_then_vf(self):
-        pv = ppt.PlacementView("fake-node")
+        pv = ppt.PlacementView(
+            "fake-node", instances_under_same_host_resize=[])
         pf = pci_device.PciDevice(
             address="0000:81:00.0",
             dev_type=fields.PciDeviceType.SRIOV_PF
@@ -140,7 +141,8 @@ def test_dependent_device_pf_then_vf(self):
         )
 
     def test_dependent_device_vf_then_pf(self):
-        pv = ppt.PlacementView("fake-node")
+        pv = ppt.PlacementView(
+            "fake-node", instances_under_same_host_resize=[])
         pf = pci_device.PciDevice(
             address="0000:81:00.0",
             dev_type=fields.PciDeviceType.SRIOV_PF
@@ -173,7 +175,8 @@ def test_dependent_device_vf_then_pf(self):
         )
 
     def test_mixed_rc_for_sibling_vfs(self):
-        pv = ppt.PlacementView("fake-node")
+        pv = ppt.PlacementView(
+            "fake-node", instances_under_same_host_resize=[])
         vf1, vf2, vf3, vf4 = [
             pci_device.PciDevice(
                 address="0000:81:00.%d" % f,
diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py
@@ -1808,6 +1808,7 @@ def test_update_pci_reporting(self, mock_update_provider_tree_for_pci):
             compute_obj.hypervisor_hostname,
             self.rt.pci_tracker,
             mock_get_allocs.return_value,
+            [],
         )
         upt = self.rt.reportclient.update_from_provider_tree
         upt.assert_called_once_with(mock.sentinel.ctx, ptree, allocations=None)
@@ -1847,6 +1848,7 @@ def test_update_pci_reporting_reshape(
             compute_obj.hypervisor_hostname,
             self.rt.pci_tracker,
             mock_get_allocs.return_value,
+            [],
         )
         upt = self.rt.reportclient.update_from_provider_tree
         upt.assert_called_once_with(
@@ -1892,11 +1894,72 @@ def test_update_pci_reporting_driver_reshape(
             compute_obj.hypervisor_hostname,
             self.rt.pci_tracker,
             mock_get_allocs.return_value,
+            [],
         )
         upt = self.rt.reportclient.update_from_provider_tree
         upt.assert_called_once_with(
             mock.sentinel.ctx, ptree, allocations=mock_get_allocs.return_value)
 
+    @mock.patch(
+        'nova.compute.resource_tracker.ResourceTracker.'
+        '_sync_compute_service_disabled_trait',
+        new=mock.Mock()
+    )
+    @mock.patch(
+        'nova.compute.resource_tracker.ResourceTracker._resource_change',
+        new=mock.Mock(return_value=False)
+    )
+    @mock.patch(
+        'nova.compute.pci_placement_translator.update_provider_tree_for_pci')
+    def test_update_pci_reporting_same_host_resize(
+        self, mock_update_provider_tree_for_pci
+    ):
+        """Assert that resource tracker calls update_provider_tree_for_pci
+        and with the list of instances that are being resized to the same
+        host.
+        """
+        compute_obj = _COMPUTE_NODE_FIXTURES[0].obj_clone()
+        self._setup_rt()
+        ptree = self._setup_ptree(compute_obj)
+        # simulate that pci reporting did not touch allocations
+        mock_update_provider_tree_for_pci.return_value = False
+        self.rt.tracked_migrations = {
+            uuids.inst1: objects.Migration(
+                migration_type="resize",
+                source_node="fake-node",
+                dest_node="fake-node",
+                instance_uuid=uuids.inst1,
+            ),
+            uuids.inst2: objects.Migration(
+                migration_type="evacuation",
+                source_node="fake-node",
+                dest_node="fake-node",
+                instance_uuid=uuids.inst2,
+            ),
+            uuids.inst3: objects.Migration(
+                migration_type="resize",
+                source_node="fake-node1",
+                dest_node="fake-node2",
+                instance_uuid=uuids.inst3,
+            ),
+        }
+
+        self.rt._update(mock.sentinel.ctx, compute_obj)
+
+        mock_get_allocs = (
+            self.report_client_mock.get_allocations_for_provider_tree)
+        mock_get_allocs.assert_called_once_with(
+            mock.sentinel.ctx, compute_obj.hypervisor_hostname)
+        mock_update_provider_tree_for_pci.assert_called_once_with(
+            ptree,
+            compute_obj.hypervisor_hostname,
+            self.rt.pci_tracker,
+            mock_get_allocs.return_value,
+            [uuids.inst1],
+        )
+        upt = self.rt.reportclient.update_from_provider_tree
+        upt.assert_called_once_with(mock.sentinel.ctx, ptree, allocations=None)
+
     @mock.patch('nova.objects.Service.get_by_compute_host',
                 return_value=objects.Service(disabled=True))
     def test_sync_compute_service_disabled_trait_add(self, mock_get_by_host):