Skip to content

Commit 9268bc3

Browse files
committed
Handle PCI dev reconf with allocations
PCI devices which are allocated to instances can be removed from the [pci]device_spec configuration or can be removed from the hypervisor directly. The existing PciTracker code handle this cases by keeping the PciDevice in the nova DB exists and allocated and issue a warning in the logs during the compute service startup that nova is in an inconsistent state. Similar behavior is now added to the PCI placement tracking code so the PCI inventories and allocations in placement is kept in such situation. There is one case where we cannot simply accept the PCI device reconfiguration by keeping the existing allocations and applying the new config. It is when a PF that is configured and allocated is removed and VFs from this PF is now configured in the [pci]device_spec. And vice versa when VFs are removed and its parent PF is configured. In this case keeping the existing inventory and allocations and adding the new inventory to placement would result in placement model where a single PCI device would provide both PF and VF inventories. This dependent device configuration is not supported as it could lead to double consumption. In such situation the compute service will refuse to start. blueprint: pci-device-tracking-in-placement Change-Id: Id130893de650cc2d38953cea7cf9f53af71ced93
1 parent ab439da commit 9268bc3

File tree

7 files changed

+281
-42
lines changed

7 files changed

+281
-42
lines changed

doc/source/admin/pci-passthrough.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,4 +404,20 @@ is upgraded. If there is an in-progress migration, then the PCI allocation on
404404
the source host of the migration will not be healed. The placement view will be
405405
consistent after such migration is completed or reverted.
406406

407+
Reconfiguring the PCI devices on the hypervisor or changing the
408+
:oslo.config:option:`pci.device_spec` configuration option and restarting the
409+
nova-compute service is supported in the following cases:
410+
411+
* new devices are added
412+
* devices without allocation is removed
413+
414+
Removing a device that has allocations is not supported. If a device having any
415+
allocation is removed then the nova-compute service will keep the device and
416+
the allocation exists in the nova DB and in placement and logs a warning. If
417+
a device with any allocation is reconfigured in a way that an allocated PF is
418+
removed and VFs from the same PF is configured (or vice versa) then
419+
nova-compute will refuse to start as it would create a situation where both
420+
the PF and its VFs are made available for consumption.
421+
422+
407423
For deeper technical details please read the `nova specification. <https://specs.openstack.org/openstack/nova-specs/specs/zed/approved/pci-device-tracking-in-placement.html>`_

nova/compute/pci_placement_translator.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -443,15 +443,33 @@ def process_dev(
443443
fields.PciDeviceStatus.DELETED,
444444
fields.PciDeviceStatus.REMOVED,
445445
):
446+
# If the PCI tracker marked the device DELETED or REMOVED then
447+
# such device is not allocated, so we are free to drop it from
448+
# placement too.
446449
self._remove_dev(dev)
447450
else:
448451
if not dev_spec:
449-
LOG.warning(
450-
"Device spec is not found for device %s in "
451-
"[pci]device_spec. Ignoring device in Placement resource "
452-
"view. This should not happen. Please file a bug.",
453-
dev.address
454-
)
452+
if dev.instance_uuid:
453+
LOG.warning(
454+
"Device spec is not found for device %s in "
455+
"[pci]device_spec. We are skipping this devices "
456+
"during Placement update. The device is allocated by "
457+
"%s. You should not remove an allocated device from "
458+
"the configuration. Please restore the configuration "
459+
"or cold migrate the instance to resolve the "
460+
"inconsistency.",
461+
dev.address,
462+
dev.instance_uuid
463+
)
464+
else:
465+
LOG.warning(
466+
"Device spec is not found for device %s in "
467+
"[pci]device_spec. Ignoring device in Placement "
468+
"resource view. This should not happen. Please file a "
469+
"bug.",
470+
dev.address
471+
)
472+
455473
return
456474

457475
self._add_dev(dev, dev_spec.get_tags())

nova/compute/resource_tracker.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1295,14 +1295,22 @@ def _update_to_placement(self, context, compute_node, startup):
12951295
# This merges in changes from the provider config files loaded in init
12961296
self._merge_provider_configs(self.provider_configs, prov_tree)
12971297

1298-
# Flush any changes. If we either processed ReshapeNeeded above or
1299-
# update_provider_tree_for_pci did reshape, then we need to pass allocs
1300-
# to update_from_provider_tree to hit placement's POST /reshaper route.
1301-
self.reportclient.update_from_provider_tree(
1302-
context,
1303-
prov_tree,
1304-
allocations=allocs if driver_reshaped or pci_reshaped else None
1305-
)
1298+
try:
1299+
# Flush any changes. If we either processed ReshapeNeeded above or
1300+
# update_provider_tree_for_pci did reshape, then we need to pass
1301+
# allocs to update_from_provider_tree to hit placement's POST
1302+
# /reshaper route.
1303+
self.reportclient.update_from_provider_tree(
1304+
context,
1305+
prov_tree,
1306+
allocations=allocs if driver_reshaped or pci_reshaped else None
1307+
)
1308+
except exception.InventoryInUse as e:
1309+
# This means an inventory reconfiguration (e.g.: removing a parent
1310+
# PF and adding a VF under that parent) was not possible due to
1311+
# existing allocations. Translate the exception to prevent the
1312+
# compute service to start
1313+
raise exception.PlacementPciException(error=str(e))
13061314

13071315
def _update(self, context, compute_node, startup=False):
13081316
"""Update partial stats locally and populate them to Scheduler."""

nova/scheduler/client/report.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1373,7 +1373,6 @@ def catch_all(rp_uuid):
13731373
# can inherit.
13741374
helper_exceptions = (
13751375
exception.InvalidResourceClass,
1376-
exception.InventoryInUse,
13771376
exception.ResourceProviderAggregateRetrievalFailed,
13781377
exception.ResourceProviderDeletionFailed,
13791378
exception.ResourceProviderInUse,

nova/tests/functional/libvirt/test_pci_in_placement.py

Lines changed: 195 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,28 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase):
3636
# Just placeholders to satisfy the base class. The real value will be
3737
# redefined by the tests
3838
PCI_DEVICE_SPEC = []
39-
PCI_ALIAS = None
39+
PCI_ALIAS = [
40+
jsonutils.dumps(x)
41+
for x in (
42+
{
43+
"vendor_id": fakelibvirt.PCI_VEND_ID,
44+
"product_id": fakelibvirt.PCI_PROD_ID,
45+
"name": "a-pci-dev",
46+
},
47+
{
48+
"vendor_id": fakelibvirt.PCI_VEND_ID,
49+
"product_id": fakelibvirt.PF_PROD_ID,
50+
"device_type": "type-PF",
51+
"name": "a-pf",
52+
},
53+
{
54+
"vendor_id": fakelibvirt.PCI_VEND_ID,
55+
"product_id": fakelibvirt.VF_PROD_ID,
56+
"device_type": "type-VF",
57+
"name": "a-vf",
58+
},
59+
)
60+
]
4061

4162
def setUp(self):
4263
super().setUp()
@@ -681,6 +702,179 @@ def test_device_reconfiguration(self):
681702
},
682703
)
683704

705+
def _create_one_compute_with_a_pf_consumed_by_an_instance(self):
706+
# The fake libvirt will emulate on the host:
707+
# * two type-PFs in slot 0, with one type-VF
708+
pci_info = fakelibvirt.HostPCIDevicesInfo(
709+
num_pci=0, num_pfs=1, num_vfs=1)
710+
# we match the PF only and ignore the VF
711+
device_spec = self._to_device_spec_conf(
712+
[
713+
{
714+
"vendor_id": fakelibvirt.PCI_VEND_ID,
715+
"product_id": fakelibvirt.PF_PROD_ID,
716+
"address": "0000:81:00.0",
717+
},
718+
]
719+
)
720+
self.flags(group='pci', device_spec=device_spec)
721+
self.mock_pci_report_in_placement.return_value = True
722+
self.start_compute(hostname="compute1", pci_info=pci_info)
723+
724+
self.assertPCIDeviceCounts("compute1", total=1, free=1)
725+
compute1_expected_placement_view = {
726+
"inventories": {
727+
"0000:81:00.0": {self.PF_RC: 1},
728+
},
729+
"traits": {
730+
"0000:81:00.0": [],
731+
},
732+
"usages": {
733+
"0000:81:00.0": {self.PF_RC: 0},
734+
},
735+
"allocations": {},
736+
}
737+
self.assert_placement_pci_view(
738+
"compute1", **compute1_expected_placement_view)
739+
740+
# Create an instance consuming the PF
741+
extra_spec = {"pci_passthrough:alias": "a-pf:1"}
742+
flavor_id = self._create_flavor(extra_spec=extra_spec)
743+
server = self._create_server(flavor_id=flavor_id, networks=[])
744+
745+
self.assertPCIDeviceCounts("compute1", total=1, free=0)
746+
compute1_expected_placement_view["usages"] = {
747+
"0000:81:00.0": {self.PF_RC: 1},
748+
}
749+
compute1_expected_placement_view["allocations"][server["id"]] = {
750+
"0000:81:00.0": {self.PF_RC: 1},
751+
}
752+
self.assert_placement_pci_view(
753+
"compute1", **compute1_expected_placement_view)
754+
self._run_periodics()
755+
self.assert_placement_pci_view(
756+
"compute1", **compute1_expected_placement_view)
757+
758+
return server, compute1_expected_placement_view
759+
760+
def test_device_reconfiguration_with_allocations_config_change_warn(self):
761+
server, compute1_expected_placement_view = (
762+
self._create_one_compute_with_a_pf_consumed_by_an_instance())
763+
764+
# remove 0000:81:00.0 from the device spec and restart the compute
765+
device_spec = self._to_device_spec_conf([])
766+
self.flags(group='pci', device_spec=device_spec)
767+
# The PF is used but removed from the config. The PciTracker warns
768+
# but keeps the device so the placement logic mimic this and only warns
769+
# but keeps the RP and the allocation in placement intact.
770+
self.restart_compute_service(hostname="compute1")
771+
self.assert_placement_pci_view(
772+
"compute1", **compute1_expected_placement_view)
773+
self._run_periodics()
774+
self.assert_placement_pci_view(
775+
"compute1", **compute1_expected_placement_view)
776+
# the warning from the PciTracker
777+
self.assertIn(
778+
"WARNING [nova.pci.manager] Unable to remove device with status "
779+
"'allocated' and ownership %s because of PCI device "
780+
"1:0000:81:00.0 is allocated instead of ['available', "
781+
"'unavailable', 'unclaimable']. Check your [pci]device_spec "
782+
"configuration to make sure this allocated device is whitelisted. "
783+
"If you have removed the device from the whitelist intentionally "
784+
"or the device is no longer available on the host you will need "
785+
"to delete the server or migrate it to another host to silence "
786+
"this warning."
787+
% server['id'],
788+
self.stdlog.logger.output,
789+
)
790+
# the warning from the placement PCI tracking logic
791+
self.assertIn(
792+
"WARNING [nova.compute.pci_placement_translator] Device spec is "
793+
"not found for device 0000:81:00.0 in [pci]device_spec. We are "
794+
"skipping this devices during Placement update. The device is "
795+
"allocated by %s. You should not remove an allocated device from "
796+
"the configuration. Please restore the configuration or cold "
797+
"migrate the instance to resolve the inconsistency."
798+
% server['id'],
799+
self.stdlog.logger.output,
800+
)
801+
802+
def test_device_reconfiguration_with_allocations_config_change_stop(self):
803+
self._create_one_compute_with_a_pf_consumed_by_an_instance()
804+
805+
# switch 0000:81:00.0 PF to 0000:81:00.1 VF
806+
# in the config, then restart the compute service
807+
808+
# only match the VF now
809+
device_spec = self._to_device_spec_conf(
810+
[
811+
{
812+
"vendor_id": fakelibvirt.PCI_VEND_ID,
813+
"product_id": fakelibvirt.VF_PROD_ID,
814+
"address": "0000:81:00.1",
815+
},
816+
]
817+
)
818+
self.flags(group='pci', device_spec=device_spec)
819+
# The compute fails to start as the new config would mean that the PF
820+
# inventory is removed from the 0000:81:00.0 RP and the PF inventory is
821+
# added instead there, but the VF inventory has allocations. Keeping
822+
# the old inventory as in
823+
# test_device_reconfiguration_with_allocations_config_change_warn is
824+
# not an option as it would result in two resource class on the same RP
825+
# one for the PF and one for the VF. That would allow consuming
826+
# the same physical device twice. Such dependent device configuration
827+
# is intentionally not supported so we are stopping the compute
828+
# service.
829+
ex = self.assertRaises(
830+
exception.PlacementPciException,
831+
self.restart_compute_service,
832+
hostname="compute1"
833+
)
834+
self.assertRegex(
835+
str(ex),
836+
"Failed to gather or report PCI resources to Placement: There was "
837+
"a conflict when trying to complete your request.\n\n "
838+
"update conflict: Inventory for 'CUSTOM_PCI_8086_1528' on "
839+
"resource provider '.*' in use.",
840+
)
841+
842+
def test_device_reconfiguration_with_allocations_hyp_change(self):
843+
server, compute1_expected_placement_view = (
844+
self._create_one_compute_with_a_pf_consumed_by_an_instance())
845+
846+
# restart the compute but simulate that the device 0000:81:00.0 is
847+
# removed from the hypervisor while the device spec config left
848+
# intact. The PciTracker will notice this and log a warning. The
849+
# placement tracking logic simply keeps the allocation intact in
850+
# placement as both the PciDevice and the DeviceSpec is available.
851+
pci_info = fakelibvirt.HostPCIDevicesInfo(
852+
num_pci=0, num_pfs=0, num_vfs=0)
853+
self.restart_compute_service(
854+
hostname="compute1",
855+
pci_info=pci_info,
856+
keep_hypervisor_state=False
857+
)
858+
self.assert_placement_pci_view(
859+
"compute1", **compute1_expected_placement_view)
860+
self._run_periodics()
861+
self.assert_placement_pci_view(
862+
"compute1", **compute1_expected_placement_view)
863+
# the warning from the PciTracker
864+
self.assertIn(
865+
"WARNING [nova.pci.manager] Unable to remove device with status "
866+
"'allocated' and ownership %s because of PCI device "
867+
"1:0000:81:00.0 is allocated instead of ['available', "
868+
"'unavailable', 'unclaimable']. Check your [pci]device_spec "
869+
"configuration to make sure this allocated device is whitelisted. "
870+
"If you have removed the device from the whitelist intentionally "
871+
"or the device is no longer available on the host you will need "
872+
"to delete the server or migrate it to another host to silence "
873+
"this warning."
874+
% server['id'],
875+
self.stdlog.logger.output,
876+
)
877+
684878
def test_reporting_disabled_nothing_is_reported(self):
685879
# The fake libvirt will emulate on the host:
686880
# * one type-PCI in slot 0
@@ -765,32 +959,6 @@ def setUp(self):
765959
)
766960
)
767961

768-
# Pre-configure a PCI alias to consume our devs
769-
alias_pci = {
770-
"vendor_id": fakelibvirt.PCI_VEND_ID,
771-
"product_id": fakelibvirt.PCI_PROD_ID,
772-
"name": "a-pci-dev",
773-
}
774-
alias_pf = {
775-
"vendor_id": fakelibvirt.PCI_VEND_ID,
776-
"product_id": fakelibvirt.PF_PROD_ID,
777-
"device_type": "type-PF",
778-
"name": "a-pf",
779-
}
780-
alias_vf = {
781-
"vendor_id": fakelibvirt.PCI_VEND_ID,
782-
"product_id": fakelibvirt.VF_PROD_ID,
783-
"device_type": "type-VF",
784-
"name": "a-vf",
785-
}
786-
self.flags(
787-
group='pci',
788-
alias=self._to_pci_alias_conf([alias_pci, alias_pf, alias_vf]))
789-
790-
@staticmethod
791-
def _to_pci_alias_conf(alias_list):
792-
return [jsonutils.dumps(x) for x in alias_list]
793-
794962
@staticmethod
795963
def _move_allocation(allocations, from_uuid, to_uuid):
796964
allocations[to_uuid] = allocations[from_uuid]

nova/tests/unit/compute/test_pci_placement_translator.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def test_translator_skips_devices_without_matching_spec(self):
5050
pci_device.PciDevice(
5151
address="0000:81:00.0",
5252
status=fields.PciDeviceStatus.AVAILABLE,
53+
instance_uuid=None,
5354
)
5455
]
5556
)

nova/tests/unit/compute/test_resource_tracker.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1960,6 +1960,35 @@ def test_update_pci_reporting_same_host_resize(
19601960
upt = self.rt.reportclient.update_from_provider_tree
19611961
upt.assert_called_once_with(mock.sentinel.ctx, ptree, allocations=None)
19621962

1963+
@mock.patch(
1964+
'nova.compute.resource_tracker.ResourceTracker.'
1965+
'_sync_compute_service_disabled_trait',
1966+
new=mock.Mock()
1967+
)
1968+
@mock.patch(
1969+
'nova.compute.resource_tracker.ResourceTracker._resource_change',
1970+
new=mock.Mock(return_value=False)
1971+
)
1972+
def test_update_pci_reporting_allocation_in_use_error_propagated(self):
1973+
"""Assert that if the pci placement reporting code tries to remove
1974+
inventory with allocation from placement due to invalid hypervisor
1975+
or [pci]device_spec reconfiguration then the InventoryInUse error from
1976+
placement is propagated and makes the compute startup to fail.
1977+
"""
1978+
compute_obj = _COMPUTE_NODE_FIXTURES[0].obj_clone()
1979+
self._setup_rt()
1980+
self.rt.reportclient.update_from_provider_tree.side_effect = (
1981+
exc.InventoryInUse(
1982+
resource_class="FOO", resource_provider="bar"))
1983+
1984+
self.assertRaises(
1985+
exc.PlacementPciException,
1986+
self.rt._update,
1987+
mock.sentinel.ctx,
1988+
compute_obj,
1989+
startup=True,
1990+
)
1991+
19631992
@mock.patch('nova.objects.Service.get_by_compute_host',
19641993
return_value=objects.Service(disabled=True))
19651994
def test_sync_compute_service_disabled_trait_add(self, mock_get_by_host):

0 commit comments

Comments
 (0)