Skip to content

Commit d7bca63

Browse files
committed
Remove unavailable but not reported PCI devices at startup
We saw in the field that the pci_devices table can end up in inconsistent state after a compute node HW failure and re-deployment. There could be dependent devices where the parent PF is in available state while the children VFs are in unavailable state. (Before the HW fault the PF was allocated hence the VFs was marked unavailable). In this state this PF is still schedulable but during the PCI claim the handling of dependent devices in the PCI tracker fill fail with the error: "Attempt to consume PCI device XXX from empty pool". The reason of the failure is that when the PF is claimed, all the children VFs are marked unavailable. But if the VF is already unavailable such step fails. One way the deployer might try to recover from this state is to remove the VFs from the hypervisor and restart the compute agent. The compute startup already has a logic to delete PCI devices that are unused and not reported by the hypervisor. However this logic only removed devices in 'available' state and ignored devices in 'unavailable' state. If a device is unused and the hypervisor is not reporting the device any more then it is safe to delete that device from the PCI tracker. So this patch extends the logic to allow deleting 'unavailable' devices. There is a small window when dependent PCI device is in 'unclaimable' state. From cleanup perspective this is an analogous state. So it is also added to the cleanup logic. Related-Bug: #1969496 Change-Id: If9ab424cc7375a1f0d41b03f01c4a823216b3eb8 (cherry picked from commit 284ea72)
1 parent f04cfd4 commit d7bca63

File tree

4 files changed

+123
-7
lines changed

4 files changed

+123
-7
lines changed

nova/objects/pci_device.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -447,11 +447,30 @@ def allocate(self, instance):
447447
instance.pci_devices.objects.append(copy.copy(self))
448448

449449
def remove(self):
450-
if self.status != fields.PciDeviceStatus.AVAILABLE:
450+
# We allow removal of a device is if it is unused. It can be unused
451+
# either by being in available state or being in a state that shows
452+
# that the parent or child device blocks the consumption of this device
453+
expected_states = [
454+
fields.PciDeviceStatus.AVAILABLE,
455+
fields.PciDeviceStatus.UNAVAILABLE,
456+
fields.PciDeviceStatus.UNCLAIMABLE,
457+
]
458+
if self.status not in expected_states:
451459
raise exception.PciDeviceInvalidStatus(
452460
compute_node_id=self.compute_node_id,
453461
address=self.address, status=self.status,
454-
hopestatus=[fields.PciDeviceStatus.AVAILABLE])
462+
hopestatus=expected_states)
463+
# Just to be on the safe side, do not allow removal of device that has
464+
# an owner even if the state of the device suggests that it is not
465+
# owned.
466+
if 'instance_uuid' in self and self.instance_uuid is not None:
467+
raise exception.PciDeviceInvalidOwner(
468+
compute_node_id=self.compute_node_id,
469+
address=self.address,
470+
owner=self.instance_uuid,
471+
hopeowner=None,
472+
)
473+
455474
self.status = fields.PciDeviceStatus.REMOVED
456475
self.instance_uuid = None
457476
self.request_id = None

nova/pci/manager.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -217,10 +217,13 @@ def _set_hvdevs(self, devices: ty.List[ty.Dict[str, ty.Any]]) -> None:
217217
# from the pci whitelist.
218218
try:
219219
existed.remove()
220-
except exception.PciDeviceInvalidStatus as e:
221-
LOG.warning("Unable to remove device with %(status)s "
222-
"ownership %(instance_uuid)s because of "
223-
"%(pci_exception)s. "
220+
except (
221+
exception.PciDeviceInvalidStatus,
222+
exception.PciDeviceInvalidOwner,
223+
) as e:
224+
LOG.warning("Unable to remove device with status "
225+
"'%(status)s' and ownership %(instance_uuid)s "
226+
"because of %(pci_exception)s. "
224227
"Check your [pci]passthrough_whitelist "
225228
"configuration to make sure this allocated "
226229
"device is whitelisted. If you have removed "
@@ -250,7 +253,10 @@ def _set_hvdevs(self, devices: ty.List[ty.Dict[str, ty.Any]]) -> None:
250253
else:
251254
# Note(yjiang5): no need to update stats if an assigned
252255
# device is hot removed.
253-
self.stats.remove_device(existed)
256+
# NOTE(gibi): only remove the device from the pools if it
257+
# is not already removed
258+
if existed in self.stats.get_free_devs():
259+
self.stats.remove_device(existed)
254260
else:
255261
# Update tracked devices.
256262
new_value: ty.Dict[str, ty.Any]

nova/tests/unit/objects/test_pci_device.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,16 @@ def test_remove_device_fail(self):
467467
devobj.claim(self.inst.uuid)
468468
self.assertRaises(exception.PciDeviceInvalidStatus, devobj.remove)
469469

470+
def test_remove_device_fail_owned_with_unavailable_state(self):
471+
# This test creates an PCI device in an invalid state. This should
472+
# not happen in any known scenario. But we want to be save not to allow
473+
# removing a device that has an owner. See bug 1969496 for more details
474+
self._create_fake_instance()
475+
devobj = pci_device.PciDevice.create(None, dev_dict)
476+
devobj.claim(self.inst.uuid)
477+
devobj.status = fields.PciDeviceStatus.UNAVAILABLE
478+
self.assertRaises(exception.PciDeviceInvalidOwner, devobj.remove)
479+
470480

471481
class TestPciDeviceObject(test_objects._LocalTest,
472482
_TestPciDeviceObject):

nova/tests/unit/pci/test_manager.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,87 @@ def test_set_hvdev_changed_stal(self):
397397
self.assertEqual(len(self.tracker.stale), 1)
398398
self.assertEqual(self.tracker.stale['0000:00:00.2']['vendor_id'], 'v2')
399399

400+
def _get_device_by_address(self, address):
401+
devs = [dev for dev in self.tracker.pci_devs if dev.address == address]
402+
if len(devs) == 1:
403+
return devs[0]
404+
if devs:
405+
raise ValueError('ambiguous address', devs)
406+
else:
407+
raise ValueError('device not found', address)
408+
409+
def test_set_hvdevs_unavailable_vf_removed(self):
410+
# We start with a PF parent and two VF children
411+
self._create_tracker([fake_db_dev_3, fake_db_dev_4, fake_db_dev_5])
412+
pci_requests_obj = self._create_pci_requests_object(
413+
[
414+
{
415+
'count': 1,
416+
'spec': [{'dev_type': fields.PciDeviceType.SRIOV_PF}]
417+
}
418+
],
419+
instance_uuid=uuidsentinel.instance1,
420+
)
421+
# then claim and allocate the PF that makes the VFs unavailable
422+
self.tracker.claim_instance(
423+
mock.sentinel.context, pci_requests_obj, None)
424+
self.tracker.allocate_instance({'uuid': uuidsentinel.instance1})
425+
426+
dev3_pf = self._get_device_by_address(fake_db_dev_3['address'])
427+
self.assertEqual('allocated', dev3_pf.status)
428+
self.assertEqual(uuidsentinel.instance1, dev3_pf.instance_uuid)
429+
dev4_vf = self._get_device_by_address(fake_db_dev_4['address'])
430+
self.assertEqual('unavailable', dev4_vf.status)
431+
dev5_vf = self._get_device_by_address(fake_db_dev_5['address'])
432+
self.assertEqual('unavailable', dev5_vf.status)
433+
434+
# now simulate that one VF (dev_5) is removed from the hypervisor and
435+
# the compute is restarted. As the VF is not claimed or allocated we
436+
# are free to remove it from the tracker.
437+
self.tracker._set_hvdevs(copy.deepcopy([fake_pci_3, fake_pci_4]))
438+
439+
dev3_pf = self._get_device_by_address(fake_db_dev_3['address'])
440+
self.assertEqual('allocated', dev3_pf.status)
441+
self.assertEqual(uuidsentinel.instance1, dev3_pf.instance_uuid)
442+
dev4_vf = self._get_device_by_address(fake_db_dev_4['address'])
443+
self.assertEqual('unavailable', dev4_vf.status)
444+
dev5_vf = self._get_device_by_address(fake_db_dev_5['address'])
445+
self.assertEqual('removed', dev5_vf.status)
446+
447+
def test_set_hvdevs_unavailable_pf_removed(self):
448+
# We start with one PF parent and one child VF
449+
self._create_tracker([fake_db_dev_3, fake_db_dev_4])
450+
pci_requests_obj = self._create_pci_requests_object(
451+
[
452+
{
453+
'count': 1,
454+
'spec': [{'dev_type': fields.PciDeviceType.SRIOV_VF}]
455+
}
456+
],
457+
instance_uuid=uuidsentinel.instance1,
458+
)
459+
# Then we claim and allocate the VF that makes the PF unavailable
460+
self.tracker.claim_instance(
461+
mock.sentinel.context, pci_requests_obj, None)
462+
self.tracker.allocate_instance({'uuid': uuidsentinel.instance1})
463+
464+
dev3_pf = self._get_device_by_address(fake_db_dev_3['address'])
465+
self.assertEqual('unavailable', dev3_pf.status)
466+
dev4_vf = self._get_device_by_address(fake_db_dev_4['address'])
467+
self.assertEqual('allocated', dev4_vf.status)
468+
self.assertEqual(uuidsentinel.instance1, dev4_vf.instance_uuid)
469+
470+
# now simulate that the parent PF is removed from the hypervisor and
471+
# the compute is restarted. As the PF is not claimed or allocated we
472+
# are free to remove it from the tracker.
473+
self.tracker._set_hvdevs(copy.deepcopy([fake_pci_4]))
474+
475+
dev3_pf = self._get_device_by_address(fake_db_dev_3['address'])
476+
self.assertEqual('removed', dev3_pf.status)
477+
dev4_vf = self._get_device_by_address(fake_db_dev_4['address'])
478+
self.assertEqual('allocated', dev4_vf.status)
479+
self.assertEqual(uuidsentinel.instance1, dev4_vf.instance_uuid)
480+
400481
def test_update_pci_for_instance_active(self):
401482
pci_requests_obj = self._create_pci_requests_object(fake_pci_requests)
402483
self.tracker.claim_instance(mock.sentinel.context,

0 commit comments

Comments
 (0)