Skip to content

Commit 3e68e19

Browse files
committed
Stop if tracking is disable after it was enabled before
We don't support enabling and then later disabling [pci]report_in_placement. This patch adds a check that kills the nova-compute service is such situation. The implementation rely on an extra trait COMPUTE_MANAGED_PCI_DEVICE that is added to every PCI RP automatically. Note that we cannot use the existing OWNER_NOVA standard trait as that will be exists on other RPs in RP three. Also we cannot use the naming scheme of the PCI RPs as cyborg is free to use the same scheme and also nova might want to use that for VGPUs / MDEVs in the future. Depends-On: https://review.opendev.org/c/openstack/os-traits/+/850538 blueprint: pci-device-tracking-in-placement Change-Id: I6e68d7754cee51525894c14a74a554d82a648d8d
1 parent 5860541 commit 3e68e19

File tree

4 files changed

+115
-14
lines changed

4 files changed

+115
-14
lines changed

nova/compute/pci_placement_translator.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,12 @@ def _get_traits_for_dev(
7373
# traits is a comma separated list of placement trait names
7474
traits_str = dev_spec_tags.get("traits")
7575
if not traits_str:
76-
return set()
76+
return {os_traits.COMPUTE_MANAGED_PCI_DEVICE}
7777

7878
traits = traits_str.split(',')
79-
return set(_normalize_traits(traits))
79+
return set(_normalize_traits(traits)) | {
80+
os_traits.COMPUTE_MANAGED_PCI_DEVICE
81+
}
8082

8183

8284
def _get_rc_for_dev(
@@ -211,7 +213,7 @@ def __str__(self) -> str:
211213
if self.devs:
212214
return (
213215
f"RP({self.name}, {self.resource_class}={len(self.devs)}, "
214-
f"traits={','.join(self.traits or set())})"
216+
f"traits={','.join(sorted(self.traits or set()))})"
215217
)
216218
else:
217219
return f"RP({self.name}, <EMPTY>)"
@@ -357,6 +359,26 @@ def ensure_no_dev_spec_with_devname(dev_specs: ty.List[devspec.PciDeviceSpec]):
357359
raise exception.PlacementPciException(error=msg)
358360

359361

362+
def ensure_tracking_was_not_enabled_before(
363+
provider_tree: provider_tree.ProviderTree
364+
) -> None:
365+
# If placement tracking was enabled before then we do not support
366+
# disabling it later. To check for that we can look for RPs with
367+
# the COMPUTE_MANAGED_PCI_DEVICE trait. If any then we raise to
368+
# kill the service
369+
for rp_uuid in provider_tree.get_provider_uuids():
370+
if (
371+
os_traits.COMPUTE_MANAGED_PCI_DEVICE
372+
in provider_tree.data(rp_uuid).traits
373+
):
374+
msg = _(
375+
"The [pci]report_in_placement is False but it was enabled "
376+
"before on this compute. Nova does not support disabling "
377+
"it after it is enabled."
378+
)
379+
raise exception.PlacementPciException(error=msg)
380+
381+
360382
def update_provider_tree_for_pci(
361383
provider_tree: provider_tree.ProviderTree,
362384
nodename: str,
@@ -393,6 +415,7 @@ def update_provider_tree_for_pci(
393415
}
394416
"""
395417
if not _is_placement_tracking_enabled():
418+
ensure_tracking_was_not_enabled_before(provider_tree)
396419
# If tracking is not enabled we just return without touching anything
397420
return False
398421

nova/tests/functional/libvirt/test_pci_in_placement.py

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def setUp(self):
4646
return_value=True
4747
)
4848
self.addCleanup(patcher.stop)
49-
patcher.start()
49+
self.mock_pci_report_in_placement = patcher.start()
5050

5151
# These tests should not depend on the host's sysfs
5252
self.useFixture(
@@ -286,8 +286,9 @@ def test_sibling_vfs_with_contradicting_traits_rejected(self):
286286
)
287287
self.assertIn(
288288
"VFs from the same PF cannot be configured with different set of "
289-
"'traits' in [pci]device_spec. We got CUSTOM_BAR for 0000:81:00.2 "
290-
"and CUSTOM_FOO for 0000:81:00.1.",
289+
"'traits' in [pci]device_spec. We got "
290+
"COMPUTE_MANAGED_PCI_DEVICE,CUSTOM_BAR for 0000:81:00.2 and "
291+
"COMPUTE_MANAGED_PCI_DEVICE,CUSTOM_FOO for 0000:81:00.1.",
291292
str(ex)
292293
)
293294

@@ -676,3 +677,74 @@ def test_device_reconfiguration(self):
676677
],
677678
},
678679
)
680+
681+
def test_reporting_disabled_nothing_is_reported(self):
682+
# The fake libvirt will emulate on the host:
683+
# * one type-PCI in slot 0
684+
pci_info = fakelibvirt.HostPCIDevicesInfo(
685+
num_pci=1, num_pfs=0, num_vfs=0)
686+
# the config matches the PCI dev
687+
device_spec = self._to_device_spec_conf(
688+
[
689+
{
690+
"vendor_id": fakelibvirt.PCI_VEND_ID,
691+
"product_id": fakelibvirt.PCI_PROD_ID,
692+
},
693+
]
694+
)
695+
self.flags(group='pci', device_spec=device_spec)
696+
# Disable placement reporting so even if there are PCI devices on the
697+
# hypervisor matching the [pci]device_spec config they are not reported
698+
# to Placement
699+
self.mock_pci_report_in_placement.return_value = False
700+
self.start_compute(hostname="compute1", pci_info=pci_info)
701+
702+
self.assert_placement_pci_view(
703+
"compute1",
704+
inventories={},
705+
traits={},
706+
)
707+
708+
def test_reporting_cannot_be_disable_once_it_is_enabled(self):
709+
# The fake libvirt will emulate on the host:
710+
# * one type-PCI in slot 0
711+
pci_info = fakelibvirt.HostPCIDevicesInfo(
712+
num_pci=1, num_pfs=0, num_vfs=0)
713+
# the config matches the PCI dev
714+
device_spec = self._to_device_spec_conf(
715+
[
716+
{
717+
"vendor_id": fakelibvirt.PCI_VEND_ID,
718+
"product_id": fakelibvirt.PCI_PROD_ID,
719+
},
720+
]
721+
)
722+
self.flags(group='pci', device_spec=device_spec)
723+
self.start_compute(hostname="compute1", pci_info=pci_info)
724+
725+
self.assert_placement_pci_view(
726+
"compute1",
727+
inventories={
728+
"0000:81:00.0": {self.PCI_RC: 1},
729+
},
730+
traits={
731+
"0000:81:00.0": [],
732+
},
733+
)
734+
735+
# Try to disable placement reporting. The compute will refuse to start
736+
# as there are already PCI device RPs in placement.
737+
self.mock_pci_report_in_placement.return_value = False
738+
ex = self.assertRaises(
739+
exception.PlacementPciException,
740+
self.restart_compute_service,
741+
hostname="compute1",
742+
pci_info=pci_info,
743+
keep_hypervisor_state=False,
744+
)
745+
self.assertIn(
746+
"The [pci]report_in_placement is False but it was enabled before "
747+
"on this compute. Nova does not support disabling it after it is "
748+
"enabled.",
749+
str(ex)
750+
)

nova/tests/functional/libvirt/test_pci_sriov_servers.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,9 @@ def assert_placement_pci_view(self, hostname, inventories, traits):
113113

114114
rp_traits = self._get_provider_traits(rp['uuid'])
115115
self.assertEqual(
116-
set(traits[rp_name]),
116+
# COMPUTE_MANAGED_PCI_DEVICE is automatically reported on
117+
# PCI device RPs by nova
118+
set(traits[rp_name]) | {"COMPUTE_MANAGED_PCI_DEVICE"},
117119
set(rp_traits),
118120
f"Traits on RP {real_rp_name} does not match with expectation"
119121
)

nova/tests/unit/compute/test_pci_placement_translator.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,9 @@ def test_translator_skips_devices_without_matching_spec(self):
8787
)
8888
def test_trait_normalization(self, trait_names, expected_traits):
8989
self.assertEqual(
90-
expected_traits, ppt._get_traits_for_dev({"traits": trait_names}))
90+
expected_traits | {"COMPUTE_MANAGED_PCI_DEVICE"},
91+
ppt._get_traits_for_dev({"traits": trait_names})
92+
)
9193

9294
@ddt.unpack
9395
@ddt.data(
@@ -192,9 +194,11 @@ def test_mixed_rc_for_sibling_vfs(self):
192194
)
193195
self.assertEqual(
194196
"VFs from the same PF cannot be configured with different set of "
195-
"'traits' in [pci]device_spec. We got CUSTOM_BAR,CUSTOM_FOO for "
196-
"0000:81:00.2 and CUSTOM_BAR,CUSTOM_BAZ,CUSTOM_FOO for "
197-
"0000:81:00.0,0000:81:00.1.",
197+
"'traits' in [pci]device_spec. We got "
198+
"COMPUTE_MANAGED_PCI_DEVICE,CUSTOM_BAR,CUSTOM_FOO for "
199+
"0000:81:00.2 and "
200+
"COMPUTE_MANAGED_PCI_DEVICE,CUSTOM_BAR,CUSTOM_BAZ,CUSTOM_FOO "
201+
"for 0000:81:00.0,0000:81:00.1.",
198202
str(ex),
199203
)
200204
# as well as additional trait
@@ -207,8 +211,8 @@ def test_mixed_rc_for_sibling_vfs(self):
207211
self.assertEqual(
208212
"VFs from the same PF cannot be configured with different set of "
209213
"'traits' in [pci]device_spec. We got "
210-
"CUSTOM_BAR,CUSTOM_BAZ,CUSTOM_EXTRA,CUSTOM_FOO for 0000:81:00.3 "
211-
"and CUSTOM_BAR,CUSTOM_BAZ,CUSTOM_FOO for "
212-
"0000:81:00.0,0000:81:00.1.",
214+
"COMPUTE_MANAGED_PCI_DEVICE,CUSTOM_BAR,CUSTOM_BAZ,CUSTOM_EXTRA,"
215+
"CUSTOM_FOO for 0000:81:00.3 and COMPUTE_MANAGED_PCI_DEVICE,"
216+
"CUSTOM_BAR,CUSTOM_BAZ,CUSTOM_FOO for 0000:81:00.0,0000:81:00.1.",
213217
str(ex),
214218
)

0 commit comments

Comments
 (0)