Skip to content

Commit 5860541

Browse files
committed
Support [pci]device_spec reconfiguration
Device addition, removal and reconfiguration (i.e. changing resource_class or traits) are supported now. Note that as scheduling support is not yet added testing reconfiguration while there are allocations against the reconfigured devices is not tested yet. blueprint: pci-device-tracking-in-placement Change-Id: I31b94598516e97d1653aed6edfd3d19c67782a4a
1 parent 01d7a39 commit 5860541

File tree

3 files changed

+429
-27
lines changed

3 files changed

+429
-27
lines changed

nova/compute/pci_placement_translator.py

Lines changed: 76 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -166,9 +166,29 @@ def add_parent(self, dev, dev_spec_tags: ty.Dict[str, str]) -> None:
166166
self.resource_class = _get_rc_for_dev(dev, dev_spec_tags)
167167
self.traits = _get_traits_for_dev(dev_spec_tags)
168168

169+
def remove_child(self, dev: pci_device.PciDevice) -> None:
170+
# Nothing to do here. The update_provider_tree will handle the
171+
# inventory decrease or the full RP removal
172+
pass
173+
174+
def remove_parent(self, dev: pci_device.PciDevice) -> None:
175+
# Nothing to do here. The update_provider_tree we handle full RP
176+
pass
177+
169178
def update_provider_tree(
170179
self, provider_tree: provider_tree.ProviderTree
171180
) -> None:
181+
182+
if not self.parent_dev and not self.children_devs:
183+
# This means we need to delete the RP from placement if exists
184+
if provider_tree.exists(self.name):
185+
# NOTE(gibi): If there are allocations on this RP then
186+
# Placement will reject the update the provider_tree is
187+
# synced up.
188+
provider_tree.remove(self.name)
189+
190+
return
191+
172192
provider_tree.update_inventory(
173193
self.name,
174194
# NOTE(gibi): The rest of the inventory fields (reserved,
@@ -188,10 +208,13 @@ def update_provider_tree(
188208
provider_tree.update_traits(self.name, self.traits)
189209

190210
def __str__(self) -> str:
191-
return (
192-
f"RP({self.name}, {self.resource_class}={len(self.devs)}, "
193-
f"traits={','.join(self.traits or set())})"
194-
)
211+
if self.devs:
212+
return (
213+
f"RP({self.name}, {self.resource_class}={len(self.devs)}, "
214+
f"traits={','.join(self.traits or set())})"
215+
)
216+
else:
217+
return f"RP({self.name}, <EMPTY>)"
195218

196219

197220
class PlacementView:
@@ -207,9 +230,7 @@ def _get_rp_name_for_address(self, addr: str) -> str:
207230
def _ensure_rp(self, rp_name: str) -> PciResourceProvider:
208231
return self.rps.setdefault(rp_name, PciResourceProvider(rp_name))
209232

210-
def _add_child(
211-
self, dev: pci_device.PciDevice, dev_spec_tags: ty.Dict[str, str]
212-
) -> None:
233+
def _get_rp_name_for_child(self, dev: pci_device.PciDevice) -> str:
213234
if not dev.parent_addr:
214235
msg = _(
215236
"Missing parent address for PCI device s(dev)% with "
@@ -220,7 +241,12 @@ def _add_child(
220241
}
221242
raise exception.PlacementPciException(error=msg)
222243

223-
rp_name = self._get_rp_name_for_address(dev.parent_addr)
244+
return self._get_rp_name_for_address(dev.parent_addr)
245+
246+
def _add_child(
247+
self, dev: pci_device.PciDevice, dev_spec_tags: ty.Dict[str, str]
248+
) -> None:
249+
rp_name = self._get_rp_name_for_child(dev)
224250
self._ensure_rp(rp_name).add_child(dev, dev_spec_tags)
225251

226252
def _add_parent(
@@ -229,7 +255,7 @@ def _add_parent(
229255
rp_name = self._get_rp_name_for_address(dev.address)
230256
self._ensure_rp(rp_name).add_parent(dev, dev_spec_tags)
231257

232-
def add_dev(
258+
def _add_dev(
233259
self, dev: pci_device.PciDevice, dev_spec_tags: ty.Dict[str, str]
234260
) -> None:
235261
if dev_spec_tags.get("physical_network"):
@@ -263,6 +289,46 @@ def add_dev(
263289
# check for running migrations.
264290
pass
265291

292+
def _remove_child(self, dev: pci_device.PciDevice) -> None:
293+
rp_name = self._get_rp_name_for_child(dev)
294+
self._ensure_rp(rp_name).remove_child(dev)
295+
296+
def _remove_parent(self, dev: pci_device.PciDevice) -> None:
297+
rp_name = self._get_rp_name_for_address(dev.address)
298+
self._ensure_rp(rp_name).remove_parent(dev)
299+
300+
def _remove_dev(self, dev: pci_device.PciDevice) -> None:
301+
"""Remove PCI devices from Placement that existed before but now
302+
deleted from the hypervisor or unlisted from [pci]device_spec
303+
"""
304+
if dev.dev_type in PARENT_TYPES:
305+
self._remove_parent(dev)
306+
elif dev.dev_type in CHILD_TYPES:
307+
self._remove_child(dev)
308+
309+
def process_dev(
310+
self,
311+
dev: pci_device.PciDevice,
312+
dev_spec: ty.Optional[devspec.PciDeviceSpec],
313+
) -> None:
314+
315+
if dev.status in (
316+
fields.PciDeviceStatus.DELETED,
317+
fields.PciDeviceStatus.REMOVED,
318+
):
319+
self._remove_dev(dev)
320+
else:
321+
if not dev_spec:
322+
LOG.warning(
323+
"Device spec is not found for device %s in "
324+
"[pci]device_spec. Ignoring device in Placement resource "
325+
"view. This should not happen. Please file a bug.",
326+
dev.address
327+
)
328+
return
329+
330+
self._add_dev(dev, dev_spec.get_tags())
331+
266332
def __str__(self) -> str:
267333
return (
268334
f"Placement PCI view on {self.root_rp_name}: "
@@ -341,14 +407,7 @@ def update_provider_tree_for_pci(
341407
# match the PCI device with the [pci]dev_spec config to access
342408
# the configuration metadata tags
343409
dev_spec = pci_tracker.dev_filter.get_devspec(dev)
344-
if not dev_spec:
345-
LOG.warning(
346-
"Device spec is not found for device %s in [pci]device_spec. "
347-
"Ignoring device in Placement resource view. "
348-
"This should not happen. Please file a bug.", dev.address)
349-
continue
350-
351-
pv.add_dev(dev, dev_spec.get_tags())
410+
pv.process_dev(dev, dev_spec)
352411

353412
LOG.info("Placement PCI resource view: %s", pv)
354413

0 commit comments

Comments
 (0)