Skip to content

Commit d8b8a81

Browse files
Update pci stat pools based on PCI device changes
At start up of nova-compute service, the PCI stat pools are populated based on information in pci_devices table in Nova database. The pools are updated only when new device is added or removed but not on any device changes like device type. If an existing device is configured as SRIOV and nova-compute is restarted, the pci_devices table gets updated but the device is still listed under the old pool in pci_tracker.stats.pool (in-memory object). This patch looks for device type updates in existing devices and updates the pools accordingly. Change-Id: Id4ebb06e634a612c8be4be6c678d8265e0b99730 Closes-Bug: #1892361 (cherry picked from commit b8695de)
1 parent 239ffff commit d8b8a81

File tree

6 files changed

+231
-41
lines changed

6 files changed

+231
-41
lines changed

nova/pci/manager.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ def _set_hvdevs(self, devices):
225225
self.stale[new_value['address']] = new_value
226226
else:
227227
existed.update_device(new_value)
228+
self.stats.update_device(existed)
228229

229230
# Track newly discovered devices.
230231
for dev in [dev for dev in devices if

nova/pci/stats.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,32 @@ def _create_pool_keys_from_dev(self, dev):
104104
pool['parent_ifname'] = dev.extra_info['parent_ifname']
105105
return pool
106106

107+
def _get_pool_with_device_type_mismatch(self, dev):
108+
"""Check for device type mismatch in the pools for a given device.
109+
110+
Return (pool, device) if device type does not match or a single None
111+
if the device type matches.
112+
"""
113+
for pool in self.pools:
114+
for device in pool['devices']:
115+
if device.address == dev.address:
116+
if dev.dev_type != pool["dev_type"]:
117+
return pool, device
118+
return None
119+
120+
return None
121+
122+
def update_device(self, dev):
123+
"""Update a device to its matching pool."""
124+
pool_device_info = self._get_pool_with_device_type_mismatch(dev)
125+
if pool_device_info is None:
126+
return
127+
128+
pool, device = pool_device_info
129+
pool['devices'].remove(device)
130+
self._decrease_pool_count(self.pools, pool)
131+
self.add_device(dev)
132+
107133
def add_device(self, dev):
108134
"""Add a device to its matching pool."""
109135
dev_pool = self._create_pool_keys_from_dev(dev)

nova/tests/functional/libvirt/test_pci_sriov_servers.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ class SRIOVServersTest(_PCIServersTestBase):
7070
{
7171
'vendor_id': fakelibvirt.PCI_VEND_ID,
7272
'product_id': fakelibvirt.PF_PROD_ID,
73+
'physical_network': 'physnet4',
7374
},
7475
{
7576
'vendor_id': fakelibvirt.PCI_VEND_ID,
@@ -103,6 +104,20 @@ def setUp(self):
103104
# fixture already stubbed.
104105
self.neutron = self.useFixture(base.LibvirtNeutronFixture(self))
105106

107+
def _disable_sriov_in_pf(self, pci_info):
108+
# Check for PF and change the capability from virt_functions
109+
# Delete all the VFs
110+
vfs_to_delete = []
111+
112+
for device_name, device in pci_info.devices.items():
113+
if 'virt_functions' in device.pci_device:
114+
device.generate_xml(skip_capability=True)
115+
elif 'phys_function' in device.pci_device:
116+
vfs_to_delete.append(device_name)
117+
118+
for device in vfs_to_delete:
119+
del pci_info.devices[device]
120+
106121
def test_create_server_with_VF(self):
107122
"""Create a server with an SR-IOV VF-type PCI device."""
108123

@@ -266,6 +281,69 @@ def test_get_server_diagnostics_server_with_VF(self):
266281
)
267282
self.assertIsNone(diagnostics['nic_details'][1]['tx_packets'])
268283

284+
def test_create_server_after_change_in_nonsriov_pf_to_sriov_pf(self):
285+
# Starts a compute with PF not configured with SRIOV capabilities
286+
# Updates the PF with SRIOV capability and restart the compute service
287+
# Then starts a VM with the sriov port. The VM should be in active
288+
# state with sriov port attached.
289+
290+
# To emulate the device type changing, we first create a
291+
# HostPCIDevicesInfo object with PFs and VFs. Then we make a copy
292+
# and remove the VFs and the virt_function capability. This is
293+
# done to ensure the physical function product id is same in both
294+
# the versions.
295+
pci_info = fakelibvirt.HostPCIDevicesInfo(num_pfs=1, num_vfs=1)
296+
pci_info_no_sriov = copy.deepcopy(pci_info)
297+
298+
# Disable SRIOV capabilties in PF and delete the VFs
299+
self._disable_sriov_in_pf(pci_info_no_sriov)
300+
301+
fake_connection = self._get_connection(pci_info=pci_info_no_sriov,
302+
hostname='test_compute0')
303+
self.mock_conn.return_value = fake_connection
304+
305+
self.compute = self.start_service('compute', host='test_compute0')
306+
307+
ctxt = context.get_admin_context()
308+
pci_devices = objects.PciDeviceList.get_by_compute_node(
309+
ctxt,
310+
objects.ComputeNode.get_by_nodename(
311+
ctxt, 'test_compute0',
312+
).id,
313+
)
314+
self.assertEqual(1, len(pci_devices))
315+
self.assertEqual('type-PCI', pci_devices[0].dev_type)
316+
317+
# Update connection with original pci info with sriov PFs
318+
fake_connection = self._get_connection(pci_info=pci_info,
319+
hostname='test_compute0')
320+
self.mock_conn.return_value = fake_connection
321+
322+
# Restart the compute service
323+
self.restart_compute_service(self.compute)
324+
325+
# Verify if PCI devices are of type type-PF or type-VF
326+
pci_devices = objects.PciDeviceList.get_by_compute_node(
327+
ctxt,
328+
objects.ComputeNode.get_by_nodename(
329+
ctxt, 'test_compute0',
330+
).id,
331+
)
332+
for pci_device in pci_devices:
333+
self.assertIn(pci_device.dev_type, ['type-PF', 'type-VF'])
334+
335+
# create the port
336+
self.neutron.create_port({'port': self.neutron.network_4_port_1})
337+
338+
# create a server using the VF via neutron
339+
flavor_id = self._create_flavor()
340+
self._create_server(
341+
flavor_id=flavor_id,
342+
networks=[
343+
{'port': base.LibvirtNeutronFixture.network_4_port_1['id']},
344+
],
345+
)
346+
269347

270348
class SRIOVAttachDetachTest(_PCIServersTestBase):
271349
# no need for aliases as these test will request SRIOV via neutron

nova/tests/unit/pci/test_stats.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,30 @@ def test_remove_device(self):
561561
self.pci_stats.remove_device(dev2)
562562
self._assertPools()
563563

564+
def test_update_device(self):
565+
# Update device type of one of the device from type-PCI to
566+
# type-PF. Verify if the existing pool is updated and a new
567+
# pool is created with dev_type type-PF.
568+
self._create_pci_devices()
569+
dev1 = self.pci_tagged_devices.pop()
570+
dev1.dev_type = 'type-PF'
571+
self.pci_stats.update_device(dev1)
572+
self.assertEqual(3, len(self.pci_stats.pools))
573+
self._assertPoolContent(self.pci_stats.pools[0], '1137', '0072',
574+
len(self.pci_untagged_devices))
575+
self.assertEqual(self.pci_untagged_devices,
576+
self.pci_stats.pools[0]['devices'])
577+
self._assertPoolContent(self.pci_stats.pools[1], '1137', '0071',
578+
len(self.pci_tagged_devices),
579+
physical_network='physnet1')
580+
self.assertEqual(self.pci_tagged_devices,
581+
self.pci_stats.pools[1]['devices'])
582+
self._assertPoolContent(self.pci_stats.pools[2], '1137', '0071',
583+
1,
584+
physical_network='physnet1')
585+
self.assertEqual(dev1,
586+
self.pci_stats.pools[2]['devices'][0])
587+
564588

565589
class PciDeviceVFPFStatsTestCase(test.NoDBTestCase):
566590

nova/tests/unit/virt/libvirt/fakelibvirt.py

Lines changed: 90 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -284,49 +284,62 @@ def __init__(self, dev_type, slot, function, iommu_group, numa_node,
284284
:param multiple_gpu_types: (bool) Supports different vGPU types
285285
"""
286286

287+
self.dev_type = dev_type
288+
self.slot = slot
289+
self.function = function
290+
self.iommu_group = iommu_group
291+
self.numa_node = numa_node
292+
self.vf_ratio = vf_ratio
293+
self.multiple_gpu_types = multiple_gpu_types
294+
self.parent = parent
295+
self.generate_xml()
296+
297+
def generate_xml(self, skip_capability=False):
287298
vend_id = PCI_VEND_ID
288299
vend_name = PCI_VEND_NAME
289-
if dev_type == 'PCI':
290-
if vf_ratio:
300+
capability = ''
301+
if self.dev_type == 'PCI':
302+
if self.vf_ratio:
291303
raise ValueError('vf_ratio does not apply for PCI devices')
292304

293305
prod_id = PCI_PROD_ID
294306
prod_name = PCI_PROD_NAME
295307
driver = PCI_DRIVER_NAME
296-
capability = ''
297-
elif dev_type == 'PF':
308+
elif self.dev_type == 'PF':
298309
prod_id = PF_PROD_ID
299310
prod_name = PF_PROD_NAME
300311
driver = PF_DRIVER_NAME
301-
capability = self.cap_templ % {
302-
'cap_type': PF_CAP_TYPE,
303-
'addresses': '\n'.join([
304-
self.addr_templ % {
305-
# these are the slot, function values of the child VFs
306-
# we can only assign 8 functions to a slot (0-7) so
307-
# bump the slot each time we exceed this
308-
'slot': slot + (x // 8),
309-
# ...and wrap the function value
310-
'function': x % 8,
311-
# the offset is because the PF is occupying function 0
312-
} for x in range(1, vf_ratio + 1)])
313-
}
314-
elif dev_type == 'VF':
312+
if not skip_capability:
313+
capability = self.cap_templ % {
314+
'cap_type': PF_CAP_TYPE,
315+
'addresses': '\n'.join([
316+
self.addr_templ % {
317+
# these are the slot, function values of the child
318+
# VFs, we can only assign 8 functions to a slot
319+
# (0-7) so bump the slot each time we exceed this
320+
'slot': self.slot + (x // 8),
321+
# ...and wrap the function value
322+
'function': x % 8,
323+
# the offset is because the PF is occupying function 0
324+
} for x in range(1, self.vf_ratio + 1)])
325+
}
326+
elif self.dev_type == 'VF':
315327
prod_id = VF_PROD_ID
316328
prod_name = VF_PROD_NAME
317329
driver = VF_DRIVER_NAME
318-
capability = self.cap_templ % {
319-
'cap_type': VF_CAP_TYPE,
320-
'addresses': self.addr_templ % {
321-
# this is the slot, function value of the parent PF
322-
# if we're e.g. device 8, we'll have a different slot
323-
# to our parent so reverse this
324-
'slot': slot - ((vf_ratio + 1) // 8),
325-
# the parent PF is always function 0
326-
'function': 0,
330+
if not skip_capability:
331+
capability = self.cap_templ % {
332+
'cap_type': VF_CAP_TYPE,
333+
'addresses': self.addr_templ % {
334+
# this is the slot, function value of the parent PF
335+
# if we're e.g. device 8, we'll have a different slot
336+
# to our parent so reverse this
337+
'slot': self.slot - ((self.vf_ratio + 1) // 8),
338+
# the parent PF is always function 0
339+
'function': 0,
340+
}
327341
}
328-
}
329-
elif dev_type == 'MDEV_TYPES':
342+
elif self.dev_type == 'MDEV_TYPES':
330343
prod_id = MDEV_CAPABLE_PROD_ID
331344
prod_name = MDEV_CAPABLE_PROD_NAME
332345
driver = MDEV_CAPABLE_DRIVER_NAME
@@ -336,36 +349,37 @@ def __init__(self, dev_type, slot, function, iommu_group, numa_node,
336349
'type_id': NVIDIA_11_VGPU_TYPE,
337350
'instances': 16,
338351
}]
339-
if multiple_gpu_types:
352+
if self.multiple_gpu_types:
340353
types.append(self.mdevtypes_templ % {
341354
'type_id': NVIDIA_12_VGPU_TYPE,
342355
'instances': 8,
343356
})
344-
capability = self.cap_templ % {
345-
'cap_type': MDEV_CAPABLE_CAP_TYPE,
346-
'addresses': '\n'.join(types)
347-
}
357+
if not skip_capability:
358+
capability = self.cap_templ % {
359+
'cap_type': MDEV_CAPABLE_CAP_TYPE,
360+
'addresses': '\n'.join(types)
361+
}
348362
self.is_capable_of_mdevs = True
349363
else:
350364
raise ValueError('Expected one of: PCI, VF, PCI')
351365

352366
self.pci_device = self.pci_device_template % {
353-
'slot': slot,
354-
'function': function,
367+
'slot': self.slot,
368+
'function': self.function,
355369
'vend_id': vend_id,
356370
'vend_name': vend_name,
357371
'prod_id': prod_id,
358372
'prod_name': prod_name,
359373
'driver': driver,
360374
'capability': capability,
361-
'iommu_group': iommu_group,
362-
'numa_node': numa_node,
363-
'parent': parent or self.pci_default_parent
375+
'iommu_group': self.iommu_group,
376+
'numa_node': self.numa_node,
377+
'parent': self.parent or self.pci_default_parent
364378
}
365379
# -1 is the sentinel set in /sys/bus/pci/devices/*/numa_node
366380
# for no NUMA affinity. When the numa_node is set to -1 on a device
367381
# Libvirt omits the NUMA element so we remove it.
368-
if numa_node == -1:
382+
if self.numa_node == -1:
369383
self.pci_device = self.pci_device.replace("<numa node='-1'/>", "")
370384

371385
def XMLDesc(self, flags):
@@ -943,6 +957,20 @@ def _parse_definition(self, xml):
943957
nic_info['source'] = source.get('network')
944958
elif nic_info['type'] == 'bridge':
945959
nic_info['source'] = source.get('bridge')
960+
elif nic_info['type'] == 'hostdev':
961+
# <interface type='hostdev'> is for VF when vnic_type
962+
# is direct. Add sriov vf pci information in nic_info
963+
address = source.find('./address')
964+
pci_type = address.get('type')
965+
pci_domain = address.get('domain').replace('0x', '')
966+
pci_bus = address.get('bus').replace('0x', '')
967+
pci_slot = address.get('slot').replace('0x', '')
968+
pci_function = address.get('function').replace(
969+
'0x', '')
970+
pci_device = "%s_%s_%s_%s_%s" % (pci_type, pci_domain,
971+
pci_bus, pci_slot,
972+
pci_function)
973+
nic_info['source'] = pci_device
946974

947975
nics_info += [nic_info]
948976

@@ -984,11 +1012,32 @@ def _parse_definition(self, xml):
9841012

9851013
return definition
9861014

1015+
def verify_hostdevs_interface_are_vfs(self):
1016+
"""Verify for interface type hostdev if the pci device is VF or not.
1017+
"""
1018+
1019+
error_message = ("Interface type hostdev is currently supported on "
1020+
"SR-IOV Virtual Functions only")
1021+
1022+
nics = self._def['devices'].get('nics', [])
1023+
for nic in nics:
1024+
if nic['type'] == 'hostdev':
1025+
pci_device = nic['source']
1026+
pci_info_from_connection = self._connection.pci_info.devices[
1027+
pci_device]
1028+
if 'phys_function' not in pci_info_from_connection.pci_device:
1029+
raise make_libvirtError(
1030+
libvirtError,
1031+
error_message,
1032+
error_code=VIR_ERR_CONFIG_UNSUPPORTED,
1033+
error_domain=VIR_FROM_DOMAIN)
1034+
9871035
def create(self):
9881036
self.createWithFlags(0)
9891037

9901038
def createWithFlags(self, flags):
9911039
# FIXME: Not handling flags at the moment
1040+
self.verify_hostdevs_interface_are_vfs()
9921041
self._state = VIR_DOMAIN_RUNNING
9931042
self._connection._mark_running(self)
9941043
self._has_saved_state = False
@@ -1112,7 +1161,7 @@ def XMLDesc(self, flags):
11121161

11131162
nics = ''
11141163
for nic in self._def['devices']['nics']:
1115-
if 'source' in nic:
1164+
if 'source' in nic and nic['type'] != 'hostdev':
11161165
nics += '''<interface type='%(type)s'>
11171166
<mac address='%(mac)s'/>
11181167
<source %(type)s='%(source)s'/>
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
---
2+
fixes:
3+
- |
4+
Fixes `bug 1892361`_ in which the pci stat pools are not updated when an
5+
existing device is enabled with SRIOV capability. Restart of nova-compute
6+
service updates the pci device type from type-PCI to type-PF but the pools
7+
still maintain the device type as type-PCI. And so the PF is considered for
8+
allocation to instance that requests vnic_type=direct. With this fix, the
9+
pci device type updates are detected and the pci stat pools are updated
10+
properly.
11+
12+
.. _bug 1892361: https://bugs.launchpad.net/nova/+bug/1892361

0 commit comments

Comments
 (0)