Skip to content

Commit e13f59b

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "Update SRIOV port pci_slot when unshelving" into stable/wallaby
2 parents 90455cd + bf7254b commit e13f59b

File tree

5 files changed

+127
-53
lines changed

5 files changed

+127
-53
lines changed

nova/compute/manager.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6613,12 +6613,6 @@ def _unshelve_instance(self, context, instance, image, filter_properties,
66136613
context, self.reportclient, instance.pci_requests.requests,
66146614
provider_mappings)
66156615

6616-
self.network_api.setup_instance_network_on_host(
6617-
context, instance, self.host,
6618-
provider_mappings=provider_mappings)
6619-
network_info = self.network_api.get_instance_nw_info(
6620-
context, instance)
6621-
66226616
accel_info = []
66236617
if accel_uuids:
66246618
try:
@@ -6628,11 +6622,17 @@ def _unshelve_instance(self, context, instance, image, filter_properties,
66286622
LOG.exception('Failure getting accelerator requests '
66296623
'with the exception: %s', exc,
66306624
instance=instance)
6631-
self._build_resources_cleanup(instance, network_info)
6625+
self._build_resources_cleanup(instance, None)
66326626
raise
66336627

66346628
with self.rt.instance_claim(context, instance, node, allocations,
66356629
limits):
6630+
self.network_api.setup_instance_network_on_host(
6631+
context, instance, self.host,
6632+
provider_mappings=provider_mappings)
6633+
network_info = self.network_api.get_instance_nw_info(
6634+
context, instance)
6635+
66366636
self.driver.spawn(context, instance, image_meta,
66376637
injected_files=[],
66386638
admin_password=None,

nova/network/neutron.py

Lines changed: 63 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2107,16 +2107,16 @@ def create_resource_requests(
21072107
port_numa_policy = None
21082108

21092109
if request_net.port_id:
2110+
# InstancePCIRequest.requester_id is semantically linked
2111+
# to a port with a resource_request.
2112+
requester_id = request_net.port_id
21102113
(vnic_type, trusted, network_id, resource_request,
21112114
port_numa_policy) = self._get_port_vnic_info(
21122115
context, neutron, request_net.port_id)
21132116
physnet, tunneled_ = self._get_physnet_tunneled_info(
21142117
context, neutron, network_id)
21152118

21162119
if resource_request:
2117-
# InstancePCIRequest.requester_id is semantically linked
2118-
# to a port with a resource_request.
2119-
requester_id = request_net.port_id
21202120
# NOTE(gibi): explicitly orphan the RequestGroup by setting
21212121
# context=None as we never intended to save it to the DB.
21222122
resource_requests.append(
@@ -3364,6 +3364,37 @@ def _get_pci_mapping_for_migration(self, instance, migration):
33643364
migration.get('status') == 'reverted')
33653365
return instance.migration_context.get_pci_mapping_for_migration(revert)
33663366

3367+
def _get_port_pci_slot(self, context, instance, port):
3368+
"""Find the PCI address of the device corresponding to the port.
3369+
Assumes the port is an SRIOV one.
3370+
3371+
:param context: The request context.
3372+
:param instance: The instance to which the port is attached.
3373+
:param port: The Neutron port, as obtained from the Neutron API
3374+
JSON form.
3375+
:return: The PCI address as a string, or None if unable to find.
3376+
"""
3377+
# Find the port's PCIRequest, or return None
3378+
for r in instance.pci_requests.requests:
3379+
if r.requester_id == port['id']:
3380+
request = r
3381+
break
3382+
else:
3383+
LOG.debug('No PCI request found for port %s', port['id'],
3384+
instance=instance)
3385+
return None
3386+
# Find the request's device, or return None
3387+
for d in instance.pci_devices:
3388+
if d.request_id == request.request_id:
3389+
device = d
3390+
break
3391+
else:
3392+
LOG.debug('No PCI device found for request %s',
3393+
request.request_id, instance=instance)
3394+
return None
3395+
# Return the device's PCI address
3396+
return device.address
3397+
33673398
def _update_port_binding_for_instance(
33683399
self, context, instance, host, migration=None,
33693400
provider_mappings=None):
@@ -3372,7 +3403,6 @@ def _update_port_binding_for_instance(
33723403
search_opts = {'device_id': instance.uuid,
33733404
'tenant_id': instance.project_id}
33743405
data = neutron.list_ports(**search_opts)
3375-
pci_mapping = None
33763406
port_updates = []
33773407
ports = data['ports']
33783408
FAILED_VIF_TYPES = (network_model.VIF_TYPE_UNBOUND,
@@ -3405,25 +3435,36 @@ def _update_port_binding_for_instance(
34053435
# that this function is called without a migration object, such
34063436
# as in an unshelve operation.
34073437
vnic_type = p.get('binding:vnic_type')
3408-
if (vnic_type in network_model.VNIC_TYPES_SRIOV and
3409-
migration is not None and
3410-
not migration.is_live_migration):
3411-
# Note(adrianc): for live migration binding profile was already
3412-
# updated in conductor when calling bind_ports_to_host()
3413-
if not pci_mapping:
3414-
pci_mapping = self._get_pci_mapping_for_migration(
3415-
instance, migration)
3416-
3417-
pci_slot = binding_profile.get('pci_slot')
3418-
new_dev = pci_mapping.get(pci_slot)
3419-
if new_dev:
3420-
binding_profile.update(
3421-
self._get_pci_device_profile(new_dev))
3422-
updates[constants.BINDING_PROFILE] = binding_profile
3438+
if vnic_type in network_model.VNIC_TYPES_SRIOV:
3439+
# NOTE(artom) For migrations, update the binding profile from
3440+
# the migration object...
3441+
if migration is not None:
3442+
# NOTE(artom) ... except for live migrations, because the
3443+
# conductor has already done that whe calling
3444+
# bind_ports_to_host().
3445+
if not migration.is_live_migration:
3446+
pci_mapping = self._get_pci_mapping_for_migration(
3447+
instance, migration)
3448+
3449+
pci_slot = binding_profile.get('pci_slot')
3450+
new_dev = pci_mapping.get(pci_slot)
3451+
if new_dev:
3452+
binding_profile.update(
3453+
self._get_pci_device_profile(new_dev))
3454+
updates[
3455+
constants.BINDING_PROFILE] = binding_profile
3456+
else:
3457+
raise exception.PortUpdateFailed(port_id=p['id'],
3458+
reason=_("Unable to correlate PCI slot %s") %
3459+
pci_slot)
3460+
# NOTE(artom) If migration is None, this is an unshevle, and we
3461+
# need to figure out the pci_slot from the InstancePCIRequest
3462+
# and PciDevice objects.
34233463
else:
3424-
raise exception.PortUpdateFailed(port_id=p['id'],
3425-
reason=_("Unable to correlate PCI slot %s") %
3426-
pci_slot)
3464+
pci_slot = self._get_port_pci_slot(context, instance, p)
3465+
if pci_slot:
3466+
binding_profile.update({'pci_slot': pci_slot})
3467+
updates[constants.BINDING_PROFILE] = binding_profile
34273468

34283469
# NOTE(gibi): during live migration the conductor already sets the
34293470
# allocation key in the port binding. However during resize, cold

nova/tests/functional/libvirt/test_pci_sriov_servers.py

Lines changed: 6 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -434,30 +434,17 @@ def _test_move_operation_with_neutron(self, move_operation,
434434
source_port = self.neutron.show_port(source_port['port']['id'])
435435
same_slot_port = self.neutron.show_port(same_slot_port['port']['id'])
436436

437-
# FIXME(artom) Until bug 1851545 is fixed, unshelve will not update the
438-
# pci_slot.
439-
if expect_fail:
440-
self.assertEqual(
441-
source_port['port']['binding:profile']['pci_slot'],
442-
same_slot_port['port']['binding:profile']['pci_slot'])
443-
else:
444-
self.assertNotEqual(
445-
source_port['port']['binding:profile']['pci_slot'],
446-
same_slot_port['port']['binding:profile']['pci_slot'])
437+
self.assertNotEqual(
438+
source_port['port']['binding:profile']['pci_slot'],
439+
same_slot_port['port']['binding:profile']['pci_slot'])
447440

448441
conn = self.computes['dest'].driver._host.get_connection()
449442
vms = [vm._def for vm in conn._vms.values()]
450443
self.assertEqual(2, len(vms))
451444
for vm in vms:
452445
self.assertEqual(1, len(vm['devices']['nics']))
453-
# FIXME(artom) Until bug 1851545 is fixed, unshelve will not update the
454-
# XML.
455-
if expect_fail:
456-
self.assertEqual(vms[0]['devices']['nics'][0]['source'],
457-
vms[1]['devices']['nics'][0]['source'])
458-
else:
459-
self.assertNotEqual(vms[0]['devices']['nics'][0]['source'],
460-
vms[1]['devices']['nics'][0]['source'])
446+
self.assertNotEqual(vms[0]['devices']['nics'][0]['source'],
447+
vms[1]['devices']['nics'][0]['source'])
461448

462449
def test_unshelve_server_with_neutron(self):
463450
def move_operation(source_server):
@@ -466,10 +453,7 @@ def move_operation(source_server):
466453
self.api.put_service(self.computes['source'].service_ref.uuid,
467454
{'status': 'disabled'})
468455
self._unshelve_server(source_server)
469-
# FIXME(artom) Bug 1851545 means we explain failure here: the pci_slot
470-
# and XML will not get updated.
471-
self._test_move_operation_with_neutron(move_operation,
472-
expect_fail=True)
456+
self._test_move_operation_with_neutron(move_operation)
473457

474458
def test_cold_migrate_server_with_neutron(self):
475459
def move_operation(source_server):

nova/tests/unit/network/test_neutron.py

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5830,9 +5830,11 @@ def test_create_resource_requests(self, getclient,
58305830
else:
58315831
self.assertNotIn(pci_request.PCI_TRUSTED_TAG, spec)
58325832

5833-
# Only the port with a resource_request will have pci_req.requester_id.
5833+
# Only SRIOV ports and those with a resource_request will have
5834+
# pci_req.requester_id.
58345835
self.assertEqual(
5835-
[None, None, None, None, uuids.trusted_port, None],
5836+
[uuids.portid_1, uuids.portid_3, uuids.portid_4, uuids.portid_5,
5837+
uuids.trusted_port, uuids.portid_vdpa],
58365838
[pci_req.requester_id for pci_req in pci_requests.requests])
58375839

58385840
self.assertCountEqual(
@@ -6383,6 +6385,41 @@ def test_get_segment_id_for_subnet_fails(self, mock_client):
63836385
self.api.get_segment_id_for_subnet,
63846386
self.context, uuids.subnet_id)
63856387

6388+
@mock.patch.object(neutronapi.LOG, 'debug')
6389+
def test_get_port_pci_slot(self, mock_debug):
6390+
fake_port = {'id': uuids.fake_port_id}
6391+
request = objects.InstancePCIRequest(requester_id=uuids.fake_port_id,
6392+
request_id=uuids.pci_request_id)
6393+
bad_request = objects.InstancePCIRequest(
6394+
requester_id=uuids.wrong_port_id)
6395+
device = objects.PciDevice(request_id=uuids.pci_request_id,
6396+
address='fake-pci-address')
6397+
bad_device = objects.PciDevice(request_id=uuids.wrong_request_id)
6398+
# Test the happy path
6399+
instance = objects.Instance(
6400+
pci_requests=objects.InstancePCIRequests(requests=[request]),
6401+
pci_devices=objects.PciDeviceList(objects=[device]))
6402+
self.assertEqual(
6403+
'fake-pci-address',
6404+
self.api._get_port_pci_slot(self.context, instance, fake_port))
6405+
# Test not finding the request
6406+
instance = objects.Instance(
6407+
pci_requests=objects.InstancePCIRequests(
6408+
requests=[objects.InstancePCIRequest(bad_request)]))
6409+
self.assertIsNone(
6410+
self.api._get_port_pci_slot(self.context, instance, fake_port))
6411+
mock_debug.assert_called_with('No PCI request found for port %s',
6412+
uuids.fake_port_id, instance=instance)
6413+
mock_debug.reset_mock()
6414+
# Test not finding the device
6415+
instance = objects.Instance(
6416+
pci_requests=objects.InstancePCIRequests(requests=[request]),
6417+
pci_devices=objects.PciDeviceList(objects=[bad_device]))
6418+
self.assertIsNone(
6419+
self.api._get_port_pci_slot(self.context, instance, fake_port))
6420+
mock_debug.assert_called_with('No PCI device found for request %s',
6421+
uuids.pci_request_id, instance=instance)
6422+
63866423

63876424
class TestAPIModuleMethods(test.NoDBTestCase):
63886425

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
---
2+
fixes:
3+
- |
4+
`Bug 1851545 <https://bugs.launchpad.net/nova/+bug/1851545>`_, wherein
5+
unshelving an instance with SRIOV Neutron ports did not update the port
6+
binding's ``pci_slot`` and could cause libvirt PCI conflicts, has been
7+
fixed.
8+
9+
.. important:: Constraints in the fix's implementation mean that it only
10+
applies to instances booted **after** it has been applied. Existing
11+
instances will still experience bug 1851545 after being shelved and
12+
unshelved, even with the fix applied.

0 commit comments

Comments
 (0)