Skip to content

Commit 3749633

Browse files
authored
Merge pull request #23 from stackhpc/upstream/yoga-2023-01-30
Synchronise yoga with upstream
2 parents a52be94 + c07495d commit 3749633

File tree

11 files changed

+378
-5
lines changed

11 files changed

+378
-5
lines changed

nova/compute/manager.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1242,6 +1242,20 @@ def _init_instance(self, context, instance):
12421242
'updated.', instance=instance)
12431243
self._set_instance_obj_error_state(instance)
12441244
return
1245+
except exception.PciDeviceNotFoundById:
1246+
# This is bug 1981813 where the bound port vnic_type has changed
1247+
# from direct to macvtap. Nova does not support that and it
1248+
# already printed an ERROR when the change is detected during
1249+
# _heal_instance_info_cache. Now we print an ERROR again and skip
1250+
# plugging the vifs but let the service startup continue to init
1251+
# the other instances
1252+
LOG.exception(
1253+
'Virtual interface plugging failed for instance. Probably the '
1254+
'vnic_type of the bound port has been changed. Nova does not '
1255+
'support such change.',
1256+
instance=instance
1257+
)
1258+
return
12451259

12461260
if instance.task_state == task_states.RESIZE_MIGRATING:
12471261
# We crashed during resize/migration, so roll back for safety

nova/conductor/tasks/live_migrate.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,8 +347,9 @@ def _check_compatible_with_source_hypervisor(self, destination):
347347

348348
source_version = source_info.hypervisor_version
349349
destination_version = destination_info.hypervisor_version
350-
if source_version > destination_version:
351-
raise exception.DestinationHypervisorTooOld()
350+
if not CONF.workarounds.skip_hypervisor_version_check_on_lm:
351+
if source_version > destination_version:
352+
raise exception.DestinationHypervisorTooOld()
352353
return source_info, destination_info
353354

354355
def _call_livem_checks_on_host(self, destination, provider_mapping):

nova/conf/workarounds.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,13 @@
409409
with the destination host. When using QEMU >= 2.9 and libvirt >=
410410
4.4.0, libvirt will do the correct thing with respect to checking CPU
411411
compatibility on the destination host during live migration.
412+
"""),
413+
cfg.BoolOpt(
414+
'skip_hypervisor_version_check_on_lm',
415+
default=False,
416+
help="""
417+
When this is enabled, it will skip version-checking of hypervisors
418+
during live migration.
412419
"""),
413420
]
414421

nova/network/neutron.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3383,6 +3383,25 @@ def _build_vif_model(self, context, client, current_neutron_port,
33833383
delegate_create=True,
33843384
)
33853385

3386+
def _log_error_if_vnic_type_changed(
3387+
self, port_id, old_vnic_type, new_vnic_type, instance
3388+
):
3389+
if old_vnic_type and old_vnic_type != new_vnic_type:
3390+
LOG.error(
3391+
'The vnic_type of the bound port %s has '
3392+
'been changed in neutron from "%s" to '
3393+
'"%s". Changing vnic_type of a bound port '
3394+
'is not supported by Nova. To avoid '
3395+
'breaking the connectivity of the instance '
3396+
'please change the port vnic_type back to '
3397+
'"%s".',
3398+
port_id,
3399+
old_vnic_type,
3400+
new_vnic_type,
3401+
old_vnic_type,
3402+
instance=instance
3403+
)
3404+
33863405
def _build_network_info_model(self, context, instance, networks=None,
33873406
port_ids=None, admin_client=None,
33883407
preexisting_port_ids=None,
@@ -3456,6 +3475,12 @@ def _build_network_info_model(self, context, instance, networks=None,
34563475
preexisting_port_ids)
34573476
for index, vif in enumerate(nw_info):
34583477
if vif['id'] == refresh_vif_id:
3478+
self._log_error_if_vnic_type_changed(
3479+
vif['id'],
3480+
vif['vnic_type'],
3481+
refreshed_vif['vnic_type'],
3482+
instance,
3483+
)
34593484
# Update the existing entry.
34603485
nw_info[index] = refreshed_vif
34613486
LOG.debug('Updated VIF entry in instance network '
@@ -3505,13 +3530,22 @@ def _build_network_info_model(self, context, instance, networks=None,
35053530
networks, port_ids = self._gather_port_ids_and_networks(
35063531
context, instance, networks, port_ids, client)
35073532

3533+
old_nw_info = instance.get_network_info()
35083534
nw_info = network_model.NetworkInfo()
35093535
for port_id in port_ids:
35103536
current_neutron_port = current_neutron_port_map.get(port_id)
35113537
if current_neutron_port:
35123538
vif = self._build_vif_model(
35133539
context, client, current_neutron_port, networks,
35143540
preexisting_port_ids)
3541+
for old_vif in old_nw_info:
3542+
if old_vif['id'] == port_id:
3543+
self._log_error_if_vnic_type_changed(
3544+
port_id,
3545+
old_vif['vnic_type'],
3546+
vif['vnic_type'],
3547+
instance,
3548+
)
35153549
nw_info.append(vif)
35163550
elif nw_info_refresh:
35173551
LOG.info('Port %s from network info_cache is no '

nova/tests/fixtures/libvirt.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2225,9 +2225,12 @@ def setUp(self):
22252225

22262226
# libvirt driver needs to call out to the filesystem to get the
22272227
# parent_ifname for the SRIOV VFs.
2228-
self.useFixture(fixtures.MockPatch(
2229-
'nova.pci.utils.get_ifname_by_pci_address',
2230-
return_value='fake_pf_interface_name'))
2228+
self.mock_get_ifname_by_pci_address = self.useFixture(
2229+
fixtures.MockPatch(
2230+
"nova.pci.utils.get_ifname_by_pci_address",
2231+
return_value="fake_pf_interface_name",
2232+
)
2233+
).mock
22312234

22322235
self.useFixture(fixtures.MockPatch(
22332236
'nova.pci.utils.get_mac_by_pci_address',

nova/tests/functional/libvirt/test_pci_sriov_servers.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
import nova
3030
from nova import context
31+
from nova import exception
3132
from nova.network import constants
3233
from nova import objects
3334
from nova.objects import fields
@@ -951,6 +952,88 @@ def test_create_server_after_change_in_nonsriov_pf_to_sriov_pf(self):
951952
],
952953
)
953954

955+
def test_change_bound_port_vnic_type_kills_compute_at_restart(self):
956+
"""Create a server with a direct port and change the vnic_type of the
957+
bound port to macvtap. Then restart the compute service.
958+
959+
As the vnic_type is changed on the port but the vif_type is hwveb
960+
instead of macvtap the vif plug logic will try to look up the netdev
961+
of the parent VF. Howvere that VF consumed by the instance so the
962+
netdev does not exists. This causes that the compute service will fail
963+
with an exception during startup
964+
"""
965+
pci_info = fakelibvirt.HostPCIDevicesInfo(num_pfs=1, num_vfs=2)
966+
self.start_compute(pci_info=pci_info)
967+
968+
# create a direct port
969+
port = self.neutron.network_4_port_1
970+
self.neutron.create_port({'port': port})
971+
972+
# create a server using the VF via neutron
973+
server = self._create_server(networks=[{'port': port['id']}])
974+
975+
# update the vnic_type of the port in neutron
976+
port = copy.deepcopy(port)
977+
port['binding:vnic_type'] = 'macvtap'
978+
self.neutron.update_port(port['id'], {"port": port})
979+
980+
compute = self.computes['compute1']
981+
982+
# Force an update on the instance info cache to ensure nova gets the
983+
# information about the updated port
984+
with context.target_cell(
985+
context.get_admin_context(),
986+
self.host_mappings['compute1'].cell_mapping
987+
) as cctxt:
988+
compute.manager._heal_instance_info_cache(cctxt)
989+
self.assertIn(
990+
'The vnic_type of the bound port %s has been changed in '
991+
'neutron from "direct" to "macvtap". Changing vnic_type of a '
992+
'bound port is not supported by Nova. To avoid breaking the '
993+
'connectivity of the instance please change the port '
994+
'vnic_type back to "direct".' % port['id'],
995+
self.stdlog.logger.output,
996+
)
997+
998+
def fake_get_ifname_by_pci_address(pci_addr: str, pf_interface=False):
999+
# we want to fail the netdev lookup only if the pci_address is
1000+
# already consumed by our instance. So we look into the instance
1001+
# definition to see if the device is attached to the instance as VF
1002+
conn = compute.manager.driver._host.get_connection()
1003+
dom = conn.lookupByUUIDString(server['id'])
1004+
dev = dom._def['devices']['nics'][0]
1005+
lookup_addr = pci_addr.replace(':', '_').replace('.', '_')
1006+
if (
1007+
dev['type'] == 'hostdev' and
1008+
dev['source'] == 'pci_' + lookup_addr
1009+
):
1010+
# nova tried to look up the netdev of an already consumed VF.
1011+
# So we have to fail
1012+
raise exception.PciDeviceNotFoundById(id=pci_addr)
1013+
1014+
# We need to simulate the actual failure manually as in our functional
1015+
# environment all the PCI lookup is mocked. In reality nova tries to
1016+
# look up the netdev of the pci device on the host used by the port as
1017+
# the parent of the macvtap. However, as the originally direct port is
1018+
# bound to the instance, the VF pci device is already consumed by the
1019+
# instance and therefore there is no netdev for the VF.
1020+
with mock.patch(
1021+
'nova.pci.utils.get_ifname_by_pci_address',
1022+
side_effect=fake_get_ifname_by_pci_address,
1023+
):
1024+
# Nova cannot prevent the vnic_type change on a bound port. Neutron
1025+
# should prevent that instead. But the nova-compute should still
1026+
# be able to start up and only log an ERROR for this instance in
1027+
# inconsistent state.
1028+
self.restart_compute_service('compute1')
1029+
1030+
self.assertIn(
1031+
'Virtual interface plugging failed for instance. Probably the '
1032+
'vnic_type of the bound port has been changed. Nova does not '
1033+
'support such change.',
1034+
self.stdlog.logger.output,
1035+
)
1036+
9541037

9551038
class SRIOVAttachDetachTest(_PCIServersTestBase):
9561039
# no need for aliases as these test will request SRIOV via neutron

nova/tests/unit/compute/test_compute_mgr.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,6 +1306,36 @@ def test_init_instance_with_binding_failed_vif_type(self):
13061306
self.compute._init_instance(self.context, instance)
13071307
set_error_state.assert_called_once_with(instance)
13081308

1309+
def test_init_instance_vif_plug_fails_missing_pci(self):
1310+
instance = fake_instance.fake_instance_obj(
1311+
self.context,
1312+
uuid=uuids.instance,
1313+
info_cache=None,
1314+
power_state=power_state.RUNNING,
1315+
vm_state=vm_states.ACTIVE,
1316+
task_state=None,
1317+
host=self.compute.host,
1318+
expected_attrs=['info_cache'])
1319+
1320+
with test.nested(
1321+
mock.patch.object(context, 'get_admin_context',
1322+
return_value=self.context),
1323+
mock.patch.object(objects.Instance, 'get_network_info',
1324+
return_value=network_model.NetworkInfo()),
1325+
mock.patch.object(self.compute.driver, 'plug_vifs',
1326+
side_effect=exception.PciDeviceNotFoundById("pci-addr")),
1327+
mock.patch("nova.compute.manager.LOG.exception"),
1328+
) as (get_admin_context, get_nw_info, plug_vifs, log_exception):
1329+
# as this does not raise, we are sure that the compute service
1330+
# continues initializing the rest of the instances
1331+
self.compute._init_instance(self.context, instance)
1332+
log_exception.assert_called_once_with(
1333+
"Virtual interface plugging failed for instance. Probably the "
1334+
"vnic_type of the bound port has been changed. Nova does not "
1335+
"support such change.",
1336+
instance=instance
1337+
)
1338+
13091339
def _test__validate_pinning_configuration(self, supports_pcpus=True):
13101340
instance_1 = fake_instance.fake_instance_obj(
13111341
self.context, uuid=uuids.instance_1)

nova/tests/unit/conductor/tasks/test_live_migrate.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,36 @@ def test_check_compatible_fails_with_hypervisor_too_old(
345345
mock.call(self.destination)],
346346
mock_get_info.call_args_list)
347347

348+
@mock.patch.object(live_migrate.LiveMigrationTask, '_get_compute_info')
349+
def test_skip_hypervisor_version_check_on_lm_raise_ex(self, mock_get_info):
350+
host1 = {'hypervisor_type': 'a', 'hypervisor_version': 7}
351+
host2 = {'hypervisor_type': 'a', 'hypervisor_version': 6}
352+
self.flags(group='workarounds',
353+
skip_hypervisor_version_check_on_lm=False)
354+
mock_get_info.side_effect = [objects.ComputeNode(**host1),
355+
objects.ComputeNode(**host2)]
356+
self.assertRaises(exception.DestinationHypervisorTooOld,
357+
self.task._check_compatible_with_source_hypervisor,
358+
self.destination)
359+
self.assertEqual([mock.call(self.instance_host),
360+
mock.call(self.destination)],
361+
mock_get_info.call_args_list)
362+
363+
@mock.patch.object(live_migrate.LiveMigrationTask, '_get_compute_info')
364+
def test_skip_hypervisor_version_check_on_lm_do_not_raise_ex(
365+
self, mock_get_info
366+
):
367+
host1 = {'hypervisor_type': 'a', 'hypervisor_version': 7}
368+
host2 = {'hypervisor_type': 'a', 'hypervisor_version': 6}
369+
self.flags(group='workarounds',
370+
skip_hypervisor_version_check_on_lm=True)
371+
mock_get_info.side_effect = [objects.ComputeNode(**host1),
372+
objects.ComputeNode(**host2)]
373+
self.task._check_compatible_with_source_hypervisor(self.destination)
374+
self.assertEqual([mock.call(self.instance_host),
375+
mock.call(self.destination)],
376+
mock_get_info.call_args_list)
377+
348378
@mock.patch.object(compute_rpcapi.ComputeAPI,
349379
'check_can_live_migrate_destination')
350380
def test_check_requested_destination(self, mock_check):

0 commit comments

Comments
 (0)