Skip to content

Commit a28c827

Browse files
committed
Gracefully ERROR in _init_instance if vnic_type changed
If the vnic_type of a bound port changes from "direct" to "macvtap" and then the compute service is restarted then during _init_instance nova tries to plug the vif of the changed port. However as it now has macvtap vnic_type nova tries to look up the netdev of the parent VF. Still that VF is consumed by the instance so there is no such netdev on the host OS. This error killed the compute service at startup due to unhandled exception. This patch adds the exception handler, logs an ERROR and continue initializing other instances on the host. Also this patch adds a detailed ERROR log when nova detects that the vnic_type changed during _heal_instance_info_cache periodic. Closes-Bug: #1981813 Change-Id: I1719f8eda04e8d15a3b01f0612977164c4e55e85 (cherry picked from commit e43bf90)
1 parent 4954f99 commit a28c827

File tree

6 files changed

+252
-7
lines changed

6 files changed

+252
-7
lines changed

nova/compute/manager.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1242,6 +1242,20 @@ def _init_instance(self, context, instance):
12421242
'updated.', instance=instance)
12431243
self._set_instance_obj_error_state(instance)
12441244
return
1245+
except exception.PciDeviceNotFoundById:
1246+
# This is bug 1981813 where the bound port vnic_type has changed
1247+
# from direct to macvtap. Nova does not support that and it
1248+
# already printed an ERROR when the change is detected during
1249+
# _heal_instance_info_cache. Now we print an ERROR again and skip
1250+
# plugging the vifs but let the service startup continue to init
1251+
# the other instances
1252+
LOG.exception(
1253+
'Virtual interface plugging failed for instance. Probably the '
1254+
'vnic_type of the bound port has been changed. Nova does not '
1255+
'support such change.',
1256+
instance=instance
1257+
)
1258+
return
12451259

12461260
if instance.task_state == task_states.RESIZE_MIGRATING:
12471261
# We crashed during resize/migration, so roll back for safety

nova/network/neutron.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3383,6 +3383,25 @@ def _build_vif_model(self, context, client, current_neutron_port,
33833383
delegate_create=True,
33843384
)
33853385

3386+
def _log_error_if_vnic_type_changed(
3387+
self, port_id, old_vnic_type, new_vnic_type, instance
3388+
):
3389+
if old_vnic_type and old_vnic_type != new_vnic_type:
3390+
LOG.error(
3391+
'The vnic_type of the bound port %s has '
3392+
'been changed in neutron from "%s" to '
3393+
'"%s". Changing vnic_type of a bound port '
3394+
'is not supported by Nova. To avoid '
3395+
'breaking the connectivity of the instance '
3396+
'please change the port vnic_type back to '
3397+
'"%s".',
3398+
port_id,
3399+
old_vnic_type,
3400+
new_vnic_type,
3401+
old_vnic_type,
3402+
instance=instance
3403+
)
3404+
33863405
def _build_network_info_model(self, context, instance, networks=None,
33873406
port_ids=None, admin_client=None,
33883407
preexisting_port_ids=None,
@@ -3456,6 +3475,12 @@ def _build_network_info_model(self, context, instance, networks=None,
34563475
preexisting_port_ids)
34573476
for index, vif in enumerate(nw_info):
34583477
if vif['id'] == refresh_vif_id:
3478+
self._log_error_if_vnic_type_changed(
3479+
vif['id'],
3480+
vif['vnic_type'],
3481+
refreshed_vif['vnic_type'],
3482+
instance,
3483+
)
34593484
# Update the existing entry.
34603485
nw_info[index] = refreshed_vif
34613486
LOG.debug('Updated VIF entry in instance network '
@@ -3505,13 +3530,22 @@ def _build_network_info_model(self, context, instance, networks=None,
35053530
networks, port_ids = self._gather_port_ids_and_networks(
35063531
context, instance, networks, port_ids, client)
35073532

3533+
old_nw_info = instance.get_network_info()
35083534
nw_info = network_model.NetworkInfo()
35093535
for port_id in port_ids:
35103536
current_neutron_port = current_neutron_port_map.get(port_id)
35113537
if current_neutron_port:
35123538
vif = self._build_vif_model(
35133539
context, client, current_neutron_port, networks,
35143540
preexisting_port_ids)
3541+
for old_vif in old_nw_info:
3542+
if old_vif['id'] == port_id:
3543+
self._log_error_if_vnic_type_changed(
3544+
port_id,
3545+
old_vif['vnic_type'],
3546+
vif['vnic_type'],
3547+
instance,
3548+
)
35153549
nw_info.append(vif)
35163550
elif nw_info_refresh:
35173551
LOG.info('Port %s from network info_cache is no '

nova/tests/functional/libvirt/test_pci_sriov_servers.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -986,6 +986,14 @@ def test_change_bound_port_vnic_type_kills_compute_at_restart(self):
986986
self.host_mappings['compute1'].cell_mapping
987987
) as cctxt:
988988
compute.manager._heal_instance_info_cache(cctxt)
989+
self.assertIn(
990+
'The vnic_type of the bound port %s has been changed in '
991+
'neutron from "direct" to "macvtap". Changing vnic_type of a '
992+
'bound port is not supported by Nova. To avoid breaking the '
993+
'connectivity of the instance please change the port '
994+
'vnic_type back to "direct".' % port['id'],
995+
self.stdlog.logger.output,
996+
)
989997

990998
def fake_get_ifname_by_pci_address(pci_addr: str, pf_interface=False):
991999
# we want to fail the netdev lookup only if the pci_address is
@@ -1013,17 +1021,18 @@ def fake_get_ifname_by_pci_address(pci_addr: str, pf_interface=False):
10131021
'nova.pci.utils.get_ifname_by_pci_address',
10141022
side_effect=fake_get_ifname_by_pci_address,
10151023
):
1016-
# This is bug 1981813 as the compute service fails to start with an
1017-
# exception.
10181024
# Nova cannot prevent the vnic_type change on a bound port. Neutron
10191025
# should prevent that instead. But the nova-compute should still
10201026
# be able to start up and only log an ERROR for this instance in
10211027
# inconsistent state.
1022-
self.assertRaises(
1023-
exception.PciDeviceNotFoundById,
1024-
self.restart_compute_service,
1025-
'compute1',
1026-
)
1028+
self.restart_compute_service('compute1')
1029+
1030+
self.assertIn(
1031+
'Virtual interface plugging failed for instance. Probably the '
1032+
'vnic_type of the bound port has been changed. Nova does not '
1033+
'support such change.',
1034+
self.stdlog.logger.output,
1035+
)
10271036

10281037

10291038
class SRIOVAttachDetachTest(_PCIServersTestBase):

nova/tests/unit/compute/test_compute_mgr.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,6 +1306,36 @@ def test_init_instance_with_binding_failed_vif_type(self):
13061306
self.compute._init_instance(self.context, instance)
13071307
set_error_state.assert_called_once_with(instance)
13081308

1309+
def test_init_instance_vif_plug_fails_missing_pci(self):
1310+
instance = fake_instance.fake_instance_obj(
1311+
self.context,
1312+
uuid=uuids.instance,
1313+
info_cache=None,
1314+
power_state=power_state.RUNNING,
1315+
vm_state=vm_states.ACTIVE,
1316+
task_state=None,
1317+
host=self.compute.host,
1318+
expected_attrs=['info_cache'])
1319+
1320+
with test.nested(
1321+
mock.patch.object(context, 'get_admin_context',
1322+
return_value=self.context),
1323+
mock.patch.object(objects.Instance, 'get_network_info',
1324+
return_value=network_model.NetworkInfo()),
1325+
mock.patch.object(self.compute.driver, 'plug_vifs',
1326+
side_effect=exception.PciDeviceNotFoundById("pci-addr")),
1327+
mock.patch("nova.compute.manager.LOG.exception"),
1328+
) as (get_admin_context, get_nw_info, plug_vifs, log_exception):
1329+
# as this does not raise, we are sure that the compute service
1330+
# continues initializing the rest of the instances
1331+
self.compute._init_instance(self.context, instance)
1332+
log_exception.assert_called_once_with(
1333+
"Virtual interface plugging failed for instance. Probably the "
1334+
"vnic_type of the bound port has been changed. Nova does not "
1335+
"support such change.",
1336+
instance=instance
1337+
)
1338+
13091339
def _test__validate_pinning_configuration(self, supports_pcpus=True):
13101340
instance_1 = fake_instance.fake_instance_obj(
13111341
self.context, uuid=uuids.instance_1)

nova/tests/unit/network/test_neutron.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3383,6 +3383,155 @@ def test_build_network_info_model_empty(
33833383
mocked_client.list_ports.assert_called_once_with(
33843384
tenant_id=uuids.fake, device_id=uuids.instance)
33853385

3386+
@mock.patch.object(
3387+
neutronapi.API,
3388+
'_get_physnet_tunneled_info',
3389+
new=mock.Mock(return_value=(None, False)))
3390+
@mock.patch.object(
3391+
neutronapi.API,
3392+
'_get_preexisting_port_ids',
3393+
new=mock.Mock(return_value=[]))
3394+
@mock.patch.object(
3395+
neutronapi.API,
3396+
'_get_subnets_from_port',
3397+
new=mock.Mock(return_value=[model.Subnet(cidr='1.0.0.0/8')]))
3398+
@mock.patch.object(
3399+
neutronapi.API,
3400+
'_get_floating_ips_by_fixed_and_port',
3401+
new=mock.Mock(return_value=[{'floating_ip_address': '10.0.0.1'}]))
3402+
@mock.patch.object(neutronapi, 'get_client')
3403+
def test_build_network_info_model_full_vnic_type_change(
3404+
self, mock_get_client
3405+
):
3406+
mocked_client = mock.create_autospec(client.Client)
3407+
mock_get_client.return_value = mocked_client
3408+
fake_inst = objects.Instance()
3409+
fake_inst.project_id = uuids.fake
3410+
fake_inst.uuid = uuids.instance
3411+
fake_ports = [
3412+
{
3413+
"id": "port1",
3414+
"network_id": "net-id",
3415+
"tenant_id": uuids.fake,
3416+
"admin_state_up": True,
3417+
"status": "ACTIVE",
3418+
"fixed_ips": [{"ip_address": "1.1.1.1"}],
3419+
"mac_address": "de:ad:be:ef:00:01",
3420+
"binding:vif_type": model.VIF_TYPE_BRIDGE,
3421+
"binding:vnic_type": model.VNIC_TYPE_DIRECT,
3422+
"binding:vif_details": {},
3423+
},
3424+
]
3425+
mocked_client.list_ports.return_value = {'ports': fake_ports}
3426+
fake_inst.info_cache = objects.InstanceInfoCache.new(
3427+
self.context, uuids.instance)
3428+
fake_inst.info_cache.network_info = model.NetworkInfo.hydrate([])
3429+
3430+
# build the network info first
3431+
nw_infos = self.api._build_network_info_model(
3432+
self.context,
3433+
fake_inst,
3434+
force_refresh=True,
3435+
)
3436+
3437+
self.assertEqual(1, len(nw_infos))
3438+
fake_inst.info_cache.network_info = nw_infos
3439+
3440+
# change the vnic_type of the port and rebuild the network info
3441+
fake_ports[0]["binding:vnic_type"] = model.VNIC_TYPE_MACVTAP
3442+
with mock.patch(
3443+
"nova.network.neutron.API._log_error_if_vnic_type_changed"
3444+
) as mock_log:
3445+
nw_infos = self.api._build_network_info_model(
3446+
self.context,
3447+
fake_inst,
3448+
force_refresh=True,
3449+
)
3450+
3451+
mock_log.assert_called_once_with(
3452+
fake_ports[0]["id"], "direct", "macvtap", fake_inst)
3453+
self.assertEqual(1, len(nw_infos))
3454+
3455+
@mock.patch.object(
3456+
neutronapi.API,
3457+
'_get_physnet_tunneled_info',
3458+
new=mock.Mock(return_value=(None, False)))
3459+
@mock.patch.object(
3460+
neutronapi.API,
3461+
'_get_preexisting_port_ids',
3462+
new=mock.Mock(return_value=[]))
3463+
@mock.patch.object(
3464+
neutronapi.API,
3465+
'_get_subnets_from_port',
3466+
new=mock.Mock(return_value=[model.Subnet(cidr='1.0.0.0/8')]))
3467+
@mock.patch.object(
3468+
neutronapi.API,
3469+
'_get_floating_ips_by_fixed_and_port',
3470+
new=mock.Mock(return_value=[{'floating_ip_address': '10.0.0.1'}]))
3471+
@mock.patch.object(neutronapi, 'get_client')
3472+
def test_build_network_info_model_single_vnic_type_change(
3473+
self, mock_get_client
3474+
):
3475+
mocked_client = mock.create_autospec(client.Client)
3476+
mock_get_client.return_value = mocked_client
3477+
fake_inst = objects.Instance()
3478+
fake_inst.project_id = uuids.fake
3479+
fake_inst.uuid = uuids.instance
3480+
fake_ports = [
3481+
{
3482+
"id": "port1",
3483+
"network_id": "net-id",
3484+
"tenant_id": uuids.fake,
3485+
"admin_state_up": True,
3486+
"status": "ACTIVE",
3487+
"fixed_ips": [{"ip_address": "1.1.1.1"}],
3488+
"mac_address": "de:ad:be:ef:00:01",
3489+
"binding:vif_type": model.VIF_TYPE_BRIDGE,
3490+
"binding:vnic_type": model.VNIC_TYPE_DIRECT,
3491+
"binding:vif_details": {},
3492+
},
3493+
]
3494+
fake_nets = [
3495+
{
3496+
"id": "net-id",
3497+
"name": "foo",
3498+
"tenant_id": uuids.fake,
3499+
}
3500+
]
3501+
mocked_client.list_ports.return_value = {'ports': fake_ports}
3502+
fake_inst.info_cache = objects.InstanceInfoCache.new(
3503+
self.context, uuids.instance)
3504+
fake_inst.info_cache.network_info = model.NetworkInfo.hydrate([])
3505+
3506+
# build the network info first
3507+
nw_infos = self.api._build_network_info_model(
3508+
self.context,
3509+
fake_inst,
3510+
fake_nets,
3511+
[fake_ports[0]["id"]],
3512+
refresh_vif_id=fake_ports[0]["id"],
3513+
)
3514+
3515+
self.assertEqual(1, len(nw_infos))
3516+
fake_inst.info_cache.network_info = nw_infos
3517+
3518+
# change the vnic_type of the port and rebuild the network info
3519+
fake_ports[0]["binding:vnic_type"] = model.VNIC_TYPE_MACVTAP
3520+
with mock.patch(
3521+
"nova.network.neutron.API._log_error_if_vnic_type_changed"
3522+
) as mock_log:
3523+
nw_infos = self.api._build_network_info_model(
3524+
self.context,
3525+
fake_inst,
3526+
fake_nets,
3527+
[fake_ports[0]["id"]],
3528+
refresh_vif_id=fake_ports[0]["id"],
3529+
)
3530+
3531+
mock_log.assert_called_once_with(
3532+
fake_ports[0]["id"], "direct", "macvtap", fake_inst)
3533+
self.assertEqual(1, len(nw_infos))
3534+
33863535
@mock.patch.object(neutronapi, 'get_client')
33873536
def test_get_subnets_from_port(self, mock_get_client):
33883537
mocked_client = mock.create_autospec(client.Client)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
fixes:
3+
- |
4+
`Bug #1981813 <https://bugs.launchpad.net/nova/+bug/1981813>`_: Now nova
5+
detects if the ``vnic_type`` of a bound port has been changed in neutron
6+
and leaves an ERROR message in the compute service log as such change on a
7+
bound port is not supported. Also the restart of the nova-compute service
8+
will not crash any more after such port change. Nova will log an ERROR and
9+
skip the initialization of the instance with such port during the startup.

0 commit comments

Comments
 (0)