Skip to content

Commit e43bf90

Browse files
committed
Gracefully ERROR in _init_instance if vnic_type changed
If the vnic_type of a bound port changes from "direct" to "macvtap" and then the compute service is restarted then during _init_instance nova tries to plug the vif of the changed port. However as it now has macvtap vnic_type nova tries to look up the netdev of the parent VF. Still that VF is consumed by the instance so there is no such netdev on the host OS. This error killed the compute service at startup due to unhandled exception. This patch adds the exception handler, logs an ERROR and continue initializing other instances on the host. Also this patch adds a detailed ERROR log when nova detects that the vnic_type changed during _heal_instance_info_cache periodic. Closes-Bug: #1981813 Change-Id: I1719f8eda04e8d15a3b01f0612977164c4e55e85
1 parent f8c91eb commit e43bf90

File tree

6 files changed

+250
-5
lines changed

6 files changed

+250
-5
lines changed

nova/compute/manager.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,6 +1247,20 @@ def _init_instance(self, context, instance):
12471247
'updated.', instance=instance)
12481248
self._set_instance_obj_error_state(instance)
12491249
return
1250+
except exception.PciDeviceNotFoundById:
1251+
# This is bug 1981813 where the bound port vnic_type has changed
1252+
# from direct to macvtap. Nova does not support that and it
1253+
# already printed an ERROR when the change is detected during
1254+
# _heal_instance_info_cache. Now we print an ERROR again and skip
1255+
# plugging the vifs but let the service startup continue to init
1256+
# the other instances
1257+
LOG.exception(
1258+
'Virtual interface plugging failed for instance. Probably the '
1259+
'vnic_type of the bound port has been changed. Nova does not '
1260+
'support such change.',
1261+
instance=instance
1262+
)
1263+
return
12501264

12511265
if instance.task_state == task_states.RESIZE_MIGRATING:
12521266
# We crashed during resize/migration, so roll back for safety

nova/network/neutron.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3356,6 +3356,25 @@ def _build_vif_model(self, context, client, current_neutron_port,
33563356
delegate_create=True,
33573357
)
33583358

3359+
def _log_error_if_vnic_type_changed(
3360+
self, port_id, old_vnic_type, new_vnic_type, instance
3361+
):
3362+
if old_vnic_type and old_vnic_type != new_vnic_type:
3363+
LOG.error(
3364+
'The vnic_type of the bound port %s has '
3365+
'been changed in neutron from "%s" to '
3366+
'"%s". Changing vnic_type of a bound port '
3367+
'is not supported by Nova. To avoid '
3368+
'breaking the connectivity of the instance '
3369+
'please change the port vnic_type back to '
3370+
'"%s".',
3371+
port_id,
3372+
old_vnic_type,
3373+
new_vnic_type,
3374+
old_vnic_type,
3375+
instance=instance
3376+
)
3377+
33593378
def _build_network_info_model(self, context, instance, networks=None,
33603379
port_ids=None, admin_client=None,
33613380
preexisting_port_ids=None,
@@ -3429,6 +3448,12 @@ def _build_network_info_model(self, context, instance, networks=None,
34293448
preexisting_port_ids)
34303449
for index, vif in enumerate(nw_info):
34313450
if vif['id'] == refresh_vif_id:
3451+
self._log_error_if_vnic_type_changed(
3452+
vif['id'],
3453+
vif['vnic_type'],
3454+
refreshed_vif['vnic_type'],
3455+
instance,
3456+
)
34323457
# Update the existing entry.
34333458
nw_info[index] = refreshed_vif
34343459
LOG.debug('Updated VIF entry in instance network '
@@ -3478,13 +3503,22 @@ def _build_network_info_model(self, context, instance, networks=None,
34783503
networks, port_ids = self._gather_port_ids_and_networks(
34793504
context, instance, networks, port_ids, client)
34803505

3506+
old_nw_info = instance.get_network_info()
34813507
nw_info = network_model.NetworkInfo()
34823508
for port_id in port_ids:
34833509
current_neutron_port = current_neutron_port_map.get(port_id)
34843510
if current_neutron_port:
34853511
vif = self._build_vif_model(
34863512
context, client, current_neutron_port, networks,
34873513
preexisting_port_ids)
3514+
for old_vif in old_nw_info:
3515+
if old_vif['id'] == port_id:
3516+
self._log_error_if_vnic_type_changed(
3517+
port_id,
3518+
old_vif['vnic_type'],
3519+
vif['vnic_type'],
3520+
instance,
3521+
)
34883522
nw_info.append(vif)
34893523
elif nw_info_refresh:
34903524
LOG.info('Port %s from network info_cache is no '

nova/tests/functional/libvirt/test_pci_sriov_servers.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,6 +1077,14 @@ def test_change_bound_port_vnic_type_kills_compute_at_restart(self):
10771077
self.host_mappings['compute1'].cell_mapping
10781078
) as cctxt:
10791079
compute.manager._heal_instance_info_cache(cctxt)
1080+
self.assertIn(
1081+
'The vnic_type of the bound port %s has been changed in '
1082+
'neutron from "direct" to "macvtap". Changing vnic_type of a '
1083+
'bound port is not supported by Nova. To avoid breaking the '
1084+
'connectivity of the instance please change the port '
1085+
'vnic_type back to "direct".' % port['id'],
1086+
self.stdlog.logger.output,
1087+
)
10801088

10811089
def fake_get_ifname_by_pci_address(pci_addr: str, pf_interface=False):
10821090
# we want to fail the netdev lookup only if the pci_address is
@@ -1103,15 +1111,16 @@ def fake_get_ifname_by_pci_address(pci_addr: str, pf_interface=False):
11031111
self.libvirt.mock_get_ifname_by_pci_address.side_effect = (
11041112
fake_get_ifname_by_pci_address
11051113
)
1106-
# This is bug 1981813 as the compute service fails to start with an
1107-
# exception.
11081114
# Nova cannot prevent the vnic_type change on a bound port. Neutron
11091115
# should prevent that instead. But the nova-compute should still
11101116
# be able to start up and only log an ERROR for this instance in
11111117
# inconsistent state.
1112-
self.assertRaises(
1113-
exception.PciDeviceNotFoundById,
1114-
self.restart_compute_service, 'compute1'
1118+
self.restart_compute_service('compute1')
1119+
self.assertIn(
1120+
'Virtual interface plugging failed for instance. Probably the '
1121+
'vnic_type of the bound port has been changed. Nova does not '
1122+
'support such change.',
1123+
self.stdlog.logger.output,
11151124
)
11161125

11171126

nova/tests/unit/compute/test_compute_mgr.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1350,6 +1350,36 @@ def test_init_instance_with_binding_failed_vif_type(self):
13501350
self.compute._init_instance(self.context, instance)
13511351
set_error_state.assert_called_once_with(instance)
13521352

1353+
def test_init_instance_vif_plug_fails_missing_pci(self):
1354+
instance = fake_instance.fake_instance_obj(
1355+
self.context,
1356+
uuid=uuids.instance,
1357+
info_cache=None,
1358+
power_state=power_state.RUNNING,
1359+
vm_state=vm_states.ACTIVE,
1360+
task_state=None,
1361+
host=self.compute.host,
1362+
expected_attrs=['info_cache'])
1363+
1364+
with test.nested(
1365+
mock.patch.object(context, 'get_admin_context',
1366+
return_value=self.context),
1367+
mock.patch.object(objects.Instance, 'get_network_info',
1368+
return_value=network_model.NetworkInfo()),
1369+
mock.patch.object(self.compute.driver, 'plug_vifs',
1370+
side_effect=exception.PciDeviceNotFoundById("pci-addr")),
1371+
mock.patch("nova.compute.manager.LOG.exception"),
1372+
) as (get_admin_context, get_nw_info, plug_vifs, log_exception):
1373+
# as this does not raise, we are sure that the compute service
1374+
# continues initializing the rest of the instances
1375+
self.compute._init_instance(self.context, instance)
1376+
log_exception.assert_called_once_with(
1377+
"Virtual interface plugging failed for instance. Probably the "
1378+
"vnic_type of the bound port has been changed. Nova does not "
1379+
"support such change.",
1380+
instance=instance
1381+
)
1382+
13531383
def _test__validate_pinning_configuration(self, supports_pcpus=True):
13541384
instance_1 = fake_instance.fake_instance_obj(
13551385
self.context, uuid=uuids.instance_1)

nova/tests/unit/network/test_neutron.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3382,6 +3382,155 @@ def test_build_network_info_model_empty(
33823382
mocked_client.list_ports.assert_called_once_with(
33833383
tenant_id=uuids.fake, device_id=uuids.instance)
33843384

3385+
@mock.patch.object(
3386+
neutronapi.API,
3387+
'_get_physnet_tunneled_info',
3388+
new=mock.Mock(return_value=(None, False)))
3389+
@mock.patch.object(
3390+
neutronapi.API,
3391+
'_get_preexisting_port_ids',
3392+
new=mock.Mock(return_value=[]))
3393+
@mock.patch.object(
3394+
neutronapi.API,
3395+
'_get_subnets_from_port',
3396+
new=mock.Mock(return_value=[model.Subnet(cidr='1.0.0.0/8')]))
3397+
@mock.patch.object(
3398+
neutronapi.API,
3399+
'_get_floating_ips_by_fixed_and_port',
3400+
new=mock.Mock(return_value=[{'floating_ip_address': '10.0.0.1'}]))
3401+
@mock.patch.object(neutronapi, 'get_client')
3402+
def test_build_network_info_model_full_vnic_type_change(
3403+
self, mock_get_client
3404+
):
3405+
mocked_client = mock.create_autospec(client.Client)
3406+
mock_get_client.return_value = mocked_client
3407+
fake_inst = objects.Instance()
3408+
fake_inst.project_id = uuids.fake
3409+
fake_inst.uuid = uuids.instance
3410+
fake_ports = [
3411+
{
3412+
"id": "port1",
3413+
"network_id": "net-id",
3414+
"tenant_id": uuids.fake,
3415+
"admin_state_up": True,
3416+
"status": "ACTIVE",
3417+
"fixed_ips": [{"ip_address": "1.1.1.1"}],
3418+
"mac_address": "de:ad:be:ef:00:01",
3419+
"binding:vif_type": model.VIF_TYPE_BRIDGE,
3420+
"binding:vnic_type": model.VNIC_TYPE_DIRECT,
3421+
"binding:vif_details": {},
3422+
},
3423+
]
3424+
mocked_client.list_ports.return_value = {'ports': fake_ports}
3425+
fake_inst.info_cache = objects.InstanceInfoCache.new(
3426+
self.context, uuids.instance)
3427+
fake_inst.info_cache.network_info = model.NetworkInfo.hydrate([])
3428+
3429+
# build the network info first
3430+
nw_infos = self.api._build_network_info_model(
3431+
self.context,
3432+
fake_inst,
3433+
force_refresh=True,
3434+
)
3435+
3436+
self.assertEqual(1, len(nw_infos))
3437+
fake_inst.info_cache.network_info = nw_infos
3438+
3439+
# change the vnic_type of the port and rebuild the network info
3440+
fake_ports[0]["binding:vnic_type"] = model.VNIC_TYPE_MACVTAP
3441+
with mock.patch(
3442+
"nova.network.neutron.API._log_error_if_vnic_type_changed"
3443+
) as mock_log:
3444+
nw_infos = self.api._build_network_info_model(
3445+
self.context,
3446+
fake_inst,
3447+
force_refresh=True,
3448+
)
3449+
3450+
mock_log.assert_called_once_with(
3451+
fake_ports[0]["id"], "direct", "macvtap", fake_inst)
3452+
self.assertEqual(1, len(nw_infos))
3453+
3454+
@mock.patch.object(
3455+
neutronapi.API,
3456+
'_get_physnet_tunneled_info',
3457+
new=mock.Mock(return_value=(None, False)))
3458+
@mock.patch.object(
3459+
neutronapi.API,
3460+
'_get_preexisting_port_ids',
3461+
new=mock.Mock(return_value=[]))
3462+
@mock.patch.object(
3463+
neutronapi.API,
3464+
'_get_subnets_from_port',
3465+
new=mock.Mock(return_value=[model.Subnet(cidr='1.0.0.0/8')]))
3466+
@mock.patch.object(
3467+
neutronapi.API,
3468+
'_get_floating_ips_by_fixed_and_port',
3469+
new=mock.Mock(return_value=[{'floating_ip_address': '10.0.0.1'}]))
3470+
@mock.patch.object(neutronapi, 'get_client')
3471+
def test_build_network_info_model_single_vnic_type_change(
3472+
self, mock_get_client
3473+
):
3474+
mocked_client = mock.create_autospec(client.Client)
3475+
mock_get_client.return_value = mocked_client
3476+
fake_inst = objects.Instance()
3477+
fake_inst.project_id = uuids.fake
3478+
fake_inst.uuid = uuids.instance
3479+
fake_ports = [
3480+
{
3481+
"id": "port1",
3482+
"network_id": "net-id",
3483+
"tenant_id": uuids.fake,
3484+
"admin_state_up": True,
3485+
"status": "ACTIVE",
3486+
"fixed_ips": [{"ip_address": "1.1.1.1"}],
3487+
"mac_address": "de:ad:be:ef:00:01",
3488+
"binding:vif_type": model.VIF_TYPE_BRIDGE,
3489+
"binding:vnic_type": model.VNIC_TYPE_DIRECT,
3490+
"binding:vif_details": {},
3491+
},
3492+
]
3493+
fake_nets = [
3494+
{
3495+
"id": "net-id",
3496+
"name": "foo",
3497+
"tenant_id": uuids.fake,
3498+
}
3499+
]
3500+
mocked_client.list_ports.return_value = {'ports': fake_ports}
3501+
fake_inst.info_cache = objects.InstanceInfoCache.new(
3502+
self.context, uuids.instance)
3503+
fake_inst.info_cache.network_info = model.NetworkInfo.hydrate([])
3504+
3505+
# build the network info first
3506+
nw_infos = self.api._build_network_info_model(
3507+
self.context,
3508+
fake_inst,
3509+
fake_nets,
3510+
[fake_ports[0]["id"]],
3511+
refresh_vif_id=fake_ports[0]["id"],
3512+
)
3513+
3514+
self.assertEqual(1, len(nw_infos))
3515+
fake_inst.info_cache.network_info = nw_infos
3516+
3517+
# change the vnic_type of the port and rebuild the network info
3518+
fake_ports[0]["binding:vnic_type"] = model.VNIC_TYPE_MACVTAP
3519+
with mock.patch(
3520+
"nova.network.neutron.API._log_error_if_vnic_type_changed"
3521+
) as mock_log:
3522+
nw_infos = self.api._build_network_info_model(
3523+
self.context,
3524+
fake_inst,
3525+
fake_nets,
3526+
[fake_ports[0]["id"]],
3527+
refresh_vif_id=fake_ports[0]["id"],
3528+
)
3529+
3530+
mock_log.assert_called_once_with(
3531+
fake_ports[0]["id"], "direct", "macvtap", fake_inst)
3532+
self.assertEqual(1, len(nw_infos))
3533+
33853534
@mock.patch.object(neutronapi, 'get_client')
33863535
def test_get_subnets_from_port(self, mock_get_client):
33873536
mocked_client = mock.create_autospec(client.Client)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
fixes:
3+
- |
4+
`Bug #1981813 <https://bugs.launchpad.net/nova/+bug/1981813>`_: Now nova
5+
detects if the ``vnic_type`` of a bound port has been changed in neutron
6+
and leaves an ERROR message in the compute service log as such change on a
7+
bound port is not supported. Also the restart of the nova-compute service
8+
will not crash any more after such port change. Nova will log an ERROR and
9+
skip the initialization of the instance with such port during the startup.

0 commit comments

Comments
 (0)