Skip to content

Commit 9b3d69c

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "Add a WA flag waiting for vif-plugged event during reboot" into stable/victoria
2 parents 69fafb9 + c531fdc commit 9b3d69c

File tree

5 files changed

+141
-2
lines changed

5 files changed

+141
-2
lines changed

.zuul.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,12 @@
191191
# reduce the number of placement calls in steady state. Added in
192192
# Stein.
193193
resource_provider_association_refresh: 0
194+
workarounds:
195+
# This wa is an improvement on hard reboot that cannot be turned
196+
# on unconditionally. But we know that ml2/ovs sends plug time
197+
# events so we can enable this in this ovs job for vnic_type
198+
# normal
199+
wait_for_vif_plugged_event_during_hard_reboot: normal
194200
$NOVA_CONF:
195201
quota:
196202
# Added in Train.

nova/conf/workarounds.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,59 @@
345345
* :oslo.config:option:`DEFAULT.instances_path`
346346
* :oslo.config:option:`image_cache.subdirectory_name`
347347
* :oslo.config:option:`update_resources_interval`
348+
"""),
349+
cfg.ListOpt('wait_for_vif_plugged_event_during_hard_reboot',
350+
item_type=cfg.types.String(
351+
choices=[
352+
"normal",
353+
"direct",
354+
"macvtap",
355+
"baremetal",
356+
"direct-physical",
357+
"virtio-forwarder",
358+
"smart-nic",
359+
]),
360+
default=[],
361+
help="""
362+
The libvirt virt driver implements power on and hard reboot by tearing down
363+
every vif of the instance being rebooted then plug them again. By default nova
364+
does not wait for network-vif-plugged event from neutron before it lets the
365+
instance run. This can cause the instance to requests the IP via DHCP before
366+
the neutron backend has a chance to set up the networking backend after the vif
367+
plug.
368+
369+
This flag defines which vifs nova expects network-vif-plugged events from
370+
during hard reboot. The possible values are neutron port vnic types:
371+
372+
* normal
373+
* direct
374+
* macvtap
375+
* baremetal
376+
* direct-physical
377+
* virtio-forwarder
378+
* smart-nic
379+
380+
Adding a ``vnic_type`` to this configuration makes Nova wait for a
381+
network-vif-plugged event for each of the instance's vifs having the specific
382+
``vnic_type`` before unpausing the instance, similarly to how new instance
383+
creation works.
384+
385+
Please note that not all neutron networking backends send plug time events, for
386+
certain ``vnic_type`` therefore this config is empty by default.
387+
388+
The ml2/ovs and the networking-odl backends are known to send plug time events
389+
for ports with ``normal`` ``vnic_type`` so it is safe to add ``normal`` to this
390+
config if you are using only those backends in the compute host.
391+
392+
The neutron in-tree SRIOV backend does not reliably send network-vif-plugged
393+
event during plug time for ports with ``direct`` vnic_type and never sends
394+
that event for port with ``direct-physical`` vnic_type during plug time. For
395+
other ``vnic_type`` and backend pairs, please consult the developers of the
396+
backend.
397+
398+
Related options:
399+
400+
* :oslo.config:option:`DEFAULT.vif_plugging_timeout`
348401
"""),
349402
]
350403

nova/tests/unit/virt/libvirt/test_driver.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16288,7 +16288,48 @@ def test_hard_reboot(self, mock_get_mdev, mock_destroy, mock_get_disk_info,
1628816288
accel_info=accel_info)
1628916289
mock_create_guest_with_network.assert_called_once_with(self.context,
1629016290
dummyxml, instance, network_info, block_device_info,
16291-
vifs_already_plugged=True)
16291+
vifs_already_plugged=True, external_events=[])
16292+
16293+
@mock.patch('oslo_utils.fileutils.ensure_tree', new=mock.Mock())
16294+
@mock.patch('nova.virt.libvirt.LibvirtDriver.get_info')
16295+
@mock.patch('nova.virt.libvirt.LibvirtDriver._create_guest_with_network')
16296+
@mock.patch('nova.virt.libvirt.LibvirtDriver._get_guest_xml')
16297+
@mock.patch('nova.virt.libvirt.LibvirtDriver.destroy', new=mock.Mock())
16298+
@mock.patch(
16299+
'nova.virt.libvirt.LibvirtDriver._get_all_assigned_mediated_devices',
16300+
new=mock.Mock(return_value={}))
16301+
def test_hard_reboot_wait_for_plug(
16302+
self, mock_get_guest_xml, mock_create_guest_with_network, mock_get_info
16303+
):
16304+
self.flags(
16305+
group="workarounds",
16306+
wait_for_vif_plugged_event_during_hard_reboot=["normal"])
16307+
self.context.auth_token = None
16308+
instance = objects.Instance(**self.test_instance)
16309+
network_info = _fake_network_info(self, num_networks=4)
16310+
network_info[0]["vnic_type"] = "normal"
16311+
network_info[1]["vnic_type"] = "direct"
16312+
network_info[2]["vnic_type"] = "normal"
16313+
network_info[3]["vnic_type"] = "direct-physical"
16314+
block_device_info = None
16315+
return_values = [hardware.InstanceInfo(state=power_state.SHUTDOWN),
16316+
hardware.InstanceInfo(state=power_state.RUNNING)]
16317+
mock_get_info.side_effect = return_values
16318+
mock_get_guest_xml.return_value = mock.sentinel.xml
16319+
16320+
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
16321+
drvr._hard_reboot(
16322+
self.context, instance, network_info, block_device_info)
16323+
16324+
mock_create_guest_with_network.assert_called_once_with(
16325+
self.context, mock.sentinel.xml, instance, network_info,
16326+
block_device_info,
16327+
vifs_already_plugged=False,
16328+
external_events=[
16329+
('network-vif-plugged', uuids.vif1),
16330+
('network-vif-plugged', uuids.vif3),
16331+
]
16332+
)
1629216333

1629316334
@mock.patch('oslo_utils.fileutils.ensure_tree')
1629416335
@mock.patch('oslo_service.loopingcall.FixedIntervalLoopingCall')

nova/virt/libvirt/driver.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3383,11 +3383,32 @@ def _hard_reboot(self, context, instance, network_info,
33833383
# on which vif type we're using and we are working with a stale network
33843384
# info cache here, so won't rely on waiting for neutron plug events.
33853385
# vifs_already_plugged=True means "do not wait for neutron plug events"
3386+
external_events = []
3387+
vifs_already_plugged = True
3388+
event_expected_for_vnic_types = (
3389+
CONF.workarounds.wait_for_vif_plugged_event_during_hard_reboot)
3390+
if event_expected_for_vnic_types:
3391+
# NOTE(gibi): We unplugged every vif during destroy above and we
3392+
# will replug them with _create_guest_with_network. As the
3393+
# workaround config has some vnic_types configured we expect
3394+
# vif-plugged events for every vif with those vnic_types.
3395+
# TODO(gibi): only wait for events if we know that the networking
3396+
# backend sends plug time events. For that we need to finish
3397+
# https://bugs.launchpad.net/neutron/+bug/1821058 first in Neutron
3398+
# then create a driver -> plug-time event mapping in nova.
3399+
external_events = [
3400+
('network-vif-plugged', vif['id'])
3401+
for vif in network_info
3402+
if vif['vnic_type'] in event_expected_for_vnic_types
3403+
]
3404+
vifs_already_plugged = False
3405+
33863406
# NOTE(efried): The instance should already have a vtpm_secret_uuid
33873407
# registered if appropriate.
33883408
self._create_guest_with_network(
33893409
context, xml, instance, network_info, block_device_info,
3390-
vifs_already_plugged=True)
3410+
vifs_already_plugged=vifs_already_plugged,
3411+
external_events=external_events)
33913412
self._prepare_pci_devices_for_use(
33923413
pci_manager.get_instance_pci_devs(instance, 'all'))
33933414

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
---
2+
issues:
3+
- |
4+
The libvirt virt driver in Nova implements power on and hard reboot by
5+
destroying the domain first and unpluging the vifs then recreating the
6+
domain and replugging the vifs. However nova does not wait for the
7+
network-vif-plugged event before unpause the domain. This can cause
8+
the domain to start running and requesting IP via DHCP before the
9+
networking backend has finished plugging the vifs. The config option
10+
[workarounds]wait_for_vif_plugged_event_during_hard_reboot has been added,
11+
defaulting to an empty list, that can be used to ensure that the libvirt
12+
driver waits for the network-vif-plugged event for vifs with specific
13+
``vnic_type`` before it unpauses the domain during hard reboot. This should
14+
only be used if the deployment uses a networking backend that sends such
15+
event for the given ``vif_type`` at vif plug time. The ml2/ovs and the
16+
networking-odl Neutron backend is known to send plug time events for ports
17+
with ``normal`` ``vnic_type``. For more information see
18+
https://bugs.launchpad.net/nova/+bug/1946729

0 commit comments

Comments
 (0)