Skip to content

Commit 1c2213d

Browse files
authored
Merge pull request #13 from stackhpc/upstream/wallaby-2023-01-25
Synchronise wallaby with upstream
2 parents 60f5480 + 4f9a306 commit 1c2213d

File tree

19 files changed

+642
-368
lines changed

19 files changed

+642
-368
lines changed

.zuul.yaml

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,66 @@
124124
# Verifiy the evacuation of instances with ceph ephemeral disks
125125
# from down compute hosts.
126126

127+
- job:
128+
name: nova-ovs-hybrid-plug
129+
parent: tempest-multinode-full-py3
130+
description: |
131+
Run move operations, reboot, and evacuation (via the same post-run hook
132+
as the nova-live-migration job) tests with the OVS network backend and
133+
the "iptables_hybrid" securitygroup firewall driver, aka "hybrid plug".
134+
The external events interactions between Nova and Neutron in these
135+
situations has historically been fragile. This job exercises them.
136+
irrelevant-files: *dsvm-irrelevant-files
137+
vars:
138+
tox_envlist: all
139+
tempest_test_regex: (^tempest\..*compute\..*(migration|resize|reboot).*)
140+
devstack_localrc:
141+
Q_AGENT: openvswitch
142+
Q_ML2_TENANT_NETWORK_TYPE: vxlan
143+
Q_ML2_PLUGIN_MECHANISM_DRIVERS: openvswitch,linuxbridge
144+
ML2_L3_PLUGIN: router
145+
devstack_services:
146+
# Disable OVN services
147+
br-ex-tcpdump: false
148+
br-int-flows: false
149+
ovn-controller: false
150+
ovn-northd: false
151+
q-ovn-metadata-agent: false
152+
# Neutron services
153+
q-agt: true
154+
q-dhcp: true
155+
q-l3: true
156+
q-meta: true
157+
devstack_local_conf:
158+
post-config:
159+
"/$NEUTRON_CORE_PLUGIN_CONF":
160+
securitygroup:
161+
firewall_driver: iptables_hybrid
162+
group-vars:
163+
subnode:
164+
devstack_localrc:
165+
Q_AGENT: openvswitch
166+
Q_ML2_TENANT_NETWORK_TYPE: vxlan
167+
Q_ML2_PLUGIN_MECHANISM_DRIVERS: openvswitch,linuxbridge
168+
ML2_L3_PLUGIN: router
169+
devstack_services:
170+
# Disable OVN services
171+
br-ex-tcpdump: false
172+
br-int-flows: false
173+
ovn-controller: false
174+
ovn-northd: false
175+
ovs-vswitchd: false
176+
ovsdb-server: false
177+
q-ovn-metadata-agent: false
178+
# Neutron services
179+
q-agt: true
180+
devstack_local_conf:
181+
post-config:
182+
"/$NEUTRON_CORE_PLUGIN_CONF":
183+
securitygroup:
184+
firewall_driver: iptables_hybrid
185+
post-run: playbooks/nova-live-migration/post-run.yaml
186+
127187
- job:
128188
name: nova-lvm
129189
parent: devstack-tempest
@@ -477,6 +537,7 @@
477537
- nova-lvm
478538
- nova-multi-cell
479539
- nova-next
540+
- nova-ovs-hybrid-plug
480541
- nova-tox-validate-backport:
481542
voting: false
482543
- nova-tox-functional-py38

nova/compute/manager.py

Lines changed: 103 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -4779,8 +4779,18 @@ def _finish_revert_snapshot_based_resize_at_source(
47794779
self.host, instance=instance)
47804780
# TODO(mriedem): Calculate provider mappings when we support
47814781
# cross-cell resize/migrate with ports having resource requests.
4782-
self._finish_revert_resize_network_migrate_finish(
4783-
ctxt, instance, migration, provider_mappings=None)
4782+
# NOTE(hanrong): we need to change migration.dest_compute to
4783+
# source host temporarily.
4784+
# "network_api.migrate_instance_finish" will setup the network
4785+
# for the instance on the destination host. For revert resize,
4786+
# the instance will back to the source host, the setup of the
4787+
# network for instance should be on the source host. So set
4788+
# the migration.dest_compute to source host at here.
4789+
with utils.temporary_mutation(
4790+
migration, dest_compute=migration.source_compute
4791+
):
4792+
self.network_api.migrate_instance_finish(
4793+
ctxt, instance, migration, provider_mappings=None)
47844794
network_info = self.network_api.get_instance_nw_info(ctxt, instance)
47854795

47864796
# Remember that prep_snapshot_based_resize_at_source destroyed the
@@ -4872,50 +4882,6 @@ def revert_resize(self, context, instance, migration, request_spec):
48724882
self.compute_rpcapi.finish_revert_resize(context, instance,
48734883
migration, migration.source_compute, request_spec)
48744884

4875-
def _finish_revert_resize_network_migrate_finish(
4876-
self, context, instance, migration, provider_mappings):
4877-
"""Causes port binding to be updated. In some Neutron or port
4878-
configurations - see NetworkModel.get_bind_time_events() - we
4879-
expect the vif-plugged event from Neutron immediately and wait for it.
4880-
The rest of the time, the event is expected further along in the
4881-
virt driver, so we don't wait here.
4882-
4883-
:param context: The request context.
4884-
:param instance: The instance undergoing the revert resize.
4885-
:param migration: The Migration object of the resize being reverted.
4886-
:param provider_mappings: a dict of list of resource provider uuids
4887-
keyed by port uuid
4888-
:raises: eventlet.timeout.Timeout or
4889-
exception.VirtualInterfacePlugException.
4890-
"""
4891-
network_info = instance.get_network_info()
4892-
events = []
4893-
deadline = CONF.vif_plugging_timeout
4894-
if deadline and network_info:
4895-
events = network_info.get_bind_time_events(migration)
4896-
if events:
4897-
LOG.debug('Will wait for bind-time events: %s', events)
4898-
error_cb = self._neutron_failed_migration_callback
4899-
try:
4900-
with self.virtapi.wait_for_instance_event(instance, events,
4901-
deadline=deadline,
4902-
error_callback=error_cb):
4903-
# NOTE(hanrong): we need to change migration.dest_compute to
4904-
# source host temporarily.
4905-
# "network_api.migrate_instance_finish" will setup the network
4906-
# for the instance on the destination host. For revert resize,
4907-
# the instance will back to the source host, the setup of the
4908-
# network for instance should be on the source host. So set
4909-
# the migration.dest_compute to source host at here.
4910-
with utils.temporary_mutation(
4911-
migration, dest_compute=migration.source_compute):
4912-
self.network_api.migrate_instance_finish(
4913-
context, instance, migration, provider_mappings)
4914-
except eventlet.timeout.Timeout:
4915-
with excutils.save_and_reraise_exception():
4916-
LOG.error('Timeout waiting for Neutron events: %s', events,
4917-
instance=instance)
4918-
49194885
@wrap_exception()
49204886
@reverts_task_state
49214887
@wrap_instance_event(prefix='compute')
@@ -4973,8 +4939,18 @@ def _finish_revert_resize(
49734939

49744940
self.network_api.setup_networks_on_host(context, instance,
49754941
migration.source_compute)
4976-
self._finish_revert_resize_network_migrate_finish(
4977-
context, instance, migration, provider_mappings)
4942+
# NOTE(hanrong): we need to change migration.dest_compute to
4943+
# source host temporarily. "network_api.migrate_instance_finish"
4944+
# will setup the network for the instance on the destination host.
4945+
# For revert resize, the instance will back to the source host, the
4946+
# setup of the network for instance should be on the source host.
4947+
# So set the migration.dest_compute to source host at here.
4948+
with utils.temporary_mutation(
4949+
migration, dest_compute=migration.source_compute):
4950+
self.network_api.migrate_instance_finish(context,
4951+
instance,
4952+
migration,
4953+
provider_mappings)
49784954
network_info = self.network_api.get_instance_nw_info(context,
49794955
instance)
49804956

@@ -5051,8 +5027,7 @@ def _fill_provider_mapping_based_on_allocs(
50515027
# the provider mappings. If the instance has ports with
50525028
# resource request then the port update will fail in
50535029
# _update_port_binding_for_instance() called via
5054-
# _finish_revert_resize_network_migrate_finish() in
5055-
# finish_revert_resize.
5030+
# migrate_instance_finish() in finish_revert_resize.
50565031
provider_mappings = None
50575032
return provider_mappings
50585033

@@ -8255,8 +8230,8 @@ def pre_live_migration(self, context, instance, disk, migrate_data):
82558230
return migrate_data
82568231

82578232
@staticmethod
8258-
def _neutron_failed_migration_callback(event_name, instance):
8259-
msg = ('Neutron reported failure during migration '
8233+
def _neutron_failed_live_migration_callback(event_name, instance):
8234+
msg = ('Neutron reported failure during live migration '
82608235
'with %(event)s for instance %(uuid)s')
82618236
msg_args = {'event': event_name, 'uuid': instance.uuid}
82628237
if CONF.vif_plugging_is_fatal:
@@ -8352,7 +8327,7 @@ class _BreakWaitForInstanceEvent(Exception):
83528327
disk = None
83538328

83548329
deadline = CONF.vif_plugging_timeout
8355-
error_cb = self._neutron_failed_migration_callback
8330+
error_cb = self._neutron_failed_live_migration_callback
83568331
# In order to avoid a race with the vif plugging that the virt
83578332
# driver does on the destination host, we register our events
83588333
# to wait for before calling pre_live_migration. Then if the
@@ -8460,8 +8435,9 @@ def _do_live_migration(self, context, dest, instance, block_migration,
84608435
# host attachment. We fetch BDMs before that to retain connection_info
84618436
# and attachment_id relating to the source host for post migration
84628437
# cleanup.
8463-
post_live_migration = functools.partial(self._post_live_migration,
8464-
source_bdms=source_bdms)
8438+
post_live_migration = functools.partial(
8439+
self._post_live_migration_update_host, source_bdms=source_bdms
8440+
)
84658441
rollback_live_migration = functools.partial(
84668442
self._rollback_live_migration, source_bdms=source_bdms)
84678443

@@ -8588,15 +8564,41 @@ def live_migration_abort(self, context, instance, migration_id):
85888564
migration, future = (
85898565
self._waiting_live_migrations.pop(instance.uuid))
85908566
if future and future.cancel():
8591-
# If we got here, we've successfully aborted the queued
8592-
# migration and _do_live_migration won't run so we need
8593-
# to set the migration status to cancelled and send the
8594-
# notification. If Future.cancel() fails, it means
8595-
# _do_live_migration is running and the migration status
8596-
# is preparing, and _do_live_migration() itself will attempt
8597-
# to pop the queued migration, hit a KeyError, and rollback,
8598-
# set the migration to cancelled and send the
8599-
# live.migration.abort.end notification.
8567+
# If we got here, we've successfully dropped a queued
8568+
# migration from the queue, so _do_live_migration won't run
8569+
# and we only need to revert minor changes introduced by Nova
8570+
# control plane (port bindings, resource allocations and
8571+
# instance's PCI devices), restore VM's state, set the
8572+
# migration's status to cancelled and send the notification.
8573+
# If Future.cancel() fails, it means _do_live_migration is
8574+
# running and the migration status is preparing, and
8575+
# _do_live_migration() itself will attempt to pop the queued
8576+
# migration, hit a KeyError, and rollback, set the migration
8577+
# to cancelled and send the live.migration.abort.end
8578+
# notification.
8579+
self._revert_allocation(context, instance, migration)
8580+
try:
8581+
# This call will delete any inactive destination host
8582+
# port bindings.
8583+
self.network_api.setup_networks_on_host(
8584+
context, instance, host=migration.dest_compute,
8585+
teardown=True)
8586+
except exception.PortBindingDeletionFailed as e:
8587+
# Removing the inactive port bindings from the destination
8588+
# host is not critical so just log an error but don't fail.
8589+
LOG.error(
8590+
'Network cleanup failed for destination host %s '
8591+
'during live migration rollback. You may need to '
8592+
'manually clean up resources in the network service. '
8593+
'Error: %s', migration.dest_compute, str(e))
8594+
except Exception:
8595+
with excutils.save_and_reraise_exception():
8596+
LOG.exception(
8597+
'An error occurred while cleaning up networking '
8598+
'during live migration rollback.',
8599+
instance=instance)
8600+
instance.task_state = None
8601+
instance.save(expected_task_state=[task_states.MIGRATING])
86008602
self._set_migration_status(migration, 'cancelled')
86018603
except KeyError:
86028604
migration = objects.Migration.get_by_id(context, migration_id)
@@ -8707,6 +8709,42 @@ def _post_live_migration_remove_source_vol_connections(
87078709
bdm.attachment_id, self.host,
87088710
str(e), instance=instance)
87098711

8712+
# TODO(sean-k-mooney): add typing
8713+
def _post_live_migration_update_host(
8714+
self, ctxt, instance, dest, block_migration=False,
8715+
migrate_data=None, source_bdms=None
8716+
):
8717+
try:
8718+
self._post_live_migration(
8719+
ctxt, instance, dest, block_migration, migrate_data,
8720+
source_bdms)
8721+
except Exception:
8722+
# Restore the instance object
8723+
node_name = None
8724+
try:
8725+
# get node name of compute, where instance will be
8726+
# running after migration, that is destination host
8727+
compute_node = self._get_compute_info(ctxt, dest)
8728+
node_name = compute_node.hypervisor_hostname
8729+
except exception.ComputeHostNotFound:
8730+
LOG.exception('Failed to get compute_info for %s', dest)
8731+
8732+
# we can never rollback from post live migration and we can only
8733+
# get here if the instance is running on the dest so we ensure
8734+
# the instance.host is set correctly and reraise the original
8735+
# exception unmodified.
8736+
if instance.host != dest:
8737+
# apply saves the new fields while drop actually removes the
8738+
# migration context from the instance, so migration persists.
8739+
instance.apply_migration_context()
8740+
instance.drop_migration_context()
8741+
instance.host = dest
8742+
instance.task_state = None
8743+
instance.node = node_name
8744+
instance.progress = 0
8745+
instance.save()
8746+
raise
8747+
87108748
@wrap_exception()
87118749
@wrap_instance_fault
87128750
def _post_live_migration(self, ctxt, instance, dest,
@@ -8718,7 +8756,7 @@ def _post_live_migration(self, ctxt, instance, dest,
87188756
and mainly updating database record.
87198757

87208758
:param ctxt: security context
8721-
:param instance: instance dict
8759+
:param instance: instance object
87228760
:param dest: destination host
87238761
:param block_migration: if true, prepare for block migration
87248762
:param migrate_data: if not None, it is a dict which has data

nova/network/model.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -459,17 +459,6 @@ def labeled_ips(self):
459459
'ips': ips}
460460
return []
461461

462-
def has_bind_time_event(self, migration):
463-
"""Returns whether this VIF's network-vif-plugged external event will
464-
be sent by Neutron at "bind-time" - in other words, as soon as the port
465-
binding is updated. This is in the context of updating the port binding
466-
to a host that already has the instance in a shutoff state - in
467-
practice, this means reverting either a cold migration or a
468-
non-same-host resize.
469-
"""
470-
return (self.is_hybrid_plug_enabled() and not
471-
migration.is_same_host())
472-
473462
@property
474463
def has_live_migration_plug_time_event(self):
475464
"""Returns whether this VIF's network-vif-plugged external event will
@@ -538,27 +527,13 @@ def wait(self, do_raise=True):
538527
def json(self):
539528
return jsonutils.dumps(self)
540529

541-
def get_bind_time_events(self, migration):
542-
"""Returns a list of external events for any VIFs that have
543-
"bind-time" events during cold migration.
544-
"""
545-
return [('network-vif-plugged', vif['id'])
546-
for vif in self if vif.has_bind_time_event(migration)]
547-
548530
def get_live_migration_plug_time_events(self):
549531
"""Returns a list of external events for any VIFs that have
550532
"plug-time" events during live migration.
551533
"""
552534
return [('network-vif-plugged', vif['id'])
553535
for vif in self if vif.has_live_migration_plug_time_event]
554536

555-
def get_plug_time_events(self, migration):
556-
"""Returns a list of external events for any VIFs that have
557-
"plug-time" events during cold migration.
558-
"""
559-
return [('network-vif-plugged', vif['id'])
560-
for vif in self if not vif.has_bind_time_event(migration)]
561-
562537
def has_port_with_allocation(self):
563538
return any(vif.has_allocation() for vif in self)
564539

nova/objects/migration.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -200,9 +200,6 @@ def instance(self):
200200
def instance(self, instance):
201201
self._cached_instance = instance
202202

203-
def is_same_host(self):
204-
return self.source_compute == self.dest_compute
205-
206203
@property
207204
def is_live_migration(self):
208205
return self.migration_type == fields.MigrationType.LIVE_MIGRATION

nova/tests/fixtures.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1844,13 +1844,16 @@ def delete_port_binding(self, context, client, port_id, host):
18441844

18451845
return fake_requests.FakeResponse(204)
18461846

1847-
def _activate_port_binding(self, port_id, host):
1847+
def _activate_port_binding(self, port_id, host, modify_port=False):
18481848
# It makes sure that only one binding is active for a port
18491849
for h, binding in self._port_bindings[port_id].items():
18501850
if h == host:
18511851
# NOTE(gibi): neutron returns 409 if this binding is already
18521852
# active but nova does not depend on this behaviour yet.
18531853
binding['status'] = 'ACTIVE'
1854+
if modify_port:
1855+
# We need to ensure that port's binding:host_id is valid
1856+
self._merge_in_active_binding(self._ports[port_id])
18541857
else:
18551858
binding['status'] = 'INACTIVE'
18561859

@@ -1860,7 +1863,7 @@ def activate_port_binding(self, context, client, port_id, host):
18601863
if failure is not None:
18611864
return failure
18621865

1863-
self._activate_port_binding(port_id, host)
1866+
self._activate_port_binding(port_id, host, modify_port=True)
18641867

18651868
return fake_requests.FakeResponse(200)
18661869

0 commit comments

Comments
 (0)