Skip to content

Commit 750b364

Browse files
committed
Clean up when queued live migration aborted
This patch solves bug #1949808 and bug #1960412 by tuning live_migration_abort() function and adding calls to: - remove placement allocations for live migration; - remove INACTIVE port bindings against destination compute node; - restore instance's state. Related unit test was adjusted and related functional tests were fixed. Closes-bug: #1949808 Closes-bug: #1960412 Change-Id: Ic97eff86f580bff67b1f02c8eeb60c4cf4181e6a (cherry picked from commit 219520d) (cherry picked from commit 8670ca8)
1 parent 3d69804 commit 750b364

File tree

3 files changed

+69
-39
lines changed

3 files changed

+69
-39
lines changed

nova/compute/manager.py

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8580,15 +8580,41 @@ def live_migration_abort(self, context, instance, migration_id):
85808580
migration, future = (
85818581
self._waiting_live_migrations.pop(instance.uuid))
85828582
if future and future.cancel():
8583-
# If we got here, we've successfully aborted the queued
8584-
# migration and _do_live_migration won't run so we need
8585-
# to set the migration status to cancelled and send the
8586-
# notification. If Future.cancel() fails, it means
8587-
# _do_live_migration is running and the migration status
8588-
# is preparing, and _do_live_migration() itself will attempt
8589-
# to pop the queued migration, hit a KeyError, and rollback,
8590-
# set the migration to cancelled and send the
8591-
# live.migration.abort.end notification.
8583+
# If we got here, we've successfully dropped a queued
8584+
# migration from the queue, so _do_live_migration won't run
8585+
# and we only need to revert minor changes introduced by Nova
8586+
# control plane (port bindings, resource allocations and
8587+
# instance's PCI devices), restore VM's state, set the
8588+
# migration's status to cancelled and send the notification.
8589+
# If Future.cancel() fails, it means _do_live_migration is
8590+
# running and the migration status is preparing, and
8591+
# _do_live_migration() itself will attempt to pop the queued
8592+
# migration, hit a KeyError, and rollback, set the migration
8593+
# to cancelled and send the live.migration.abort.end
8594+
# notification.
8595+
self._revert_allocation(context, instance, migration)
8596+
try:
8597+
# This call will delete any inactive destination host
8598+
# port bindings.
8599+
self.network_api.setup_networks_on_host(
8600+
context, instance, host=migration.dest_compute,
8601+
teardown=True)
8602+
except exception.PortBindingDeletionFailed as e:
8603+
# Removing the inactive port bindings from the destination
8604+
# host is not critical so just log an error but don't fail.
8605+
LOG.error(
8606+
'Network cleanup failed for destination host %s '
8607+
'during live migration rollback. You may need to '
8608+
'manually clean up resources in the network service. '
8609+
'Error: %s', migration.dest_compute, str(e))
8610+
except Exception:
8611+
with excutils.save_and_reraise_exception():
8612+
LOG.exception(
8613+
'An error occurred while cleaning up networking '
8614+
'during live migration rollback.',
8615+
instance=instance)
8616+
instance.task_state = None
8617+
instance.save(expected_task_state=[task_states.MIGRATING])
85928618
self._set_migration_status(migration, 'cancelled')
85938619
except KeyError:
85948620
migration = objects.Migration.get_by_id(context, migration_id)

nova/tests/functional/libvirt/test_live_migration.py

Lines changed: 17 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -117,16 +117,12 @@ def test_queued_live_migration_abort_vm_status(self):
117117
'/servers/%s/migrations/%s' % (self.server_b['id'],
118118
serverb_migration['id']))
119119
self._wait_for_migration_status(self.server_b, ['cancelled'])
120-
# Unlock live migrations and confirm that server_a becomes
121-
# active again after successful live migration
120+
# Unlock live migrations and confirm that both servers become
121+
# active again after successful (server_a) and aborted
122+
# (server_b) live migrations
122123
self.lock_live_migration.release()
123124
self._wait_for_state_change(self.server_a, 'ACTIVE')
124-
125-
# FIXME(artom) Assert the server_b never comes out of 'MIGRATING'
126-
self.assertRaises(
127-
AssertionError,
128-
self._wait_for_state_change, self.server_b, 'ACTIVE')
129-
self._wait_for_state_change(self.server_b, 'MIGRATING')
125+
self._wait_for_state_change(self.server_b, 'ACTIVE')
130126

131127

132128
class LiveMigrationQueuedAbortTestLeftoversRemoved(LiveMigrationWithLockBase):
@@ -182,36 +178,28 @@ def test_queued_live_migration_abort_leftovers_removed(self):
182178
'/servers/%s/migrations/%s' % (self.server_b['id'],
183179
migration_server_b['id']))
184180
self._wait_for_migration_status(self.server_b, ['cancelled'])
185-
# Unlock live migrations and confirm that server_a becomes
186-
# active again after successful live migration
181+
# Unlock live migrations and confirm that both servers become
182+
# active again after successful (server_a) and aborted
183+
# (server_b) live migrations
187184
self.lock_live_migration.release()
188185
self._wait_for_state_change(self.server_a, 'ACTIVE')
189186
self._wait_for_migration_status(self.server_a, ['completed'])
190-
# FIXME(astupnikov) Assert the server_b never comes out of 'MIGRATING'
191-
# This should be fixed after bug #1949808 is addressed
192-
self._wait_for_state_change(self.server_b, 'MIGRATING')
187+
self._wait_for_state_change(self.server_b, 'ACTIVE')
193188

194-
# FIXME(astupnikov) Because of bug #1960412 allocations for aborted
195-
# queued live migration (server_b) would not be removed. Allocations
196-
# for completed live migration (server_a) should be empty.
189+
# Allocations for both successful (server_a) and aborted queued live
190+
# migration (server_b) should be removed.
197191
allocations_server_a_migration = self.placement.get(
198192
'/allocations/%s' % migration_server_a['uuid']
199193
).body['allocations']
200194
self.assertEqual({}, allocations_server_a_migration)
201195
allocations_server_b_migration = self.placement.get(
202196
'/allocations/%s' % migration_server_b['uuid']
203197
).body['allocations']
204-
src_uuid = self.api.api_get(
205-
'os-hypervisors?hypervisor_hostname_pattern=%s' %
206-
self.src_hostname).body['hypervisors'][0]['id']
207-
self.assertIn(src_uuid, allocations_server_b_migration)
208-
209-
# FIXME(astupnikov) Because of bug #1960412 INACTIVE port binding
210-
# on destination host would not be removed when queued live migration
211-
# is aborted, so 2 port bindings would exist for server_b port from
212-
# Neutron's perspective.
213-
# server_a should be migrated to dest compute, server_b should still
214-
# be hosted by src compute.
198+
self.assertEqual({}, allocations_server_b_migration)
199+
200+
# INACTIVE port binding on destination host should be removed when
201+
# queued live migration is aborted, so only 1 port binding would
202+
# exist for ports attached to both servers.
215203
port_binding_server_a = copy.deepcopy(
216204
self.neutron._port_bindings[self.neutron.port_1['id']]
217205
)
@@ -220,4 +208,5 @@ def test_queued_live_migration_abort_leftovers_removed(self):
220208
port_binding_server_b = copy.deepcopy(
221209
self.neutron._port_bindings[self.neutron.port_2['id']]
222210
)
223-
self.assertEqual(2, len(port_binding_server_b))
211+
self.assertEqual(1, len(port_binding_server_b))
212+
self.assertNotIn('dest', port_binding_server_b)

nova/tests/unit/compute/test_compute_mgr.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10379,19 +10379,34 @@ def test_live_migration_abort(self, mock_notify_action, mock_driver,
1037910379
action='live_migration_abort', phase='end')]
1038010380
)
1038110381

10382+
@mock.patch.object(objects.Instance, 'save')
10383+
@mock.patch.object(manager.ComputeManager, '_revert_allocation')
1038210384
@mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage')
1038310385
@mock.patch.object(objects.Migration, 'get_by_id')
1038410386
@mock.patch('nova.compute.utils.notify_about_instance_action')
1038510387
def test_live_migration_abort_queued(self, mock_notify_action,
10386-
mock_get_migration, mock_notify):
10388+
mock_get_migration, mock_notify,
10389+
mock_revert_allocations,
10390+
mock_instance_save):
1038710391
instance = objects.Instance(id=123, uuid=uuids.instance)
1038810392
migration = self._get_migration(10, 'queued', 'live-migration')
10393+
migration.dest_compute = uuids.dest
10394+
migration.dest_node = uuids.dest
1038910395
migration.save = mock.MagicMock()
1039010396
mock_get_migration.return_value = migration
1039110397
fake_future = mock.MagicMock()
1039210398
self.compute._waiting_live_migrations[instance.uuid] = (
1039310399
migration, fake_future)
10394-
self.compute.live_migration_abort(self.context, instance, migration.id)
10400+
with mock.patch.object(
10401+
self.compute.network_api,
10402+
'setup_networks_on_host') as mock_setup_net:
10403+
self.compute.live_migration_abort(
10404+
self.context, instance, migration.id)
10405+
mock_setup_net.assert_called_once_with(
10406+
self.context, instance, host=migration.dest_compute,
10407+
teardown=True)
10408+
mock_revert_allocations.assert_called_once_with(
10409+
self.context, instance, migration)
1039510410
mock_notify.assert_has_calls(
1039610411
[mock.call(self.context, instance,
1039710412
'live.migration.abort.start'),

0 commit comments

Comments
 (0)