Skip to content

Commit 90c5190

Browse files
committed
Fix pre_live_migration rollback
During the pre live migration process, Nova performs most of the tasks related to the creation and operation of the VM in the destination host. That is done without interrupting any of the hardware in the source host. If the pre_live_migration fails, those same operations should be rolled back. Currently nova is sharing the _rollback_live_migration for both live and pre_live migration rollbacks, and that is causing the source host to try to re-attach network interfaces on the source host where they weren't actually de-attached. This patch fixes that by adding a conditional to allow nova to do different paths for migration and pre_live_migration rollbacks. Closes-bug: #1944619 Change-Id: I784190ac356695dd508e0ad8ec31d8eaa3ebee56 (cherry picked from commit 63ffba7) (cherry picked from commit 29b94aa)
1 parent 1059921 commit 90c5190

File tree

4 files changed

+27
-13
lines changed

4 files changed

+27
-13
lines changed

nova/compute/manager.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8309,7 +8309,8 @@ def _cleanup_pre_live_migration(self, context, dest, instance,
83098309
migrate_data.migration = migration
83108310
self._rollback_live_migration(context, instance, dest,
83118311
migrate_data=migrate_data,
8312-
source_bdms=source_bdms)
8312+
source_bdms=source_bdms,
8313+
pre_live_migration=True)
83138314

83148315
def _do_pre_live_migration_from_source(self, context, dest, instance,
83158316
block_migration, migration,
@@ -9063,7 +9064,8 @@ def _rollback_volume_bdms(self, context, bdms, original_bdms, instance):
90639064
def _rollback_live_migration(self, context, instance,
90649065
dest, migrate_data=None,
90659066
migration_status='failed',
9066-
source_bdms=None):
9067+
source_bdms=None,
9068+
pre_live_migration=False):
90679069
"""Recovers Instance/volume state from migrating -> running.
90689070

90699071
:param context: security context
@@ -9113,8 +9115,14 @@ def _rollback_live_migration(self, context, instance,
91139115
# for nova-network)
91149116
# NOTE(mriedem): This is a no-op for neutron.
91159117
self.network_api.setup_networks_on_host(context, instance, self.host)
9116-
self.driver.rollback_live_migration_at_source(context, instance,
9117-
migrate_data)
9118+
9119+
# NOTE(erlon): We should make sure that rollback_live_migration_at_src
9120+
# is not called in the pre_live_migration rollback as that will trigger
9121+
# the src host to re-attach interfaces which were not detached
9122+
# previously.
9123+
if not pre_live_migration:
9124+
self.driver.rollback_live_migration_at_source(context, instance,
9125+
migrate_data)
91189126

91199127
# NOTE(lyarwood): Fetch the current list of BDMs, disconnect any
91209128
# connected volumes from the dest and delete any volume attachments

nova/tests/functional/regressions/test_bug_1944619.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,5 @@ def test_rollback_pre_live_migration(self):
7272
self._live_migrate(self.server,
7373
migration_expected_state='failed',
7474
server_expected_state='MIGRATING')
75-
# FIXME(erlon): In the current behavior,
76-
# rollback_live_migration_at_source is called if an error happens
77-
# during the pre_live_migration phase on the destination and therefore
78-
# triggers the observed bug. rollback_live_migration_at_source should
79-
# *not* be called for when errors happen during pre_live_migration
80-
# phase.
81-
mlpr.assert_called_once()
75+
mlpr.assert_not_called()
8276
mlpp.assert_called_once()

nova/tests/unit/compute/test_compute_mgr.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9533,7 +9533,8 @@ def test_live_migration_wait_vif_plugged_vif_plug_error(
95339533
self.assertEqual('error', self.migration.status)
95349534
mock_rollback_live_mig.assert_called_once_with(
95359535
self.context, self.instance, 'dest-host',
9536-
migrate_data=migrate_data, source_bdms=source_bdms)
9536+
migrate_data=migrate_data, source_bdms=source_bdms,
9537+
pre_live_migration=True)
95379538

95389539
@mock.patch('nova.compute.rpcapi.ComputeAPI.pre_live_migration')
95399540
@mock.patch('nova.compute.manager.ComputeManager._rollback_live_migration')
@@ -9568,7 +9569,8 @@ def test_live_migration_wait_vif_plugged_timeout_error(
95689569
self.assertEqual('error', self.migration.status)
95699570
mock_rollback_live_mig.assert_called_once_with(
95709571
self.context, self.instance, 'dest-host',
9571-
migrate_data=migrate_data, source_bdms=source_bdms)
9572+
migrate_data=migrate_data, source_bdms=source_bdms,
9573+
pre_live_migration=True)
95729574

95739575
@mock.patch('nova.compute.rpcapi.ComputeAPI.pre_live_migration')
95749576
@mock.patch('nova.compute.manager.ComputeManager._rollback_live_migration')
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
fixes:
3+
- |
4+
Instances with hardware offloaded ovs ports no longer lose connectivity
5+
after failed live migrations. The driver.rollback_live_migration_at_source
6+
function is no longer called during during pre_live_migration rollback
7+
which previously resulted in connectivity loss following a failed live
8+
migration. See `Bug 1944619`_ for more details.
9+
10+
.. _Bug 1944619: https://bugs.launchpad.net/nova/+bug/1944619

0 commit comments

Comments
 (0)