Skip to content

Commit eda4588

Browse files
committed
compute: Don't detach volumes when RescheduledException raised without retry
I8b1c05317734e14ea73dc868941351bb31210bf0 introduced a crude call to _cleanup_volumes within _do_build_and_run_instance when handling a RescheduledException exception raised from _build_and_run_instance without any retry information provided from the scheduler. This situation can arise when using the 'availability_zone' parameter to skip the scheduler by providing both a target availability_zone and host in the format of `$availability_zone:$host`. If the instance is unable to build on the compute the failure will eventually lead to _cleanup_volumes calling DriverVolumeBlockDevice.detach that will either detach (cinderv2) or delete the associated volume attachments (cinderv3) moving the volume to an `available` state, assuming it isn't multi-attached etc. The issue with this is that this behaviour is in stark contrast to that of volumes associated with instances that have failed to schedule. In that case the volumes remain marked as reserved and associated with the ERROR'd out instance until the instance itself is deleted. This change aims to align both cases by removing the call to _cleanup_volumes and in doing so keeping any volumes in a `reserved` state until the underlying instance is deleted. Note that leaving these volumes associated with ERROR'd out instances is now safe after I4dc6c8bd3bb6c135f8a698af41f5d0e026c39117 landed and now ensures that ports and volumes associated with such an instance are correctly cleaned up. Closes-Bug: #1899649 Change-Id: I5dda9e8bca5fbaae77ece12b67176945ca4d9a4c (cherry picked from commit 26c46a4)
1 parent 95fc161 commit eda4588

File tree

3 files changed

+4
-20
lines changed

3 files changed

+4
-20
lines changed

nova/compute/manager.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2230,8 +2230,6 @@ def _do_build_and_run_instance(self, context, instance, image,
22302230
instance=instance)
22312231
self._cleanup_allocated_networks(context, instance,
22322232
requested_networks)
2233-
self._cleanup_volumes(context, instance,
2234-
block_device_mapping, raise_exc=False)
22352233
compute_utils.add_instance_fault_from_exc(context,
22362234
instance, e, sys.exc_info(),
22372235
fault_message=e.kwargs['reason'])

nova/tests/functional/regressions/test_bug_1899649.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -92,19 +92,9 @@ def test_failure_to_schedule_with_host(self):
9292
self._assert_failure_and_volume_attachments(server)
9393

9494
def test_failure_to_build_with_az_and_host(self):
95-
# Assert that a volume attachments does not remain after a failure to
95+
# Assert that a volume attachments remain after a failure to
9696
# build and reschedule by providing an availability_zone *and* host,
9797
# skipping the scheduler. This is bug #1899649.
9898
self.server['availability_zone'] = 'nova:compute1'
9999
server = self.admin_api.post_server({'server': self.server})
100-
101-
# Assert the server ends up in an ERROR state
102-
self._wait_for_state_change(server, 'ERROR')
103-
104-
# FIXME(lyarwood): A single volume attachment should be present for the
105-
# instance at this stage as the volume *can* otherwise be marked as
106-
# available within Cinder if it isn't multi-attached.
107-
attachments = self.cinder.volume_to_attachment.get(self.volume_id)
108-
self.assertEqual(0, len(attachments))
109-
self.assertNotIn(
110-
self.volume_id, self.cinder.volume_ids_for_instance(server['id']))
100+
self._assert_failure_and_volume_attachments(server)

nova/tests/unit/compute/test_compute_mgr.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6875,14 +6875,13 @@ def test_rescheduled_exception_with_sriov_network_allocated(self,
68756875
@mock.patch.object(objects.Instance, 'save')
68766876
@mock.patch.object(manager.ComputeManager,
68776877
'_nil_out_instance_obj_host_and_node')
6878-
@mock.patch.object(manager.ComputeManager, '_cleanup_volumes')
68796878
@mock.patch.object(manager.ComputeManager, '_cleanup_allocated_networks')
68806879
@mock.patch.object(manager.ComputeManager, '_set_instance_obj_error_state')
68816880
@mock.patch.object(compute_utils, 'add_instance_fault_from_exc')
68826881
@mock.patch.object(manager.ComputeManager, '_build_and_run_instance')
68836882
def test_rescheduled_exception_without_retry(self,
6884-
mock_build_run, mock_add, mock_set, mock_clean_net, mock_clean_vol,
6885-
mock_nil, mock_save, mock_start, mock_finish):
6883+
mock_build_run, mock_add, mock_set, mock_clean_net, mock_nil,
6884+
mock_save, mock_start, mock_finish):
68866885
self._do_build_instance_update(mock_save)
68876886
mock_build_run.side_effect = exception.RescheduledException(reason='',
68886887
instance_uuid=self.instance.uuid)
@@ -6918,9 +6917,6 @@ def _wrapped_do_build_and_run_instance(*args, **kwargs):
69186917
self.accel_uuids)
69196918
mock_clean_net.assert_called_once_with(self.context, self.instance,
69206919
self.requested_networks)
6921-
mock_clean_vol.assert_called_once_with(self.context,
6922-
self.instance, self.block_device_mapping,
6923-
raise_exc=False)
69246920
mock_add.assert_called_once_with(self.context, self.instance,
69256921
mock.ANY, mock.ANY, fault_message=mock.ANY)
69266922
mock_nil.assert_called_once_with(self.instance)

0 commit comments

Comments
 (0)