Skip to content

Commit 35112d7

Browse files
committed
Handle instance = None in _local_delete_cleanup
Change I4d3193d8401614311010ed0e055fcb3aaeeebaed added some additional local delete cleanup to prevent leaking of placement allocations. The change introduced a regression in our "delete while booting" handling as the _local_delete_cleanup required a valid instance object to do its work and in two cases, we could have instance = None from _lookup_instance if we are racing with a create request and the conductor has deleted the instance record while we are in the middle of processing the delete request. This handles those scenarios by doing two things: (1) Changing the _local_delete_cleanup and _update_queued_for_deletion methods to take an instance UUID instead of a full instance object as they really only need the UUID to do their work (2) Saving a copy of the instance UUID before doing another instance lookup which might return None and passing that UUID to the _local_delete_cleanup and _update_queued_for_deletion methods Closes-Bug: #1914777 Change-Id: I03cf285ad83e09d88cdb702a88dfed53c01610f8 (cherry picked from commit 123f626)
1 parent 4f17ea2 commit 35112d7

File tree

3 files changed

+29
-36
lines changed

3 files changed

+29
-36
lines changed

nova/compute/api.py

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2179,22 +2179,22 @@ def _delete_while_booting(self, context, instance):
21792179
return True
21802180
return False
21812181

2182-
def _local_delete_cleanup(self, context, instance):
2182+
def _local_delete_cleanup(self, context, instance_uuid):
21832183
# NOTE(aarents) Ensure instance allocation is cleared and instance
21842184
# mapping queued as deleted before _delete() return
21852185
try:
21862186
self.placementclient.delete_allocation_for_instance(
2187-
context, instance.uuid)
2187+
context, instance_uuid)
21882188
except exception.AllocationDeleteFailed:
21892189
LOG.info("Allocation delete failed during local delete cleanup.",
2190-
instance=instance)
2190+
instance_uuid=instance_uuid)
21912191

21922192
try:
2193-
self._update_queued_for_deletion(context, instance, True)
2193+
self._update_queued_for_deletion(context, instance_uuid, True)
21942194
except exception.InstanceMappingNotFound:
21952195
LOG.info("Instance Mapping does not exist while attempting "
21962196
"local delete cleanup.",
2197-
instance=instance)
2197+
instance_uuid=instance_uuid)
21982198

21992199
def _attempt_delete_of_buildrequest(self, context, instance):
22002200
# If there is a BuildRequest then the instance may not have been
@@ -2231,7 +2231,7 @@ def _delete(self, context, instance, delete_type, cb, **instance_attrs):
22312231
if not instance.host and not may_have_ports_or_volumes:
22322232
try:
22332233
if self._delete_while_booting(context, instance):
2234-
self._local_delete_cleanup(context, instance)
2234+
self._local_delete_cleanup(context, instance.uuid)
22352235
return
22362236
# If instance.host was not set it's possible that the Instance
22372237
# object here was pulled from a BuildRequest object and is not
@@ -2240,6 +2240,11 @@ def _delete(self, context, instance, delete_type, cb, **instance_attrs):
22402240
# properly. A lookup is attempted which will either return a
22412241
# full Instance or None if not found. If not found then it's
22422242
# acceptable to skip the rest of the delete processing.
2243+
2244+
# Save a copy of the instance UUID early, in case
2245+
# _lookup_instance returns instance = None, to pass to
2246+
# _local_delete_cleanup if needed.
2247+
instance_uuid = instance.uuid
22432248
cell, instance = self._lookup_instance(context, instance.uuid)
22442249
if cell and instance:
22452250
try:
@@ -2250,11 +2255,11 @@ def _delete(self, context, instance, delete_type, cb, **instance_attrs):
22502255
except exception.InstanceNotFound:
22512256
pass
22522257
# The instance was deleted or is already gone.
2253-
self._local_delete_cleanup(context, instance)
2258+
self._local_delete_cleanup(context, instance.uuid)
22542259
return
22552260
if not instance:
22562261
# Instance is already deleted.
2257-
self._local_delete_cleanup(context, instance)
2262+
self._local_delete_cleanup(context, instance_uuid)
22582263
return
22592264
except exception.ObjectActionError:
22602265
# NOTE(melwitt): This means the instance.host changed
@@ -2267,7 +2272,7 @@ def _delete(self, context, instance, delete_type, cb, **instance_attrs):
22672272
cell, instance = self._lookup_instance(context, instance.uuid)
22682273
if not instance:
22692274
# Instance is already deleted
2270-
self._local_delete_cleanup(context, instance)
2275+
self._local_delete_cleanup(context, instance_uuid)
22712276
return
22722277

22732278
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
@@ -2311,7 +2316,7 @@ def _delete(self, context, instance, delete_type, cb, **instance_attrs):
23112316
'field, its vm_state is %(state)s.',
23122317
{'state': instance.vm_state},
23132318
instance=instance)
2314-
self._local_delete_cleanup(context, instance)
2319+
self._local_delete_cleanup(context, instance.uuid)
23152320
return
23162321
except exception.ObjectActionError as ex:
23172322
# The instance's host likely changed under us as
@@ -2496,7 +2501,7 @@ def _local_delete(self, context, instance, bdms, delete_type, cb):
24962501
instance.destroy()
24972502

24982503
@staticmethod
2499-
def _update_queued_for_deletion(context, instance, qfd):
2504+
def _update_queued_for_deletion(context, instance_uuid, qfd):
25002505
# NOTE(tssurya): We query the instance_mapping record of this instance
25012506
# and update the queued_for_delete flag to True (or False according to
25022507
# the state of the instance). This just means that the instance is
@@ -2505,7 +2510,7 @@ def _update_queued_for_deletion(context, instance, qfd):
25052510
# value could be stale which is fine, considering its use is only
25062511
# during down cell (desperate) situation.
25072512
im = objects.InstanceMapping.get_by_instance_uuid(context,
2508-
instance.uuid)
2513+
instance_uuid)
25092514
im.queued_for_delete = qfd
25102515
im.save()
25112516

@@ -2517,7 +2522,7 @@ def _do_delete(self, context, instance, bdms, local=False):
25172522
instance.save()
25182523
else:
25192524
self.compute_rpcapi.terminate_instance(context, instance, bdms)
2520-
self._update_queued_for_deletion(context, instance, True)
2525+
self._update_queued_for_deletion(context, instance.uuid, True)
25212526

25222527
def _do_soft_delete(self, context, instance, bdms, local=False):
25232528
if local:
@@ -2527,7 +2532,7 @@ def _do_soft_delete(self, context, instance, bdms, local=False):
25272532
instance.save()
25282533
else:
25292534
self.compute_rpcapi.soft_delete_instance(context, instance)
2530-
self._update_queued_for_deletion(context, instance, True)
2535+
self._update_queued_for_deletion(context, instance.uuid, True)
25312536

25322537
# NOTE(maoy): we allow delete to be called no matter what vm_state says.
25332538
@check_instance_lock
@@ -2580,7 +2585,7 @@ def restore(self, context, instance):
25802585
instance.task_state = None
25812586
instance.deleted_at = None
25822587
instance.save(expected_task_state=[None])
2583-
self._update_queued_for_deletion(context, instance, False)
2588+
self._update_queued_for_deletion(context, instance.uuid, False)
25842589

25852590
@check_instance_lock
25862591
@check_instance_state(task_state=None,

nova/tests/functional/regressions/test_bug_1914777.py

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
from nova import objects
1818
from nova import test
1919
from nova.tests import fixtures as nova_fixtures
20-
from nova.tests.functional.api import client
2120
from nova.tests.functional import integrated_helpers
2221
from nova.tests.unit import policy_fixture
2322

@@ -79,14 +78,8 @@ def test_build_request_and_instance_not_found(self, mock_get_br):
7978
# while booting" in compute/api fails after a different request has
8079
# deleted it.
8180
br_not_found = exception.BuildRequestNotFound(uuid=self.server['id'])
82-
mock_get_br.side_effect = [self.br, br_not_found]
83-
# FIXME(melwitt): Delete request fails due to the AttributeError.
84-
ex = self.assertRaises(
85-
client.OpenStackApiException, self._delete_server, self.server)
86-
self.assertEqual(500, ex.response.status_code)
87-
self.assertIn('AttributeError', str(ex))
88-
# FIXME(melwitt): Uncomment when the bug is fixed.
89-
# self._delete_server(self.server)
81+
mock_get_br.side_effect = [self.br, br_not_found, br_not_found]
82+
self._delete_server(self.server)
9083

9184
@mock.patch('nova.objects.build_request.BuildRequest.get_by_instance_uuid')
9285
@mock.patch('nova.objects.InstanceMapping.get_by_instance_uuid')
@@ -119,15 +112,15 @@ def test_deleting_instance_at_the_same_time(self, mock_get_i, mock_get_im,
119112
context=self.ctxt, instance_uuid=self.server['id'],
120113
cell_mapping=self.cell_mappings['cell1'])
121114
mock_get_im.side_effect = [
122-
no_cell_im, has_cell_im, has_cell_im, has_cell_im]
115+
no_cell_im, has_cell_im, has_cell_im, has_cell_im, has_cell_im]
123116
# Simulate that the instance object has been created by the conductor
124117
# in the create path while the delete request is being processed.
125118
# First lookups are before the instance has been deleted and the last
126119
# lookup is after the conductor has deleted the instance. Use the build
127120
# request to make an instance object for testing.
128121
i = self.br.get_new_instance(self.ctxt)
129122
i_not_found = exception.InstanceNotFound(instance_id=self.server['id'])
130-
mock_get_i.side_effect = [i, i, i, i_not_found]
123+
mock_get_i.side_effect = [i, i, i, i_not_found, i_not_found]
131124

132125
# Simulate that the conductor is running instance_destroy at the same
133126
# time as we are.
@@ -142,10 +135,4 @@ def fake_instance_destroy(*args, **kwargs):
142135

143136
self.stub_out(
144137
'nova.objects.instance.Instance.destroy', fake_instance_destroy)
145-
# FIXME(melwitt): Delete request fails due to the AttributeError.
146-
ex = self.assertRaises(
147-
client.OpenStackApiException, self._delete_server, self.server)
148-
self.assertEqual(500, ex.response.status_code)
149-
self.assertIn('AttributeError', str(ex))
150-
# FIXME(melwitt): Uncomment when the bug is fixed.
151-
# self._delete_server(self.server)
138+
self._delete_server(self.server)

nova/tests/unit/compute/test_api.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4211,7 +4211,7 @@ def test_restore_by_admin(self, update_qfd, action_start, instance_save,
42114211
'cores': 1 + instance.flavor.vcpus,
42124212
'ram': 512 + instance.flavor.memory_mb},
42134213
project_id=instance.project_id)
4214-
update_qfd.assert_called_once_with(admin_context, instance, False)
4214+
update_qfd.assert_called_once_with(admin_context, instance.uuid, False)
42154215

42164216
@mock.patch('nova.objects.Quotas.get_all_by_project_and_user',
42174217
new=mock.MagicMock())
@@ -4252,7 +4252,7 @@ def test_restore_by_instance_owner(self, update_qfd, action_start,
42524252
'cores': 1 + instance.flavor.vcpus,
42534253
'ram': 512 + instance.flavor.memory_mb},
42544254
project_id=instance.project_id)
4255-
update_qfd.assert_called_once_with(self.context, instance, False)
4255+
update_qfd.assert_called_once_with(self.context, instance.uuid, False)
42564256

42574257
@mock.patch.object(objects.InstanceAction, 'action_start')
42584258
def test_external_instance_event(self, mock_action_start):
@@ -5971,7 +5971,8 @@ def test_update_queued_for_deletion(self, mock_get, mock_save):
59715971
inst = objects.Instance(uuid=uuid)
59725972
im = objects.InstanceMapping(instance_uuid=uuid)
59735973
mock_get.return_value = im
5974-
self.compute_api._update_queued_for_deletion(self.context, inst, True)
5974+
self.compute_api._update_queued_for_deletion(
5975+
self.context, inst.uuid, True)
59755976
self.assertTrue(im.queued_for_delete)
59765977
mock_get.assert_called_once_with(self.context, inst.uuid)
59775978
mock_save.assert_called_once_with()

0 commit comments

Comments
 (0)