Skip to content

Commit e41962f

Browse files
committed
Do not untrack resources of a server being unshelved
This patch concerns the time when a VM is being unshelved and the compute manager set the task_state to spawning, claimed resources of the VM and then called driver.spawn(). So the instance is in vm_state SHELVED_OFFLOADED, task_state spawning. If at this point a new update_available_resource periodic job is started that collects all the instances assigned to the node to calculate resource usage. However the calculation assumed that a VM in SHELVED_OFFLOADED state does not need resource allocation on the node (probably being removed from the node as it is offloaded) and deleted the resource claim. Given all this we ended up with the VM spawned successfully but having lost the resource claim on the node. This patch changes what we do in vm_state SHELVED_OFFLOADED, task_state spawning. We no longer delete the resource claim in this state and keep tracking the resource in stats. Conflicts: nova/compute/vm_states.py conflict with 8c2e765 Change-Id: I8c9944810c09d501a6d3f60f095d9817b756872d Closes-Bug: #2025480 (cherry picked from commit f1dc4ec) (cherry picked from commit 4239d1f) (cherry picked from commit 683ecc0)
1 parent 7422642 commit e41962f

File tree

6 files changed

+37
-11
lines changed

6 files changed

+37
-11
lines changed

nova/compute/manager.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6809,9 +6809,9 @@ def _shelve_offload_instance(self, context, instance, clean_shutdown,
68096809

68106810
instance.power_state = current_power_state
68116811
# NOTE(mriedem): The vm_state has to be set before updating the
6812-
# resource tracker, see vm_states.ALLOW_RESOURCE_REMOVAL. The host/node
6813-
# values cannot be nulled out until after updating the resource tracker
6814-
# though.
6812+
# resource tracker, see vm_states.allow_resource_removal(). The
6813+
# host/node values cannot be nulled out until after updating the
6814+
# resource tracker though.
68156815
instance.vm_state = vm_states.SHELVED_OFFLOADED
68166816
instance.task_state = None
68176817
instance.save(expected_task_state=[task_states.SHELVING,

nova/compute/resource_tracker.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1546,7 +1546,8 @@ def _update_usage_from_instance(self, context, instance, nodename,
15461546
# NOTE(sfinucan): Both brand new instances as well as instances that
15471547
# are being unshelved will have is_new_instance == True
15481548
is_removed_instance = not is_new_instance and (is_removed or
1549-
instance['vm_state'] in vm_states.ALLOW_RESOURCE_REMOVAL)
1549+
vm_states.allow_resource_removal(
1550+
vm_state=instance['vm_state'], task_state=instance.task_state))
15501551

15511552
if is_new_instance:
15521553
self.tracked_instances.add(uuid)
@@ -1605,7 +1606,9 @@ def _update_usage_from_instances(self, context, instances, nodename):
16051606

16061607
instance_by_uuid = {}
16071608
for instance in instances:
1608-
if instance.vm_state not in vm_states.ALLOW_RESOURCE_REMOVAL:
1609+
if not vm_states.allow_resource_removal(
1610+
vm_state=instance['vm_state'],
1611+
task_state=instance.task_state):
16091612
self._update_usage_from_instance(context, instance, nodename)
16101613
instance_by_uuid[instance.uuid] = instance
16111614
return instance_by_uuid

nova/compute/stats.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ def update_stats_for_instance(self, instance, is_removed=False):
105105
(vm_state, task_state, os_type, project_id) = \
106106
self._extract_state_from_instance(instance)
107107

108-
if is_removed or vm_state in vm_states.ALLOW_RESOURCE_REMOVAL:
108+
if is_removed or vm_states.allow_resource_removal(
109+
vm_state=vm_state, task_state=task_state):
109110
self._decrement("num_instances")
110111
self.states.pop(uuid)
111112
else:

nova/compute/vm_states.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
See http://wiki.openstack.org/VMState
2828
"""
2929

30+
from nova.compute import task_states
3031
from nova.objects import fields
3132

3233

@@ -74,5 +75,11 @@
7475
# states we allow to trigger crash dump
7576
ALLOW_TRIGGER_CRASH_DUMP = [ACTIVE, PAUSED, RESCUED, RESIZED, ERROR]
7677

77-
# states we allow resources to be freed in
78-
ALLOW_RESOURCE_REMOVAL = [DELETED, SHELVED_OFFLOADED]
78+
79+
def allow_resource_removal(vm_state, task_state=None):
80+
"""(vm_state, task_state) combinations we allow resources to be freed in"""
81+
82+
return (
83+
vm_state == DELETED or
84+
vm_state == SHELVED_OFFLOADED and task_state != task_states.SPAWNING
85+
)

nova/tests/functional/regressions/test_bug_2025480.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,5 @@ def fake_spawn(*args, **kwargs):
8282

8383
node = compute_node.ComputeNode.get_by_nodename(
8484
context.get_admin_context(), 'compute1')
85-
# This is the bug, the instance should have resources claimed
86-
# self.assertEqual(1, node.vcpus_used)
87-
self.assertEqual(0, node.vcpus_used)
85+
# After the fix, the instance should have resources claimed
86+
self.assertEqual(1, node.vcpus_used)

nova/tests/unit/compute/test_stats.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,22 @@ def test_update_stats_for_instance_offloaded(self):
208208
self.assertEqual(0, self.stats.num_os_type("Linux"))
209209
self.assertEqual(0, self.stats["num_vm_" + vm_states.BUILDING])
210210

211+
def test_update_stats_for_instance_being_unshelved(self):
212+
instance = self._create_instance()
213+
self.stats.update_stats_for_instance(instance)
214+
self.assertEqual(1, self.stats.num_instances_for_project("1234"))
215+
216+
instance["vm_state"] = vm_states.SHELVED_OFFLOADED
217+
instance["task_state"] = task_states.SPAWNING
218+
self.stats.update_stats_for_instance(instance)
219+
220+
self.assertEqual(1, self.stats.num_instances)
221+
self.assertEqual(1, self.stats.num_instances_for_project(1234))
222+
self.assertEqual(1, self.stats["num_os_type_Linux"])
223+
self.assertEqual(1, self.stats["num_vm_%s" %
224+
vm_states.SHELVED_OFFLOADED])
225+
self.assertEqual(1, self.stats["num_task_%s" % task_states.SPAWNING])
226+
211227
def test_io_workload(self):
212228
vms = [vm_states.ACTIVE, vm_states.BUILDING, vm_states.PAUSED]
213229
tasks = [task_states.RESIZE_MIGRATING, task_states.REBUILDING,

0 commit comments

Comments
 (0)