Skip to content

Commit 683ecc0

Browse files
committed
Do not untrack resources of a server being unshelved
This patch concerns the time when a VM is being unshelved and the compute manager set the task_state to spawning, claimed resources of the VM and then called driver.spawn(). So the instance is in vm_state SHELVED_OFFLOADED, task_state spawning. If at this point a new update_available_resource periodic job is started that collects all the instances assigned to the node to calculate resource usage. However the calculation assumed that a VM in SHELVED_OFFLOADED state does not need resource allocation on the node (probably being removed from the node as it is offloaded) and deleted the resource claim. Given all this we ended up with the VM spawned successfully but having lost the resource claim on the node. This patch changes what we do in vm_state SHELVED_OFFLOADED, task_state spawning. We no longer delete the resource claim in this state and keep tracking the resource in stats. Change-Id: I8c9944810c09d501a6d3f60f095d9817b756872d Closes-Bug: #2025480 (cherry picked from commit f1dc4ec) (cherry picked from commit 4239d1f)
1 parent 23c190a commit 683ecc0

File tree

6 files changed

+38
-12
lines changed

6 files changed

+38
-12
lines changed

nova/compute/manager.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6918,9 +6918,9 @@ def _shelve_offload_instance(self, context, instance, clean_shutdown,
69186918

69196919
instance.power_state = current_power_state
69206920
# NOTE(mriedem): The vm_state has to be set before updating the
6921-
# resource tracker, see vm_states.ALLOW_RESOURCE_REMOVAL. The host/node
6922-
# values cannot be nulled out until after updating the resource tracker
6923-
# though.
6921+
# resource tracker, see vm_states.allow_resource_removal(). The
6922+
# host/node values cannot be nulled out until after updating the
6923+
# resource tracker though.
69246924
instance.vm_state = vm_states.SHELVED_OFFLOADED
69256925
instance.task_state = None
69266926
instance.save(expected_task_state=[task_states.SHELVING,

nova/compute/resource_tracker.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1539,7 +1539,8 @@ def _update_usage_from_instance(self, context, instance, nodename,
15391539
# NOTE(sfinucan): Both brand new instances as well as instances that
15401540
# are being unshelved will have is_new_instance == True
15411541
is_removed_instance = not is_new_instance and (is_removed or
1542-
instance['vm_state'] in vm_states.ALLOW_RESOURCE_REMOVAL)
1542+
vm_states.allow_resource_removal(
1543+
vm_state=instance['vm_state'], task_state=instance.task_state))
15431544

15441545
if is_new_instance:
15451546
self.tracked_instances.add(uuid)
@@ -1598,7 +1599,9 @@ def _update_usage_from_instances(self, context, instances, nodename):
15981599

15991600
instance_by_uuid = {}
16001601
for instance in instances:
1601-
if instance.vm_state not in vm_states.ALLOW_RESOURCE_REMOVAL:
1602+
if not vm_states.allow_resource_removal(
1603+
vm_state=instance['vm_state'],
1604+
task_state=instance.task_state):
16021605
self._update_usage_from_instance(context, instance, nodename)
16031606
instance_by_uuid[instance.uuid] = instance
16041607
return instance_by_uuid

nova/compute/stats.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ def update_stats_for_instance(self, instance, is_removed=False):
105105
(vm_state, task_state, os_type, project_id) = \
106106
self._extract_state_from_instance(instance)
107107

108-
if is_removed or vm_state in vm_states.ALLOW_RESOURCE_REMOVAL:
108+
if is_removed or vm_states.allow_resource_removal(
109+
vm_state=vm_state, task_state=task_state):
109110
self._decrement("num_instances")
110111
self.states.pop(uuid)
111112
else:

nova/compute/vm_states.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
See http://wiki.openstack.org/VMState
2828
"""
2929

30+
from nova.compute import task_states
3031
from nova.objects import fields
3132

3233

@@ -74,8 +75,14 @@
7475
# states we allow to trigger crash dump
7576
ALLOW_TRIGGER_CRASH_DUMP = [ACTIVE, PAUSED, RESCUED, RESIZED, ERROR]
7677

77-
# states we allow resources to be freed in
78-
ALLOW_RESOURCE_REMOVAL = [DELETED, SHELVED_OFFLOADED]
79-
8078
# states we allow for evacuate instance
8179
ALLOW_TARGET_STATES = [STOPPED]
80+
81+
82+
def allow_resource_removal(vm_state, task_state=None):
83+
"""(vm_state, task_state) combinations we allow resources to be freed in"""
84+
85+
return (
86+
vm_state == DELETED or
87+
vm_state == SHELVED_OFFLOADED and task_state != task_states.SPAWNING
88+
)

nova/tests/functional/regressions/test_bug_2025480.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,5 @@ def fake_spawn(*args, **kwargs):
8282

8383
node = compute_node.ComputeNode.get_by_nodename(
8484
context.get_admin_context(), 'compute1')
85-
# This is the bug, the instance should have resources claimed
86-
# self.assertEqual(1, node.vcpus_used)
87-
self.assertEqual(0, node.vcpus_used)
85+
# After the fix, the instance should have resources claimed
86+
self.assertEqual(1, node.vcpus_used)

nova/tests/unit/compute/test_stats.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,22 @@ def test_update_stats_for_instance_offloaded(self):
208208
self.assertEqual(0, self.stats.num_os_type("Linux"))
209209
self.assertEqual(0, self.stats["num_vm_" + vm_states.BUILDING])
210210

211+
def test_update_stats_for_instance_being_unshelved(self):
212+
instance = self._create_instance()
213+
self.stats.update_stats_for_instance(instance)
214+
self.assertEqual(1, self.stats.num_instances_for_project("1234"))
215+
216+
instance["vm_state"] = vm_states.SHELVED_OFFLOADED
217+
instance["task_state"] = task_states.SPAWNING
218+
self.stats.update_stats_for_instance(instance)
219+
220+
self.assertEqual(1, self.stats.num_instances)
221+
self.assertEqual(1, self.stats.num_instances_for_project(1234))
222+
self.assertEqual(1, self.stats["num_os_type_Linux"])
223+
self.assertEqual(1, self.stats["num_vm_%s" %
224+
vm_states.SHELVED_OFFLOADED])
225+
self.assertEqual(1, self.stats["num_task_%s" % task_states.SPAWNING])
226+
211227
def test_io_workload(self):
212228
vms = [vm_states.ACTIVE, vm_states.BUILDING, vm_states.PAUSED]
213229
tasks = [task_states.RESIZE_MIGRATING, task_states.REBUILDING,

0 commit comments

Comments
 (0)