Skip to content

Commit ae915a9

Browse files
rubasovElod Illes
authored andcommitted
Do not untrack resources of a server being unshelved
This patch concerns the time when a VM is being unshelved and the compute manager set the task_state to spawning, claimed resources of the VM and then called driver.spawn(). So the instance is in vm_state SHELVED_OFFLOADED, task_state spawning. If at this point a new update_available_resource periodic job is started that collects all the instances assigned to the node to calculate resource usage. However the calculation assumed that a VM in SHELVED_OFFLOADED state does not need resource allocation on the node (probably being removed from the node as it is offloaded) and deleted the resource claim. Given all this we ended up with the VM spawned successfully but having lost the resource claim on the node. This patch changes what we do in vm_state SHELVED_OFFLOADED, task_state spawning. We no longer delete the resource claim in this state and keep tracking the resource in stats. Change-Id: I8c9944810c09d501a6d3f60f095d9817b756872d Closes-Bug: #2025480 (cherry picked from commit f1dc4ec) (cherry picked from commit 4239d1f) (cherry picked from commit 683ecc0) (cherry picked from commit e41962f) (cherry picked from commit 22dfd1f) (cherry picked from commit 26a9cae)
1 parent 4023d3f commit ae915a9

File tree

6 files changed

+37
-11
lines changed

6 files changed

+37
-11
lines changed

nova/compute/manager.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6488,9 +6488,9 @@ def _shelve_offload_instance(self, context, instance, clean_shutdown,
64886488

64896489
instance.power_state = current_power_state
64906490
# NOTE(mriedem): The vm_state has to be set before updating the
6491-
# resource tracker, see vm_states.ALLOW_RESOURCE_REMOVAL. The host/node
6492-
# values cannot be nulled out until after updating the resource tracker
6493-
# though.
6491+
# resource tracker, see vm_states.allow_resource_removal(). The
6492+
# host/node values cannot be nulled out until after updating the
6493+
# resource tracker though.
64946494
instance.vm_state = vm_states.SHELVED_OFFLOADED
64956495
instance.task_state = None
64966496
instance.save(expected_task_state=[task_states.SHELVING,

nova/compute/resource_tracker.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1465,7 +1465,8 @@ def _update_usage_from_instance(self, context, instance, nodename,
14651465
# NOTE(sfinucan): Both brand new instances as well as instances that
14661466
# are being unshelved will have is_new_instance == True
14671467
is_removed_instance = not is_new_instance and (is_removed or
1468-
instance['vm_state'] in vm_states.ALLOW_RESOURCE_REMOVAL)
1468+
vm_states.allow_resource_removal(
1469+
vm_state=instance['vm_state'], task_state=instance.task_state))
14691470

14701471
if is_new_instance:
14711472
self.tracked_instances.add(uuid)
@@ -1524,7 +1525,9 @@ def _update_usage_from_instances(self, context, instances, nodename):
15241525

15251526
instance_by_uuid = {}
15261527
for instance in instances:
1527-
if instance.vm_state not in vm_states.ALLOW_RESOURCE_REMOVAL:
1528+
if not vm_states.allow_resource_removal(
1529+
vm_state=instance['vm_state'],
1530+
task_state=instance.task_state):
15281531
self._update_usage_from_instance(context, instance, nodename)
15291532
instance_by_uuid[instance.uuid] = instance
15301533
return instance_by_uuid

nova/compute/stats.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ def update_stats_for_instance(self, instance, is_removed=False):
105105
(vm_state, task_state, os_type, project_id) = \
106106
self._extract_state_from_instance(instance)
107107

108-
if is_removed or vm_state in vm_states.ALLOW_RESOURCE_REMOVAL:
108+
if is_removed or vm_states.allow_resource_removal(
109+
vm_state=vm_state, task_state=task_state):
109110
self._decrement("num_instances")
110111
self.states.pop(uuid)
111112
else:

nova/compute/vm_states.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
See http://wiki.openstack.org/VMState
2828
"""
2929

30+
from nova.compute import task_states
3031
from nova.objects import fields
3132

3233

@@ -74,5 +75,11 @@
7475
# states we allow to trigger crash dump
7576
ALLOW_TRIGGER_CRASH_DUMP = [ACTIVE, PAUSED, RESCUED, RESIZED, ERROR]
7677

77-
# states we allow resources to be freed in
78-
ALLOW_RESOURCE_REMOVAL = [DELETED, SHELVED_OFFLOADED]
78+
79+
def allow_resource_removal(vm_state, task_state=None):
80+
"""(vm_state, task_state) combinations we allow resources to be freed in"""
81+
82+
return (
83+
vm_state == DELETED or
84+
vm_state == SHELVED_OFFLOADED and task_state != task_states.SPAWNING
85+
)

nova/tests/functional/regressions/test_bug_2025480.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,5 @@ def fake_spawn(*args, **kwargs):
7777

7878
node = compute_node.ComputeNode.get_by_nodename(
7979
context.get_admin_context(), 'compute1')
80-
# This is the bug, the instance should have resources claimed
81-
# self.assertEqual(1, node.vcpus_used)
82-
self.assertEqual(0, node.vcpus_used)
80+
# After the fix, the instance should have resources claimed
81+
self.assertEqual(1, node.vcpus_used)

nova/tests/unit/compute/test_stats.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,22 @@ def test_update_stats_for_instance_offloaded(self):
208208
self.assertEqual(0, self.stats.num_os_type("Linux"))
209209
self.assertEqual(0, self.stats["num_vm_" + vm_states.BUILDING])
210210

211+
def test_update_stats_for_instance_being_unshelved(self):
212+
instance = self._create_instance()
213+
self.stats.update_stats_for_instance(instance)
214+
self.assertEqual(1, self.stats.num_instances_for_project("1234"))
215+
216+
instance["vm_state"] = vm_states.SHELVED_OFFLOADED
217+
instance["task_state"] = task_states.SPAWNING
218+
self.stats.update_stats_for_instance(instance)
219+
220+
self.assertEqual(1, self.stats.num_instances)
221+
self.assertEqual(1, self.stats.num_instances_for_project(1234))
222+
self.assertEqual(1, self.stats["num_os_type_Linux"])
223+
self.assertEqual(1, self.stats["num_vm_%s" %
224+
vm_states.SHELVED_OFFLOADED])
225+
self.assertEqual(1, self.stats["num_task_%s" % task_states.SPAWNING])
226+
211227
def test_io_workload(self):
212228
vms = [vm_states.ACTIVE, vm_states.BUILDING, vm_states.PAUSED]
213229
tasks = [task_states.RESIZE_MIGRATING, task_states.REBUILDING,

0 commit comments

Comments
 (0)