Skip to content

Commit 400d25f

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "Store old_flavor already on source host during resize" into stable/wallaby
2 parents cf39996 + c8b04d1 commit 400d25f

File tree

2 files changed

+25
-27
lines changed

2 files changed

+25
-27
lines changed

nova/compute/manager.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5608,6 +5608,14 @@ def _resize_instance(self, context, instance, image,
56085608

56095609
instance.host = migration.dest_compute
56105610
instance.node = migration.dest_node
5611+
# NOTE(gibi): as the instance now tracked on the destination we
5612+
# have to make sure that the source compute resource track can
5613+
# track this instance as a migration. For that the resource tracker
5614+
# needs to see the old_flavor set on the instance. The old_flavor
5615+
# setting used to be done on the destination host in finish_resize
5616+
# but that is racy with a source host update_available_resource
5617+
# periodic run
5618+
instance.old_flavor = instance.flavor
56115619
instance.task_state = task_states.RESIZE_MIGRATED
56125620
instance.save(expected_task_state=task_states.RESIZE_MIGRATING)
56135621

@@ -5721,6 +5729,10 @@ def _finish_resize(self, context, instance, migration, disk_info,
57215729
# to ACTIVE for backwards compatibility
57225730
old_vm_state = instance.system_metadata.get('old_vm_state',
57235731
vm_states.ACTIVE)
5732+
# NOTE(gibi): this is already set by the resize_instance on the source
5733+
# node before calling finish_resize on destination but during upgrade
5734+
# it can be that the source node is not having the fix for bug 1944759
5735+
# yet. This assignment can be removed in Z release.
57245736
instance.old_flavor = old_flavor
57255737

57265738
if old_instance_type_id != new_instance_type_id:

nova/tests/functional/libvirt/test_numa_servers.py

Lines changed: 13 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -878,10 +878,10 @@ def inject_periodic_to_finish_resize(*args, **kwargs):
878878

879879
dst_host = server['OS-EXT-SRV-ATTR:host']
880880

881-
# This is a resource accounting bug, we should have 2 cpus pinned on
882-
# both computes. The source should have it due to the outbound
883-
# migration and the destination due to the instance running there
884-
self._assert_pinned_cpus(src_host, 0)
881+
# we have 2 cpus pinned on both computes. The source should have it
882+
# due to the outbound migration and the destination due to the
883+
# instance running there
884+
self._assert_pinned_cpus(src_host, 2)
885885
self._assert_pinned_cpus(dst_host, 2)
886886

887887
return server, src_host, dst_host
@@ -893,30 +893,17 @@ def test_resize_confirm_bug_1944759(self):
893893
# Now confirm the resize
894894
post = {'confirmResize': None}
895895

896-
# FIXME(gibi): This is bug 1944759 where during resize, on the source
897-
# node the resize_instance() call at the point of calling finish_resize
898-
# overlaps with a update_available_resources() periodic job. This
899-
# causes that the periodic job will not track the migration nor the
900-
# instance and therefore freeing the resource allocation. Then when
901-
# later the resize is confirmed the confirm_resize on the source
902-
# compute also wants to free up the resources, the pinned CPUs, and it
903-
# fails as they are already freed.
904-
exc = self.assertRaises(
905-
client.OpenStackApiException,
906-
self.api.post_server_action, server['id'], post
907-
)
908-
self.assertEqual(500, exc.response.status_code)
909-
self.assertIn('CPUUnpinningInvalid', str(exc))
896+
self.api.post_server_action(server['id'], post)
897+
self._wait_for_state_change(server, 'ACTIVE')
910898

911-
# confirm failed above but the resource allocation reflects that the
912-
# VM is running on the dest node
899+
# the resource allocation reflects that the VM is running on the dest
900+
# node
913901
self._assert_pinned_cpus(src_host, 0)
914902
self._assert_pinned_cpus(dst_host, 2)
915903

904+
# and running periodics does not break it either
916905
self._run_periodics()
917906

918-
# and such allocation situation is stable so as a recovery the VM
919-
# can be reset-state to ACTIVE without problem.
920907
self._assert_pinned_cpus(src_host, 0)
921908
self._assert_pinned_cpus(dst_host, 2)
922909

@@ -932,15 +919,14 @@ def test_resize_revert_bug_1944759(self):
932919
self.api.post_server_action(server['id'], post)
933920
self._wait_for_state_change(server, 'ACTIVE')
934921

935-
# This is a resource accounting bug. After the revert the source host
936-
# should have 2 cpus pinned due to the instance.
937-
self._assert_pinned_cpus(src_host, 0)
922+
# After the revert the source host should have 2 cpus pinned due to
923+
# the instance.
924+
self._assert_pinned_cpus(src_host, 2)
938925
self._assert_pinned_cpus(dst_host, 0)
939926

940-
# running the periodic job will fix the resource accounting
927+
# running the periodic job will not break it either
941928
self._run_periodics()
942929

943-
# this is now correct
944930
self._assert_pinned_cpus(src_host, 2)
945931
self._assert_pinned_cpus(dst_host, 0)
946932

0 commit comments

Comments
 (0)