Skip to content

Commit cf39996

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "Reproduce bug 1944759" into stable/wallaby
2 parents 0cd4ef2 + 140ae45 commit cf39996

File tree

1 file changed

+121
-0
lines changed

1 file changed

+121
-0
lines changed

nova/tests/functional/libvirt/test_numa_servers.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,127 @@ def fake_confirm_migration(*args, **kwargs):
823823

824824
server = self._wait_for_state_change(server, 'ACTIVE')
825825

826+
def _assert_pinned_cpus(self, hostname, expected_number_of_pinned):
827+
numa_topology = objects.NUMATopology.obj_from_db_obj(
828+
objects.ComputeNode.get_by_nodename(
829+
self.ctxt, hostname,
830+
).numa_topology,
831+
)
832+
self.assertEqual(
833+
expected_number_of_pinned, len(numa_topology.cells[0].pinned_cpus))
834+
835+
def _create_server_and_resize_bug_1944759(self):
836+
self.flags(
837+
cpu_dedicated_set='0-3', cpu_shared_set='4-7', group='compute')
838+
self.flags(vcpu_pin_set=None)
839+
840+
# start services
841+
self.start_compute(hostname='test_compute0')
842+
self.start_compute(hostname='test_compute1')
843+
844+
flavor_a_id = self._create_flavor(
845+
vcpu=2, extra_spec={'hw:cpu_policy': 'dedicated'})
846+
server = self._create_server(flavor_id=flavor_a_id)
847+
848+
src_host = server['OS-EXT-SRV-ATTR:host']
849+
self._assert_pinned_cpus(src_host, 2)
850+
851+
# we don't really care what the new flavor is, so long as the old
852+
# flavor is using pinning. We use a similar flavor for simplicity.
853+
flavor_b_id = self._create_flavor(
854+
vcpu=2, extra_spec={'hw:cpu_policy': 'dedicated'})
855+
856+
orig_rpc_finish_resize = nova.compute.rpcapi.ComputeAPI.finish_resize
857+
858+
# Simulate that the finish_resize call overlaps with an
859+
# update_available_resource periodic job
860+
def inject_periodic_to_finish_resize(*args, **kwargs):
861+
self._run_periodics()
862+
return orig_rpc_finish_resize(*args, **kwargs)
863+
864+
self.stub_out(
865+
'nova.compute.rpcapi.ComputeAPI.finish_resize',
866+
inject_periodic_to_finish_resize,
867+
)
868+
869+
# TODO(stephenfin): The mock of 'migrate_disk_and_power_off' should
870+
# probably be less...dumb
871+
with mock.patch(
872+
'nova.virt.libvirt.driver.LibvirtDriver'
873+
'.migrate_disk_and_power_off', return_value='{}',
874+
):
875+
post = {'resize': {'flavorRef': flavor_b_id}}
876+
self.api.post_server_action(server['id'], post)
877+
server = self._wait_for_state_change(server, 'VERIFY_RESIZE')
878+
879+
dst_host = server['OS-EXT-SRV-ATTR:host']
880+
881+
# This is a resource accounting bug, we should have 2 cpus pinned on
882+
# both computes. The source should have it due to the outbound
883+
# migration and the destination due to the instance running there
884+
self._assert_pinned_cpus(src_host, 0)
885+
self._assert_pinned_cpus(dst_host, 2)
886+
887+
return server, src_host, dst_host
888+
889+
def test_resize_confirm_bug_1944759(self):
890+
server, src_host, dst_host = (
891+
self._create_server_and_resize_bug_1944759())
892+
893+
# Now confirm the resize
894+
post = {'confirmResize': None}
895+
896+
# FIXME(gibi): This is bug 1944759 where during resize, on the source
897+
# node the resize_instance() call at the point of calling finish_resize
898+
# overlaps with a update_available_resources() periodic job. This
899+
# causes that the periodic job will not track the migration nor the
900+
# instance and therefore freeing the resource allocation. Then when
901+
# later the resize is confirmed the confirm_resize on the source
902+
# compute also wants to free up the resources, the pinned CPUs, and it
903+
# fails as they are already freed.
904+
exc = self.assertRaises(
905+
client.OpenStackApiException,
906+
self.api.post_server_action, server['id'], post
907+
)
908+
self.assertEqual(500, exc.response.status_code)
909+
self.assertIn('CPUUnpinningInvalid', str(exc))
910+
911+
# confirm failed above but the resource allocation reflects that the
912+
# VM is running on the dest node
913+
self._assert_pinned_cpus(src_host, 0)
914+
self._assert_pinned_cpus(dst_host, 2)
915+
916+
self._run_periodics()
917+
918+
# and such allocation situation is stable so as a recovery the VM
919+
# can be reset-state to ACTIVE without problem.
920+
self._assert_pinned_cpus(src_host, 0)
921+
self._assert_pinned_cpus(dst_host, 2)
922+
923+
def test_resize_revert_bug_1944759(self):
924+
server, src_host, dst_host = (
925+
self._create_server_and_resize_bug_1944759())
926+
927+
# Now revert the resize
928+
post = {'revertResize': None}
929+
930+
# reverts actually succeeds (not like confirm) but the resource
931+
# allocation is still flaky
932+
self.api.post_server_action(server['id'], post)
933+
self._wait_for_state_change(server, 'ACTIVE')
934+
935+
# This is a resource accounting bug. After the revert the source host
936+
# should have 2 cpus pinned due to the instance.
937+
self._assert_pinned_cpus(src_host, 0)
938+
self._assert_pinned_cpus(dst_host, 0)
939+
940+
# running the periodic job will fix the resource accounting
941+
self._run_periodics()
942+
943+
# this is now correct
944+
self._assert_pinned_cpus(src_host, 2)
945+
self._assert_pinned_cpus(dst_host, 0)
946+
826947

827948
class NUMAServerTestWithCountingQuotaFromPlacement(NUMAServersTest):
828949

0 commit comments

Comments
 (0)