@@ -823,6 +823,127 @@ def fake_confirm_migration(*args, **kwargs):
823823
824824 server = self ._wait_for_state_change (server , 'ACTIVE' )
825825
826+ def _assert_pinned_cpus (self , hostname , expected_number_of_pinned ):
827+ numa_topology = objects .NUMATopology .obj_from_db_obj (
828+ objects .ComputeNode .get_by_nodename (
829+ self .ctxt , hostname ,
830+ ).numa_topology ,
831+ )
832+ self .assertEqual (
833+ expected_number_of_pinned , len (numa_topology .cells [0 ].pinned_cpus ))
834+
835+ def _create_server_and_resize_bug_1944759 (self ):
836+ self .flags (
837+ cpu_dedicated_set = '0-3' , cpu_shared_set = '4-7' , group = 'compute' )
838+ self .flags (vcpu_pin_set = None )
839+
840+ # start services
841+ self .start_compute (hostname = 'test_compute0' )
842+ self .start_compute (hostname = 'test_compute1' )
843+
844+ flavor_a_id = self ._create_flavor (
845+ vcpu = 2 , extra_spec = {'hw:cpu_policy' : 'dedicated' })
846+ server = self ._create_server (flavor_id = flavor_a_id )
847+
848+ src_host = server ['OS-EXT-SRV-ATTR:host' ]
849+ self ._assert_pinned_cpus (src_host , 2 )
850+
851+ # we don't really care what the new flavor is, so long as the old
852+ # flavor is using pinning. We use a similar flavor for simplicity.
853+ flavor_b_id = self ._create_flavor (
854+ vcpu = 2 , extra_spec = {'hw:cpu_policy' : 'dedicated' })
855+
856+ orig_rpc_finish_resize = nova .compute .rpcapi .ComputeAPI .finish_resize
857+
858+ # Simulate that the finish_resize call overlaps with an
859+ # update_available_resource periodic job
860+ def inject_periodic_to_finish_resize (* args , ** kwargs ):
861+ self ._run_periodics ()
862+ return orig_rpc_finish_resize (* args , ** kwargs )
863+
864+ self .stub_out (
865+ 'nova.compute.rpcapi.ComputeAPI.finish_resize' ,
866+ inject_periodic_to_finish_resize ,
867+ )
868+
869+ # TODO(stephenfin): The mock of 'migrate_disk_and_power_off' should
870+ # probably be less...dumb
871+ with mock .patch (
872+ 'nova.virt.libvirt.driver.LibvirtDriver'
873+ '.migrate_disk_and_power_off' , return_value = '{}' ,
874+ ):
875+ post = {'resize' : {'flavorRef' : flavor_b_id }}
876+ self .api .post_server_action (server ['id' ], post )
877+ server = self ._wait_for_state_change (server , 'VERIFY_RESIZE' )
878+
879+ dst_host = server ['OS-EXT-SRV-ATTR:host' ]
880+
881+ # This is a resource accounting bug, we should have 2 cpus pinned on
882+ # both computes. The source should have it due to the outbound
883+ # migration and the destination due to the instance running there
884+ self ._assert_pinned_cpus (src_host , 0 )
885+ self ._assert_pinned_cpus (dst_host , 2 )
886+
887+ return server , src_host , dst_host
888+
889+ def test_resize_confirm_bug_1944759 (self ):
890+ server , src_host , dst_host = (
891+ self ._create_server_and_resize_bug_1944759 ())
892+
893+ # Now confirm the resize
894+ post = {'confirmResize' : None }
895+
896+ # FIXME(gibi): This is bug 1944759 where during resize, on the source
897+ # node the resize_instance() call at the point of calling finish_resize
898+ # overlaps with a update_available_resources() periodic job. This
899+ # causes that the periodic job will not track the migration nor the
900+ # instance and therefore freeing the resource allocation. Then when
901+ # later the resize is confirmed the confirm_resize on the source
902+ # compute also wants to free up the resources, the pinned CPUs, and it
903+ # fails as they are already freed.
904+ exc = self .assertRaises (
905+ client .OpenStackApiException ,
906+ self .api .post_server_action , server ['id' ], post
907+ )
908+ self .assertEqual (500 , exc .response .status_code )
909+ self .assertIn ('CPUUnpinningInvalid' , str (exc ))
910+
911+ # confirm failed above but the resource allocation reflects that the
912+ # VM is running on the dest node
913+ self ._assert_pinned_cpus (src_host , 0 )
914+ self ._assert_pinned_cpus (dst_host , 2 )
915+
916+ self ._run_periodics ()
917+
918+ # and such allocation situation is stable so as a recovery the VM
919+ # can be reset-state to ACTIVE without problem.
920+ self ._assert_pinned_cpus (src_host , 0 )
921+ self ._assert_pinned_cpus (dst_host , 2 )
922+
923+ def test_resize_revert_bug_1944759 (self ):
924+ server , src_host , dst_host = (
925+ self ._create_server_and_resize_bug_1944759 ())
926+
927+ # Now revert the resize
928+ post = {'revertResize' : None }
929+
930+ # reverts actually succeeds (not like confirm) but the resource
931+ # allocation is still flaky
932+ self .api .post_server_action (server ['id' ], post )
933+ self ._wait_for_state_change (server , 'ACTIVE' )
934+
935+ # This is a resource accounting bug. After the revert the source host
936+ # should have 2 cpus pinned due to the instance.
937+ self ._assert_pinned_cpus (src_host , 0 )
938+ self ._assert_pinned_cpus (dst_host , 0 )
939+
940+ # running the periodic job will fix the resource accounting
941+ self ._run_periodics ()
942+
943+ # this is now correct
944+ self ._assert_pinned_cpus (src_host , 2 )
945+ self ._assert_pinned_cpus (dst_host , 0 )
946+
826947
827948class NUMAServerTestWithCountingQuotaFromPlacement (NUMAServersTest ):
828949
0 commit comments