@@ -822,6 +822,127 @@ def fake_confirm_migration(*args, **kwargs):
822822
823823 server = self ._wait_for_state_change (server , 'ACTIVE' )
824824
825+ def _assert_pinned_cpus (self , hostname , expected_number_of_pinned ):
826+ numa_topology = objects .NUMATopology .obj_from_db_obj (
827+ objects .ComputeNode .get_by_nodename (
828+ self .ctxt , hostname ,
829+ ).numa_topology ,
830+ )
831+ self .assertEqual (
832+ expected_number_of_pinned , len (numa_topology .cells [0 ].pinned_cpus ))
833+
834+ def _create_server_and_resize_bug_1944759 (self ):
835+ self .flags (
836+ cpu_dedicated_set = '0-3' , cpu_shared_set = '4-7' , group = 'compute' )
837+ self .flags (vcpu_pin_set = None )
838+
839+ # start services
840+ self .start_compute (hostname = 'test_compute0' )
841+ self .start_compute (hostname = 'test_compute1' )
842+
843+ flavor_a_id = self ._create_flavor (
844+ vcpu = 2 , extra_spec = {'hw:cpu_policy' : 'dedicated' })
845+ server = self ._create_server (flavor_id = flavor_a_id )
846+
847+ src_host = server ['OS-EXT-SRV-ATTR:host' ]
848+ self ._assert_pinned_cpus (src_host , 2 )
849+
850+ # we don't really care what the new flavor is, so long as the old
851+ # flavor is using pinning. We use a similar flavor for simplicity.
852+ flavor_b_id = self ._create_flavor (
853+ vcpu = 2 , extra_spec = {'hw:cpu_policy' : 'dedicated' })
854+
855+ orig_rpc_finish_resize = nova .compute .rpcapi .ComputeAPI .finish_resize
856+
857+ # Simulate that the finish_resize call overlaps with an
858+ # update_available_resource periodic job
859+ def inject_periodic_to_finish_resize (* args , ** kwargs ):
860+ self ._run_periodics ()
861+ return orig_rpc_finish_resize (* args , ** kwargs )
862+
863+ self .stub_out (
864+ 'nova.compute.rpcapi.ComputeAPI.finish_resize' ,
865+ inject_periodic_to_finish_resize ,
866+ )
867+
868+ # TODO(stephenfin): The mock of 'migrate_disk_and_power_off' should
869+ # probably be less...dumb
870+ with mock .patch (
871+ 'nova.virt.libvirt.driver.LibvirtDriver'
872+ '.migrate_disk_and_power_off' , return_value = '{}' ,
873+ ):
874+ post = {'resize' : {'flavorRef' : flavor_b_id }}
875+ self .api .post_server_action (server ['id' ], post )
876+ server = self ._wait_for_state_change (server , 'VERIFY_RESIZE' )
877+
878+ dst_host = server ['OS-EXT-SRV-ATTR:host' ]
879+
880+ # This is a resource accounting bug, we should have 2 cpus pinned on
881+ # both computes. The source should have it due to the outbound
882+ # migration and the destination due to the instance running there
883+ self ._assert_pinned_cpus (src_host , 0 )
884+ self ._assert_pinned_cpus (dst_host , 2 )
885+
886+ return server , src_host , dst_host
887+
888+ def test_resize_confirm_bug_1944759 (self ):
889+ server , src_host , dst_host = (
890+ self ._create_server_and_resize_bug_1944759 ())
891+
892+ # Now confirm the resize
893+ post = {'confirmResize' : None }
894+
895+ # FIXME(gibi): This is bug 1944759 where during resize, on the source
896+ # node the resize_instance() call at the point of calling finish_resize
897+ # overlaps with a update_available_resources() periodic job. This
898+ # causes that the periodic job will not track the migration nor the
899+ # instance and therefore freeing the resource allocation. Then when
900+ # later the resize is confirmed the confirm_resize on the source
901+ # compute also wants to free up the resources, the pinned CPUs, and it
902+ # fails as they are already freed.
903+ exc = self .assertRaises (
904+ client .OpenStackApiException ,
905+ self .api .post_server_action , server ['id' ], post
906+ )
907+ self .assertEqual (500 , exc .response .status_code )
908+ self .assertIn ('CPUUnpinningInvalid' , str (exc ))
909+
910+ # confirm failed above but the resource allocation reflects that the
911+ # VM is running on the dest node
912+ self ._assert_pinned_cpus (src_host , 0 )
913+ self ._assert_pinned_cpus (dst_host , 2 )
914+
915+ self ._run_periodics ()
916+
917+ # and such allocation situation is stable so as a recovery the VM
918+ # can be reset-state to ACTIVE without problem.
919+ self ._assert_pinned_cpus (src_host , 0 )
920+ self ._assert_pinned_cpus (dst_host , 2 )
921+
922+ def test_resize_revert_bug_1944759 (self ):
923+ server , src_host , dst_host = (
924+ self ._create_server_and_resize_bug_1944759 ())
925+
926+ # Now revert the resize
927+ post = {'revertResize' : None }
928+
929+ # reverts actually succeeds (not like confirm) but the resource
930+ # allocation is still flaky
931+ self .api .post_server_action (server ['id' ], post )
932+ self ._wait_for_state_change (server , 'ACTIVE' )
933+
934+ # This is a resource accounting bug. After the revert the source host
935+ # should have 2 cpus pinned due to the instance.
936+ self ._assert_pinned_cpus (src_host , 0 )
937+ self ._assert_pinned_cpus (dst_host , 0 )
938+
939+ # running the periodic job will fix the resource accounting
940+ self ._run_periodics ()
941+
942+ # this is now correct
943+ self ._assert_pinned_cpus (src_host , 2 )
944+ self ._assert_pinned_cpus (dst_host , 0 )
945+
825946
826947class NUMAServerTestWithCountingQuotaFromPlacement (NUMAServersTest ):
827948
0 commit comments