@@ -25,8 +25,10 @@ import (
2525 "testing"
2626 "time"
2727
28+ corev1 "k8s.io/api/core/v1"
2829 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2930 v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
31+ "k8s.io/apimachinery/pkg/util/wait"
3032 "k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
3133 "k8s.io/utils/pointer"
3234)
@@ -819,16 +821,29 @@ func TestNodeGroupMachineSetDeleteNodesWithMismatchedNodes(t *testing.T) {
819821}
820822
821823func TestNodeGroupDeleteNodesTwice (t * testing.T ) {
822- addDeletionTimestamp := func (t * testing.T , controller * machineController , machine * Machine ) error {
824+ addDeletionTimestampToMachine := func (t * testing.T , controller * machineController , node * corev1.Node ) error {
825+ m , err := controller .findMachineByProviderID (normalizedProviderString (node .Spec .ProviderID ))
826+ if err != nil {
827+ return err
828+ }
829+
823830 // Simulate delete that would have happened if the
824831 // Machine API controllers were running Don't actually
825832 // delete since the fake client does not support
826833 // finalizers.
827834 now := v1 .Now ()
828- machine .DeletionTimestamp = & now
829- return controller .machineInformer .Informer ().GetStore ().Update (newUnstructuredFromMachine (machine ))
835+ m .DeletionTimestamp = & now
836+ if _ , err := controller .dynamicclient .Resource (* controller .machineResource ).Namespace (m .GetNamespace ()).Update (context .Background (), newUnstructuredFromMachine (m ), v1.UpdateOptions {}); err != nil {
837+ return err
838+ }
839+
840+ return nil
830841 }
831842
843+ // This is the size we expect the NodeGroup to be after we have called DeleteNodes.
844+ // We need at least 8 nodes for this test to be valid.
845+ expectedSize := 7
846+
832847 test := func (t * testing.T , testConfig * testConfig ) {
833848 controller , stop := mustCreateTestController (t , testConfig )
834849 defer stop ()
@@ -848,6 +863,17 @@ func TestNodeGroupDeleteNodesTwice(t *testing.T) {
848863 t .Fatalf ("unexpected error: %v" , err )
849864 }
850865
866+ // Check that the test case is valid before executing DeleteNodes
867+ // 1. We must have at least 1 more node than the expected size otherwise DeleteNodes is a no-op
868+ // 2. MinSize must be less than the expected size otherwise a second call to DeleteNodes may
869+ // not make the nodegroup size less than the expected size.
870+ if len (nodeNames ) <= expectedSize {
871+ t .Fatalf ("expected more nodes than the expected size: %d <= %d" , len (nodeNames ), expectedSize )
872+ }
873+ if ng .MinSize () >= expectedSize {
874+ t .Fatalf ("expected min size to be less than expected size: %d >= %d" , ng .MinSize (), expectedSize )
875+ }
876+
851877 if len (nodeNames ) != len (testConfig .nodes ) {
852878 t .Fatalf ("expected len=%v, got len=%v" , len (testConfig .nodes ), len (nodeNames ))
853879 }
@@ -862,55 +888,41 @@ func TestNodeGroupDeleteNodesTwice(t *testing.T) {
862888 }
863889 }
864890
891+ // These are the nodes which are over the final expectedSize
892+ nodesToBeDeleted := testConfig .nodes [expectedSize :]
893+
865894 // Assert that we have no DeletionTimestamp
866- for i := 7 ; i < len (testConfig .machines ); i ++ {
895+ for i := expectedSize ; i < len (testConfig .machines ); i ++ {
867896 if ! testConfig .machines [i ].ObjectMeta .DeletionTimestamp .IsZero () {
868897 t .Fatalf ("unexpected DeletionTimestamp" )
869898 }
870899 }
871900
872- if err := ng .DeleteNodes (testConfig .nodes [7 :]); err != nil {
901+ // Delete all nodes over the expectedSize
902+ if err := ng .DeleteNodes (nodesToBeDeleted ); err != nil {
873903 t .Fatalf ("unexpected error: %v" , err )
874904 }
875905
876- for i := 7 ; i < len ( testConfig . machines ); i ++ {
877- if err := addDeletionTimestamp (t , controller , testConfig . machines [ i ] ); err != nil {
906+ for _ , node := range nodesToBeDeleted {
907+ if err := addDeletionTimestampToMachine (t , controller , node ); err != nil {
878908 t .Fatalf ("unexpected err: %v" , err )
879909 }
880- if testConfig .machines [i ].ObjectMeta .DeletionTimestamp .IsZero () {
881- t .Fatalf ("expected a DeletionTimestamp" )
882- }
883910 }
884911
885- // TODO(frobware) We have a flaky test here because we
886- // just called Delete and Update and the next call to
887- // controller.nodeGroups() will sometimes get stale
888- // objects from the (fakeclient) store. To fix this we
889- // should update the test machinery so that individual
890- // tests can have callbacks on Add/Update/Delete on
891- // each of the respective informers. We should then
892- // override those callbacks here in this test to add
893- // rendezvous points so that we wait until all objects
894- // have been updated before we go and get them again.
895- //
896- // Running this test with a 500ms duration I see:
897- //
898- // $ ./stress ./openshiftmachineapi.test -test.run TestNodeGroupDeleteNodesTwice -test.count 5 | ts | ts -i
899- // 00:00:05 Feb 27 14:29:36 0 runs so far, 0 failures
900- // 00:00:05 Feb 27 14:29:41 8 runs so far, 0 failures
901- // 00:00:05 Feb 27 14:29:46 16 runs so far, 0 failures
902- // 00:00:05 Feb 27 14:29:51 24 runs so far, 0 failures
903- // 00:00:05 Feb 27 14:29:56 32 runs so far, 0 failures
904- // ...
905- // 00:00:05 Feb 27 14:31:01 112 runs so far, 0 failures
906- // 00:00:05 Feb 27 14:31:06 120 runs so far, 0 failures
907- // 00:00:05 Feb 27 14:31:11 128 runs so far, 0 failures
908- // 00:00:05 Feb 27 14:31:16 136 runs so far, 0 failures
909- // 00:00:05 Feb 27 14:31:21 144 runs so far, 0 failures
910- //
911- // To make sure we don't run into any flakes in CI
912- // I've chosen to make this sleep duration 3s.
913- time .Sleep (3 * time .Second )
912+ // Wait for the machineset to have been updated
913+ if err := wait .PollImmediate (100 * time .Millisecond , 5 * time .Second , func () (bool , error ) {
914+ nodegroups , err = controller .nodeGroups ()
915+ if err != nil {
916+ return false , err
917+ }
918+ targetSize , err := nodegroups [0 ].TargetSize ()
919+ if err != nil {
920+ return false , err
921+ }
922+ return targetSize == expectedSize , nil
923+ }); err != nil {
924+ t .Fatalf ("unexpected error waiting for nodegroup to be expected size: %v" , err )
925+ }
914926
915927 nodegroups , err = controller .nodeGroups ()
916928 if err != nil {
@@ -919,22 +931,58 @@ func TestNodeGroupDeleteNodesTwice(t *testing.T) {
919931
920932 ng = nodegroups [0 ]
921933
922- // Attempt to delete the nodes again which verifies
923- // that nodegroup.DeleteNodes() skips over nodes that
924- // have a non-nil DeletionTimestamp value.
925- if err := ng .DeleteNodes (testConfig .nodes [7 :]); err != nil {
926- t .Fatalf ("unexpected error: %v" , err )
927- }
928-
934+ // Check the nodegroup is at the expected size
929935 actualSize , err := ng .TargetSize ()
930936 if err != nil {
931937 t .Fatalf ("unexpected error: %v" , err )
932938 }
933939
934- expectedSize := len (testConfig .machines ) - len (testConfig .machines [7 :])
935940 if actualSize != expectedSize {
936941 t .Fatalf ("expected %d nodes, got %d" , expectedSize , actualSize )
937942 }
943+
944+ // Check that the machines deleted in the last run have DeletionTimestamp's
945+ // when fetched from the API
946+ for _ , node := range nodesToBeDeleted {
947+ // Ensure the update has propogated
948+ if err := wait .PollImmediate (100 * time .Millisecond , 5 * time .Second , func () (bool , error ) {
949+ m , err := controller .findMachineByProviderID (normalizedProviderString (node .Spec .ProviderID ))
950+ if err != nil {
951+ return false , err
952+ }
953+ return ! m .GetDeletionTimestamp ().IsZero (), nil
954+ }); err != nil {
955+ t .Fatalf ("unexpected error waiting for machine to have deletion timestamp: %v" , err )
956+ }
957+ }
958+
959+ // Attempt to delete the nodes again which verifies
960+ // that nodegroup.DeleteNodes() skips over nodes that
961+ // have a non-nil DeletionTimestamp value.
962+ if err := ng .DeleteNodes (nodesToBeDeleted ); err != nil {
963+ t .Fatalf ("unexpected error: %v" , err )
964+ }
965+
966+ switch v := (ng .scalableResource ).(type ) {
967+ case * machineSetScalableResource :
968+ updatedMachineSet , err := controller .getMachineSet (testConfig .machineSet .Namespace , testConfig .machineSet .Name , metav1.GetOptions {})
969+ if err != nil {
970+ t .Fatalf ("unexpected error: %v" , err )
971+ }
972+ if actual := pointer .Int32PtrDerefOr (updatedMachineSet .Spec .Replicas , 0 ); int (actual ) != expectedSize {
973+ t .Fatalf ("expected %v nodes, got %v" , expectedSize , actual )
974+ }
975+ case * machineDeploymentScalableResource :
976+ updatedMachineDeployment , err := controller .getMachineDeployment (testConfig .machineDeployment .Namespace , testConfig .machineDeployment .Name , metav1.GetOptions {})
977+ if err != nil {
978+ t .Fatalf ("unexpected error: %v" , err )
979+ }
980+ if actual := pointer .Int32PtrDerefOr (updatedMachineDeployment .Spec .Replicas , 0 ); int (actual ) != expectedSize {
981+ t .Fatalf ("expected %v nodes, got %v" , expectedSize , actual )
982+ }
983+ default :
984+ t .Errorf ("unexpected type: %T" , v )
985+ }
938986 }
939987
940988 // Note: 10 is an upper bound for the number of nodes/replicas
0 commit comments