Skip to content

Commit be6edb4

Browse files
JoelSpeedelmiko
authored andcommitted
Rewrite DeleteNodesTwice test to check API not TargetSize for
cluster-autoscaler CAPI provider
1 parent dac1f7d commit be6edb4

File tree

1 file changed

+95
-47
lines changed

1 file changed

+95
-47
lines changed

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go

Lines changed: 95 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,10 @@ import (
2525
"testing"
2626
"time"
2727

28+
corev1 "k8s.io/api/core/v1"
2829
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2930
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
31+
"k8s.io/apimachinery/pkg/util/wait"
3032
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
3133
"k8s.io/utils/pointer"
3234
)
@@ -819,16 +821,29 @@ func TestNodeGroupMachineSetDeleteNodesWithMismatchedNodes(t *testing.T) {
819821
}
820822

821823
func TestNodeGroupDeleteNodesTwice(t *testing.T) {
822-
addDeletionTimestamp := func(t *testing.T, controller *machineController, machine *Machine) error {
824+
addDeletionTimestampToMachine := func(t *testing.T, controller *machineController, node *corev1.Node) error {
825+
m, err := controller.findMachineByProviderID(normalizedProviderString(node.Spec.ProviderID))
826+
if err != nil {
827+
return err
828+
}
829+
823830
// Simulate delete that would have happened if the
824831
// Machine API controllers were running Don't actually
825832
// delete since the fake client does not support
826833
// finalizers.
827834
now := v1.Now()
828-
machine.DeletionTimestamp = &now
829-
return controller.machineInformer.Informer().GetStore().Update(newUnstructuredFromMachine(machine))
835+
m.DeletionTimestamp = &now
836+
if _, err := controller.dynamicclient.Resource(*controller.machineResource).Namespace(m.GetNamespace()).Update(context.Background(), newUnstructuredFromMachine(m), v1.UpdateOptions{}); err != nil {
837+
return err
838+
}
839+
840+
return nil
830841
}
831842

843+
// This is the size we expect the NodeGroup to be after we have called DeleteNodes.
844+
// We need at least 8 nodes for this test to be valid.
845+
expectedSize := 7
846+
832847
test := func(t *testing.T, testConfig *testConfig) {
833848
controller, stop := mustCreateTestController(t, testConfig)
834849
defer stop()
@@ -848,6 +863,17 @@ func TestNodeGroupDeleteNodesTwice(t *testing.T) {
848863
t.Fatalf("unexpected error: %v", err)
849864
}
850865

866+
// Check that the test case is valid before executing DeleteNodes
867+
// 1. We must have at least 1 more node than the expected size otherwise DeleteNodes is a no-op
868+
// 2. MinSize must be less than the expected size otherwise a second call to DeleteNodes may
869+
// not make the nodegroup size less than the expected size.
870+
if len(nodeNames) <= expectedSize {
871+
t.Fatalf("expected more nodes than the expected size: %d <= %d", len(nodeNames), expectedSize)
872+
}
873+
if ng.MinSize() >= expectedSize {
874+
t.Fatalf("expected min size to be less than expected size: %d >= %d", ng.MinSize(), expectedSize)
875+
}
876+
851877
if len(nodeNames) != len(testConfig.nodes) {
852878
t.Fatalf("expected len=%v, got len=%v", len(testConfig.nodes), len(nodeNames))
853879
}
@@ -862,55 +888,41 @@ func TestNodeGroupDeleteNodesTwice(t *testing.T) {
862888
}
863889
}
864890

891+
// These are the nodes which are over the final expectedSize
892+
nodesToBeDeleted := testConfig.nodes[expectedSize:]
893+
865894
// Assert that we have no DeletionTimestamp
866-
for i := 7; i < len(testConfig.machines); i++ {
895+
for i := expectedSize; i < len(testConfig.machines); i++ {
867896
if !testConfig.machines[i].ObjectMeta.DeletionTimestamp.IsZero() {
868897
t.Fatalf("unexpected DeletionTimestamp")
869898
}
870899
}
871900

872-
if err := ng.DeleteNodes(testConfig.nodes[7:]); err != nil {
901+
// Delete all nodes over the expectedSize
902+
if err := ng.DeleteNodes(nodesToBeDeleted); err != nil {
873903
t.Fatalf("unexpected error: %v", err)
874904
}
875905

876-
for i := 7; i < len(testConfig.machines); i++ {
877-
if err := addDeletionTimestamp(t, controller, testConfig.machines[i]); err != nil {
906+
for _, node := range nodesToBeDeleted {
907+
if err := addDeletionTimestampToMachine(t, controller, node); err != nil {
878908
t.Fatalf("unexpected err: %v", err)
879909
}
880-
if testConfig.machines[i].ObjectMeta.DeletionTimestamp.IsZero() {
881-
t.Fatalf("expected a DeletionTimestamp")
882-
}
883910
}
884911

885-
// TODO(frobware) We have a flaky test here because we
886-
// just called Delete and Update and the next call to
887-
// controller.nodeGroups() will sometimes get stale
888-
// objects from the (fakeclient) store. To fix this we
889-
// should update the test machinery so that individual
890-
// tests can have callbacks on Add/Update/Delete on
891-
// each of the respective informers. We should then
892-
// override those callbacks here in this test to add
893-
// rendezvous points so that we wait until all objects
894-
// have been updated before we go and get them again.
895-
//
896-
// Running this test with a 500ms duration I see:
897-
//
898-
// $ ./stress ./openshiftmachineapi.test -test.run TestNodeGroupDeleteNodesTwice -test.count 5 | ts | ts -i
899-
// 00:00:05 Feb 27 14:29:36 0 runs so far, 0 failures
900-
// 00:00:05 Feb 27 14:29:41 8 runs so far, 0 failures
901-
// 00:00:05 Feb 27 14:29:46 16 runs so far, 0 failures
902-
// 00:00:05 Feb 27 14:29:51 24 runs so far, 0 failures
903-
// 00:00:05 Feb 27 14:29:56 32 runs so far, 0 failures
904-
// ...
905-
// 00:00:05 Feb 27 14:31:01 112 runs so far, 0 failures
906-
// 00:00:05 Feb 27 14:31:06 120 runs so far, 0 failures
907-
// 00:00:05 Feb 27 14:31:11 128 runs so far, 0 failures
908-
// 00:00:05 Feb 27 14:31:16 136 runs so far, 0 failures
909-
// 00:00:05 Feb 27 14:31:21 144 runs so far, 0 failures
910-
//
911-
// To make sure we don't run into any flakes in CI
912-
// I've chosen to make this sleep duration 3s.
913-
time.Sleep(3 * time.Second)
912+
// Wait for the machineset to have been updated
913+
if err := wait.PollImmediate(100*time.Millisecond, 5*time.Second, func() (bool, error) {
914+
nodegroups, err = controller.nodeGroups()
915+
if err != nil {
916+
return false, err
917+
}
918+
targetSize, err := nodegroups[0].TargetSize()
919+
if err != nil {
920+
return false, err
921+
}
922+
return targetSize == expectedSize, nil
923+
}); err != nil {
924+
t.Fatalf("unexpected error waiting for nodegroup to be expected size: %v", err)
925+
}
914926

915927
nodegroups, err = controller.nodeGroups()
916928
if err != nil {
@@ -919,22 +931,58 @@ func TestNodeGroupDeleteNodesTwice(t *testing.T) {
919931

920932
ng = nodegroups[0]
921933

922-
// Attempt to delete the nodes again which verifies
923-
// that nodegroup.DeleteNodes() skips over nodes that
924-
// have a non-nil DeletionTimestamp value.
925-
if err := ng.DeleteNodes(testConfig.nodes[7:]); err != nil {
926-
t.Fatalf("unexpected error: %v", err)
927-
}
928-
934+
// Check the nodegroup is at the expected size
929935
actualSize, err := ng.TargetSize()
930936
if err != nil {
931937
t.Fatalf("unexpected error: %v", err)
932938
}
933939

934-
expectedSize := len(testConfig.machines) - len(testConfig.machines[7:])
935940
if actualSize != expectedSize {
936941
t.Fatalf("expected %d nodes, got %d", expectedSize, actualSize)
937942
}
943+
944+
// Check that the machines deleted in the last run have DeletionTimestamp's
945+
// when fetched from the API
946+
for _, node := range nodesToBeDeleted {
947+
// Ensure the update has propogated
948+
if err := wait.PollImmediate(100*time.Millisecond, 5*time.Second, func() (bool, error) {
949+
m, err := controller.findMachineByProviderID(normalizedProviderString(node.Spec.ProviderID))
950+
if err != nil {
951+
return false, err
952+
}
953+
return !m.GetDeletionTimestamp().IsZero(), nil
954+
}); err != nil {
955+
t.Fatalf("unexpected error waiting for machine to have deletion timestamp: %v", err)
956+
}
957+
}
958+
959+
// Attempt to delete the nodes again which verifies
960+
// that nodegroup.DeleteNodes() skips over nodes that
961+
// have a non-nil DeletionTimestamp value.
962+
if err := ng.DeleteNodes(nodesToBeDeleted); err != nil {
963+
t.Fatalf("unexpected error: %v", err)
964+
}
965+
966+
switch v := (ng.scalableResource).(type) {
967+
case *machineSetScalableResource:
968+
updatedMachineSet, err := controller.getMachineSet(testConfig.machineSet.Namespace, testConfig.machineSet.Name, metav1.GetOptions{})
969+
if err != nil {
970+
t.Fatalf("unexpected error: %v", err)
971+
}
972+
if actual := pointer.Int32PtrDerefOr(updatedMachineSet.Spec.Replicas, 0); int(actual) != expectedSize {
973+
t.Fatalf("expected %v nodes, got %v", expectedSize, actual)
974+
}
975+
case *machineDeploymentScalableResource:
976+
updatedMachineDeployment, err := controller.getMachineDeployment(testConfig.machineDeployment.Namespace, testConfig.machineDeployment.Name, metav1.GetOptions{})
977+
if err != nil {
978+
t.Fatalf("unexpected error: %v", err)
979+
}
980+
if actual := pointer.Int32PtrDerefOr(updatedMachineDeployment.Spec.Replicas, 0); int(actual) != expectedSize {
981+
t.Fatalf("expected %v nodes, got %v", expectedSize, actual)
982+
}
983+
default:
984+
t.Errorf("unexpected type: %T", v)
985+
}
938986
}
939987

940988
// Note: 10 is an upper bound for the number of nodes/replicas

0 commit comments

Comments
 (0)