Skip to content

Commit bbfcb94

Browse files
committed
OCPBUGS-53432: deflake TestIngressControllerCustomEndpoints by proper waiting for CCM to be ready
1 parent b609eb2 commit bbfcb94

File tree

2 files changed

+63
-5
lines changed

2 files changed

+63
-5
lines changed

test/e2e/operator_test.go

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2630,6 +2630,17 @@ func TestIngressControllerCustomEndpoints(t *testing.T) {
26302630
Name: "elasticloadbalancing",
26312631
URL: fmt.Sprintf("https://elasticloadbalancing.%s.amazonaws.com", platform.AWS.Region),
26322632
}
2633+
2634+
ccmDeploymentName := types.NamespacedName{Namespace: "openshift-cloud-controller-manager", Name: "aws-cloud-controller-manager"}
2635+
oldCCMDeployment, err := retrieveDeployment(ccmDeploymentName)
2636+
if err != nil {
2637+
t.Fatalf("error getting CCM deployment: %v\n", err)
2638+
}
2639+
oldConfigHash := retrieveCCMConfigurationHash(oldCCMDeployment)
2640+
if oldConfigHash == "" {
2641+
t.Fatalf("CCM does not have a config hash, this should never happen: %v\n", oldCCMDeployment)
2642+
}
2643+
26332644
if err := updateInfrastructureConfigSpecWithRetryOnConflict(t, types.NamespacedName{Name: "cluster"}, timeout, func(spec *configv1.InfrastructureSpec) {
26342645
spec.PlatformSpec.AWS = &configv1.AWSPlatformSpec{
26352646
ServiceEndpoints: []configv1.AWSServiceEndpoint{
@@ -2649,16 +2660,40 @@ func TestIngressControllerCustomEndpoints(t *testing.T) {
26492660
t.Fatalf("failed to update infrastructure config: %v", err)
26502661
}
26512662
}()
2652-
// Wait for infrastructure status to update with custom endpoints.
2653-
if err := wait.PollImmediate(1*time.Second, 15*time.Second, func() (bool, error) {
2654-
if err := kclient.Get(context.TODO(), types.NamespacedName{Name: "cluster"}, &infraConfig); err != nil {
2663+
2664+
// Wait for infrastructure status to update with custom endpoints,
2665+
// for CCM deploy to get a new configuration hash and for pods to be scheduled
2666+
// and available
2667+
// This configuration change and replicas available may take up to 5 minutes, depending on
2668+
// the environment load
2669+
if err := wait.PollUntilContextTimeout(context.TODO(), 1*time.Second, 5*time.Minute, false, func(ctx context.Context) (done bool, err error) {
2670+
if err := kclient.Get(ctx, types.NamespacedName{Name: "cluster"}, &infraConfig); err != nil {
26552671
t.Logf("failed to get infrastructure config: %v\n", err)
26562672
return false, err
26572673
}
26582674
if len(infraConfig.Status.PlatformStatus.AWS.ServiceEndpoints) == 0 {
26592675
return false, nil
26602676
}
2661-
return true, nil
2677+
2678+
currentCCMDeployment, err := retrieveDeployment(ccmDeploymentName)
2679+
if err != nil {
2680+
t.Logf("error getting CCM deployment: %v\n", err)
2681+
return false, err
2682+
}
2683+
currentConfigHash := retrieveCCMConfigurationHash(currentCCMDeployment)
2684+
if oldConfigHash == currentConfigHash {
2685+
t.Log("new config hash not yet applied to CCM deployment")
2686+
return false, nil
2687+
}
2688+
2689+
if currentCCMDeployment.Generation == currentCCMDeployment.Status.ObservedGeneration &&
2690+
currentCCMDeployment.Status.Replicas == currentCCMDeployment.Status.ReadyReplicas &&
2691+
currentCCMDeployment.Status.Replicas == currentCCMDeployment.Status.UpdatedReplicas &&
2692+
currentCCMDeployment.Status.UnavailableReplicas == 0 {
2693+
return true, nil
2694+
}
2695+
return false, nil
2696+
26622697
}); err != nil {
26632698
t.Fatalf("failed to observe status update for infrastructure config %s", infraConfig.Name)
26642699
}
@@ -2677,7 +2712,10 @@ func TestIngressControllerCustomEndpoints(t *testing.T) {
26772712
}
26782713
defer assertIngressControllerDeleted(t, kclient, ic)
26792714
// Ensure the ingress controller is reporting expected status conditions.
2680-
if err := waitForIngressControllerCondition(t, kclient, 5*time.Minute, name, availableConditionsForIngressControllerWithLoadBalancer...); err != nil {
2715+
// The readiness of the resource depends on the load of the environment, and because changing InfrastructureConfig
2716+
// causes the Cloud controller manager to rollout, at least 3 minutes are required for leader
2717+
// election of CCM to happen + the required time to properly schedule the Pods once they are killed/restarted
2718+
if err := waitForIngressControllerCondition(t, kclient, 7*time.Minute, name, availableConditionsForIngressControllerWithLoadBalancer...); err != nil {
26812719
t.Errorf("failed to observe expected conditions: %v", err)
26822720
}
26832721
}

test/e2e/util_test.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,26 @@ func updateInfrastructureConfigSpecWithRetryOnConflict(t *testing.T, name types.
647647
})
648648
}
649649

650+
// retrieveDeployment is used to retrieve a full deployment resource
651+
func retrieveDeployment(name types.NamespacedName) (appsv1.Deployment, error) {
652+
ccmDeployment := appsv1.Deployment{}
653+
err := kclient.Get(context.TODO(), name, &ccmDeployment)
654+
return ccmDeployment, err
655+
}
656+
657+
// retrieveCCMConfigurationHash fetches the config-hash used on CCM deployment
658+
// and present on the annotation operator.openshift.io/config-hash
659+
// this information can be used to signal that a required infrastructure change
660+
// was successfully applied on CCM and forces CCM to do a rollout update.
661+
func retrieveCCMConfigurationHash(ccmDeployment appsv1.Deployment) string {
662+
ccmConfigAnnotation := "operator.openshift.io/config-hash"
663+
annotations := ccmDeployment.Spec.Template.GetAnnotations()
664+
if cfghash, ok := annotations[ccmConfigAnnotation]; ok {
665+
return cfghash
666+
}
667+
return ""
668+
}
669+
650670
// updateInfrastructureStatus updates the Infrastructure status by applying
651671
// the given update function to the current Infrastructure object.
652672
// If there is a conflict error on update then the complete operation

0 commit comments

Comments
 (0)