@@ -12,6 +12,7 @@ import (
1212 configv1 "github.com/openshift/api/config/v1"
1313 operatorv1 "github.com/openshift/api/operator/v1"
1414 routev1 "github.com/openshift/api/route/v1"
15+ operatorv1client "github.com/openshift/client-go/operator/clientset/versioned/typed/operator/v1"
1516 exutil "github.com/openshift/origin/test/extended/util"
1617 "github.com/openshift/origin/test/extended/util/operator"
1718 authnv1 "k8s.io/api/authentication/v1"
@@ -555,24 +556,48 @@ func waitForRollout(ctx context.Context, client *exutil.CLI) {
555556 // This means that the KAS-O has successfully started being configured
556557 // with our auth resource changes.
557558 o .Eventually (func (gomega o.Gomega ) {
558- kas , err := kasCli .Get (ctx , "cluster" , metav1.GetOptions {})
559- gomega .Expect (err ).NotTo (o .HaveOccurred (), "should not encounter an error fetching the KAS" )
560-
561- found := false
562- nipCond := operatorv1.OperatorCondition {}
563- for _ , cond := range kas .Status .Conditions {
564- if cond .Type == condition .NodeInstallerProgressingConditionType {
565- found = true
566- nipCond = cond
567- break
568- }
569- }
570-
571- gomega .Expect (found ).To (o .BeTrue (), "should have found the NodeInstallerProgressing condition" )
572- gomega .Expect (nipCond .Status ).To (o .Equal (operatorv1 .ConditionTrue ), "NodeInstallerProgressing condition should be True" , nipCond )
559+ err := checkKubeAPIServerCondition (ctx , kasCli , condition .NodeInstallerProgressingConditionType , operatorv1 .ConditionTrue )
560+ gomega .Expect (err ).NotTo (o .HaveOccurred ())
573561 }).WithTimeout (10 * time .Minute ).WithPolling (20 * time .Second ).Should (o .Succeed (), "should eventually begin rolling out a new revision" )
574562
575- // waitTime is in minutes - set to 30 minute wait for cluster operators to settle
576- err := operator .WaitForOperatorsToSettle (ctx , client .AdminConfigClient (), 30 )
563+ // waitTime is in minutes - set to 50 minute wait for cluster operators to settle
564+ // Usually, it doesn't take nearly an hour for cluster operators to settle
565+ // but due to the disruptive nature of how we are testing here means we _may_
566+ // encounter scenarios where the KAS is undergoing multiple revision rollouts
567+ // in succession. The worst case we've seen is 2 back-to-back revision rollouts
568+ // which lead to the cluster-authentication-operator being unavailable for ~35-45
569+ // minutes as it waits for the KAS to finish rolling out so it can begin
570+ // doing whatever configurations it needs to.
571+ err := operator .WaitForOperatorsToSettle (ctx , client .AdminConfigClient (), 50 )
577572 o .Expect (err ).NotTo (o .HaveOccurred (), "should not encounter an error waiting for the cluster operators to settle" )
578573}
574+
575+ // checkKubeAPIServerCondition is a utility function to check that the KubeAPIServer
576+ // resource on the cluster has a status condition type set with the expected
577+ // condition status. If it does not, it returns an error. If it does, it returns <nil>.
578+ func checkKubeAPIServerCondition (ctx context.Context , kasCli operatorv1client.KubeAPIServerInterface , conditionType string , conditionStatus operatorv1.ConditionStatus ) error {
579+ kas , err := kasCli .Get (ctx , "cluster" , metav1.GetOptions {})
580+ if err != nil {
581+ return fmt .Errorf ("getting KAS: %w" , err )
582+ }
583+
584+ found := false
585+ nipCond := operatorv1.OperatorCondition {}
586+ for _ , cond := range kas .Status .Conditions {
587+ if cond .Type == condition .NodeInstallerProgressingConditionType {
588+ found = true
589+ nipCond = cond
590+ break
591+ }
592+ }
593+
594+ if ! found {
595+ return fmt .Errorf ("no condition %q found in KAS status conditions" , conditionType )
596+ }
597+
598+ if nipCond .Status != conditionStatus {
599+ return fmt .Errorf ("condition %q expected to have status %q, but has status %q instead. Full condition: %v" , conditionType , conditionStatus , nipCond .Status , nipCond )
600+ }
601+
602+ return nil
603+ }
0 commit comments