Skip to content

Commit 1527393

Browse files
committed
OTA-1643: Each CO must go Progressing during upgrade
1 parent f0041b6 commit 1527393

File tree

2 files changed

+116
-88
lines changed

2 files changed

+116
-88
lines changed

pkg/monitortests/clusterversionoperator/legacycvomonitortests/monitortest.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ func (w *legacyMonitorTests) EvaluateTestsFromConstructedIntervals(ctx context.C
4545
isUpgrade := platformidentification.DidUpgradeHappenDuringCollection(finalIntervals, time.Time{}, time.Time{})
4646
if isUpgrade {
4747
junits = append(junits, testUpgradeOperatorStateTransitions(finalIntervals, w.adminRESTConfig)...)
48-
junits = append(junits, clusterOperatorIsNotProgressingWhenMachineConfigIs(finalIntervals)...)
48+
junits = append(junits, testUpgradeOperatorProgressingStateTransitions(finalIntervals)...)
4949
} else {
5050
junits = append(junits, testStableSystemOperatorStateTransitions(finalIntervals, w.adminRESTConfig)...)
5151
}

pkg/monitortests/clusterversionoperator/legacycvomonitortests/operators.go

Lines changed: 115 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -610,7 +610,7 @@ func testOperatorStateTransitions(events monitorapi.Intervals, conditionTypes []
610610
return ret
611611
}
612612

613-
func clusterOperatorIsNotProgressingWhenMachineConfigIs(events monitorapi.Intervals) []*junitapi.JUnitTestCase {
613+
func testUpgradeOperatorProgressingStateTransitions(events monitorapi.Intervals) []*junitapi.JUnitTestCase {
614614
var ret []*junitapi.JUnitTestCase
615615
upgradeWindows := getUpgradeWindows(events)
616616

@@ -633,31 +633,127 @@ func clusterOperatorIsNotProgressingWhenMachineConfigIs(events monitorapi.Interv
633633
duration := stop.Sub(start).Seconds()
634634

635635
eventsByOperator := getEventsByOperator(eventsInUpgradeWindows)
636-
for _, mcEvent := range eventsByOperator["machine-config"] {
637-
condition := monitorapi.GetOperatorConditionStatus(mcEvent)
638-
if condition == nil {
639-
continue // ignore non-condition intervals
640-
}
641-
if condition.Type == configv1.OperatorProgressing && condition.Status == configv1.ConditionTrue {
642-
machineConfigProgressingStart = mcEvent.To
643-
break
636+
coProgressingStart := map[string]time.Time{}
637+
for _, operatorName := range platformidentification.KnownOperators.List() {
638+
for _, mcEvent := range eventsByOperator[operatorName] {
639+
condition := monitorapi.GetOperatorConditionStatus(mcEvent)
640+
if condition == nil {
641+
continue // ignore non-condition intervals
642+
}
643+
if condition.Type == configv1.OperatorProgressing && condition.Status == configv1.ConditionTrue {
644+
coProgressingStart[operatorName] = mcEvent.To
645+
if operatorName == "machine-config" {
646+
machineConfigProgressingStart = mcEvent.To
647+
}
648+
break
649+
}
644650
}
645651
}
646652

647-
mcTestCase := &junitapi.JUnitTestCase{
648-
Name: fmt.Sprintf("[bz-Machine Config Operator] clusteroperator/machine-config must go Progressing=True during an upgrade test"),
649-
Duration: duration,
653+
except := func(co string, _ string) string {
654+
return ""
650655
}
651-
if machineConfigProgressingStart.IsZero() {
652-
mcTestCase.FailureOutput = &junitapi.FailureOutput{
653-
Output: fmt.Sprintf("machine-config was never Progressing=True during the upgrade window from %s to %s", start.Format(time.RFC3339), stop.Format(time.RFC3339)),
656+
657+
// Each cluster operator must report Progressing=True during cluster upgrade
658+
for _, operatorName := range platformidentification.KnownOperators.List() {
659+
bzComponent := platformidentification.GetBugzillaComponentForOperator(operatorName)
660+
name := fmt.Sprintf("[bz-%s] clusteroperator/%s must go Progressing=True during an upgrade test", bzComponent, operatorName)
661+
mcTestCase := &junitapi.JUnitTestCase{
662+
Name: name,
663+
Duration: duration,
664+
}
665+
var exception string
666+
if t, ok := coProgressingStart[operatorName]; !ok || t.IsZero() {
667+
output := fmt.Sprintf("clusteroperator/%s was never Progressing=True during the upgrade window from %s to %s", operatorName, start.Format(time.RFC3339), stop.Format(time.RFC3339))
668+
exception = except(operatorName, "")
669+
if exception != "" {
670+
output = fmt.Sprintf("%s which is expected up to %s", output, exception)
671+
}
672+
mcTestCase.FailureOutput = &junitapi.FailureOutput{
673+
Output: output,
674+
}
675+
} else {
676+
mcTestCase.SystemOut = fmt.Sprintf("clusteroperator/%s became Progressing=True at %s during the upgrade window from %s to %s", operatorName, t.Format(time.RFC3339), start.Format(time.RFC3339), stop.Format(time.RFC3339))
677+
}
678+
ret = append(ret, mcTestCase)
679+
// add a success so we flake (or pass) and don't fail
680+
if exception != "" {
681+
ret = append(ret, &junitapi.JUnitTestCase{
682+
Name: name,
683+
SystemOut: "Passing the case to make the overall test case flake as the previous failure is expected",
684+
})
654685
}
655-
return []*junitapi.JUnitTestCase{mcTestCase}
656-
} else {
657-
mcTestCase.SystemOut = fmt.Sprintf("machine-config became Progressing=True at %s during the upgrade window from %s to %s", machineConfigProgressingStart.Format(time.RFC3339), start.Format(time.RFC3339), stop.Format(time.RFC3339))
658686
}
659-
ret = append(ret, mcTestCase)
660687

688+
except = func(co string, reason string) string {
689+
switch co {
690+
case "console":
691+
if reason == "SyncLoopRefresh_InProgress" {
692+
return "https://issues.redhat.com/browse/OCPBUGS-64688"
693+
}
694+
case "csi-snapshot-controller":
695+
if reason == "CSISnapshotController_Deploying" {
696+
return "https://issues.redhat.com/browse/OCPBUGS-62624"
697+
}
698+
case "dns":
699+
if reason == "DNSReportsProgressingIsTrue" {
700+
return "https://issues.redhat.com/browse/OCPBUGS-62623"
701+
}
702+
case "image-registry":
703+
if reason == "NodeCADaemonUnavailable::Ready" || reason == "DeploymentNotCompleted" {
704+
return "https://issues.redhat.com/browse/OCPBUGS-62626"
705+
}
706+
case "ingress":
707+
if reason == "Reconciling" {
708+
return "https://issues.redhat.com/browse/OCPBUGS-62627"
709+
}
710+
case "kube-storage-version-migrator":
711+
if reason == "KubeStorageVersionMigrator_Deploying" {
712+
return "https://issues.redhat.com/browse/OCPBUGS-62629"
713+
}
714+
case "network":
715+
if reason == "Deploying" {
716+
return "https://issues.redhat.com/browse/OCPBUGS-62630"
717+
}
718+
case "node-tuning":
719+
if reason == "Reconciling" || reason == "ProfileProgressing" {
720+
return "https://issues.redhat.com/browse/OCPBUGS-62632"
721+
}
722+
case "openshift-controller-manager":
723+
// _DesiredStateNotYetAchieved
724+
// RouteControllerManager_DesiredStateNotYetAchieved
725+
if strings.HasSuffix(reason, "_DesiredStateNotYetAchieved") {
726+
return "https://issues.redhat.com/browse/OCPBUGS-63116"
727+
}
728+
case "service-ca":
729+
if reason == "_ManagedDeploymentsAvailable" {
730+
return "https://issues.redhat.com/browse/OCPBUGS-62633"
731+
}
732+
case "storage":
733+
// GCPPDCSIDriverOperatorCR_GCPPDDriverControllerServiceController_Deploying
734+
// GCPPDCSIDriverOperatorCR_GCPPDDriverNodeServiceController_Deploying
735+
// AWSEBSCSIDriverOperatorCR_AWSEBSDriverNodeServiceController_Deploying
736+
// VolumeDataSourceValidatorDeploymentController_Deploying
737+
// GCPPD_Deploying
738+
// AWSEBS_Deploying
739+
if strings.HasSuffix(reason, "_Deploying") {
740+
return "https://issues.redhat.com/browse/OCPBUGS-62634"
741+
}
742+
case "olm":
743+
// CatalogdDeploymentCatalogdControllerManager_Deploying
744+
// OperatorcontrollerDeploymentOperatorControllerControllerManager_Deploying
745+
if strings.HasSuffix(reason, "ControllerManager_Deploying") {
746+
return "https://issues.redhat.com/browse/OCPBUGS-62635"
747+
}
748+
case "operator-lifecycle-manager-packageserver":
749+
if reason == "" {
750+
return "https://issues.redhat.com/browse/OCPBUGS-63672"
751+
}
752+
}
753+
return ""
754+
}
755+
756+
// No cluster operator report Progressing=True after machine-config does
661757
for _, operatorName := range platformidentification.KnownOperators.Difference(sets.NewString("machine-config")).List() {
662758
bzComponent := platformidentification.GetBugzillaComponentForOperator(operatorName)
663759
testName := fmt.Sprintf("[bz-%v] clusteroperator/%v should stay Progressing=False while MCO is Progressing=True", bzComponent, operatorName)
@@ -670,74 +766,6 @@ func clusterOperatorIsNotProgressingWhenMachineConfigIs(events monitorapi.Interv
670766
continue
671767
}
672768

673-
except := func(co string, reason string) string {
674-
switch co {
675-
case "console":
676-
if reason == "SyncLoopRefresh_InProgress" {
677-
return "https://issues.redhat.com/browse/OCPBUGS-64688"
678-
}
679-
case "csi-snapshot-controller":
680-
if reason == "CSISnapshotController_Deploying" {
681-
return "https://issues.redhat.com/browse/OCPBUGS-62624"
682-
}
683-
case "dns":
684-
if reason == "DNSReportsProgressingIsTrue" {
685-
return "https://issues.redhat.com/browse/OCPBUGS-62623"
686-
}
687-
case "image-registry":
688-
if reason == "NodeCADaemonUnavailable::Ready" || reason == "DeploymentNotCompleted" {
689-
return "https://issues.redhat.com/browse/OCPBUGS-62626"
690-
}
691-
case "ingress":
692-
if reason == "Reconciling" {
693-
return "https://issues.redhat.com/browse/OCPBUGS-62627"
694-
}
695-
case "kube-storage-version-migrator":
696-
if reason == "KubeStorageVersionMigrator_Deploying" {
697-
return "https://issues.redhat.com/browse/OCPBUGS-62629"
698-
}
699-
case "network":
700-
if reason == "Deploying" {
701-
return "https://issues.redhat.com/browse/OCPBUGS-62630"
702-
}
703-
case "node-tuning":
704-
if reason == "Reconciling" || reason == "ProfileProgressing" {
705-
return "https://issues.redhat.com/browse/OCPBUGS-62632"
706-
}
707-
case "openshift-controller-manager":
708-
// _DesiredStateNotYetAchieved
709-
// RouteControllerManager_DesiredStateNotYetAchieved
710-
if strings.HasSuffix(reason, "_DesiredStateNotYetAchieved") {
711-
return "https://issues.redhat.com/browse/OCPBUGS-63116"
712-
}
713-
case "service-ca":
714-
if reason == "_ManagedDeploymentsAvailable" {
715-
return "https://issues.redhat.com/browse/OCPBUGS-62633"
716-
}
717-
case "storage":
718-
// GCPPDCSIDriverOperatorCR_GCPPDDriverControllerServiceController_Deploying
719-
// GCPPDCSIDriverOperatorCR_GCPPDDriverNodeServiceController_Deploying
720-
// AWSEBSCSIDriverOperatorCR_AWSEBSDriverNodeServiceController_Deploying
721-
// VolumeDataSourceValidatorDeploymentController_Deploying
722-
// GCPPD_Deploying
723-
// AWSEBS_Deploying
724-
if strings.HasSuffix(reason, "_Deploying") {
725-
return "https://issues.redhat.com/browse/OCPBUGS-62634"
726-
}
727-
case "olm":
728-
// CatalogdDeploymentCatalogdControllerManager_Deploying
729-
// OperatorcontrollerDeploymentOperatorControllerControllerManager_Deploying
730-
if strings.HasSuffix(reason, "ControllerManager_Deploying") {
731-
return "https://issues.redhat.com/browse/OCPBUGS-62635"
732-
}
733-
case "operator-lifecycle-manager-packageserver":
734-
if reason == "" {
735-
return "https://issues.redhat.com/browse/OCPBUGS-63672"
736-
}
737-
}
738-
return ""
739-
}
740-
741769
var excepted, fatal []string
742770
for _, operatorEvent := range operatorEvents {
743771
if operatorEvent.From.Before(machineConfigProgressingStart) {

0 commit comments

Comments
 (0)