Skip to content

Commit 721fd85

Browse files
committed
Improve transitionCSVState error logs
Problem: OLM has received complaints that a while transitioning a CSV from state to state, OLM suppresses a number of helpful error logs. Currently, OLM surfaces errors in the reconciler that can be addressed with additional syncs. Errors that cannot be addressed with additional syncs should be logged. Solution: Log errors that are not surfaced in OLM logs via the reconciler.
1 parent 7e476bb commit 721fd85

File tree

1 file changed

+20
-7
lines changed

1 file changed

+20
-7
lines changed

pkg/controller/operators/olm/operator.go

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1278,6 +1278,7 @@ func (a *Operator) operatorGroupForCSV(csv *v1alpha1.ClusterServiceVersion, logg
12781278
}
12791279

12801280
// transitionCSVState moves the CSV status state machine along based on the current value and the current cluster state.
1281+
// SyncError should be returned when an additional reconcile of the CSV might fix the issue.
12811282
func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v1alpha1.ClusterServiceVersion, syncError error) {
12821283
logger := a.logger.WithFields(logrus.Fields{
12831284
"id": queueinformer.NewLoopID(),
@@ -1297,8 +1298,8 @@ func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v
12971298

12981299
operatorSurface, err := resolver.NewOperatorFromV1Alpha1CSV(out)
12991300
if err != nil {
1300-
// TODO: Add failure status to CSV
1301-
syncError = err
1301+
// If the resolver is unable to retrieve the operator info from the CSV the CSV requires changes, a syncError should not be returned.
1302+
logger.WithError(err).Warn("Unable to retrieve operator information from CSV")
13021303
return
13031304
}
13041305

@@ -1327,9 +1328,8 @@ func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v
13271328

13281329
modeSet, err := v1alpha1.NewInstallModeSet(out.Spec.InstallModes)
13291330
if err != nil {
1330-
syncError = err
13311331
logger.WithError(err).Warn("csv has invalid installmodes")
1332-
out.SetPhaseWithEventIfChanged(v1alpha1.CSVPhaseFailed, v1alpha1.CSVReasonInvalidInstallModes, syncError.Error(), now, a.recorder)
1332+
out.SetPhaseWithEventIfChanged(v1alpha1.CSVPhaseFailed, v1alpha1.CSVReasonInvalidInstallModes, err.Error(), now, a.recorder)
13331333
return
13341334
}
13351335

@@ -1454,15 +1454,18 @@ func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v
14541454
// Create a map to track unique names
14551455
webhookNames := map[string]struct{}{}
14561456
// Check if Webhooks have valid rules and unique names
1457+
// TODO: Move this to validating library
14571458
for _, desc := range out.Spec.WebhookDefinitions {
14581459
_, present := webhookNames[desc.GenerateName]
14591460
if present {
1461+
logger.WithError(fmt.Errorf("Repeated WebhookDescription name %s", desc.GenerateName)).Warn("CSV is invalid")
14601462
out.SetPhaseWithEventIfChanged(v1alpha1.CSVPhaseFailed, v1alpha1.CSVReasonInvalidWebhookDescription, "CSV contains repeated WebhookDescription name", now, a.recorder)
14611463
return
14621464
}
14631465
webhookNames[desc.GenerateName] = struct{}{}
1464-
if syncError = install.ValidWebhookRules(desc.Rules); syncError != nil {
1465-
out.SetPhaseWithEventIfChanged(v1alpha1.CSVPhaseFailed, v1alpha1.CSVReasonInvalidWebhookDescription, syncError.Error(), now, a.recorder)
1466+
if err = install.ValidWebhookRules(desc.Rules); err != nil {
1467+
logger.WithError(err).Warnf("WebhookDescription %s includes invalid rules", desc.GenerateName)
1468+
out.SetPhaseWithEventIfChanged(v1alpha1.CSVPhaseFailed, v1alpha1.CSVReasonInvalidWebhookDescription, err.Error(), now, a.recorder)
14661469
return
14671470
}
14681471
}
@@ -1486,6 +1489,7 @@ func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v
14861489
// Check if we're not ready to install part of the replacement chain yet
14871490
if prev := a.isReplacing(out); prev != nil {
14881491
if prev.Status.Phase != v1alpha1.CSVPhaseReplacing {
1492+
logger.WithError(fmt.Errorf("CSV being replaced is in phase %s instead of %s", prev.Status.Phase, v1alpha1.CSVPhaseReplacing)).Debug("Unable to replace previous CSV")
14891493
return
14901494
}
14911495
}
@@ -1532,6 +1536,7 @@ func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v
15321536

15331537
strategy, err := a.updateDeploymentSpecsWithApiServiceData(out, strategy)
15341538
if err != nil {
1539+
logger.WithError(err).Debug("Unable to calculate expected deployment")
15351540
out.SetPhaseWithEvent(v1alpha1.CSVPhasePending, v1alpha1.CSVReasonNeedsReinstall, "calculated deployment install is bad", now, a.recorder)
15361541
return
15371542
}
@@ -1559,13 +1564,15 @@ func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v
15591564

15601565
// Check if any generated resources are missing
15611566
if err := a.checkAPIServiceResources(out, certs.PEMSHA256); err != nil {
1567+
logger.WithError(err).Debug("API Resources are unavailable")
15621568
out.SetPhaseWithEvent(v1alpha1.CSVPhaseFailed, v1alpha1.CSVReasonAPIServiceResourceIssue, err.Error(), now, a.recorder)
15631569
return
15641570
}
15651571

15661572
// Check if it's time to refresh owned APIService certs
15671573
if install.ShouldRotateCerts(out) {
1568-
out.SetPhaseWithEvent(v1alpha1.CSVPhasePending, v1alpha1.CSVReasonNeedsCertRotation, "owned APIServices need cert refresh", now, a.recorder)
1574+
logger.Debug("CSV owns resources that require a cert refresh")
1575+
out.SetPhaseWithEvent(v1alpha1.CSVPhasePending, v1alpha1.CSVReasonNeedsCertRotation, "CSV owns resources that require a cert refresh", now, a.recorder)
15691576
return
15701577
}
15711578

@@ -1576,6 +1583,7 @@ func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v
15761583
out.SetPhaseWithEvent(v1alpha1.CSVPhaseFailed, v1alpha1.CSVReasonInvalidStrategy, fmt.Sprintf("install strategy invalid: %s", err.Error()), now, a.recorder)
15771584
return
15781585
} else if !met {
1586+
logger.Debug("CSV Requirements are no longer met")
15791587
out.SetRequirementStatus(statuses)
15801588
out.SetPhaseWithEvent(v1alpha1.CSVPhaseFailed, v1alpha1.CSVReasonRequirementsNotMet, fmt.Sprintf("requirements no longer met"), now, a.recorder)
15811589
return
@@ -1584,6 +1592,7 @@ func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v
15841592
// Check install status
15851593
strategy, err = a.updateDeploymentSpecsWithApiServiceData(out, strategy)
15861594
if err != nil {
1595+
logger.WithError(err).Debug("Unable to calculate expected deployment")
15871596
out.SetPhaseWithEvent(v1alpha1.CSVPhasePending, v1alpha1.CSVReasonNeedsReinstall, "calculated deployment install is bad", now, a.recorder)
15881597
return
15891598
}
@@ -1639,6 +1648,7 @@ func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v
16391648
out.SetPhaseWithEvent(v1alpha1.CSVPhaseFailed, v1alpha1.CSVReasonInvalidStrategy, fmt.Sprintf("install strategy invalid: %s", err.Error()), now, a.recorder)
16401649
return
16411650
} else if !met {
1651+
logger.Debug("CSV Requirements are not met")
16421652
out.SetRequirementStatus(statuses)
16431653
out.SetPhaseWithEvent(v1alpha1.CSVPhasePending, v1alpha1.CSVReasonRequirementsNotMet, fmt.Sprintf("requirements not met"), now, a.recorder)
16441654
return
@@ -1647,6 +1657,7 @@ func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v
16471657
// Check if any generated resources are missing and that OLM can action on them
16481658
if err := a.checkAPIServiceResources(out, certs.PEMSHA256); err != nil {
16491659
if a.apiServiceResourceErrorActionable(err) {
1660+
logger.WithError(err).Debug("API Resources are unavailable")
16501661
// Check if API services are adoptable. If not, keep CSV as Failed state
16511662
out.SetPhaseWithEvent(v1alpha1.CSVPhasePending, v1alpha1.CSVReasonAPIServiceResourcesNeedReinstall, err.Error(), now, a.recorder)
16521663
}
@@ -1655,13 +1666,15 @@ func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v
16551666

16561667
// Check if it's time to refresh owned APIService certs
16571668
if install.ShouldRotateCerts(out) {
1669+
logger.Debug("CSV owns resources that require a cert refresh")
16581670
out.SetPhaseWithEvent(v1alpha1.CSVPhasePending, v1alpha1.CSVReasonNeedsCertRotation, "owned APIServices need cert refresh", now, a.recorder)
16591671
return
16601672
}
16611673

16621674
// Check install status
16631675
strategy, err = a.updateDeploymentSpecsWithApiServiceData(out, strategy)
16641676
if err != nil {
1677+
logger.WithError(err).Debug("Unable to calculate expected deployment")
16651678
out.SetPhaseWithEvent(v1alpha1.CSVPhasePending, v1alpha1.CSVReasonNeedsReinstall, "calculated deployment install is bad", now, a.recorder)
16661679
return
16671680
}

0 commit comments

Comments
 (0)