Skip to content

Commit 2165590

Browse files
author
Jeff Peeler
committed
fix(olm): handle CSV install failure gracefully
Specifically, don't continuously retry CSV installation when a forbidden error is returned. A new deployment error and CSV reason has been created for this scenario. The CSV sync loop will not process a CSV that has the no retry reason set.
1 parent 201c8aa commit 2165590

File tree

4 files changed

+21
-3
lines changed

4 files changed

+21
-3
lines changed

pkg/api/apis/operators/v1alpha1/clusterserviceversion_types.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ const (
231231
CSVReasonRequirementsMet ConditionReason = "AllRequirementsMet"
232232
CSVReasonOwnerConflict ConditionReason = "OwnerConflict"
233233
CSVReasonComponentFailed ConditionReason = "InstallComponentFailed"
234+
CSVReasonComponentFailedNoRetry ConditionReason = "InstallComponentFailedNoRetry"
234235
CSVReasonInvalidStrategy ConditionReason = "InvalidInstallStrategy"
235236
CSVReasonWaiting ConditionReason = "InstallWaiting"
236237
CSVReasonInstallSuccessful ConditionReason = "InstallSucceeded"

pkg/controller/install/deployment.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
appsv1 "k8s.io/api/apps/v1"
88
rbac "k8s.io/api/rbac/v1"
99
"k8s.io/apimachinery/pkg/api/equality"
10+
k8serrors "k8s.io/apimachinery/pkg/api/errors"
1011
"k8s.io/apimachinery/pkg/util/diff"
1112

1213
"github.com/operator-framework/operator-lifecycle-manager/pkg/api/apis/operators/v1alpha1"
@@ -160,6 +161,9 @@ func (i *StrategyDeploymentInstaller) Install(s Strategy) error {
160161
}
161162

162163
if err := i.installDeployments(strategy.DeploymentSpecs); err != nil {
164+
if k8serrors.IsForbidden(err) {
165+
return StrategyError{Reason: StrategyErrInsufficientPermissions, Message: fmt.Sprintf("install strategy failed: %s", err)}
166+
}
163167
return err
164168
}
165169

pkg/controller/install/errors.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,15 @@ const (
1111
StrategyErrReasonUnknown = "Unknown"
1212
StrategyErrBadPatch = "PatchUnsuccessful"
1313
StrategyErrDeploymentUpdated = "DeploymentUpdated"
14+
StrategyErrInsufficientPermissions = "InsufficentPermissions"
1415
)
1516

1617
// unrecoverableErrors are the set of errors that mean we can't recover an install strategy
1718
var unrecoverableErrors = map[string]struct{}{
18-
StrategyErrReasonInvalidStrategy: {},
19-
StrategyErrReasonTimeout: {},
20-
StrategyErrBadPatch: {},
19+
StrategyErrReasonInvalidStrategy: {},
20+
StrategyErrReasonTimeout: {},
21+
StrategyErrBadPatch: {},
22+
StrategyErrInsufficientPermissions: {},
2123
}
2224

2325
// StrategyError is used to represent error types for install strategies

pkg/controller/operators/olm/operator.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,6 +1119,12 @@ func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v
11191119
"phase": in.Status.Phase,
11201120
})
11211121

1122+
if in.Status.Reason == v1alpha1.CSVReasonComponentFailedNoRetry {
1123+
// will change phase out of failed-no-retry in the event of an intentional requeue
1124+
logger.Debugf("skipping sync for CSV in failed-no-retry state")
1125+
return
1126+
}
1127+
11221128
out = in.DeepCopy()
11231129
now := a.now()
11241130

@@ -1308,6 +1314,11 @@ func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v
13081314
}
13091315

13101316
if syncError = installer.Install(strategy); syncError != nil {
1317+
if install.IsErrorUnrecoverable(syncError) {
1318+
logger.Infof("Setting CSV reason to failed without retry: %v", syncError)
1319+
out.SetPhaseWithEvent(v1alpha1.CSVPhaseFailed, v1alpha1.CSVReasonComponentFailedNoRetry, fmt.Sprintf("install strategy failed: %s", syncError), now, a.recorder)
1320+
return
1321+
}
13111322
out.SetPhaseWithEvent(v1alpha1.CSVPhaseFailed, v1alpha1.CSVReasonComponentFailed, fmt.Sprintf("install strategy failed: %s", syncError), now, a.recorder)
13121323
return
13131324
}

0 commit comments

Comments
 (0)