Skip to content

Commit 7f91c16

Browse files
committed
Indicate invalid OperatorGroup on InstallPlan status
The InstallPlan reconciler/sync will now update the InstallPlan phase as failed if it sees an invalid Operator Group.
1 parent 51cdd74 commit 7f91c16

File tree

5 files changed

+177
-46
lines changed

5 files changed

+177
-46
lines changed

pkg/controller/operators/catalog/operator.go

Lines changed: 50 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1331,10 +1331,44 @@ func (o *Operator) syncInstallPlans(obj interface{}) (syncError error) {
13311331
return
13321332
}
13331333

1334+
if plan.Status.Phase == v1alpha1.InstallPlanPhaseFailed {
1335+
return
1336+
}
1337+
13341338
querier := o.serviceAccountQuerier.NamespaceQuerier(plan.GetNamespace())
13351339
ref, err := querier()
13361340
if err != nil {
1337-
syncError = fmt.Errorf("attenuated service account query failed - %v", err)
1341+
1342+
// Retry sync if non-fatal error
1343+
if !scoped.IsOperatorGroupError(err) {
1344+
syncError = fmt.Errorf("attenuated service account query failed - %v", err)
1345+
return
1346+
}
1347+
1348+
// Mark the InstallPlan as failed for a fatal Operator Group related error
1349+
ipFailError := fmt.Errorf("attenuated service account query failed - %v", err)
1350+
logger.Infof("InstallPlan failed: %v", ipFailError)
1351+
1352+
now := o.now()
1353+
out := plan.DeepCopy()
1354+
out.Status.SetCondition(v1alpha1.ConditionFailed(v1alpha1.InstallPlanInstalled,
1355+
v1alpha1.InstallPlanReasonInstallCheckFailed, ipFailError.Error(), &now))
1356+
out.Status.Phase = v1alpha1.InstallPlanPhaseFailed
1357+
1358+
logger.Info("Transitioning InstallPlan to failed")
1359+
if _, err := o.client.OperatorsV1alpha1().InstallPlans(plan.GetNamespace()).UpdateStatus(context.TODO(), out, metav1.UpdateOptions{}); err != nil {
1360+
updateErr := errors.New("error updating InstallPlan status: " + err.Error())
1361+
logger = logger.WithField("updateError", updateErr)
1362+
logger.Info("error transitioning InstallPlan to failed")
1363+
1364+
// retry sync with error to update InstallPlan status
1365+
syncError = fmt.Errorf("InstallPlan failed: %s and error updating InstallPlan status as failed: %s", ipFailError, updateErr)
1366+
return
1367+
}
1368+
1369+
// Requeue subscription to propagate SubscriptionInstallPlanFailed condtion to subscription
1370+
o.requeueSubscriptionForInstallPlan(plan, logger)
1371+
13381372
return
13391373
}
13401374

@@ -1394,19 +1428,7 @@ func (o *Operator) syncInstallPlans(obj interface{}) (syncError error) {
13941428
defer o.ipQueueSet.RequeueAfter(outInstallPlan.GetNamespace(), outInstallPlan.GetName(), time.Second*5)
13951429
}
13961430

1397-
defer func() {
1398-
// Notify subscription loop of installplan changes
1399-
if owners := ownerutil.GetOwnersByKind(plan, v1alpha1.SubscriptionKind); len(owners) > 0 {
1400-
for _, owner := range owners {
1401-
logger.WithField("owner", owner).Debug("requeueing installplan owner")
1402-
if err := o.subQueueSet.Requeue(plan.GetNamespace(), owner.Name); err != nil {
1403-
logger.WithError(err).Warn("error requeuing installplan owner")
1404-
}
1405-
}
1406-
return
1407-
}
1408-
logger.Trace("no installplan owner subscriptions found to requeue")
1409-
}()
1431+
defer o.requeueSubscriptionForInstallPlan(plan, logger)
14101432

14111433
// Update InstallPlan with status of transition. Log errors if we can't write them to the status.
14121434
if _, err := o.client.OperatorsV1alpha1().InstallPlans(plan.GetNamespace()).UpdateStatus(context.TODO(), outInstallPlan, metav1.UpdateOptions{}); err != nil {
@@ -1423,6 +1445,20 @@ func (o *Operator) syncInstallPlans(obj interface{}) (syncError error) {
14231445
return
14241446
}
14251447

1448+
func (o *Operator) requeueSubscriptionForInstallPlan(plan *v1alpha1.InstallPlan, logger *logrus.Entry) {
1449+
// Notify subscription loop of installplan changes
1450+
if owners := ownerutil.GetOwnersByKind(plan, v1alpha1.SubscriptionKind); len(owners) > 0 {
1451+
for _, owner := range owners {
1452+
logger.WithField("owner", owner).Debug("requeueing installplan owner")
1453+
if err := o.subQueueSet.Requeue(plan.GetNamespace(), owner.Name); err != nil {
1454+
logger.WithError(err).Warn("error requeuing installplan owner")
1455+
}
1456+
}
1457+
return
1458+
}
1459+
logger.Trace("no installplan owner subscriptions found to requeue")
1460+
}
1461+
14261462
type installPlanTransitioner interface {
14271463
ResolvePlan(*v1alpha1.InstallPlan) error
14281464
ExecutePlan(*v1alpha1.InstallPlan) error

pkg/controller/operators/catalog/operator_test.go

Lines changed: 92 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ import (
4141
apiregistrationv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1"
4242
apiregistrationfake "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/fake"
4343

44+
operatorsv1 "github.com/operator-framework/api/pkg/operators/v1"
4445
"github.com/operator-framework/api/pkg/operators/v1alpha1"
4546
"github.com/operator-framework/operator-lifecycle-manager/pkg/api/client/clientset/versioned/fake"
4647
"github.com/operator-framework/operator-lifecycle-manager/pkg/api/client/informers/externalversions"
@@ -142,38 +143,75 @@ func TestTransitionInstallPlan(t *testing.T) {
142143

143144
func TestSyncInstallPlanUnhappy(t *testing.T) {
144145
namespace := "ns"
146+
ipWithSteps := withSteps(installPlan("p", namespace, v1alpha1.InstallPlanPhaseInstalling, "csv"),
147+
[]*v1alpha1.Step{
148+
{
149+
Resource: v1alpha1.StepResource{
150+
CatalogSource: "catalog",
151+
CatalogSourceNamespace: namespace,
152+
Group: "",
153+
Version: "v1",
154+
Kind: "ServiceAccount",
155+
Name: "sa",
156+
Manifest: toManifest(t, serviceAccount("sa", namespace, "",
157+
objectReference("init secret"))),
158+
},
159+
Status: v1alpha1.StepStatusUnknown,
160+
},
161+
},
162+
)
145163

146164
tests := []struct {
147-
testName string
148-
in *v1alpha1.InstallPlan
149-
err error
165+
testName string
166+
in *v1alpha1.InstallPlan
167+
err error
168+
expectedPhase v1alpha1.InstallPlanPhase
169+
operatorGroups []*operatorsv1.OperatorGroup
170+
clientObjs []runtime.Object
150171
}{
151172
{
152-
testName: "NoStatus",
153-
in: installPlan("p", namespace, v1alpha1.InstallPlanPhaseNone),
154-
err: nil,
173+
testName: "NoStatus",
174+
in: installPlan("p", namespace, v1alpha1.InstallPlanPhaseNone),
175+
err: nil,
176+
expectedPhase: v1alpha1.InstallPlanPhaseNone,
155177
},
156178
{
157-
// This checks that installplans are not applied when no operatorgroup is present
158-
testName: "HasSteps/NoOperatorGroup",
159-
in: withSteps(installPlan("p", namespace, v1alpha1.InstallPlanPhaseInstalling, "csv"),
160-
[]*v1alpha1.Step{
161-
{
162-
Resource: v1alpha1.StepResource{
163-
CatalogSource: "catalog",
164-
CatalogSourceNamespace: namespace,
165-
Group: "",
166-
Version: "v1",
167-
Kind: "ServiceAccount",
168-
Name: "sa",
169-
Manifest: toManifest(t, serviceAccount("sa", namespace, "",
170-
objectReference("init secret"))),
171-
},
172-
Status: v1alpha1.StepStatusUnknown,
173-
},
174-
},
175-
),
176-
err: fmt.Errorf("attenuated service account query failed - no operator group found that is managing this namespace"),
179+
// This checks that an installplan is marked as failed when no operatorgroup is present
180+
testName: "HasSteps/NoOperatorGroup",
181+
in: ipWithSteps,
182+
err: nil,
183+
expectedPhase: v1alpha1.InstallPlanPhaseFailed,
184+
},
185+
{
186+
// This checks that an installplan is marked as failed when multiple operator groups are present for the same namespace
187+
testName: "HasSteps/TooManyOperatorGroups",
188+
in: ipWithSteps,
189+
err: nil,
190+
expectedPhase: v1alpha1.InstallPlanPhaseFailed,
191+
operatorGroups: []*operatorsv1.OperatorGroup{
192+
operatorGroup("og1", "sa", namespace,
193+
&corev1.ObjectReference{
194+
Kind: "ServiceAccount",
195+
Namespace: namespace,
196+
Name: "sa",
197+
}),
198+
operatorGroup("og2", "sa", namespace,
199+
&corev1.ObjectReference{
200+
Kind: "ServiceAccount",
201+
Namespace: namespace,
202+
Name: "sa",
203+
}),
204+
},
205+
},
206+
{
207+
// This checks that an installplan is marked as failed when no service account is synced for the operator group, i.e the service account ref doesn't exist
208+
testName: "HasSteps/NonExistentServiceAccount",
209+
in: ipWithSteps,
210+
err: nil,
211+
expectedPhase: v1alpha1.InstallPlanPhaseFailed,
212+
operatorGroups: []*operatorsv1.OperatorGroup{
213+
operatorGroup("og", "sa1", namespace, nil),
214+
},
177215
},
178216
}
179217

@@ -182,11 +220,21 @@ func TestSyncInstallPlanUnhappy(t *testing.T) {
182220
ctx, cancel := context.WithCancel(context.TODO())
183221
defer cancel()
184222

185-
op, err := NewFakeOperator(ctx, namespace, []string{namespace}, withClientObjs(tt.in))
223+
tt.clientObjs = append(tt.clientObjs, tt.in)
224+
for _, og := range tt.operatorGroups {
225+
tt.clientObjs = append(tt.clientObjs, og)
226+
}
227+
228+
op, err := NewFakeOperator(ctx, namespace, []string{namespace}, withClientObjs(tt.clientObjs...))
186229
require.NoError(t, err)
187230

188231
err = op.syncInstallPlans(tt.in)
189232
require.Equal(t, tt.err, err)
233+
234+
ip, err := op.client.OperatorsV1alpha1().InstallPlans(namespace).Get(ctx, tt.in.Name, metav1.GetOptions{})
235+
require.NoError(t, err)
236+
237+
require.Equal(t, tt.expectedPhase, ip.Status.Phase)
190238
})
191239
}
192240
}
@@ -1535,3 +1583,20 @@ func apiResourcesForObjects(objs []runtime.Object) []*metav1.APIResourceList {
15351583
}
15361584
return apis
15371585
}
1586+
1587+
func operatorGroup(ogName, saName, namespace string, saRef *corev1.ObjectReference) *operatorsv1.OperatorGroup {
1588+
return &operatorsv1.OperatorGroup{
1589+
ObjectMeta: metav1.ObjectMeta{
1590+
Name: ogName,
1591+
Namespace: namespace,
1592+
},
1593+
Spec: operatorsv1.OperatorGroupSpec{
1594+
TargetNamespaces: []string{namespace},
1595+
ServiceAccountName: saName,
1596+
},
1597+
Status: operatorsv1.OperatorGroupStatus{
1598+
Namespaces: []string{namespace},
1599+
ServiceAccountRef: saRef,
1600+
},
1601+
}
1602+
}

pkg/lib/scoped/error.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package scoped
2+
3+
// operatorGroupError is an error type returned by the service account querier when
4+
// there is an invalid operatorGroup (zero groups, multiple groups, non-existent service account)
5+
type operatorGroupError struct {
6+
s string
7+
}
8+
9+
func NewOperatorGroupError(s string) error {
10+
return operatorGroupError{s: s}
11+
}
12+
13+
func (e operatorGroupError) Error() string {
14+
return e.s
15+
}
16+
17+
func (e operatorGroupError) IsOperatorGroupError() bool {
18+
return true
19+
}
20+
21+
// IsOperatorGroupError checks if an error is an operator group error
22+
// This lets us classify multiple errors as operatorGroupError without
23+
// defining and checking all the specific error value types
24+
func IsOperatorGroupError(err error) bool {
25+
type operatorGroupError interface {
26+
IsOperatorGroupError() bool
27+
}
28+
ogErr, ok := err.(operatorGroupError)
29+
return ok && ogErr.IsOperatorGroupError()
30+
}

pkg/lib/scoped/querier.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func queryServiceAccountFromNamespace(logger *logrus.Entry, crclient versioned.I
5555
}
5656

5757
if len(list.Items) == 0 {
58-
err = fmt.Errorf("no operator group found that is managing this namespace")
58+
err = NewOperatorGroupError("no operator group found that is managing this namespace")
5959
return
6060
}
6161

@@ -70,12 +70,12 @@ func queryServiceAccountFromNamespace(logger *logrus.Entry, crclient versioned.I
7070
}
7171

7272
if len(groups) == 0 {
73-
err = fmt.Errorf("no operator group found that is managing this namespace")
73+
err = NewOperatorGroupError("no operator group found that is managing this namespace")
7474
return
7575
}
7676

7777
if len(groups) > 1 {
78-
err = fmt.Errorf("more than one operator group(s) are managing this namespace count=%d", len(groups))
78+
err = NewOperatorGroupError(fmt.Sprintf("more than one operator group(s) are managing this namespace count=%d", len(groups)))
7979
return
8080
}
8181

@@ -86,7 +86,7 @@ func queryServiceAccountFromNamespace(logger *logrus.Entry, crclient versioned.I
8686
}
8787

8888
if !group.HasServiceAccountSynced() {
89-
err = fmt.Errorf("please make sure the service account exists. sa=%s operatorgroup=%s/%s", group.Spec.ServiceAccountName, group.GetNamespace(), group.GetName())
89+
err = NewOperatorGroupError(fmt.Sprintf("please make sure the service account exists. sa=%s operatorgroup=%s/%s", group.Spec.ServiceAccountName, group.GetNamespace(), group.GetName()))
9090
return
9191
}
9292

pkg/lib/scoped/querier_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ func TestUserDefinedServiceAccountQuerier(t *testing.T) {
149149
}
150150
gotReference, err := f.NamespaceQuerier(tt.namespace)()
151151
if tt.wantErr {
152-
require.Equal(t, tt.err, err)
152+
require.Equal(t, tt.err.Error(), err.Error())
153153
} else {
154154
require.Nil(t, tt.err)
155155
}

0 commit comments

Comments
 (0)