Skip to content

Commit 3fe25c4

Browse files
Merge pull request #1006 from tkashem/bz-1740332
Bug 1740332: OLM should resume operator install
2 parents a96b2b8 + ff19541 commit 3fe25c4

File tree

9 files changed

+228
-28
lines changed

9 files changed

+228
-28
lines changed

pkg/api/apis/operators/installplan_types.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,10 +78,11 @@ var ErrInvalidInstallPlan = errors.New("the InstallPlan contains invalid data")
7878
//
7979
// Status may trail the actual state of a system.
8080
type InstallPlanStatus struct {
81-
Phase InstallPlanPhase
82-
Conditions []InstallPlanCondition
83-
CatalogSources []string
84-
Plan []*Step
81+
Phase InstallPlanPhase
82+
Conditions []InstallPlanCondition
83+
CatalogSources []string
84+
Plan []*Step
85+
AttenuatedServiceAccountRef *corev1.ObjectReference
8586
}
8687

8788
// InstallPlanCondition represents the overall status of the execution of

pkg/api/apis/operators/v1alpha1/installplan_types.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ type InstallPlanStatus struct {
8484
Conditions []InstallPlanCondition `json:"conditions,omitempty"`
8585
CatalogSources []string `json:"catalogSources"`
8686
Plan []*Step `json:"plan,omitempty"`
87+
88+
// AttenuatedServiceAccountRef references the service account that is used
89+
// to do scoped operator install.
90+
AttenuatedServiceAccountRef *corev1.ObjectReference `json:"attenuatedServiceAccountRef,omitempty"`
8791
}
8892

8993
// InstallPlanCondition represents the overall status of the execution of
@@ -131,23 +135,22 @@ func (s *InstallPlanStatus) SetCondition(cond InstallPlanCondition) InstallPlanC
131135
return cond
132136
}
133137

134-
135138
func ConditionFailed(cond InstallPlanConditionType, reason InstallPlanConditionReason, message string, now *metav1.Time) InstallPlanCondition {
136139
return InstallPlanCondition{
137-
Type: cond,
138-
Status: corev1.ConditionFalse,
139-
Reason: reason,
140-
Message: message,
141-
LastUpdateTime: *now,
140+
Type: cond,
141+
Status: corev1.ConditionFalse,
142+
Reason: reason,
143+
Message: message,
144+
LastUpdateTime: *now,
142145
LastTransitionTime: *now,
143146
}
144147
}
145148

146149
func ConditionMet(cond InstallPlanConditionType, now *metav1.Time) InstallPlanCondition {
147150
return InstallPlanCondition{
148-
Type: cond,
149-
Status: corev1.ConditionTrue,
150-
LastUpdateTime: *now,
151+
Type: cond,
152+
Status: corev1.ConditionTrue,
153+
LastUpdateTime: *now,
151154
LastTransitionTime: *now,
152155
}
153156
}

pkg/api/apis/operators/v1alpha1/zz_generated.conversion.go

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/api/apis/operators/v1alpha1/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/api/apis/operators/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
package catalog
2+
3+
import (
4+
"errors"
5+
6+
"github.com/sirupsen/logrus"
7+
8+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9+
"k8s.io/apimachinery/pkg/labels"
10+
"k8s.io/apimachinery/pkg/runtime"
11+
utilerrors "k8s.io/apimachinery/pkg/util/errors"
12+
13+
"github.com/operator-framework/operator-lifecycle-manager/pkg/api/apis/operators/v1alpha1"
14+
"github.com/operator-framework/operator-lifecycle-manager/pkg/lib/ownerutil"
15+
)
16+
17+
// When a user adds permission to a ServiceAccount by creating or updating
18+
// Role/RoleBinding then we expect the InstallPlan that refers to the
19+
// ServiceAccount to be retried if it has failed to install before due to
20+
// permission issue(s).
21+
func (o *Operator) triggerInstallPlanRetry(obj interface{}) (syncError error) {
22+
metaObj, ok := obj.(metav1.Object)
23+
if !ok {
24+
syncError = errors.New("casting to metav1 object failed")
25+
o.logger.Warn(syncError.Error())
26+
return
27+
}
28+
29+
related, _ := isObjectRBACRelated(obj)
30+
if !related {
31+
return
32+
}
33+
34+
ips, err := o.lister.OperatorsV1alpha1().InstallPlanLister().InstallPlans(metaObj.GetNamespace()).List(labels.Everything())
35+
if err != nil {
36+
syncError = err
37+
return
38+
}
39+
40+
isTarget := func(ip *v1alpha1.InstallPlan) bool {
41+
// Only an InstallPlan that has failed to install before and only if it
42+
// has a reference to a ServiceAccount then
43+
return ip.Status.Phase == v1alpha1.InstallPlanPhaseFailed && ip.Status.AttenuatedServiceAccountRef != nil
44+
}
45+
46+
update := func(ip *v1alpha1.InstallPlan) error {
47+
out := ip.DeepCopy()
48+
out.Status.Phase = v1alpha1.InstallPlanPhaseInstalling
49+
_, err := o.client.OperatorsV1alpha1().InstallPlans(ip.GetNamespace()).UpdateStatus(out)
50+
51+
return err
52+
}
53+
54+
var errs []error
55+
for _, ip := range ips {
56+
if !isTarget(ip) {
57+
continue
58+
}
59+
60+
logger := o.logger.WithFields(logrus.Fields{
61+
"ip": ip.GetName(),
62+
"namespace": ip.GetNamespace(),
63+
"phase": ip.Status.Phase,
64+
})
65+
66+
if updateErr := update(ip); updateErr != nil {
67+
errs = append(errs, updateErr)
68+
logger.WithError(updateErr).Warn("failed to kick off InstallPlan retry")
69+
continue
70+
}
71+
72+
logger.Info("InstallPlan status set to 'Installing' for retry")
73+
}
74+
75+
syncError = utilerrors.NewAggregate(errs)
76+
return
77+
}
78+
79+
func isObjectRBACRelated(obj interface{}) (related bool, object runtime.Object) {
80+
object, ok := obj.(runtime.Object)
81+
if !ok {
82+
return
83+
}
84+
85+
if err := ownerutil.InferGroupVersionKind(object); err != nil {
86+
return
87+
}
88+
89+
kind := object.GetObjectKind().GroupVersionKind().Kind
90+
switch kind {
91+
case roleKind:
92+
fallthrough
93+
case roleBindingKind:
94+
fallthrough
95+
case serviceAccountKind:
96+
related = true
97+
}
98+
99+
return
100+
}

pkg/controller/operators/catalog/operator.go

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ func (o *Operator) syncObject(obj interface{}) (syncError error) {
388388

389389
o.requeueOwners(metaObj)
390390

391-
return
391+
return o.triggerInstallPlanRetry(obj)
392392
}
393393

394394
func (o *Operator) handleDeletion(obj interface{}) {
@@ -1027,6 +1027,28 @@ func (o *Operator) syncInstallPlans(obj interface{}) (syncError error) {
10271027
return
10281028
}
10291029

1030+
querier := o.serviceAccountQuerier.NamespaceQuerier(plan.GetNamespace())
1031+
reference, err := querier()
1032+
if err != nil {
1033+
syncError = fmt.Errorf("attenuated service account query failed - %v", err)
1034+
return
1035+
}
1036+
1037+
if reference != nil {
1038+
out := plan.DeepCopy()
1039+
out.Status.AttenuatedServiceAccountRef = reference
1040+
1041+
if !reflect.DeepEqual(plan, out) {
1042+
if _, updateErr := o.client.OperatorsV1alpha1().InstallPlans(out.GetNamespace()).UpdateStatus(out); err != nil {
1043+
syncError = fmt.Errorf("failed to attach attenuated ServiceAccount to status - %v", updateErr)
1044+
return
1045+
}
1046+
1047+
logger.WithField("attenuated-sa", reference.Name).Info("successfully attached attenuated ServiceAccount to status")
1048+
return
1049+
}
1050+
}
1051+
10301052
outInstallPlan, syncError := transitionInstallPlanState(logger.Logger, o, *plan, o.now())
10311053

10321054
if syncError != nil {
@@ -1204,8 +1226,7 @@ func (o *Operator) ExecutePlan(plan *v1alpha1.InstallPlan) error {
12041226
// Does the namespace have an operator group that specifies a user defined
12051227
// service account? If so, then we should use a scoped client for plan
12061228
// execution.
1207-
getter := o.serviceAccountQuerier.NamespaceQuerier(namespace)
1208-
kubeclient, crclient, err := o.clientAttenuator.AttenuateClient(getter)
1229+
kubeclient, crclient, err := o.clientAttenuator.AttenuateClientWithServiceAccount(plan.Status.AttenuatedServiceAccountRef)
12091230
if err != nil {
12101231
o.logger.Errorf("failed to get a client for plan execution- %v", err)
12111232
return err

pkg/lib/scoped/attenuator.go

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,22 +50,12 @@ type ClientAttenuator struct {
5050
logger *logrus.Logger
5151
}
5252

53-
// AttenuateClient returns appropriately scoped client(s) to the caller.
53+
// AttenuateClientWithServiceAccount returns appropriately scoped client(s) to the caller.
5454
//
5555
// client(s) that are bound to OLM cluster-admin role are returned if the querier
5656
// returns no error and reference to a service account is nil.
5757
// Otherwise an attempt is made to return attenuated client instance(s).
58-
func (s *ClientAttenuator) AttenuateClient(querier ServiceAccountQuerierFunc) (kubeclient operatorclient.ClientInterface, crclient versioned.Interface, err error) {
59-
if querier == nil {
60-
err = errQuerierNotSpecified
61-
return
62-
}
63-
64-
reference, err := querier()
65-
if err != nil {
66-
return
67-
}
68-
58+
func (s *ClientAttenuator) AttenuateClientWithServiceAccount(reference *corev1.ObjectReference) (kubeclient operatorclient.ClientInterface, crclient versioned.Interface, err error) {
6959
if reference == nil {
7060
// No service account/token has been provided. Return the default client(s).
7161
kubeclient = s.kubeclient
@@ -92,6 +82,25 @@ func (s *ClientAttenuator) AttenuateClient(querier ServiceAccountQuerierFunc) (k
9282
return
9383
}
9484

85+
// AttenuateClient returns appropriately scoped client(s) to the caller.
86+
//
87+
// client(s) that are bound to OLM cluster-admin role are returned if the querier
88+
// returns no error and reference to a service account is nil.
89+
// Otherwise an attempt is made to return attenuated client instance(s).
90+
func (s *ClientAttenuator) AttenuateClient(querier ServiceAccountQuerierFunc) (kubeclient operatorclient.ClientInterface, crclient versioned.Interface, err error) {
91+
if querier == nil {
92+
err = errQuerierNotSpecified
93+
return
94+
}
95+
96+
reference, err := querier()
97+
if err != nil {
98+
return
99+
}
100+
101+
return s.AttenuateClientWithServiceAccount(reference)
102+
}
103+
95104
// AttenuateOperatorClient returns a scoped operator client instance based on the
96105
// service account returned by the querier specified.
97106
func (s *ClientAttenuator) AttenuateOperatorClient(querier ServiceAccountQuerierFunc) (kubeclient operatorclient.ClientInterface, err error) {

test/e2e/user_defined_sa_test.go

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,60 @@ func TestUserDefinedServiceAccountWithPermission(t *testing.T) {
131131
}
132132
}
133133

134+
func TestUserDefinedServiceAccountWithRetry(t *testing.T) {
135+
defer cleaner.NotifyTestComplete(t, false)
136+
137+
kubeclient := newKubeClient(t)
138+
crclient := newCRClient(t)
139+
140+
namespace := genName("scoped-ns-")
141+
_, cleanupNS := newNamespace(t, kubeclient, namespace)
142+
defer cleanupNS()
143+
144+
// Create a service account, but add no permission to it.
145+
saName := genName("scoped-sa-")
146+
_, cleanupSA := newServiceAccount(t, kubeclient, namespace, saName)
147+
defer cleanupSA()
148+
149+
// Add an OperatorGroup and specify the service account.
150+
ogName := genName("scoped-og-")
151+
_, cleanupOG := newOperatorGroupWithServiceAccount(t, crclient, namespace, ogName, saName)
152+
defer cleanupOG()
153+
154+
permissions := deploymentPermissions(t)
155+
catsrc, subSpec, catsrcCleanup := newCatalogSource(t, kubeclient, crclient, "scoped", namespace, permissions)
156+
defer catsrcCleanup()
157+
158+
// Ensure that the catalog source is resolved before we create a subscription.
159+
_, err := fetchCatalogSource(t, crclient, catsrc.GetName(), namespace, catalogSourceRegistryPodSynced)
160+
require.NoError(t, err)
161+
162+
subscriptionName := genName("scoped-sub-")
163+
cleanupSubscription := createSubscriptionForCatalog(t, crclient, namespace, subscriptionName, catsrc.GetName(), subSpec.Package, subSpec.Channel, subSpec.StartingCSV, subSpec.InstallPlanApproval)
164+
defer cleanupSubscription()
165+
166+
// Wait until an install plan is created.
167+
subscription, err := fetchSubscription(t, crclient, namespace, subscriptionName, subscriptionHasInstallPlanChecker)
168+
require.NoError(t, err)
169+
require.NotNil(t, subscription)
170+
171+
// We expect the InstallPlan to be in status: Failed.
172+
ipNameOld := subscription.Status.Install.Name
173+
ipPhaseCheckerFunc := buildInstallPlanPhaseCheckFunc(v1alpha1.InstallPlanPhaseFailed)
174+
ipGotOld, err := fetchInstallPlanWithNamespace(t, crclient, ipNameOld, namespace, ipPhaseCheckerFunc)
175+
require.NoError(t, err)
176+
require.Equal(t, v1alpha1.InstallPlanPhaseFailed, ipGotOld.Status.Phase)
177+
178+
// Grant permission now and this should trigger an retry of InstallPlan.
179+
cleanupPerm := grantPermission(t, kubeclient, namespace, saName)
180+
defer cleanupPerm()
181+
182+
ipPhaseCheckerFunc = buildInstallPlanPhaseCheckFunc(v1alpha1.InstallPlanPhaseComplete)
183+
ipGotNew, err := fetchInstallPlanWithNamespace(t, crclient, ipNameOld, namespace, ipPhaseCheckerFunc)
184+
require.NoError(t, err)
185+
require.Equal(t, v1alpha1.InstallPlanPhaseComplete, ipGotNew.Status.Phase)
186+
}
187+
134188
func newNamespace(t *testing.T, client operatorclient.ClientInterface, name string) (ns *corev1.Namespace, cleanup cleanupFunc) {
135189
request := &corev1.Namespace{
136190
ObjectMeta: metav1.ObjectMeta{

0 commit comments

Comments
 (0)