@@ -2,13 +2,17 @@ package clusterapi
22
33import (
44 "context"
5+ "errors"
56 "fmt"
7+ "net/http"
68 "time"
79
810 "github.com/sirupsen/logrus"
911 resourcemanager "google.golang.org/api/cloudresourcemanager/v1"
12+ "google.golang.org/api/googleapi"
1013 iam "google.golang.org/api/iam/v1"
1114 "google.golang.org/api/option"
15+ "k8s.io/apimachinery/pkg/util/wait"
1216
1317 gcp "github.com/openshift/installer/pkg/asset/installconfig/gcp"
1418)
@@ -102,48 +106,66 @@ func AddServiceAccountRoles(ctx context.Context, projectID, serviceAccountID str
102106 return fmt .Errorf ("failed to create resourcemanager service: %w" , err )
103107 }
104108
105- policy , err := getPolicy (ctx , service , projectID )
106- if err != nil {
107- return err
108- }
109+ backoff := wait.Backoff {
110+ Duration : 2 * time .Second ,
111+ Jitter : 1.0 ,
112+ Steps : 5 ,
113+ }
114+ // Get and set the policy in a backoff loop.
115+ // If the policy set fails, the policy must be retrieved again via the get before retrying the set.
116+ var lastErr error
117+ if waitErr := wait .ExponentialBackoffWithContext (ctx , backoff , func (ctx context.Context ) (bool , error ) {
118+ policy , err := getPolicy (ctx , service , projectID )
119+ if isQuotaExceededError (err ) {
120+ lastErr = err
121+ logrus .Debugf ("Failed to get IAM policy, retrying after backoff" )
122+ return false , nil
123+ } else if err != nil {
124+ return false , fmt .Errorf ("failed to get IAM policy, unexpected error: %w" , err )
125+ }
109126
110- member := fmt .Sprintf ("serviceAccount:%s" , serviceAccountID )
111- for _ , role := range roles {
112- err = addMemberToRole (policy , role , member )
113- if err != nil {
114- return fmt .Errorf ("failed to add role %s to %s: %w" , role , member , err )
127+ member := fmt .Sprintf ("serviceAccount:%s" , serviceAccountID )
128+ for _ , role := range roles {
129+ err = addMemberToRole (policy , role , member )
130+ if err != nil {
131+ return false , fmt .Errorf ("failed to add role %s to %s: %w" , role , member , err )
132+ }
115133 }
116- }
117134
118- err = setPolicy (ctx , service , projectID , policy )
119- if err != nil {
120- return err
135+ err = setPolicy (ctx , service , projectID , policy )
136+ if err != nil {
137+ if isConflictError (err ) {
138+ lastErr = err
139+ logrus .Debugf ("Concurrent IAM policy changes, restarting read/modify/write" )
140+ return false , nil
141+ }
142+ return false , fmt .Errorf ("failed to set IAM policy, unexpected error: %w" , err )
143+ }
144+ logrus .Debugf ("Successfully set IAM policy" )
145+ return true , nil
146+ }); waitErr != nil {
147+ if wait .Interrupted (waitErr ) {
148+ return fmt .Errorf ("failed to set IAM policy: %w" , lastErr )
149+ }
150+ return waitErr
121151 }
122-
123152 return nil
124153}
125154
126155// getPolicy gets the project's IAM policy.
127156func getPolicy (ctx context.Context , crmService * resourcemanager.Service , projectID string ) (* resourcemanager.Policy , error ) {
157+ logrus .Debugf ("Getting policy for %s" , projectID )
128158 request := & resourcemanager.GetIamPolicyRequest {}
129159 policy , err := crmService .Projects .GetIamPolicy (projectID , request ).Context (ctx ).Do ()
130- if err != nil {
131- return nil , fmt .Errorf ("failed to fetch project IAM policy: %w" , err )
132- }
133-
134- return policy , nil
160+ return policy , err
135161}
136162
137163// setPolicy sets the project's IAM policy.
138164func setPolicy (ctx context.Context , crmService * resourcemanager.Service , projectID string , policy * resourcemanager.Policy ) error {
139165 request := & resourcemanager.SetIamPolicyRequest {}
140166 request .Policy = policy
141167 _ , err := crmService .Projects .SetIamPolicy (projectID , request ).Context (ctx ).Do ()
142- if err != nil {
143- return fmt .Errorf ("failed to set project IAM policy: %w" , err )
144- }
145-
146- return nil
168+ return err
147169}
148170
149171// addMemberToRole adds a member to a role binding.
@@ -175,3 +197,21 @@ func addMemberToRole(policy *resourcemanager.Policy, role, member string) error
175197 logrus .Debugf ("adding %s role, added %s member" , role , member )
176198 return nil
177199}
200+
201+ // isConflictError returns true if error matches conflict on concurrent policy sets.
202+ func isConflictError (err error ) bool {
203+ var ae * googleapi.Error
204+ if errors .As (err , & ae ) && (ae .Code == http .StatusConflict || ae .Code == http .StatusPreconditionFailed ) {
205+ return true
206+ }
207+ return false
208+ }
209+
210+ // isQuotaExceededError returns true if the error matches quota exceeded.
211+ func isQuotaExceededError (err error ) bool {
212+ var ae * googleapi.Error
213+ if errors .As (err , & ae ) && (ae .Code == http .StatusTooManyRequests ) {
214+ return true
215+ }
216+ return false
217+ }
0 commit comments