@@ -20,6 +20,7 @@ package azure
20
20
21
21
import (
22
22
"net/http"
23
+ "regexp"
23
24
"strings"
24
25
25
26
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-12-01/compute"
@@ -44,6 +45,12 @@ const (
44
45
operationCanceledErrorMessage = "canceledandsupersededduetoanotheroperation"
45
46
46
47
cannotDeletePublicIPErrorMessageCode = "PublicIPAddressCannotBeDeleted"
48
+
49
+ referencedResourceNotProvisionedMessageCode = "ReferencedResourceNotProvisioned"
50
+ )
51
+
52
+ var (
53
+ pipErrorMessageRE = regexp .MustCompile (`(?:.*)/subscriptions/(?:.*)/resourceGroups/(.*)/providers/Microsoft.Network/publicIPAddresses/([^\s]+)(?:.*)` )
47
54
)
48
55
49
56
// RequestBackoff if backoff is disabled in cloud provider it
@@ -182,7 +189,7 @@ func (az *Cloud) CreateOrUpdateLB(service *v1.Service, lb network.LoadBalancer)
182
189
defer cancel ()
183
190
184
191
rgName := az .getLoadBalancerResourceGroup ()
185
- rerr := az .LoadBalancerClient .CreateOrUpdate (ctx , rgName , * lb .Name , lb , to .String (lb .Etag ))
192
+ rerr := az .LoadBalancerClient .CreateOrUpdate (ctx , rgName , to . String ( lb .Name ) , lb , to .String (lb .Etag ))
186
193
klog .V (10 ).Infof ("LoadBalancerClient.CreateOrUpdate(%s): end" , * lb .Name )
187
194
if rerr == nil {
188
195
// Invalidate the cache right after updating
@@ -192,12 +199,39 @@ func (az *Cloud) CreateOrUpdateLB(service *v1.Service, lb network.LoadBalancer)
192
199
193
200
// Invalidate the cache because ETAG precondition mismatch.
194
201
if rerr .HTTPStatusCode == http .StatusPreconditionFailed {
195
- klog .V (3 ).Infof ("LoadBalancer cache for %s is cleanup because of http.StatusPreconditionFailed" , * lb .Name )
202
+ klog .V (3 ).Infof ("LoadBalancer cache for %s is cleanup because of http.StatusPreconditionFailed" , to . String ( lb .Name ) )
196
203
az .lbCache .Delete (* lb .Name )
197
204
}
205
+
206
+ retryErrorMessage := rerr .Error ().Error ()
198
207
// Invalidate the cache because another new operation has canceled the current request.
199
- if strings .Contains (strings .ToLower (rerr .Error ().Error ()), operationCanceledErrorMessage ) {
200
- klog .V (3 ).Infof ("LoadBalancer cache for %s is cleanup because CreateOrUpdate is canceled by another operation" , * lb .Name )
208
+ if strings .Contains (strings .ToLower (retryErrorMessage ), operationCanceledErrorMessage ) {
209
+ klog .V (3 ).Infof ("LoadBalancer cache for %s is cleanup because CreateOrUpdate is canceled by another operation" , to .String (lb .Name ))
210
+ az .lbCache .Delete (* lb .Name )
211
+ }
212
+
213
+ // The LB update may fail because the referenced PIP is not in the Succeeded provisioning state
214
+ if strings .Contains (strings .ToLower (retryErrorMessage ), strings .ToLower (referencedResourceNotProvisionedMessageCode )) {
215
+ matches := pipErrorMessageRE .FindStringSubmatch (retryErrorMessage )
216
+ if len (matches ) != 3 {
217
+ klog .Warningf ("Failed to parse the retry error message %s" , retryErrorMessage )
218
+ return rerr .Error ()
219
+ }
220
+ pipRG , pipName := matches [1 ], matches [2 ]
221
+ klog .V (3 ).Infof ("The public IP %s referenced by load balancer %s is not in Succeeded provisioning state, will try to update it" , pipName , to .String (lb .Name ))
222
+ pip , _ , err := az .getPublicIPAddress (pipRG , pipName )
223
+ if err != nil {
224
+ klog .Warningf ("Failed to get the public IP %s in resource group %s: %v" , pipName , pipRG , err )
225
+ return rerr .Error ()
226
+ }
227
+ // Perform a dummy update to fix the provisioning state
228
+ err = az .CreateOrUpdatePIP (service , pipRG , pip )
229
+ if err != nil {
230
+ klog .Warningf ("Failed to update the public IP %s in resource group %s: %v" , pipName , pipRG , err )
231
+ return rerr .Error ()
232
+ }
233
+ // Invalidate the LB cache, return the error, and the controller manager
234
+ // would retry the LB update in the next reconcile loop
201
235
az .lbCache .Delete (* lb .Name )
202
236
}
203
237
@@ -241,10 +275,10 @@ func (az *Cloud) CreateOrUpdatePIP(service *v1.Service, pipResourceGroup string,
241
275
ctx , cancel := getContextWithCancel ()
242
276
defer cancel ()
243
277
244
- rerr := az .PublicIPAddressesClient .CreateOrUpdate (ctx , pipResourceGroup , * pip .Name , pip )
245
- klog .V (10 ).Infof ("PublicIPAddressesClient.CreateOrUpdate(%s, %s): end" , pipResourceGroup , * pip .Name )
278
+ rerr := az .PublicIPAddressesClient .CreateOrUpdate (ctx , pipResourceGroup , to . String ( pip .Name ) , pip )
279
+ klog .V (10 ).Infof ("PublicIPAddressesClient.CreateOrUpdate(%s, %s): end" , pipResourceGroup , to . String ( pip .Name ) )
246
280
if rerr != nil {
247
- klog .Errorf ("PublicIPAddressesClient.CreateOrUpdate(%s, %s) failed: %s" , pipResourceGroup , * pip .Name , rerr .Error ().Error ())
281
+ klog .Errorf ("PublicIPAddressesClient.CreateOrUpdate(%s, %s) failed: %s" , pipResourceGroup , to . String ( pip .Name ) , rerr .Error ().Error ())
248
282
az .Event (service , v1 .EventTypeWarning , "CreateOrUpdatePublicIPAddress" , rerr .Error ().Error ())
249
283
return rerr .Error ()
250
284
}
0 commit comments