Skip to content

Commit 7ca1c83

Browse files
committed
Set ETAG when updating Azure loadbalancer, route and route table
1 parent a019f17 commit 7ca1c83

File tree

4 files changed

+73
-24
lines changed

4 files changed

+73
-24
lines changed

staging/src/k8s.io/legacy-cloud-providers/azure/azure_backoff.go

Lines changed: 49 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ func (az *Cloud) CreateOrUpdateLB(service *v1.Service, lb network.LoadBalancer)
197197
ctx, cancel := getContextWithCancel()
198198
defer cancel()
199199

200-
resp, err := az.LoadBalancerClient.CreateOrUpdate(ctx, az.ResourceGroup, *lb.Name, lb)
200+
resp, err := az.LoadBalancerClient.CreateOrUpdate(ctx, az.ResourceGroup, *lb.Name, lb, to.String(lb.Etag))
201201
klog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%s): end", *lb.Name)
202202
if err == nil {
203203
if isSuccessHTTPResponse(resp) {
@@ -207,6 +207,11 @@ func (az *Cloud) CreateOrUpdateLB(service *v1.Service, lb network.LoadBalancer)
207207
return fmt.Errorf("HTTP response %q", resp.Status)
208208
}
209209
}
210+
211+
// Invalidate the cache because ETAG precondition mismatch.
212+
if resp != nil && resp.StatusCode == http.StatusPreconditionFailed {
213+
az.lbCache.Delete(*lb.Name)
214+
}
210215
return err
211216
}
212217

@@ -219,14 +224,20 @@ func (az *Cloud) createOrUpdateLBWithRetry(service *v1.Service, lb network.LoadB
219224
ctx, cancel := getContextWithCancel()
220225
defer cancel()
221226

222-
resp, err := az.LoadBalancerClient.CreateOrUpdate(ctx, az.ResourceGroup, *lb.Name, lb)
227+
resp, err := az.LoadBalancerClient.CreateOrUpdate(ctx, az.ResourceGroup, *lb.Name, lb, to.String(lb.Etag))
223228
klog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%s): end", *lb.Name)
224-
done, err := az.processHTTPRetryResponse(service, "CreateOrUpdateLoadBalancer", resp, err)
229+
done, retryError := az.processHTTPRetryResponse(service, "CreateOrUpdateLoadBalancer", resp, err)
225230
if done && err == nil {
226231
// Invalidate the cache right after updating
227232
az.lbCache.Delete(*lb.Name)
228233
}
229-
return done, err
234+
235+
// Invalidate the cache and abort backoff because ETAG precondition mismatch.
236+
if resp != nil && resp.StatusCode == http.StatusPreconditionFailed {
237+
az.nsgCache.Delete(*lb.Name)
238+
return true, err
239+
}
240+
return done, retryError
230241
})
231242
}
232243

@@ -441,7 +452,10 @@ func (az *Cloud) CreateOrUpdateRouteTable(routeTable network.RouteTable) error {
441452
ctx, cancel := getContextWithCancel()
442453
defer cancel()
443454

444-
resp, err := az.RouteTablesClient.CreateOrUpdate(ctx, az.RouteTableResourceGroup, az.RouteTableName, routeTable)
455+
resp, err := az.RouteTablesClient.CreateOrUpdate(ctx, az.RouteTableResourceGroup, az.RouteTableName, routeTable, to.String(routeTable.Etag))
456+
if resp != nil && resp.StatusCode == http.StatusPreconditionFailed {
457+
az.rtCache.Delete(*routeTable.Name)
458+
}
445459
return az.processHTTPResponse(nil, "", resp, err)
446460
}
447461

@@ -454,8 +468,19 @@ func (az *Cloud) createOrUpdateRouteTableWithRetry(routeTable network.RouteTable
454468
ctx, cancel := getContextWithCancel()
455469
defer cancel()
456470

457-
resp, err := az.RouteTablesClient.CreateOrUpdate(ctx, az.RouteTableResourceGroup, az.RouteTableName, routeTable)
458-
return az.processHTTPRetryResponse(nil, "", resp, err)
471+
resp, err := az.RouteTablesClient.CreateOrUpdate(ctx, az.RouteTableResourceGroup, az.RouteTableName, routeTable, to.String(routeTable.Etag))
472+
done, retryError := az.processHTTPRetryResponse(nil, "", resp, err)
473+
if done && err == nil {
474+
az.rtCache.Delete(*routeTable.Name)
475+
return done, nil
476+
}
477+
478+
// Invalidate the cache and abort backoff because ETAG precondition mismatch.
479+
if resp != nil && resp.StatusCode == http.StatusPreconditionFailed {
480+
az.rtCache.Delete(*routeTable.Name)
481+
return true, err
482+
}
483+
return done, retryError
459484
})
460485
}
461486

@@ -465,8 +490,11 @@ func (az *Cloud) CreateOrUpdateRoute(route network.Route) error {
465490
ctx, cancel := getContextWithCancel()
466491
defer cancel()
467492

468-
resp, err := az.RoutesClient.CreateOrUpdate(ctx, az.RouteTableResourceGroup, az.RouteTableName, *route.Name, route)
493+
resp, err := az.RoutesClient.CreateOrUpdate(ctx, az.RouteTableResourceGroup, az.RouteTableName, *route.Name, route, to.String(route.Etag))
469494
klog.V(10).Infof("RoutesClient.CreateOrUpdate(%s): end", *route.Name)
495+
if resp != nil && resp.StatusCode == http.StatusPreconditionFailed {
496+
az.rtCache.Delete(az.RouteTableName)
497+
}
470498
return az.processHTTPResponse(nil, "", resp, err)
471499
}
472500

@@ -479,9 +507,20 @@ func (az *Cloud) createOrUpdateRouteWithRetry(route network.Route) error {
479507
ctx, cancel := getContextWithCancel()
480508
defer cancel()
481509

482-
resp, err := az.RoutesClient.CreateOrUpdate(ctx, az.RouteTableResourceGroup, az.RouteTableName, *route.Name, route)
510+
resp, err := az.RoutesClient.CreateOrUpdate(ctx, az.RouteTableResourceGroup, az.RouteTableName, *route.Name, route, to.String(route.Etag))
483511
klog.V(10).Infof("RoutesClient.CreateOrUpdate(%s): end", *route.Name)
484-
return az.processHTTPRetryResponse(nil, "", resp, err)
512+
done, retryError := az.processHTTPRetryResponse(nil, "", resp, err)
513+
if done && err == nil {
514+
az.rtCache.Delete(az.RouteTableName)
515+
return done, nil
516+
}
517+
518+
// Invalidate the cache and abort backoff because ETAG precondition mismatch.
519+
if resp != nil && resp.StatusCode == http.StatusPreconditionFailed {
520+
az.rtCache.Delete(az.RouteTableName)
521+
return true, err
522+
}
523+
return done, retryError
485524
})
486525
}
487526

staging/src/k8s.io/legacy-cloud-providers/azure/azure_fakes.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ func newFakeAzureLBClient() *fakeAzureLBClient {
5252
return fLBC
5353
}
5454

55-
func (fLBC *fakeAzureLBClient) CreateOrUpdate(ctx context.Context, resourceGroupName string, loadBalancerName string, parameters network.LoadBalancer) (resp *http.Response, err error) {
55+
func (fLBC *fakeAzureLBClient) CreateOrUpdate(ctx context.Context, resourceGroupName string, loadBalancerName string, parameters network.LoadBalancer, etag string) (resp *http.Response, err error) {
5656
fLBC.mutex.Lock()
5757
defer fLBC.mutex.Unlock()
5858

@@ -642,7 +642,7 @@ func newFakeRoutesClient() *fakeRoutesClient {
642642
return fRC
643643
}
644644

645-
func (fRC *fakeRoutesClient) CreateOrUpdate(ctx context.Context, resourceGroupName string, routeTableName string, routeName string, routeParameters network.Route) (resp *http.Response, err error) {
645+
func (fRC *fakeRoutesClient) CreateOrUpdate(ctx context.Context, resourceGroupName string, routeTableName string, routeName string, routeParameters network.Route, etag string) (resp *http.Response, err error) {
646646
fRC.mutex.Lock()
647647
defer fRC.mutex.Unlock()
648648

@@ -683,7 +683,7 @@ func newFakeRouteTablesClient() *fakeRouteTablesClient {
683683
return fRTC
684684
}
685685

686-
func (fRTC *fakeRouteTablesClient) CreateOrUpdate(ctx context.Context, resourceGroupName string, routeTableName string, parameters network.RouteTable) (resp *http.Response, err error) {
686+
func (fRTC *fakeRouteTablesClient) CreateOrUpdate(ctx context.Context, resourceGroupName string, routeTableName string, parameters network.RouteTable, etag string) (resp *http.Response, err error) {
687687
fRTC.mutex.Lock()
688688
defer fRTC.mutex.Unlock()
689689

staging/src/k8s.io/legacy-cloud-providers/azure/azure_loadbalancer.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,8 @@ func (az *Cloud) EnsureLoadBalancer(ctx context.Context, clusterName string, ser
147147
return nil, err
148148
}
149149

150-
if _, err := az.reconcilePublicIP(clusterName, updateService, lb, true /* wantLb */); err != nil {
150+
// lb is not reused here because the ETAG may be changed in above operations, hence reconcilePublicIP() would get lb again from cache.
151+
if _, err := az.reconcilePublicIP(clusterName, updateService, to.String(lb.Name), true /* wantLb */); err != nil {
151152
return nil, err
152153
}
153154

@@ -203,7 +204,7 @@ func (az *Cloud) EnsureLoadBalancerDeleted(ctx context.Context, clusterName stri
203204
}
204205
}
205206

206-
if _, err := az.reconcilePublicIP(clusterName, service, nil, false /* wantLb */); err != nil {
207+
if _, err := az.reconcilePublicIP(clusterName, service, "", false /* wantLb */); err != nil {
207208
if ignoreErrors(err) != nil {
208209
return err
209210
}
@@ -1323,9 +1324,10 @@ func deduplicate(collection *[]string) *[]string {
13231324
}
13241325

13251326
// This reconciles the PublicIP resources similar to how the LB is reconciled.
1326-
func (az *Cloud) reconcilePublicIP(clusterName string, service *v1.Service, lb *network.LoadBalancer, wantLb bool) (*network.PublicIPAddress, error) {
1327+
func (az *Cloud) reconcilePublicIP(clusterName string, service *v1.Service, lbName string, wantLb bool) (*network.PublicIPAddress, error) {
13271328
isInternal := requiresInternalLoadBalancer(service)
13281329
serviceName := getServiceName(service)
1330+
var lb *network.LoadBalancer
13291331
var desiredPipName string
13301332
var err error
13311333
if !isInternal && wantLb {
@@ -1335,6 +1337,14 @@ func (az *Cloud) reconcilePublicIP(clusterName string, service *v1.Service, lb *
13351337
}
13361338
}
13371339

1340+
if lbName != "" {
1341+
loadBalancer, _, err := az.getAzureLoadBalancer(lbName)
1342+
if err != nil {
1343+
return nil, err
1344+
}
1345+
lb = &loadBalancer
1346+
}
1347+
13381348
pipResourceGroup := az.getPublicIPAddressResourceGroup(service)
13391349

13401350
pips, err := az.ListPIP(service, pipResourceGroup)

staging/src/k8s.io/legacy-cloud-providers/azure/azure_test.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -879,13 +879,13 @@ func TestReconcilePublicIPWithNewService(t *testing.T) {
879879
az := getTestCloud()
880880
svc := getTestService("servicea", v1.ProtocolTCP, 80, 443)
881881

882-
pip, err := az.reconcilePublicIP(testClusterName, &svc, nil, true /* wantLb*/)
882+
pip, err := az.reconcilePublicIP(testClusterName, &svc, "", true /* wantLb*/)
883883
if err != nil {
884884
t.Errorf("Unexpected error: %q", err)
885885
}
886886
validatePublicIP(t, pip, &svc, true)
887887

888-
pip2, err := az.reconcilePublicIP(testClusterName, &svc, nil, true /* wantLb */)
888+
pip2, err := az.reconcilePublicIP(testClusterName, &svc, "", true /* wantLb */)
889889
if err != nil {
890890
t.Errorf("Unexpected error: %q", err)
891891
}
@@ -900,15 +900,15 @@ func TestReconcilePublicIPRemoveService(t *testing.T) {
900900
az := getTestCloud()
901901
svc := getTestService("servicea", v1.ProtocolTCP, 80, 443)
902902

903-
pip, err := az.reconcilePublicIP(testClusterName, &svc, nil, true /* wantLb*/)
903+
pip, err := az.reconcilePublicIP(testClusterName, &svc, "", true /* wantLb*/)
904904
if err != nil {
905905
t.Errorf("Unexpected error: %q", err)
906906
}
907907

908908
validatePublicIP(t, pip, &svc, true)
909909

910910
// Remove the service
911-
pip, err = az.reconcilePublicIP(testClusterName, &svc, nil, false /* wantLb */)
911+
pip, err = az.reconcilePublicIP(testClusterName, &svc, "", false /* wantLb */)
912912
if err != nil {
913913
t.Errorf("Unexpected error: %q", err)
914914
}
@@ -920,7 +920,7 @@ func TestReconcilePublicIPWithInternalService(t *testing.T) {
920920
az := getTestCloud()
921921
svc := getInternalTestService("servicea", 80, 443)
922922

923-
pip, err := az.reconcilePublicIP(testClusterName, &svc, nil, true /* wantLb*/)
923+
pip, err := az.reconcilePublicIP(testClusterName, &svc, "", true /* wantLb*/)
924924
if err != nil {
925925
t.Errorf("Unexpected error: %q", err)
926926
}
@@ -932,22 +932,22 @@ func TestReconcilePublicIPWithExternalAndInternalSwitch(t *testing.T) {
932932
az := getTestCloud()
933933
svc := getInternalTestService("servicea", 80, 443)
934934

935-
pip, err := az.reconcilePublicIP(testClusterName, &svc, nil, true /* wantLb*/)
935+
pip, err := az.reconcilePublicIP(testClusterName, &svc, "", true /* wantLb*/)
936936
if err != nil {
937937
t.Errorf("Unexpected error: %q", err)
938938
}
939939
validatePublicIP(t, pip, &svc, true)
940940

941941
// Update to external service
942942
svcUpdated := getTestService("servicea", v1.ProtocolTCP, 80)
943-
pip, err = az.reconcilePublicIP(testClusterName, &svcUpdated, nil, true /* wantLb*/)
943+
pip, err = az.reconcilePublicIP(testClusterName, &svcUpdated, "", true /* wantLb*/)
944944
if err != nil {
945945
t.Errorf("Unexpected error: %q", err)
946946
}
947947
validatePublicIP(t, pip, &svcUpdated, true)
948948

949949
// Update to internal service again
950-
pip, err = az.reconcilePublicIP(testClusterName, &svc, nil, true /* wantLb*/)
950+
pip, err = az.reconcilePublicIP(testClusterName, &svc, "", true /* wantLb*/)
951951
if err != nil {
952952
t.Errorf("Unexpected error: %q", err)
953953
}

0 commit comments

Comments
 (0)