Skip to content

Commit 34e898a

Browse files
committed
Add etag for NSG updates so as to fix nsg race condition
1 parent 69718b9 commit 34e898a

File tree

2 files changed

+63
-12
lines changed

2 files changed

+63
-12
lines changed

staging/src/k8s.io/legacy-cloud-providers/azure/azure_backoff.go

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222

2323
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-03-01/compute"
2424
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network"
25+
"github.com/Azure/go-autorest/autorest/to"
2526

2627
"k8s.io/api/core/v1"
2728
"k8s.io/apimachinery/pkg/runtime"
@@ -146,7 +147,7 @@ func (az *Cloud) CreateOrUpdateSecurityGroup(service *v1.Service, sg network.Sec
146147
ctx, cancel := getContextWithCancel()
147148
defer cancel()
148149

149-
resp, err := az.SecurityGroupsClient.CreateOrUpdate(ctx, az.ResourceGroup, *sg.Name, sg)
150+
resp, err := az.SecurityGroupsClient.CreateOrUpdate(ctx, az.ResourceGroup, *sg.Name, sg, to.String(sg.Etag))
150151
klog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%s): end", *sg.Name)
151152
if err == nil {
152153
if isSuccessHTTPResponse(resp) {
@@ -156,6 +157,11 @@ func (az *Cloud) CreateOrUpdateSecurityGroup(service *v1.Service, sg network.Sec
156157
return fmt.Errorf("HTTP response %q", resp.Status)
157158
}
158159
}
160+
161+
// Invalidate the cache because ETAG precondition mismatch.
162+
if resp != nil && resp.StatusCode == http.StatusPreconditionFailed {
163+
az.nsgCache.Delete(*sg.Name)
164+
}
159165
return err
160166
}
161167

@@ -168,14 +174,20 @@ func (az *Cloud) CreateOrUpdateSGWithRetry(service *v1.Service, sg network.Secur
168174
ctx, cancel := getContextWithCancel()
169175
defer cancel()
170176

171-
resp, err := az.SecurityGroupsClient.CreateOrUpdate(ctx, az.ResourceGroup, *sg.Name, sg)
177+
resp, err := az.SecurityGroupsClient.CreateOrUpdate(ctx, az.ResourceGroup, *sg.Name, sg, to.String(sg.Etag))
172178
klog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%s): end", *sg.Name)
173-
done, err := az.processHTTPRetryResponse(service, "CreateOrUpdateSecurityGroup", resp, err)
179+
done, retryError := az.processHTTPRetryResponse(service, "CreateOrUpdateSecurityGroup", resp, err)
174180
if done && err == nil {
175181
// Invalidate the cache right after updating
176182
az.nsgCache.Delete(*sg.Name)
177183
}
178-
return done, err
184+
185+
// Invalidate the cache and abort backoff because ETAG precondition mismatch.
186+
if resp != nil && resp.StatusCode == http.StatusPreconditionFailed {
187+
az.nsgCache.Delete(*sg.Name)
188+
return true, err
189+
}
190+
return done, retryError
179191
})
180192
}
181193

@@ -538,17 +550,22 @@ func isSuccessHTTPResponse(resp *http.Response) bool {
538550
}
539551

540552
func shouldRetryHTTPRequest(resp *http.Response, err error) bool {
541-
if err != nil {
542-
return true
543-
}
544-
545553
if resp != nil {
546-
// HTTP 4xx or 5xx suggests we should retry
554+
// HTTP 412 (StatusPreconditionFailed) means etag mismatch, hence we shouldn't retry.
555+
if resp.StatusCode == http.StatusPreconditionFailed {
556+
return false
557+
}
558+
559+
// HTTP 4xx (except 412) or 5xx suggests we should retry.
547560
if 399 < resp.StatusCode && resp.StatusCode < 600 {
548561
return true
549562
}
550563
}
551564

565+
if err != nil {
566+
return true
567+
}
568+
552569
return false
553570
}
554571

staging/src/k8s.io/legacy-cloud-providers/azure/azure_client.go

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ type SubnetsClient interface {
8181

8282
// SecurityGroupsClient defines needed functions for azure network.SecurityGroupsClient
8383
type SecurityGroupsClient interface {
84-
CreateOrUpdate(ctx context.Context, resourceGroupName string, networkSecurityGroupName string, parameters network.SecurityGroup) (resp *http.Response, err error)
84+
CreateOrUpdate(ctx context.Context, resourceGroupName string, networkSecurityGroupName string, parameters network.SecurityGroup, etag string) (resp *http.Response, err error)
8585
Delete(ctx context.Context, resourceGroupName string, networkSecurityGroupName string) (resp *http.Response, err error)
8686
Get(ctx context.Context, resourceGroupName string, networkSecurityGroupName string, expand string) (result network.SecurityGroup, err error)
8787
List(ctx context.Context, resourceGroupName string) (result []network.SecurityGroup, err error)
@@ -714,7 +714,7 @@ func newAzSecurityGroupsClient(config *azClientConfig) *azSecurityGroupsClient {
714714
}
715715
}
716716

717-
func (az *azSecurityGroupsClient) CreateOrUpdate(ctx context.Context, resourceGroupName string, networkSecurityGroupName string, parameters network.SecurityGroup) (resp *http.Response, err error) {
717+
func (az *azSecurityGroupsClient) CreateOrUpdate(ctx context.Context, resourceGroupName string, networkSecurityGroupName string, parameters network.SecurityGroup, etag string) (resp *http.Response, err error) {
718718
/* Write rate limiting */
719719
if !az.rateLimiterWriter.TryAccept() {
720720
err = createRateLimitErr(true, "NSGCreateOrUpdate")
@@ -727,7 +727,13 @@ func (az *azSecurityGroupsClient) CreateOrUpdate(ctx context.Context, resourceGr
727727
}()
728728

729729
mc := newMetricContext("security_groups", "create_or_update", resourceGroupName, az.client.SubscriptionID)
730-
future, err := az.client.CreateOrUpdate(ctx, resourceGroupName, networkSecurityGroupName, parameters)
730+
req, err := az.createOrUpdatePreparer(ctx, resourceGroupName, networkSecurityGroupName, parameters, etag)
731+
if err != nil {
732+
mc.Observe(err)
733+
return nil, err
734+
}
735+
736+
future, err := az.client.CreateOrUpdateSender(req)
731737
if err != nil {
732738
mc.Observe(err)
733739
return future.Response(), err
@@ -738,6 +744,34 @@ func (az *azSecurityGroupsClient) CreateOrUpdate(ctx context.Context, resourceGr
738744
return future.Response(), err
739745
}
740746

747+
// createOrUpdatePreparer prepares the CreateOrUpdate request.
748+
func (az *azSecurityGroupsClient) createOrUpdatePreparer(ctx context.Context, resourceGroupName string, networkSecurityGroupName string, parameters network.SecurityGroup, etag string) (*http.Request, error) {
749+
pathParameters := map[string]interface{}{
750+
"networkSecurityGroupName": autorest.Encode("path", networkSecurityGroupName),
751+
"resourceGroupName": autorest.Encode("path", resourceGroupName),
752+
"subscriptionId": autorest.Encode("path", az.client.SubscriptionID),
753+
}
754+
755+
const APIVersion = "2017-09-01"
756+
queryParameters := map[string]interface{}{
757+
"api-version": APIVersion,
758+
}
759+
760+
preparerDecorators := []autorest.PrepareDecorator{
761+
autorest.AsContentType("application/json; charset=utf-8"),
762+
autorest.AsPut(),
763+
autorest.WithBaseURL(az.client.BaseURI),
764+
autorest.WithPathParameters("/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Network/networkSecurityGroups/{networkSecurityGroupName}", pathParameters),
765+
autorest.WithJSON(parameters),
766+
autorest.WithQueryParameters(queryParameters),
767+
}
768+
if etag != "" {
769+
preparerDecorators = append(preparerDecorators, autorest.WithHeader("If-Match", autorest.String(etag)))
770+
}
771+
preparer := autorest.CreatePreparer(preparerDecorators...)
772+
return preparer.Prepare((&http.Request{}).WithContext(ctx))
773+
}
774+
741775
func (az *azSecurityGroupsClient) Delete(ctx context.Context, resourceGroupName string, networkSecurityGroupName string) (resp *http.Response, err error) {
742776
/* Write rate limiting */
743777
if !az.rateLimiterWriter.TryAccept() {

0 commit comments

Comments
 (0)