Skip to content

Commit fece0e3

Browse files
retry: keep retrying on AppGW until access is granted (#891)
* retry: keep retrying on AppGW until access is granted * add e2e tests * change authorizer * change to settings.GetAuthorizer; change wait 5 seconds * change environment variable names * moved role assignment creaton to a seaprate step in E2E * correct function name * update summary; role cmd; function name * add resource id func
1 parent 63cf902 commit fece0e3

File tree

18 files changed

+469
-63
lines changed

18 files changed

+469
-63
lines changed

cmd/appgw-ingress/main.go

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ func main() {
108108
glog.Fatal(errorLine)
109109
}
110110

111-
azClient := azure.NewAzClient(azure.SubscriptionID(env.SubscriptionID), azure.ResourceGroup(env.ResourceGroupName), azure.ResourceName(env.AppGwName))
111+
azClient := azure.NewAzClient(azure.SubscriptionID(env.SubscriptionID), azure.ResourceGroup(env.ResourceGroupName), azure.ResourceName(env.AppGwName), env.ClientID)
112112
appGwIdentifier := appgw.Identifier{
113113
SubscriptionID: env.SubscriptionID,
114114
ResourceGroup: env.ResourceGroupName,
@@ -125,7 +125,7 @@ func main() {
125125
env.HTTPServicePort)
126126
httpServer.Start()
127127

128-
glog.V(3).Infof("App Gateway Details: Subscription: %s, Resource Group: %s, Name: %s", env.SubscriptionID, env.ResourceGroupName, env.AppGwName)
128+
glog.V(3).Infof("Appication Gateway Details: Subscription=\"%s\" Resource Group=\"%s\" Name=\"%s\"", env.SubscriptionID, env.ResourceGroupName, env.AppGwName)
129129

130130
var authorizer autorest.Authorizer
131131
if authorizer, err = azure.GetAuthorizerWithRetry(env.AuthLocation, env.UseManagedIdentityForPod, cpConfig, maxRetryCount, retryPause); err != nil {
@@ -138,24 +138,28 @@ func main() {
138138
azClient.SetAuthorizer(authorizer)
139139
}
140140

141-
if _, err = azClient.GetGateway(); err != nil {
142-
if controllererrors.IsErrorCode(err, controllererrors.ErrorApplicationGatewayNotFound) &&
143-
env.EnableDeployAppGateway {
141+
// Check if Application Gateway exists/have get access
142+
// If AGIC's service principal or managed identity doesn't have read access to the Application Gateway's resource group,
143+
// then AGIC can't read it's role assignments to look for the needed permission.
144+
// Instead we perform a simple GET request to check both that the Application Gateway exists as well as implicitly make sure that AGIC has read access to it.
145+
err = azClient.WaitForGetAccessOnGateway()
146+
if err != nil {
147+
if controllererrors.IsErrorCode(err, controllererrors.ErrorApplicationGatewayNotFound) && env.EnableDeployAppGateway {
144148
if env.AppGwSubnetID != "" {
145149
err = azClient.DeployGatewayWithSubnet(env.AppGwSubnetID)
146150
} else if cpConfig != nil {
147151
err = azClient.DeployGatewayWithVnet(azure.ResourceGroup(cpConfig.VNetResourceGroup), azure.ResourceName(cpConfig.VNetName), azure.ResourceName(env.AppGwSubnetName), env.AppGwSubnetPrefix)
148152
}
149153

150154
if err != nil {
151-
errorLine := fmt.Sprint("Failed in deploying App gateway", err)
155+
errorLine := fmt.Sprint("Failed in deploying Application Gateway", err)
152156
if agicPod != nil {
153157
recorder.Event(agicPod, v1.EventTypeWarning, events.ReasonFailedDeployingAppGw, errorLine)
154158
}
155159
glog.Fatal(errorLine)
156160
}
157161
} else {
158-
errorLine := fmt.Sprint("Failed authenticating with Azure Resource Manager: ", err)
162+
errorLine := fmt.Sprint("Failed getting Application Gateway: ", err)
159163
if agicPod != nil {
160164
recorder.Event(agicPod, v1.EventTypeWarning, events.ReasonARMAuthFailure, errorLine)
161165
}

helm/ingress-azure/templates/configmap.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ data:
7070

7171
{{- if .Values.armAuth -}}
7272
{{- if eq .Values.armAuth.type "aadPodIdentity"}}
73+
AZURE_CLIENT_ID: "{{ .Values.armAuth.identityClientID }}"
7374
USE_MANAGED_IDENTITY_FOR_POD: "true"
7475
{{- end }}
7576
{{- end }}

pkg/azure/azure.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,16 @@ func RouteTableID(subscriptionID SubscriptionID, resourceGroup ResourceGroup, ro
5555
return ResourceID(subscriptionID, resourceGroup, "Microsoft.Network", "routeTables", string(routeTableName))
5656
}
5757

58+
// ApplicationGatewayID generates a application gateway resource id
59+
func ApplicationGatewayID(subscriptionID SubscriptionID, resourceGroup ResourceGroup, applicationGatewayName ResourceName) string {
60+
return ResourceID(subscriptionID, resourceGroup, "Microsoft.Network", "applicationGateways", string(applicationGatewayName))
61+
}
62+
63+
// ResourceGroupID generates a resource group resource id
64+
func ResourceGroupID(subscriptionID SubscriptionID, resourceGroup ResourceGroup) string {
65+
return fmt.Sprintf("/subscriptions/%s/resourceGroups/%s", subscriptionID, resourceGroup)
66+
}
67+
5868
// ConvertToClusterResourceGroup converts infra resource group to aks cluster ID
5969
func ConvertToClusterResourceGroup(subscriptionID SubscriptionID, resourceGroup ResourceGroup, err error) (string, error) {
6070
if err != nil {

pkg/azure/azure_test.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,20 @@ var _ = Describe("Azure", func() {
124124
})
125125
})
126126

127+
Context("test ApplicationGatewayID func", func() {
128+
It("generate correct application gateway ID", func() {
129+
expectedGatewayID := "/subscriptions/subID/resourceGroups/resGp/providers/Microsoft.Network/applicationGateways/gateway"
130+
Expect(ApplicationGatewayID(SubscriptionID("subID"), ResourceGroup("resGp"), ResourceName("gateway"))).To(Equal(expectedGatewayID))
131+
})
132+
})
133+
134+
Context("test ResourceGroupID func", func() {
135+
It("generate correct resource group ID", func() {
136+
expectedGroupID := "/subscriptions/subID/resourceGroups/resGp"
137+
Expect(ResourceGroupID(SubscriptionID("subID"), ResourceGroup("resGp"))).To(Equal(expectedGroupID))
138+
})
139+
})
140+
127141
Context("test ParseSubResourceID func", func() {
128142
It("parses sub resource ID correctly", func() {
129143
subResourceID := "/subscriptions/subID/resourceGroups/resGp/providers/Microsoft.Network/applicationGateways/appgw/sslCertificates/cert"

pkg/azure/client.go

Lines changed: 67 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import (
99
"context"
1010
"encoding/json"
1111
"fmt"
12-
"time"
1312

1413
r "github.com/Azure/azure-sdk-for-go/profiles/latest/resources/mgmt/resources"
1514
n "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2019-09-01/network"
@@ -22,17 +21,12 @@ import (
2221
"github.com/Azure/application-gateway-kubernetes-ingress/pkg/version"
2322
)
2423

25-
const (
26-
retryPause = 10 * time.Second
27-
retryCount = 3
28-
extendedRetryCount = 60
29-
)
30-
3124
// AzClient is an interface for client to Azure
3225
type AzClient interface {
3326
SetAuthorizer(authorizer autorest.Authorizer)
3427

3528
ApplyRouteTable(string, string) error
29+
WaitForGetAccessOnGateway() error
3630
GetGateway() (n.ApplicationGateway, error)
3731
UpdateGateway(*n.ApplicationGateway) error
3832
DeployGatewayWithVnet(ResourceGroup, ResourceName, ResourceName, string) error
@@ -49,6 +43,7 @@ type azClient struct {
4943
routeTablesClient n.RouteTablesClient
5044
groupsClient r.GroupsClient
5145
deploymentsClient r.DeploymentsClient
46+
clientID string
5247

5348
subscriptionID SubscriptionID
5449
resourceGroupName ResourceGroup
@@ -59,7 +54,7 @@ type azClient struct {
5954
}
6055

6156
// NewAzClient returns an Azure Client
62-
func NewAzClient(subscriptionID SubscriptionID, resourceGroupName ResourceGroup, appGwName ResourceName) AzClient {
57+
func NewAzClient(subscriptionID SubscriptionID, resourceGroupName ResourceGroup, appGwName ResourceName, clientID string) AzClient {
6358
settings, err := auth.GetSettingsFromEnvironment()
6459
if err != nil {
6560
return nil
@@ -74,6 +69,7 @@ func NewAzClient(subscriptionID SubscriptionID, resourceGroupName ResourceGroup,
7469
routeTablesClient: n.NewRouteTablesClientWithBaseURI(settings.Environment.ResourceManagerEndpoint, string(subscriptionID)),
7570
groupsClient: r.NewGroupsClientWithBaseURI(settings.Environment.ResourceManagerEndpoint, string(subscriptionID)),
7671
deploymentsClient: r.NewDeploymentsClientWithBaseURI(settings.Environment.ResourceManagerEndpoint, string(subscriptionID)),
72+
clientID: clientID,
7773

7874
subscriptionID: subscriptionID,
7975
resourceGroupName: resourceGroupName,
@@ -118,50 +114,80 @@ func (az *azClient) SetAuthorizer(authorizer autorest.Authorizer) {
118114
az.deploymentsClient.Authorizer = authorizer
119115
}
120116

121-
func (az *azClient) GetGateway() (response n.ApplicationGateway, err error) {
122-
err = utils.Retry(retryCount, retryPause,
117+
func (az *azClient) WaitForGetAccessOnGateway() (err error) {
118+
glog.V(5).Info("Getting Application Gateway configuration.")
119+
err = utils.Retry(-1, retryPause,
123120
func() (utils.Retriable, error) {
124-
response, err = az.appGatewaysClient.Get(az.ctx, string(az.resourceGroupName), string(az.appGwName))
121+
response, err := az.appGatewaysClient.Get(az.ctx, string(az.resourceGroupName), string(az.appGwName))
125122
if err == nil {
126-
return utils.Retriable(false), nil
123+
return utils.Retriable(true), nil
127124
}
128125

129-
// Reasons for 403 errors
130-
if response.Response.Response != nil && response.Response.StatusCode == 403 {
131-
glog.Error("Following might be potential reasons:\n" +
132-
" AKS Service Principal requires 'Managed Identity Operator' access on Controller Identity\n" +
133-
" 'identityResourceID' and/or 'identityClientID' are incorrect in the Helm config\n" +
134-
" AGIC Identity requires 'Contributor' access on Application Gateway and 'Reader' access on Application Gateway's Resource Group\n" +
135-
" Please check the AAD Pod Identity mni and nmi pod logs to find potential issues.")
136-
}
126+
e := controllererrors.NewErrorWithInnerErrorf(
127+
controllererrors.ErrorGetApplicationGatewayError,
128+
err,
129+
"Failed fetching configuration for Application Gateway. Will retry in %v.", retryPause,
130+
)
137131

138-
if response.Response.Response != nil && response.Response.StatusCode == 404 {
139-
err := controllererrors.NewErrorWithInnerError(
140-
controllererrors.ErrorApplicationGatewayNotFound,
132+
if response.Response.Response != nil {
133+
e = controllererrors.NewErrorWithInnerErrorf(
134+
controllererrors.ErrorApplicationGatewayUnexpectedStatusCode,
141135
err,
142-
"received 404 NOT FOUND status code on getting Application Gateway from ARM.",
136+
"Unexpected status code '%d' while performing a GET on Application Gateway.", response.Response.StatusCode,
143137
)
144-
glog.Error(err.Error())
145-
return utils.Retriable(false), err
138+
139+
if response.Response.StatusCode == 404 {
140+
e.Code = controllererrors.ErrorApplicationGatewayNotFound
141+
}
142+
143+
if response.Response.StatusCode == 403 {
144+
e.Code = controllererrors.ErrorApplicationGatewayForbidden
145+
146+
clientID := "<agic-client-id>"
147+
if az.clientID != "" {
148+
clientID = az.clientID
149+
}
150+
151+
groupID := ResourceGroupID(az.subscriptionID, az.resourceGroupName)
152+
applicationGatewayID := ApplicationGatewayID(az.subscriptionID, az.resourceGroupName, az.appGwName)
153+
roleAssignmentCmd := fmt.Sprintf("az role assignment create --role Reader --scope %s --assignee %s;"+
154+
" az role assignment create --role Contributor --scope %s --assignee %s",
155+
groupID,
156+
clientID,
157+
applicationGatewayID,
158+
clientID,
159+
)
160+
161+
e.Message += fmt.Sprintf(" You can use '%s' to assign permissions."+
162+
" AGIC Identity needs atleast has 'Contributor' access to Application Gateway '%s' and 'Reader' access to Application Gateway's Resource Group '%s'.",
163+
roleAssignmentCmd,
164+
string(az.appGwName),
165+
string(az.resourceGroupName),
166+
)
167+
}
146168
}
147169

148-
if response.Response.Response != nil && response.Response.StatusCode != 200 {
149-
// for example, getting 401. This is not expected as we are getting a token before making the call.
150-
glog.Error("unexpected ARM status code on GET existing App Gateway config: ", response.Response.StatusCode)
170+
glog.Errorf(e.Error())
171+
172+
if controllererrors.IsErrorCode(e, controllererrors.ErrorApplicationGatewayNotFound) {
173+
return utils.Retriable(false), e
151174
}
152175

153-
err := controllererrors.NewErrorWithInnerErrorf(
154-
controllererrors.ErrorGetApplicationGatewayError,
155-
err,
156-
"failed fetching config for App Gateway instance. Will retry in %v.", retryPause,
157-
)
158-
glog.Errorf(err.Error())
159-
return utils.Retriable(true), err
176+
return utils.Retriable(true), e
160177
})
161178

162-
if err != nil && !controllererrors.IsErrorCode(err, controllererrors.ErrorApplicationGatewayNotFound) {
163-
glog.Errorf("Tried %d times to authenticate with ARM; Error: %s", retryCount, err)
164-
}
179+
return
180+
}
181+
182+
func (az *azClient) GetGateway() (gateway n.ApplicationGateway, err error) {
183+
err = utils.Retry(retryCount, retryPause,
184+
func() (utils.Retriable, error) {
185+
gateway, err = az.appGatewaysClient.Get(az.ctx, string(az.resourceGroupName), string(az.appGwName))
186+
if err != nil {
187+
glog.Errorf("Error while getting application gateway '%s': %s", string(az.appGwName), err)
188+
}
189+
return utils.Retriable(true), err
190+
})
165191
return
166192
}
167193

@@ -459,7 +485,8 @@ func getTemplate() map[string]interface{} {
459485
"apiVersion": "2018-08-01",
460486
"location": "[resourceGroup().location]",
461487
"tags": {
462-
"managed-by-k8s-ingress": "true"
488+
"managed-by-k8s-ingress": "true",
489+
"created-by": "ingress-appgw"
463490
},
464491
"properties": {
465492
"sku": {

pkg/azure/consts.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// -------------------------------------------------------------------------------------------
2+
// Copyright (c) Microsoft Corporation. All rights reserved.
3+
// Licensed under the MIT License. See License.txt in the project root for license information.
4+
// --------------------------------------------------------------------------------------------
5+
6+
package azure
7+
8+
import "time"
9+
10+
const (
11+
retryPause = 10 * time.Second
12+
retryCount = 3
13+
maxAuthRetryCount = 10
14+
extendedRetryCount = 60
15+
)

pkg/azure/fake.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,20 @@ func (az *FakeAzClient) GetGateway() (n.ApplicationGateway, error) {
5252
return n.ApplicationGateway{}, nil
5353
}
5454

55+
// WaitForGetAccessOnGateway runs GetGatewayFunc until it returns a gateway
56+
func (az *FakeAzClient) WaitForGetAccessOnGateway() error {
57+
if az.GetGatewayFunc != nil {
58+
for {
59+
_, err := az.GetGatewayFunc()
60+
if err == nil {
61+
return nil
62+
}
63+
}
64+
}
65+
66+
return nil
67+
}
68+
5569
// UpdateGateway runs UpdateGatewayFunc and return a gateway
5670
func (az *FakeAzClient) UpdateGateway(appGwObj *n.ApplicationGateway) (err error) {
5771
if az.UpdateGatewayFunc != nil {

pkg/controllererrors/errorcodes.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,12 @@ const (
5959
ErrorInvalidContent ErrorCode = "ErrorInvalidContent"
6060

6161
// azure package
62-
ErrorGetApplicationGatewayError ErrorCode = "ErrorGetApplicationGatewayError"
63-
ErrorApplicationGatewayNotFound ErrorCode = "ErrorApplicationGatewayNotFound"
64-
ErrorSubnetNotFound ErrorCode = "ErrorSubnetNotFound"
65-
ErrorMissingResourceGroup ErrorCode = "ErrorMissingResourceGroup"
62+
ErrorGetApplicationGatewayError ErrorCode = "ErrorGetApplicationGatewayError"
63+
ErrorApplicationGatewayNotFound ErrorCode = "ErrorApplicationGatewayNotFound"
64+
ErrorApplicationGatewayForbidden ErrorCode = "ErrorApplicationGatewayForbidden"
65+
ErrorApplicationGatewayUnexpectedStatusCode ErrorCode = "ErrorApplicationGatewayUnexpectedStatusCode"
66+
ErrorSubnetNotFound ErrorCode = "ErrorSubnetNotFound"
67+
ErrorMissingResourceGroup ErrorCode = "ErrorMissingResourceGroup"
6668

6769
// main package
6870
ErrorNoSuchNamespace ErrorCode = "ErrorNoSuchNamespace"

pkg/controllererrors/types.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,10 @@ func NewErrorWithInnerErrorf(code ErrorCode, innerError error, message string, a
5656
// Error implements error interface to return error
5757
func (e *Error) Error() string {
5858
if e.InnerError != nil {
59-
return fmt.Sprintf("Code: %s, Message: %s, InnerError: %s", e.Code, e.Message, e.InnerError.Error())
59+
return fmt.Sprintf("Code=\"%s\" Message=\"%s\" InnerError=\"%s\"", e.Code, e.Message, e.InnerError.Error())
6060
}
6161

62-
return fmt.Sprintf("Code: %s, Message: %s", e.Code, e.Message)
62+
return fmt.Sprintf("Code=\"%s\" Message=\"%s\"", e.Code, e.Message)
6363
}
6464

6565
// IsErrorCode matches error code to the error

pkg/environment/environment.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ const (
2020
// CloudProviderConfigLocationVarName is an environment variable name. This file is available on azure cluster.
2121
CloudProviderConfigLocationVarName = "AZURE_CLOUD_PROVIDER_LOCATION"
2222

23+
// ClientIDVarName is an environment variable which stores the client id provided through user assigned identity
24+
ClientIDVarName = "AZURE_CLIENT_ID"
25+
2326
// SubscriptionIDVarName is the name of the APPGW_SUBSCRIPTION_ID
2427
SubscriptionIDVarName = "APPGW_SUBSCRIPTION_ID"
2528

@@ -101,6 +104,7 @@ var (
101104
// EnvVariables is a struct storing values for environment variables.
102105
type EnvVariables struct {
103106
CloudProviderConfigLocation string
107+
ClientID string
104108
SubscriptionID string
105109
ResourceGroupName string
106110
AppGwName string
@@ -158,6 +162,7 @@ func (env *EnvVariables) Consolidate(cpConfig *azure.CloudProviderConfig) {
158162
func GetEnv() EnvVariables {
159163
env := EnvVariables{
160164
CloudProviderConfigLocation: os.Getenv(CloudProviderConfigLocationVarName),
165+
ClientID: os.Getenv(ClientIDVarName),
161166
SubscriptionID: os.Getenv(SubscriptionIDVarName),
162167
ResourceGroupName: os.Getenv(ResourceGroupNameVarName),
163168
AppGwName: os.Getenv(AppGwNameVarName),

0 commit comments

Comments
 (0)