Skip to content

Commit 8e113d9

Browse files
committed
add flag maxWaitActiveUpdateDelete for configurable wait for managed EKS resource creation to be ready
Signed-off-by: Pankaj Walke <[email protected]>
1 parent 715851c commit 8e113d9

File tree

19 files changed

+151
-122
lines changed

19 files changed

+151
-122
lines changed

cmd/clusterawsadm/cmd/eks/addons/list_available.go

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"github.com/aws/aws-sdk-go-v2/config"
2626
"github.com/aws/aws-sdk-go-v2/service/eks"
2727
"github.com/spf13/cobra"
28+
"k8s.io/utils/ptr"
2829

2930
cmdout "sigs.k8s.io/cluster-api-provider-aws/v2/cmd/clusterawsadm/printers"
3031
)
@@ -52,13 +53,10 @@ func listAvailableCmd() *cobra.Command {
5253
}
5354

5455
func listAvailableAddons(region, clusterName, printerType *string) error {
55-
var regionOptsFunc config.LoadOptionsFunc
5656
ctx := context.TODO()
57-
if *region != "" {
58-
regionOptsFunc = config.WithRegion(*region)
59-
}
57+
6058
optFns := []func(*config.LoadOptions) error{
61-
regionOptsFunc,
59+
config.WithRegion(ptr.Deref(region, "")),
6260
}
6361

6462
cfg, err := config.LoadDefaultConfig(context.Background(), optFns...)

controlplane/eks/controllers/awsmanagedcontrolplane_controller.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ type AWSManagedControlPlaneReconciler struct {
102102
ExternalResourceGC bool
103103
AlternativeGCStrategy bool
104104
WaitInfraPeriod time.Duration
105+
MaxWaitActiveUpdateDelete time.Duration
105106
TagUnmanagedNetworkResources bool
106107
}
107108

@@ -245,6 +246,7 @@ func (r *AWSManagedControlPlaneReconciler) Reconcile(ctx context.Context, req ct
245246
Cluster: cluster,
246247
ControlPlane: awsManagedControlPlane,
247248
ControllerName: strings.ToLower(awsManagedControlPlaneKind),
249+
MaxWaitActiveUpdateDelete: r.MaxWaitActiveUpdateDelete,
248250
EnableIAM: r.EnableIAM,
249251
AllowAdditionalRoles: r.AllowAdditionalRoles,
250252
Endpoints: r.Endpoints,

controlplane/eks/controllers/awsmanagedcontrolplane_controller_test.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ import (
6060
"sigs.k8s.io/cluster-api/util/patch"
6161
)
6262

63+
const (
64+
maxActiveUpdateDeleteWait = 30 * time.Minute
65+
)
66+
6367
func TestAWSManagedControlPlaneReconcilerIntegrationTests(t *testing.T) {
6468
var (
6569
reconciler AWSManagedControlPlaneReconciler
@@ -886,7 +890,7 @@ func mockedEKSCluster(ctx context.Context, g *WithT, eksRec *mock_eksiface.MockE
886890

887891
waitUntilClusterActiveCall := eksRec.WaitUntilClusterActive(ctx, &eks.DescribeClusterInput{
888892
Name: aws.String("test-cluster"),
889-
}).After(createClusterCall).Return(nil)
893+
}, maxActiveUpdateDeleteWait).After(createClusterCall).Return(nil)
890894

891895
clusterActive := clusterCreating // copy
892896
clusterActive.Status = ekstypes.ClusterStatusActive

controlplane/eks/controllers/helpers_test.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,11 @@ import (
3131
func getAWSManagedControlPlaneScope(cluster *clusterv1.Cluster, awsManagedControlPlane *ekscontrolplanev1.AWSManagedControlPlane) *scope.ManagedControlPlaneScope {
3232
scope, err := scope.NewManagedControlPlaneScope(
3333
scope.ManagedControlPlaneScopeParams{
34-
Client: testEnv.Client,
35-
Cluster: cluster,
36-
ControlPlane: awsManagedControlPlane,
37-
EnableIAM: true,
34+
Client: testEnv.Client,
35+
Cluster: cluster,
36+
ControlPlane: awsManagedControlPlane,
37+
EnableIAM: true,
38+
MaxWaitActiveUpdateDelete: maxActiveUpdateDeleteWait,
3839
},
3940
)
4041
utilruntime.Must(err)

exp/controllers/awsmanagedmachinepool_controller.go

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package controllers
1818

1919
import (
2020
"context"
21+
"time"
2122

2223
"github.com/pkg/errors"
2324
corev1 "k8s.io/api/core/v1"
@@ -58,6 +59,7 @@ type AWSManagedMachinePoolReconciler struct {
5859
AllowAdditionalRoles bool
5960
WatchFilterValue string
6061
TagUnmanagedNetworkResources bool
62+
MaxWaitActiveUpdateDelete time.Duration
6163
}
6264

6365
// SetupWithManager is used to setup the controller.
@@ -155,16 +157,17 @@ func (r *AWSManagedMachinePoolReconciler) Reconcile(ctx context.Context, req ctr
155157
}
156158

157159
machinePoolScope, err := scope.NewManagedMachinePoolScope(scope.ManagedMachinePoolScopeParams{
158-
Client: r.Client,
159-
ControllerName: "awsmanagedmachinepool",
160-
Cluster: cluster,
161-
ControlPlane: controlPlane,
162-
MachinePool: machinePool,
163-
ManagedMachinePool: awsPool,
164-
EnableIAM: r.EnableIAM,
165-
AllowAdditionalRoles: r.AllowAdditionalRoles,
166-
Endpoints: r.Endpoints,
167-
InfraCluster: managedControlPlaneScope,
160+
Client: r.Client,
161+
ControllerName: "awsmanagedmachinepool",
162+
Cluster: cluster,
163+
ControlPlane: controlPlane,
164+
MachinePool: machinePool,
165+
ManagedMachinePool: awsPool,
166+
EnableIAM: r.EnableIAM,
167+
AllowAdditionalRoles: r.AllowAdditionalRoles,
168+
Endpoints: r.Endpoints,
169+
InfraCluster: managedControlPlaneScope,
170+
MaxWaitActiveUpdateDelete: r.MaxWaitActiveUpdateDelete,
168171
})
169172
if err != nil {
170173
return ctrl.Result{}, errors.Wrap(err, "failed to create scope")

main.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ var (
105105
instanceStateConcurrency int
106106
awsMachineConcurrency int
107107
waitInfraPeriod time.Duration
108+
maxWaitActiveUpdateDelete time.Duration
108109
syncPeriod time.Duration
109110
webhookPort int
110111
webhookCertDir string
@@ -441,6 +442,7 @@ func setupEKSReconcilersAndWebhooks(ctx context.Context, mgr ctrl.Manager, awsSe
441442
ExternalResourceGC: externalResourceGC,
442443
AlternativeGCStrategy: alternativeGCStrategy,
443444
WaitInfraPeriod: waitInfraPeriod,
445+
MaxWaitActiveUpdateDelete: maxWaitActiveUpdateDelete,
444446
TagUnmanagedNetworkResources: feature.Gates.Enabled(feature.TagUnmanagedNetworkResources),
445447
}).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: awsClusterConcurrency, RecoverPanic: ptr.To[bool](true)}); err != nil {
446448
setupLog.Error(err, "unable to create controller", "controller", "AWSManagedControlPlane")
@@ -494,6 +496,7 @@ func setupEKSReconcilersAndWebhooks(ctx context.Context, mgr ctrl.Manager, awsSe
494496
Recorder: mgr.GetEventRecorderFor("awsmanagedmachinepool-reconciler"),
495497
WatchFilterValue: watchFilterValue,
496498
TagUnmanagedNetworkResources: feature.Gates.Enabled(feature.TagUnmanagedNetworkResources),
499+
MaxWaitActiveUpdateDelete: maxWaitActiveUpdateDelete,
497500
}).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: instanceStateConcurrency, RecoverPanic: ptr.To[bool](true)}); err != nil {
498501
setupLog.Error(err, "unable to create controller", "controller", "AWSManagedMachinePool")
499502
os.Exit(1)
@@ -585,6 +588,12 @@ func initFlags(fs *pflag.FlagSet) {
585588
"The minimum interval at which reconcile process wait for infrastructure to be ready.",
586589
)
587590

591+
fs.DurationVar(&maxWaitActiveUpdateDelete,
592+
"max-wait-managed-resources",
593+
30*time.Minute,
594+
"The maximum duration to wait for managed AWS resources to be ready.",
595+
)
596+
588597
fs.DurationVar(&syncPeriod,
589598
"sync-period",
590599
10*time.Minute,

pkg/cloud/scope/managedcontrolplane.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,14 @@ func init() {
5757

5858
// ManagedControlPlaneScopeParams defines the input parameters used to create a new Scope.
5959
type ManagedControlPlaneScopeParams struct {
60-
Client client.Client
61-
Logger *logger.Logger
62-
Cluster *clusterv1.Cluster
63-
ControlPlane *ekscontrolplanev1.AWSManagedControlPlane
64-
ControllerName string
65-
Endpoints []ServiceEndpoint
66-
Session awsclient.ConfigProvider
60+
Client client.Client
61+
Logger *logger.Logger
62+
Cluster *clusterv1.Cluster
63+
ControlPlane *ekscontrolplanev1.AWSManagedControlPlane
64+
ControllerName string
65+
Endpoints []ServiceEndpoint
66+
Session awsclient.ConfigProvider
67+
MaxWaitActiveUpdateDelete time.Duration
6768

6869
EnableIAM bool
6970
AllowAdditionalRoles bool
@@ -89,6 +90,7 @@ func NewManagedControlPlaneScope(params ManagedControlPlaneScopeParams) (*Manage
8990
Client: params.Client,
9091
Cluster: params.Cluster,
9192
ControlPlane: params.ControlPlane,
93+
MaxWaitActiveUpdateDelete: params.MaxWaitActiveUpdateDelete,
9294
patchHelper: nil,
9395
session: nil,
9496
serviceLimiters: nil,
@@ -127,8 +129,9 @@ type ManagedControlPlaneScope struct {
127129
Client client.Client
128130
patchHelper *patch.Helper
129131

130-
Cluster *clusterv1.Cluster
131-
ControlPlane *ekscontrolplanev1.AWSManagedControlPlane
132+
Cluster *clusterv1.Cluster
133+
ControlPlane *ekscontrolplanev1.AWSManagedControlPlane
134+
MaxWaitActiveUpdateDelete time.Duration
132135

133136
session awsclient.ConfigProvider
134137
sessionV2 awsv2.Config

pkg/cloud/scope/managednodegroup.go

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package scope
1919
import (
2020
"context"
2121
"fmt"
22+
"time"
2223

2324
awsv2 "github.com/aws/aws-sdk-go-v2/aws"
2425
awsclient "github.com/aws/aws-sdk-go/aws/client"
@@ -45,15 +46,16 @@ import (
4546

4647
// ManagedMachinePoolScopeParams defines the input parameters used to create a new Scope.
4748
type ManagedMachinePoolScopeParams struct {
48-
Client client.Client
49-
Logger *logger.Logger
50-
Cluster *clusterv1.Cluster
51-
ControlPlane *ekscontrolplanev1.AWSManagedControlPlane
52-
ManagedMachinePool *expinfrav1.AWSManagedMachinePool
53-
MachinePool *expclusterv1.MachinePool
54-
ControllerName string
55-
Endpoints []ServiceEndpoint
56-
Session awsclient.ConfigProvider
49+
Client client.Client
50+
Logger *logger.Logger
51+
Cluster *clusterv1.Cluster
52+
ControlPlane *ekscontrolplanev1.AWSManagedControlPlane
53+
ManagedMachinePool *expinfrav1.AWSManagedMachinePool
54+
MachinePool *expclusterv1.MachinePool
55+
ControllerName string
56+
Endpoints []ServiceEndpoint
57+
Session awsclient.ConfigProvider
58+
MaxWaitActiveUpdateDelete time.Duration
5759

5860
EnableIAM bool
5961
AllowAdditionalRoles bool
@@ -79,11 +81,12 @@ func NewManagedMachinePoolScope(params ManagedMachinePoolScopeParams) (*ManagedM
7981
}
8082

8183
managedScope := &ManagedControlPlaneScope{
82-
Logger: *params.Logger,
83-
Client: params.Client,
84-
Cluster: params.Cluster,
85-
ControlPlane: params.ControlPlane,
86-
controllerName: params.ControllerName,
84+
Logger: *params.Logger,
85+
Client: params.Client,
86+
Cluster: params.Cluster,
87+
MaxWaitActiveUpdateDelete: params.MaxWaitActiveUpdateDelete,
88+
ControlPlane: params.ControlPlane,
89+
controllerName: params.ControllerName,
8790
}
8891
session, serviceLimiters, err := sessionForClusterWithRegion(params.Client, managedScope, params.ControlPlane.Spec.Region, params.Endpoints, params.Logger)
8992
if err != nil {
@@ -132,11 +135,12 @@ type ManagedMachinePoolScope struct {
132135
patchHelper *patch.Helper
133136
capiMachinePoolPatchHelper *patch.Helper
134137

135-
Cluster *clusterv1.Cluster
136-
ControlPlane *ekscontrolplanev1.AWSManagedControlPlane
137-
ManagedMachinePool *expinfrav1.AWSManagedMachinePool
138-
MachinePool *expclusterv1.MachinePool
139-
EC2Scope EC2Scope
138+
Cluster *clusterv1.Cluster
139+
ControlPlane *ekscontrolplanev1.AWSManagedControlPlane
140+
ManagedMachinePool *expinfrav1.AWSManagedMachinePool
141+
MachinePool *expclusterv1.MachinePool
142+
EC2Scope EC2Scope
143+
MaxWaitActiveUpdateDelete time.Duration
140144

141145
session awsclient.ConfigProvider
142146
sessionV2 awsv2.Config

pkg/cloud/services/eks/addons.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package eks
1919
import (
2020
"context"
2121
"fmt"
22+
"time"
2223

2324
"github.com/aws/aws-sdk-go-v2/aws"
2425
"github.com/aws/aws-sdk-go-v2/service/eks"
@@ -61,7 +62,7 @@ func (s *Service) reconcileAddons(ctx context.Context) error {
6162

6263
// Compute operations to move installed to desired
6364
s.scope.Debug("creating eks addons plan", "cluster", eksClusterName, "numdesired", len(desiredAddons), "numinstalled", len(installed))
64-
addonsPlan := eksaddons.NewPlan(eksClusterName, desiredAddons, installed, s.EKSClient)
65+
addonsPlan := eksaddons.NewPlan(eksClusterName, desiredAddons, installed, s.EKSClient, s.scope.MaxWaitActiveUpdateDelete)
6566
procedures, err := addonsPlan.Create(ctx)
6667
if err != nil {
6768
s.scope.Error(err, "failed creating eks addons plane")
@@ -210,13 +211,13 @@ func (s *Service) translateAPIToAddon(addons []ekscontrolplanev1.Addon) []*eksad
210211
return converted
211212
}
212213

213-
// WaitUntilAddonDeleted is blocking function to wait until EKS Nodegroup is Deleted.
214-
func (k *EKSClient) WaitUntilAddonDeleted(ctx context.Context, input *eks.DescribeAddonInput) error {
214+
// WaitUntilAddonDeleted is blocking function to wait until EKS Addon is Deleted.
215+
func (k *EKSClient) WaitUntilAddonDeleted(ctx context.Context, input *eks.DescribeAddonInput, maxWait time.Duration) error {
215216
waiter := eks.NewAddonDeletedWaiter(k, func(o *eks.AddonDeletedWaiterOptions) {
216217
o.LogWaitAttempts = true
217218
})
218219

219-
return waiter.Wait(ctx, input, maxActiveUpdateDeleteWait)
220+
return waiter.Wait(ctx, input, maxWait)
220221
}
221222

222223
func convertConflictResolution(conflict ekscontrolplanev1.AddonResolution) *string {

pkg/cloud/services/eks/cluster.go

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"fmt"
2222
"net"
23+
"time"
2324

2425
"github.com/aws/aws-sdk-go-v2/aws"
2526
"github.com/aws/aws-sdk-go-v2/service/eks"
@@ -266,7 +267,7 @@ func (s *Service) deleteClusterAndWait(ctx context.Context, cluster *ekstypes.Cl
266267
Name: cluster.Name,
267268
}
268269

269-
err = s.EKSClient.WaitUntilClusterDeleted(ctx, waitInput)
270+
err = s.EKSClient.WaitUntilClusterDeleted(ctx, waitInput, s.scope.MaxWaitActiveUpdateDelete)
270271
if err != nil {
271272
return errors.Wrapf(err, "failed waiting for eks cluster %s to delete", *cluster.Name)
272273
}
@@ -494,7 +495,7 @@ func (s *Service) waitForClusterActive(ctx context.Context) (*ekstypes.Cluster,
494495
req := eks.DescribeClusterInput{
495496
Name: aws.String(eksClusterName),
496497
}
497-
if err := s.EKSClient.WaitUntilClusterActive(ctx, &req); err != nil {
498+
if err := s.EKSClient.WaitUntilClusterActive(ctx, &req, s.scope.MaxWaitActiveUpdateDelete); err != nil {
498499
return nil, errors.Wrapf(err, "failed to wait for eks control plane %q", *req.Name)
499500
}
500501

@@ -586,7 +587,6 @@ func publicAccessCIDRsEqual(as []string, bs []string) bool {
586587
bsDefault = true
587588
}
588589
if sets.NewString(as...).Equal(sets.NewString(bs...)) {
589-
fmt.Println("Found IPV6 true")
590590
return true
591591
}
592592

@@ -702,6 +702,7 @@ func (s *Service) reconcileClusterVersion(ctx context.Context, cluster *ekstypes
702702
if err := s.EKSClient.WaitUntilClusterUpdating(
703703
ctx,
704704
&eks.DescribeClusterInput{Name: aws.String(s.scope.KubernetesClusterName())},
705+
s.scope.MaxWaitActiveUpdateDelete,
705706
); err != nil {
706707
return false, err
707708
}
@@ -752,6 +753,7 @@ func (s *Service) updateEncryptionConfig(ctx context.Context, updatedEncryptionC
752753
if err := s.EKSClient.WaitUntilClusterUpdating(
753754
ctx,
754755
&eks.DescribeClusterInput{Name: aws.String(s.scope.KubernetesClusterName())},
756+
s.scope.MaxWaitActiveUpdateDelete,
755757
); err != nil {
756758
return false, err
757759
}
@@ -791,29 +793,29 @@ func getKeyArn(encryptionConfig ekstypes.EncryptionConfig) string {
791793
}
792794

793795
// WaitUntilClusterActive is blocking function to wait until EKS Cluster is Active.
794-
func (k *EKSClient) WaitUntilClusterActive(ctx context.Context, input *eks.DescribeClusterInput) error {
796+
func (k *EKSClient) WaitUntilClusterActive(ctx context.Context, input *eks.DescribeClusterInput, maxWait time.Duration) error {
795797
waiter := eks.NewClusterActiveWaiter(k, func(o *eks.ClusterActiveWaiterOptions) {
796798
o.LogWaitAttempts = true
797799
})
798800

799-
return waiter.Wait(ctx, input, maxActiveUpdateDeleteWait)
801+
return waiter.Wait(ctx, input, maxWait)
800802
}
801803

802804
// WaitUntilClusterDeleted is blocking function to wait until EKS Cluster is Deleted.
803-
func (k *EKSClient) WaitUntilClusterDeleted(ctx context.Context, input *eks.DescribeClusterInput) error {
805+
func (k *EKSClient) WaitUntilClusterDeleted(ctx context.Context, input *eks.DescribeClusterInput, maxWait time.Duration) error {
804806
waiter := eks.NewClusterDeletedWaiter(k)
805807

806-
return waiter.Wait(ctx, input, maxActiveUpdateDeleteWait)
808+
return waiter.Wait(ctx, input, maxWait)
807809
}
808810

809811
// WaitUntilClusterUpdating is blocking function to wait until EKS Cluster is Updating.
810-
func (k *EKSClient) WaitUntilClusterUpdating(ctx context.Context, input *eks.DescribeClusterInput) error {
812+
func (k *EKSClient) WaitUntilClusterUpdating(ctx context.Context, input *eks.DescribeClusterInput, maxWait time.Duration) error {
811813
waiter := eks.NewClusterActiveWaiter(k, func(o *eks.ClusterActiveWaiterOptions) {
812814
o.LogWaitAttempts = true
813815
o.Retryable = clusterUpdatingStateRetryable
814816
})
815817

816-
return waiter.Wait(ctx, input, maxActiveUpdateDeleteWait)
818+
return waiter.Wait(ctx, input, maxWait)
817819
}
818820

819821
// clusterUpdatingStateRetryable is adapted from aws-sdk-go-v2/service/eks/api_op_DescribeCluster.go.

0 commit comments

Comments
 (0)