Skip to content

Commit 25fed45

Browse files
authored
Refine update patterns for Cluster and Nodegroup resources (#100)
Prior to this patch, we observed that there was some missing logic for handling `Cluster` and `Nodegroup` updates, especially when dealing with version upgrades - making it a painful/incomplete UX when using the eks-controller. This commit enhances the logic for managing updates and upgrades of cluster and nodegroups resources, enabling users to experience a better declarative resource model. Here's what the controller can do now: `Cluster` resources: - Introduces `Cluster` version rolling update, allowing users to specify any version higher than the "current" one, even if the EKS API doesn't allow it. The controller will incrementally upgrade the the cluster version until it reaches the desired state. e.g `1.25` -> `1.29` `Nodegroup` resources: - Allows updates for `ReleaseVersion` and `LaunchTemplate` - Enhances the delta/comparison logic for both `Version` and `ReleaseVersion` fields. - Carefully crafts the `UpdateNodeGroupVersion` API input to avoid situations where the controller/resource can get trapped in a infinite update loop (`1.27` -> `1.28` -> `1.27`) - Set `DiskSize`, `RemoteAccess`, `NodeRole` and `Subnets` as immutable fields Additionally this commit is also adding e2e tests for the features/bug fixes mentioned above Signed-off-by: Amine Hilaly <[email protected]> By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
1 parent f868a17 commit 25fed45

File tree

15 files changed

+792
-38
lines changed

15 files changed

+792
-38
lines changed
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
ack_generate_info:
2-
build_date: "2024-02-03T01:09:21Z"
2+
build_date: "2024-02-08T04:09:29Z"
33
build_hash: 5b4565ec2712d29988b8123aeeed6a4af57467bf
44
go_version: go1.21.5
55
version: v0.29.2-4-g5b4565e
66
api_directory_checksum: d960a9f06b58cc445e5ab21fb26ee6d92c441374
77
api_version: v1alpha1
88
aws_sdk_go_version: v1.49.13
99
generator_config_info:
10-
file_checksum: 7473dd4afcf1e30d99f20e77cbad94e2df5c7093
10+
file_checksum: 89a00ff29a62ca0e86a8c08b275a6e9e708004b0
1111
original_file_name: generator.yaml
1212
last_modification:
1313
reason: API generation

apis/v1alpha1/generator.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,11 @@ resources:
232232
service_name: iam
233233
resource: Role
234234
path: Status.ACKResourceMetadata.ARN
235+
is_immutable: true
236+
DiskSize:
237+
is_immutable: true
238+
RemoteAccess:
239+
is_immutable: true
235240
RemoteAccess.SourceSecurityGroups:
236241
references:
237242
service_name: ec2
@@ -242,9 +247,16 @@ resources:
242247
service_name: ec2
243248
resource: Subnet
244249
path: Status.SubnetID
250+
is_immutable: true
245251
Taints:
246252
compare:
247253
is_ignored: true
254+
ReleaseVersion:
255+
compare:
256+
is_ignored: true
257+
Version:
258+
compare:
259+
is_ignored: true
248260
renames:
249261
operations:
250262
CreateNodegroup:

generator.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,11 @@ resources:
232232
service_name: iam
233233
resource: Role
234234
path: Status.ACKResourceMetadata.ARN
235+
is_immutable: true
236+
DiskSize:
237+
is_immutable: true
238+
RemoteAccess:
239+
is_immutable: true
235240
RemoteAccess.SourceSecurityGroups:
236241
references:
237242
service_name: ec2
@@ -242,9 +247,16 @@ resources:
242247
service_name: ec2
243248
resource: Subnet
244249
path: Status.SubnetID
250+
is_immutable: true
245251
Taints:
246252
compare:
247253
is_ignored: true
254+
ReleaseVersion:
255+
compare:
256+
is_ignored: true
257+
Version:
258+
compare:
259+
is_ignored: true
248260
renames:
249261
operations:
250262
CreateNodegroup:

pkg/resource/cluster/hook.go

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,16 @@ import (
1919
"fmt"
2020
"time"
2121

22-
"github.com/aws-controllers-k8s/eks-controller/pkg/tags"
2322
ackcompare "github.com/aws-controllers-k8s/runtime/pkg/compare"
2423
ackcondition "github.com/aws-controllers-k8s/runtime/pkg/condition"
2524
ackerr "github.com/aws-controllers-k8s/runtime/pkg/errors"
2625
ackrequeue "github.com/aws-controllers-k8s/runtime/pkg/requeue"
2726
ackrtlog "github.com/aws-controllers-k8s/runtime/pkg/runtime/log"
2827
svcsdk "github.com/aws/aws-sdk-go/service/eks"
2928
corev1 "k8s.io/api/core/v1"
29+
30+
"github.com/aws-controllers-k8s/eks-controller/pkg/tags"
31+
"github.com/aws-controllers-k8s/eks-controller/pkg/util"
3032
)
3133

3234
const (
@@ -246,7 +248,7 @@ func (rm *resourceManager) customUpdate(
246248
}
247249

248250
if delta.DifferentAt("Spec.Version") {
249-
if err := rm.updateVersion(ctx, desired); err != nil {
251+
if err := rm.updateVersion(ctx, desired, latest); err != nil {
250252
awserr, ok := ackerr.AWSError(err)
251253

252254
// Check to see if we've raced an async update call and need to
@@ -264,16 +266,45 @@ func (rm *resourceManager) customUpdate(
264266
return updatedRes, nil
265267
}
266268

269+
// updateVersion updates the cluster version to the next possible version.
270+
//
271+
// This function isn't supposed to blindly update the cluster version to the
272+
// desired version. It should increment the minor version of the observed
273+
// version and update the cluster to that version. The reconciliation mechanism
274+
// should ensure that the desired version is eventually reached.
267275
func (rm *resourceManager) updateVersion(
268276
ctx context.Context,
269-
r *resource,
277+
desired, latest *resource,
270278
) (err error) {
271279
rlog := ackrtlog.FromContext(ctx)
272280
exit := rlog.Trace("rm.updateVersion")
273281
defer exit(err)
282+
283+
// If the desired version is less than the observed version, we can't update
284+
// the cluster to an older version.
285+
// Note that the desired and observed versions are guaranteed to be never be
286+
// equal at this stage, as the delta comparison would have caught that.
287+
compareResult, err := util.CompareEKSKubernetesVersions(*desired.ko.Spec.Version, *latest.ko.Spec.Version)
288+
if err != nil {
289+
return ackerr.NewTerminalError(fmt.Errorf("failed to compare the desired and observed versions: %v", err))
290+
}
291+
if compareResult != 1 {
292+
return ackerr.NewTerminalError(
293+
fmt.Errorf("desired cluster version is less than the observed version: %s < %s",
294+
*desired.ko.Spec.Version, *latest.ko.Spec.Version,
295+
),
296+
)
297+
}
298+
299+
// Compure the next minor version of the desired version
300+
nextVersion, err := util.IncrementEKSMinorVersion(*latest.ko.Spec.Version)
301+
if err != nil {
302+
return ackerr.NewTerminalError(fmt.Errorf("failed to compute the next minor version: %v", err))
303+
}
304+
274305
input := &svcsdk.UpdateClusterVersionInput{
275-
Name: r.ko.Spec.Name,
276-
Version: r.ko.Spec.Version,
306+
Name: desired.ko.Spec.Name,
307+
Version: &nextVersion,
277308
}
278309

279310
_, err = rm.sdkapi.UpdateClusterVersionWithContext(ctx, input)

pkg/resource/nodegroup/delta.go

Lines changed: 0 additions & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/resource/nodegroup/hook.go

Lines changed: 96 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@ import (
1818
"fmt"
1919
"reflect"
2020
"strconv"
21+
"strings"
2122
"time"
2223

2324
ackcompare "github.com/aws-controllers-k8s/runtime/pkg/compare"
2425
ackcondition "github.com/aws-controllers-k8s/runtime/pkg/condition"
26+
ackerr "github.com/aws-controllers-k8s/runtime/pkg/errors"
2527
ackrequeue "github.com/aws-controllers-k8s/runtime/pkg/requeue"
2628
ackrtlog "github.com/aws-controllers-k8s/runtime/pkg/runtime/log"
2729
svcsdk "github.com/aws/aws-sdk-go/service/eks"
@@ -30,6 +32,7 @@ import (
3032

3133
svcapitypes "github.com/aws-controllers-k8s/eks-controller/apis/v1alpha1"
3234
"github.com/aws-controllers-k8s/eks-controller/pkg/tags"
35+
"github.com/aws-controllers-k8s/eks-controller/pkg/util"
3336
)
3437

3538
// Taken from the list of nodegroup statuses on the boto3 documentation
@@ -104,6 +107,20 @@ func customPreCompare(
104107
}
105108
}
106109
}
110+
// only compare releaseVersion if it's provided by the user. Note that there will always be a
111+
// ReleaseVersion in the observed state (after a successful creation).
112+
if a.ko.Spec.ReleaseVersion != nil && *a.ko.Spec.ReleaseVersion != "" {
113+
if *a.ko.Spec.ReleaseVersion != *b.ko.Spec.ReleaseVersion {
114+
delta.Add("Spec.ReleaseVersion", a.ko.Spec.ReleaseVersion, b.ko.Spec.ReleaseVersion)
115+
}
116+
}
117+
// only compare version if it's provided by the user. Note that there will always be a
118+
// Version in the observed state (after a successful creation).
119+
if a.ko.Spec.Version != nil && *a.ko.Spec.Version != "" {
120+
if *a.ko.Spec.Version != *b.ko.Spec.Version {
121+
delta.Add("Spec.Version", a.ko.Spec.Version, b.ko.Spec.Version)
122+
}
123+
}
107124
}
108125

109126
func getDesiredSizeManagedByAnnotation(nodegroup *svcapitypes.Nodegroup) (string, bool) {
@@ -239,6 +256,12 @@ func (rm *resourceManager) customUpdate(
239256
rlog := ackrtlog.FromContext(ctx)
240257
exit := rlog.Trace("rm.customUpdate")
241258
defer exit(err)
259+
260+
if immutableFieldChanges := rm.getImmutableFieldChanges(delta); len(immutableFieldChanges) > 0 {
261+
msg := fmt.Sprintf("Immutable Spec fields have been modified: %s", strings.Join(immutableFieldChanges, ","))
262+
return nil, ackerr.NewTerminalError(fmt.Errorf(msg))
263+
}
264+
242265
if delta.DifferentAt("Spec.Tags") {
243266
err := tags.SyncTags(
244267
ctx, rm.sdkapi, rm.metrics,
@@ -282,8 +305,55 @@ func (rm *resourceManager) customUpdate(
282305
}
283306
return returnNodegroupUpdating(updatedRes)
284307
}
285-
if delta.DifferentAt("Spec.Version") {
286-
if err := rm.updateVersion(ctx, desired); err != nil {
308+
309+
// At the stage we know that at least one of Version, ReleaseVersion or
310+
// LaunchTemplate has changed. The API does not allow using LaunchTemplate
311+
// with either Version or ReleaseVersion. So we need to check if the user
312+
// has provided a valid desired state.
313+
// There is no need to manually set a terminal condition here as the
314+
// controller will automatically set a terminal condition if the api
315+
// returns an InvalidParameterException
316+
317+
if delta.DifferentAt("Spec.Version") || delta.DifferentAt("Spec.ReleaseVersion") || delta.DifferentAt("Spec.LaunchTemplate") {
318+
// Before trying to trigger a Nodegroup version update, we need to ensure that the user
319+
// has provided a valid desired state. For context the EKS UpdateNodegroupVersion API
320+
// accepts optional parameters Version and ReleaseVersion.
321+
//
322+
// The following are the valid combinations of the Version and ReleaseVersion parameters:
323+
// 1. None of the parameters are provided
324+
// 2. Only the Version parameter is provided
325+
// 3. Only the ReleaseVersion parameter is provided
326+
// 4. Both the Version and ReleaseVersion parameters are provided and they match
327+
//
328+
// The first case is not applicable here as it's counterintuitive in a declarative
329+
// model to not provide a desired state and have the controller trigger a blind update.
330+
331+
// We need to set a terminal condition if the user provides both a version and release version
332+
// and they do not match. This is needed because the controller could potentially start alternating
333+
// between the non-matching version and release version in the spec and the observed state.
334+
if desired.ko.Spec.Version != nil && desired.ko.Spec.ReleaseVersion != nil &&
335+
*desired.ko.Spec.Version != "" && *desired.ko.Spec.ReleaseVersion != "" {
336+
337+
// First parse the user provided release version and desired release
338+
desiredReleaseVersionTrimmed, err := util.GetEKSVersionFromReleaseVersion(*desired.ko.Spec.ReleaseVersion)
339+
if err != nil {
340+
return nil, ackerr.NewTerminalError(err)
341+
}
342+
343+
// Set a terminal condition if the release version and version do not match.
344+
// e.g if the user provides a release version of 1.16.8-20211201 and a version of 1.17
345+
// They will either need to provide one of the following:
346+
// 2. A version
347+
// 1. A release version
348+
// 3. A version and release version that matches (e.g 1.16 and 1.16.8-20211201)
349+
if desiredReleaseVersionTrimmed != *desired.ko.Spec.Version {
350+
return nil, ackerr.NewTerminalError(
351+
fmt.Errorf("version and release version do not match: %s and %s", *desired.ko.Spec.Version, desiredReleaseVersionTrimmed),
352+
)
353+
}
354+
}
355+
356+
if err := rm.updateVersion(ctx, delta, desired); err != nil {
287357
return nil, err
288358
}
289359
return returnNodegroupUpdating(updatedRes)
@@ -380,39 +450,51 @@ func newUpdateTaintsPayload(
380450
}
381451

382452
func newUpdateNodegroupVersionPayload(
453+
delta *ackcompare.Delta,
383454
desired *resource,
384455
) *svcsdk.UpdateNodegroupVersionInput {
385456
input := &svcsdk.UpdateNodegroupVersionInput{
386-
NodegroupName: desired.ko.Spec.Name,
387-
ClusterName: desired.ko.Spec.ClusterName,
388-
Version: desired.ko.Spec.Version,
389-
ReleaseVersion: desired.ko.Spec.ReleaseVersion,
457+
NodegroupName: desired.ko.Spec.Name,
458+
ClusterName: desired.ko.Spec.ClusterName,
459+
}
460+
461+
if delta.DifferentAt("Spec.Version") {
462+
input.Version = desired.ko.Spec.Version
463+
}
464+
465+
if delta.DifferentAt("Spec.ReleaseVersion") {
466+
input.ReleaseVersion = desired.ko.Spec.ReleaseVersion
390467
}
391468

392-
if desired.ko.Spec.LaunchTemplate != nil {
393-
input.SetLaunchTemplate(&svcsdk.LaunchTemplateSpecification{
394-
Id: desired.ko.Spec.LaunchTemplate.ID,
395-
Name: desired.ko.Spec.LaunchTemplate.Name,
396-
Version: desired.ko.Spec.LaunchTemplate.Version,
397-
})
469+
if delta.DifferentAt("Spec.LaunchTemplate") {
470+
// We need to be careful here to not access a nil pointer
471+
if desired.ko.Spec.LaunchTemplate != nil {
472+
input.SetLaunchTemplate(&svcsdk.LaunchTemplateSpecification{
473+
Id: desired.ko.Spec.LaunchTemplate.ID,
474+
Name: desired.ko.Spec.LaunchTemplate.Name,
475+
Version: desired.ko.Spec.LaunchTemplate.Version,
476+
})
477+
}
398478
}
399479

480+
// If the force annotation is set, we set the force flag on the input
481+
// payload.
400482
if getUpdateNodeGroupForceAnnotation(desired.ko.ObjectMeta) {
401483
input.SetForce(true)
402484
}
403-
404485
return input
405486
}
406487

407488
func (rm *resourceManager) updateVersion(
408489
ctx context.Context,
490+
delta *ackcompare.Delta,
409491
r *resource,
410492
) (err error) {
411493
rlog := ackrtlog.FromContext(ctx)
412494
exit := rlog.Trace("rm.updateVersion")
413495
defer exit(err)
414496

415-
input := newUpdateNodegroupVersionPayload(r)
497+
input := newUpdateNodegroupVersionPayload(delta, r)
416498

417499
_, err = rm.sdkapi.UpdateNodegroupVersionWithContext(ctx, input)
418500
rm.metrics.RecordAPICall("UPDATE", "UpdateNodegroupVersion", err)

pkg/resource/nodegroup/hook_test.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,10 @@ func Test_resourceManager_newUpdateScalingConfigPayload_ManagedByDefault(t *test
424424
}
425425

426426
func Test_newUpdateNodegroupPayload(t *testing.T) {
427+
delta := ackcompare.NewDelta()
428+
delta.Add("Spec.Version", nil, nil)
429+
delta.Add("Spec.LaunchTemplate", nil, nil)
430+
427431
type args struct {
428432
r *resource
429433
}
@@ -511,7 +515,7 @@ func Test_newUpdateNodegroupPayload(t *testing.T) {
511515
}
512516
for _, tt := range tests {
513517
t.Run(tt.name, func(t *testing.T) {
514-
got := newUpdateNodegroupVersionPayload(tt.args.r)
518+
got := newUpdateNodegroupVersionPayload(delta, tt.args.r)
515519
assert.Equal(t, tt.wantVersion, *got.Version)
516520
if tt.wantForce {
517521
assert.NotNil(t, got.Force)

0 commit comments

Comments
 (0)