Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -987,6 +987,17 @@ spec:
- name
type: object
type: array
nodeRepairConfig:
description: NodeRepairConfig specifies the node auto repair configuration
for the managed node group.
properties:
enabled:
default: false
description: |-
Enabled specifies whether node auto repair is enabled for the node group.
When enabled, EKS will automatically repair unhealthy nodes by replacing them.
type: boolean
type: object
providerIDList:
description: |-
ProviderIDList are the provider IDs of instances in the
Expand Down
4 changes: 4 additions & 0 deletions exp/api/v1beta1/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,10 @@ func (src *AWSManagedMachinePool) ConvertTo(dstRaw conversion.Hub) error {
dst.Spec.RolePath = restored.Spec.RolePath
dst.Spec.RolePermissionsBoundary = restored.Spec.RolePermissionsBoundary

if restored.Spec.NodeRepairConfig != nil {
dst.Spec.NodeRepairConfig = restored.Spec.NodeRepairConfig
}

return nil
}

Expand Down
1 change: 1 addition & 0 deletions exp/api/v1beta1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions exp/api/v1beta2/awsmanagedmachinepool_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ type AWSManagedMachinePoolSpec struct {
// AWSLifecycleHooks specifies lifecycle hooks for the managed node group.
// +optional
AWSLifecycleHooks []AWSLifecycleHook `json:"lifecycleHooks,omitempty"`

// NodeRepairConfig specifies the node auto repair configuration for the managed node group.
// +optional
NodeRepairConfig *NodeRepairConfig `json:"nodeRepairConfig,omitempty"`
}

// ManagedMachinePoolScaling specifies scaling options.
Expand Down Expand Up @@ -297,6 +301,15 @@ type AWSManagedMachinePoolStatus struct {
Conditions clusterv1.Conditions `json:"conditions,omitempty"`
}

// NodeRepairConfig defines the node auto repair configuration for managed node groups.
type NodeRepairConfig struct {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a chance to extend this struct (adding more fields) ? if not, then adding autoNodeRepair boolean field under spec is better

Copy link
Contributor Author

@afarbos afarbos Oct 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is are more fields, see https://pkg.go.dev/github.com/aws/aws-sdk-go-v2/service/[email protected]/types#NodeRepairConfig / https://docs.aws.amazon.com/eks/latest/APIReference/API_NodeRepairConfig.html Do you want me to add support for all?
Note: those did not exist when this PR was created

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fine as a struct, given that more options may be supported in the future. Is enabled enough as default to get the feature to work nicely? If yes, I think we can live with only the enabled field for now.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, those field did not exist in 1.73.0 version/until recently. This is why i did not include them from the start see https://pkg.go.dev/github.com/aws/aws-sdk-go-v2/service/[email protected]/types#NodeRepairConfig

Also the API linked above does not mention any being required, see the Required: No

// Enabled specifies whether node auto repair is enabled for the node group.
// When enabled, EKS will automatically repair unhealthy nodes by replacing them.
// +optional
// +kubebuilder:default=false
Enabled *bool `json:"enabled,omitempty"`
}

// +kubebuilder:object:root=true
// +kubebuilder:resource:path=awsmanagedmachinepools,scope=Namespaced,categories=cluster-api,shortName=awsmmp
// +kubebuilder:storageversion
Expand Down
25 changes: 25 additions & 0 deletions exp/api/v1beta2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions pkg/cloud/converters/eks.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,20 @@ func NodegroupUpdateconfigFromSDK(ngUpdateConfig *ekstypes.NodegroupUpdateConfig
return converted
}

// NodeRepairConfigToSDK is used to convert a CAPA NodeRepairConfig to AWS SDK NodeRepairConfig.
func NodeRepairConfigToSDK(repairConfig *expinfrav1.NodeRepairConfig) *ekstypes.NodeRepairConfig {
if repairConfig == nil {
// Default to disabled if not specified to avoid behavior changes
return &ekstypes.NodeRepairConfig{
Enabled: aws.Bool(false),
}
}

return &ekstypes.NodeRepairConfig{
Enabled: repairConfig.Enabled,
}
}

// AMITypeToSDK converts a CAPA ManagedMachineAMIType to AWS SDK AMIType.
func AMITypeToSDK(amiType expinfrav1.ManagedMachineAMIType) ekstypes.AMITypes {
switch amiType {
Expand Down
65 changes: 65 additions & 0 deletions pkg/cloud/converters/eks_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package converters

import (
"testing"

"github.com/aws/aws-sdk-go-v2/aws"
ekstypes "github.com/aws/aws-sdk-go-v2/service/eks/types"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"

expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2"
)

func TestNodeRepairConfigToSDK(t *testing.T) {
tests := []struct {
name string
input *expinfrav1.NodeRepairConfig
expected *ekstypes.NodeRepairConfig
}{
{
name: "nil input returns default disabled",
input: nil,
expected: &ekstypes.NodeRepairConfig{Enabled: aws.Bool(false)},
},
{
name: "enabled repair config",
input: &expinfrav1.NodeRepairConfig{
Enabled: aws.Bool(true),
},
expected: &ekstypes.NodeRepairConfig{Enabled: aws.Bool(true)},
},
{
name: "disabled repair config",
input: &expinfrav1.NodeRepairConfig{
Enabled: aws.Bool(false),
},
expected: &ekstypes.NodeRepairConfig{Enabled: aws.Bool(false)},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := NodeRepairConfigToSDK(tt.input)
if !cmp.Equal(result, tt.expected, cmpopts.IgnoreUnexported(ekstypes.NodeRepairConfig{})) {
t.Errorf("NodeRepairConfigToSDK() diff (-want +got):\n%s", cmp.Diff(tt.expected, result, cmpopts.IgnoreUnexported(ekstypes.NodeRepairConfig{})))
}
})
}
}
17 changes: 17 additions & 0 deletions pkg/cloud/services/eks/nodegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
ekstypes "github.com/aws/aws-sdk-go-v2/service/eks/types"
iamtypes "github.com/aws/aws-sdk-go-v2/service/iam/types"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/pkg/errors"
"k8s.io/apimachinery/pkg/util/version"

Expand Down Expand Up @@ -119,6 +120,11 @@ func (s *NodegroupService) updateConfig() (*ekstypes.NodegroupUpdateConfig, erro
return converters.NodegroupUpdateconfigToSDK(updateConfig)
}

func (s *NodegroupService) nodeRepairConfig() *ekstypes.NodeRepairConfig {
repairConfig := s.scope.ManagedMachinePool.Spec.NodeRepairConfig
return converters.NodeRepairConfigToSDK(repairConfig)
}

func (s *NodegroupService) roleArn(ctx context.Context) (*string, error) {
var role *iamtypes.Role
if s.scope.RoleName() != "" {
Expand Down Expand Up @@ -249,6 +255,9 @@ func (s *NodegroupService) createNodegroup(ctx context.Context) (*ekstypes.Nodeg
Version: s.scope.ManagedMachinePool.Status.LaunchTemplateVersion,
}
}
if managedPool.NodeRepairConfig != nil {
input.NodeRepairConfig = s.nodeRepairConfig()
}

out, err := s.EKSClient.CreateNodegroup(ctx, input)
if err != nil {
Expand Down Expand Up @@ -480,6 +489,14 @@ func (s *NodegroupService) reconcileNodegroupConfig(ctx context.Context, ng *eks
input.UpdateConfig = updatedConfig
needsUpdate = true
}

specRepairConfig := s.nodeRepairConfig()
if !cmp.Equal(ng.NodeRepairConfig, specRepairConfig, cmpopts.IgnoreUnexported(ekstypes.NodeRepairConfig{})) {
s.Debug("Nodegroup repair configuration differs from spec, updating the nodegroup repair config", "nodegroup", ng.NodegroupName)
input.NodeRepairConfig = specRepairConfig
needsUpdate = true
}

if !needsUpdate {
s.Debug("node group config update not needed", "cluster", eksClusterName, "name", *ng.NodegroupName)
return nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,5 @@ spec:
maxSize: 2
updateConfig:
maxUnavailable: 2
nodeRepairConfig:
enabled: false