Skip to content
Open
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions bootstrap/eks/api/v1beta1/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ func (r *EKSConfig) ConvertTo(dstRaw conversion.Hub) error {
return err
}

if restored.Spec.NodeType != "" {
dst.Spec.NodeType = restored.Spec.NodeType
}
if restored.Spec.PreBootstrapCommands != nil {
dst.Spec.PreBootstrapCommands = restored.Spec.PreBootstrapCommands
}
Expand Down Expand Up @@ -105,6 +108,9 @@ func (r *EKSConfigTemplate) ConvertTo(dstRaw conversion.Hub) error {
return err
}

if restored.Spec.Template.Spec.NodeType != "" {
dst.Spec.Template.Spec.NodeType = restored.Spec.Template.Spec.NodeType
}
if restored.Spec.Template.Spec.PreBootstrapCommands != nil {
dst.Spec.Template.Spec.PreBootstrapCommands = restored.Spec.Template.Spec.PreBootstrapCommands
}
Expand Down
1 change: 1 addition & 0 deletions bootstrap/eks/api/v1beta1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions bootstrap/eks/api/v1beta2/eksconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,20 @@ import (
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
)

// NodeType specifies the type of nodeq
// +kubebuilder:validation:Enum=al2023
type NodeType string
Comment on lines +25 to +27
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See my comments below regarding the naming of this field


const (
// NodeTypeAL2023 represents the AL2023 node type.
NodeTypeAL2023 NodeType = "al2023"
)
Comment on lines +29 to +32
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here a NodeTypeAL2

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could use the ones you defined for the AMIFamilyType also here

	// AMIFamilyAL2 is the Amazon Linux 2 AMI family.
	AMIFamilyAL2 = "AmazonLinux2"
	// AMIFamilyAL2023 is the Amazon Linux 2023 AMI family.
	AMIFamilyAL2023 = "AmazonLinux2023"

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


// EKSConfigSpec defines the desired state of Amazon EKS Bootstrap Configuration.
type EKSConfigSpec struct {
// NodeType specifies the type of node (e.g., "al2023")
// +optional
NodeType NodeType `json:"nodeType,omitempty"`
Comment on lines +36 to +38
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think NodeType is a bit weak as a field name because Type is quite generic.
How about AMIFamilyType like the field we have in the NodeInput struct? So then we can directly pass that down to it?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We would also need to specify what the default is (I guess AmazonLinux2) and an enum validation for this.

	// +kubebuilder:validation:Enum="";AmazonLinux2;AmazonLinux2023
	// +optional

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The enum is added on the type definition (https://github.com/kubernetes-sigs/cluster-api-provider-aws/pull/5553/files#diff-64e2d3d6a1323c5ae37a84cfc459f926fe7e329e83a4f695df28de89f19a3172R26). Only al2023 is allowed at this point, no other values are required to change behaviour, default (unset) is current behaviour.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This API is a little confusing to me. We're essentially specifying an AMI family here. We have a field like this already in AWSMachine

EKSOptimizedLookupType *EKSAMILookupType `json:"eksLookupType,omitempty"`

I think we shouldn't repeat this information here.

Copy link
Contributor

@faiq faiq Sep 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another thing that's worth noting here is that Amazon Linux 2 will be EOL coming up very soon - june 2026 source: https://aws.amazon.com/amazon-linux-2/faqs/

It seems like EKS has largely moved away from this AL2 support to either AL2023 or bottlerocket.

Bottlerocket seems to have its own set backs see kubernetes-sigs/cluster-api#7840

so realistically the only option is going to be AL2023.

EDIT:

[!IMPORTANT] Amazon EKS will no longer publish EKS-optimized Amazon Linux 2 (AL2) AMIs after November 26th, 2025. Additionally, Kubernetes version 1.32 is the last version for which Amazon EKS will release AL2 AMIs. From version 1.33 onwards, Amazon EKS will continue to release AL2023 and Bottlerocket based AMIs. To learn more, see Guide to EKS AL2 & AL2-Accelerated AMIs transition features.

source: https://awslabs.github.io/amazon-eks-ami/

it looks like they won't be building this AMI

// KubeletExtraArgs passes the specified kubelet args into the Amazon EKS machine bootstrap script
// +optional
KubeletExtraArgs map[string]string `json:"kubeletExtraArgs,omitempty"`
Expand Down
151 changes: 133 additions & 18 deletions bootstrap/eks/controllers/eksconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ package controllers
import (
"bytes"
"context"
"encoding/base64"
"fmt"
"os"
"time"

"github.com/pkg/errors"
Expand All @@ -28,6 +31,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/klog/v2"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
Expand All @@ -39,14 +43,16 @@ import (
eksbootstrapv1 "sigs.k8s.io/cluster-api-provider-aws/v2/bootstrap/eks/api/v1beta2"
"sigs.k8s.io/cluster-api-provider-aws/v2/bootstrap/eks/internal/userdata"
ekscontrolplanev1 "sigs.k8s.io/cluster-api-provider-aws/v2/controlplane/eks/api/v1beta2"
expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2"
"sigs.k8s.io/cluster-api-provider-aws/v2/feature"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger"
"sigs.k8s.io/cluster-api-provider-aws/v2/util/paused"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
bsutil "sigs.k8s.io/cluster-api/bootstrap/util"
expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
"sigs.k8s.io/cluster-api/feature"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/conditions"
kubeconfigutil "sigs.k8s.io/cluster-api/util/kubeconfig"
"sigs.k8s.io/cluster-api/util/patch"
"sigs.k8s.io/cluster-api/util/predicates"
)
Expand Down Expand Up @@ -143,7 +149,7 @@ func (r *EKSConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
}
}()

return ctrl.Result{}, r.joinWorker(ctx, cluster, config, configOwner)
return r.joinWorker(ctx, cluster, config, configOwner)
}

func (r *EKSConfigReconciler) resolveFiles(ctx context.Context, cfg *eksbootstrapv1.EKSConfig) ([]eksbootstrapv1.File, error) {
Expand Down Expand Up @@ -181,7 +187,7 @@ func (r *EKSConfigReconciler) resolveSecretFileContent(ctx context.Context, ns s
return data, nil
}

func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1.Cluster, config *eksbootstrapv1.EKSConfig, configOwner *bsutil.ConfigOwner) error {
func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1.Cluster, config *eksbootstrapv1.EKSConfig, configOwner *bsutil.ConfigOwner) (ctrl.Result, error) {
log := logger.FromContext(ctx)

// only need to reconcile the secret for Machine kinds once, but MachinePools need updates for new launch templates
Expand All @@ -195,15 +201,15 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
err := r.Client.Get(ctx, secretKey, existingSecret)
switch {
case err == nil:
return nil
return ctrl.Result{}, nil
case !apierrors.IsNotFound(err):
log.Error(err, "unable to check for existing bootstrap secret")
return err
return ctrl.Result{}, err
}
}

if cluster.Spec.ControlPlaneRef == nil || cluster.Spec.ControlPlaneRef.Kind != "AWSManagedControlPlane" {
return errors.New("Cluster's controlPlaneRef needs to be an AWSManagedControlPlane in order to use the EKS bootstrap provider")
return ctrl.Result{}, errors.New("Cluster's controlPlaneRef needs to be an AWSManagedControlPlane in order to use the EKS bootstrap provider")
}

if !cluster.Status.InfrastructureReady {
Expand All @@ -212,30 +218,54 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
eksbootstrapv1.DataSecretAvailableCondition,
eksbootstrapv1.WaitingForClusterInfrastructureReason,
clusterv1.ConditionSeverityInfo, "")
return nil
return ctrl.Result{}, nil
}

if !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) {
log.Info("Control Plane has not yet been initialized")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.WaitingForControlPlaneInitializationReason, clusterv1.ConditionSeverityInfo, "")
return nil
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
eksbootstrapv1.DataSecretGenerationFailedReason,
clusterv1.ConditionSeverityInfo, "Control plane is not initialized yet")

// For AL2023, requeue to ensure we retry when control plane is ready
// For AL2, follow upstream behavior and return nil
if config.Spec.NodeType == eksbootstrapv1.NodeTypeAL2023 {
log.Info("AL2023 detected, returning requeue after 30 seconds")
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}
log.Info("AL2 detected, returning no requeue")
return ctrl.Result{}, nil
}

// Get the AWSManagedControlPlane
controlPlane := &ekscontrolplanev1.AWSManagedControlPlane{}
if err := r.Get(ctx, client.ObjectKey{Name: cluster.Spec.ControlPlaneRef.Name, Namespace: cluster.Spec.ControlPlaneRef.Namespace}, controlPlane); err != nil {
return err
return ctrl.Result{}, errors.Wrap(err, "failed to get control plane")
}

// Check if control plane is ready (skip in test environments for AL2023)
if config.Spec.NodeType == eksbootstrapv1.NodeTypeAL2023 && !conditions.IsTrue(controlPlane, ekscontrolplanev1.EKSControlPlaneReadyCondition) {
// Skip control plane readiness check for AL2023 in test environment
if os.Getenv("TEST_ENV") != "true" {
log.Info("AL2023 detected, waiting for control plane to be ready")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
eksbootstrapv1.DataSecretGenerationFailedReason,
clusterv1.ConditionSeverityInfo, "Control plane is not ready yet")
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}
log.Info("Skipping control plane readiness check for AL2023 in test environment")
}
log.Info("Control plane is ready, proceeding with userdata generation")

log.Info("Generating userdata")
files, err := r.resolveFiles(ctx, config)
if err != nil {
log.Info("Failed to resolve files for user data")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.DataSecretGenerationFailedReason, clusterv1.ConditionSeverityWarning, "%s", err.Error())
return err
return ctrl.Result{}, err
}

// Create unified NodeInput for both AL2 and AL2023
nodeInput := &userdata.NodeInput{
// AWSManagedControlPlane webhooks default and validate EKSClusterName
ClusterName: controlPlane.Spec.EKSClusterName,
KubeletExtraArgs: config.Spec.KubeletExtraArgs,
ContainerRuntime: config.Spec.ContainerRuntime,
Expand All @@ -251,7 +281,9 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
DiskSetup: config.Spec.DiskSetup,
Mounts: config.Spec.Mounts,
Files: files,
ClusterCIDR: controlPlane.Spec.NetworkSpec.VPC.CidrBlock,
}

if config.Spec.PauseContainer != nil {
nodeInput.PauseContainerAccount = &config.Spec.PauseContainer.AccountNumber
nodeInput.PauseContainerVersion = &config.Spec.PauseContainer.Version
Expand All @@ -271,22 +303,85 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
nodeInput.IPFamily = ptr.To[string]("ipv6")
}

// generate userdata
// Set AMI family type and AL2023-specific fields if needed
if config.Spec.NodeType == eksbootstrapv1.NodeTypeAL2023 {
log.Info("Processing AL2023 node type")
nodeInput.AMIFamilyType = userdata.AMIFamilyAL2023

// Set AL2023-specific fields
nodeInput.APIServerEndpoint = controlPlane.Spec.ControlPlaneEndpoint.Host
nodeInput.NodeGroupName = config.Name

// In test environments, provide a mock CA certificate
if os.Getenv("TEST_ENV") == "true" {
log.Info("Using mock CA certificate for test environment")
nodeInput.CACert = "mock-ca-certificate-for-testing"
} else {
// Fetch CA cert from KubeConfig secret
// We already have the cluster object passed to this function
obj := client.ObjectKey{
Namespace: cluster.Namespace,
Name: cluster.Name,
}
ca, err := extractCAFromSecret(ctx, r.Client, obj)
if err != nil {
log.Error(err, "Failed to extract CA from kubeconfig secret")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
eksbootstrapv1.DataSecretGenerationFailedReason,
clusterv1.ConditionSeverityWarning,
"Failed to extract CA from kubeconfig secret: %v", err)
return ctrl.Result{}, err
}
nodeInput.CACert = ca
}

// Get AMI ID from AWSManagedMachinePool's launch template if specified
if configOwner.GetKind() == "AWSManagedMachinePool" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any reason that users who don' use managed machine pools can't set AMI IDs like users of AWSMachineTemplate?

amp := &expinfrav1.AWSManagedMachinePool{}
if err := r.Get(ctx, client.ObjectKey{Namespace: config.Namespace, Name: configOwner.GetName()}, amp); err == nil {
log.Info("Found AWSManagedMachinePool", "name", amp.Name, "launchTemplate", amp.Spec.AWSLaunchTemplate != nil)
if amp.Spec.AWSLaunchTemplate != nil && amp.Spec.AWSLaunchTemplate.AMI.ID != nil {
nodeInput.AMIImageID = *amp.Spec.AWSLaunchTemplate.AMI.ID
log.Info("Set AMI ID from launch template", "amiID", nodeInput.AMIImageID)
} else {
log.Info("No AMI ID found in launch template")
}
if amp.Spec.CapacityType != nil {
nodeInput.CapacityType = amp.Spec.CapacityType
log.Info("Set capacity type from AWSManagedMachinePool", "capacityType", *amp.Spec.CapacityType)
} else {
log.Info("No capacity type found in AWSManagedMachinePool")
}
} else {
log.Info("Failed to get AWSManagedMachinePool", "error", err)
}
}

log.Info("Generating AL2023 userdata",
"cluster", controlPlane.Spec.EKSClusterName,
"endpoint", nodeInput.APIServerEndpoint)
} else {
nodeInput.AMIFamilyType = userdata.AMIFamilyAL2
log.Info("Generating standard userdata for node type", "type", config.Spec.NodeType)
}

// Generate userdata using unified approach
userDataScript, err := userdata.NewNode(nodeInput)
if err != nil {
log.Error(err, "Failed to create a worker join configuration")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.DataSecretGenerationFailedReason, clusterv1.ConditionSeverityWarning, "")
return err
return ctrl.Result{}, err
}

// store userdata as secret
// Store the userdata in a secret
if err := r.storeBootstrapData(ctx, cluster, config, userDataScript); err != nil {
log.Error(err, "Failed to store bootstrap data")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.DataSecretGenerationFailedReason, clusterv1.ConditionSeverityWarning, "")
return err
return ctrl.Result{}, err
}

return nil
conditions.MarkTrue(config, eksbootstrapv1.DataSecretAvailableCondition)
return ctrl.Result{}, nil
}

func (r *EKSConfigReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, option controller.Options) error {
Expand Down Expand Up @@ -466,3 +561,23 @@ func (r *EKSConfigReconciler) updateBootstrapSecret(ctx context.Context, secret
}
return false, nil
}

func extractCAFromSecret(ctx context.Context, c client.Client, obj client.ObjectKey) (string, error) {
data, err := kubeconfigutil.FromSecret(ctx, c, obj)
if err != nil {
return "", errors.Wrapf(err, "failed to get kubeconfig secret %s", obj.Name)
}
config, err := clientcmd.Load(data)
if err != nil {
return "", errors.Wrapf(err, "failed to parse kubeconfig data from secret %s", obj.Name)
}

// Iterate through all clusters in the kubeconfig and use the first one with CA data
for _, cluster := range config.Clusters {
if len(cluster.CertificateAuthorityData) > 0 {
return base64.StdEncoding.EncodeToString(cluster.CertificateAuthorityData), nil
}
}

return "", fmt.Errorf("no cluster with CA data found in kubeconfig")
}
Loading