Skip to content

✨ Add support for EKSConfig LaunchTemplate bootstrapping for AL2023 using nodeadm #5553

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
6 changes: 6 additions & 0 deletions bootstrap/eks/api/v1beta1/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ func (r *EKSConfig) ConvertTo(dstRaw conversion.Hub) error {
return err
}

if restored.Spec.NodeType != "" {
dst.Spec.NodeType = restored.Spec.NodeType
}
if restored.Spec.PreBootstrapCommands != nil {
dst.Spec.PreBootstrapCommands = restored.Spec.PreBootstrapCommands
}
Expand Down Expand Up @@ -105,6 +108,9 @@ func (r *EKSConfigTemplate) ConvertTo(dstRaw conversion.Hub) error {
return err
}

if restored.Spec.Template.Spec.NodeType != "" {
dst.Spec.Template.Spec.NodeType = restored.Spec.Template.Spec.NodeType
}
if restored.Spec.Template.Spec.PreBootstrapCommands != nil {
dst.Spec.Template.Spec.PreBootstrapCommands = restored.Spec.Template.Spec.PreBootstrapCommands
}
Expand Down
1 change: 1 addition & 0 deletions bootstrap/eks/api/v1beta1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions bootstrap/eks/api/v1beta2/eksconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ import (

// EKSConfigSpec defines the desired state of Amazon EKS Bootstrap Configuration.
type EKSConfigSpec struct {
// NodeType specifies the type of node (e.g., "al2023")
// +optional
NodeType string `json:"nodeType,omitempty"`
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any way to derive this from the AMI being used rather than asking the user to specify in the API?

Comment on lines 25 to +29
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only required when using AL2023 so perhaps let's add an enum for this and only accept al2023? Something like this:

Suggested change
// EKSConfigSpec defines the desired state of Amazon EKS Bootstrap Configuration.
type EKSConfigSpec struct {
// NodeType specifies the type of node (e.g., "al2023")
// +optional
NodeType string `json:"nodeType,omitempty"`
// +kubebuilder:validation:Enum=al2023
type NodeType string
const (
NodeTypeAL2023 = "al2023
)
// EKSConfigSpec defines the desired state of Amazon EKS Bootstrap Configuration.
type EKSConfigSpec struct {
// NodeType specifies the type of node (e.g., "al2023")
// +optional
NodeType NodeType `json:"nodeType,omitempty"`

// KubeletExtraArgs passes the specified kubelet args into the Amazon EKS machine bootstrap script
// +optional
KubeletExtraArgs map[string]string `json:"kubeletExtraArgs,omitempty"`
Expand Down
149 changes: 132 additions & 17 deletions bootstrap/eks/controllers/eksconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,13 @@ package controllers
import (
"bytes"
"context"
"fmt"
"os"
"time"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/eks"
"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
Expand All @@ -39,6 +44,7 @@ import (
eksbootstrapv1 "sigs.k8s.io/cluster-api-provider-aws/v2/bootstrap/eks/api/v1beta2"
"sigs.k8s.io/cluster-api-provider-aws/v2/bootstrap/eks/internal/userdata"
ekscontrolplanev1 "sigs.k8s.io/cluster-api-provider-aws/v2/controlplane/eks/api/v1beta2"
expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger"
"sigs.k8s.io/cluster-api-provider-aws/v2/util/paused"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
Expand All @@ -51,6 +57,11 @@ import (
"sigs.k8s.io/cluster-api/util/predicates"
)

const (
// NodeTypeAL2023 represents the AL2023 node type.
NodeTypeAL2023 = "al2023"
)

// EKSConfigReconciler reconciles a EKSConfig object.
type EKSConfigReconciler struct {
client.Client
Expand Down Expand Up @@ -143,7 +154,7 @@ func (r *EKSConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
}
}()

return ctrl.Result{}, r.joinWorker(ctx, cluster, config, configOwner)
return r.joinWorker(ctx, cluster, config, configOwner)
}

func (r *EKSConfigReconciler) resolveFiles(ctx context.Context, cfg *eksbootstrapv1.EKSConfig) ([]eksbootstrapv1.File, error) {
Expand Down Expand Up @@ -181,8 +192,9 @@ func (r *EKSConfigReconciler) resolveSecretFileContent(ctx context.Context, ns s
return data, nil
}

func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1.Cluster, config *eksbootstrapv1.EKSConfig, configOwner *bsutil.ConfigOwner) error {
func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1.Cluster, config *eksbootstrapv1.EKSConfig, configOwner *bsutil.ConfigOwner) (ctrl.Result, error) {
log := logger.FromContext(ctx)
log.Info("joinWorker called", "config", config.Name, "nodeType", config.Spec.NodeType, "cluster", cluster.Name)

// only need to reconcile the secret for Machine kinds once, but MachinePools need updates for new launch templates
if config.Status.DataSecretName != nil && configOwner.GetKind() == "Machine" {
Expand All @@ -195,15 +207,15 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
err := r.Client.Get(ctx, secretKey, existingSecret)
switch {
case err == nil:
return nil
return ctrl.Result{}, nil
case !apierrors.IsNotFound(err):
log.Error(err, "unable to check for existing bootstrap secret")
return err
return ctrl.Result{}, err
}
}

if cluster.Spec.ControlPlaneRef == nil || cluster.Spec.ControlPlaneRef.Kind != "AWSManagedControlPlane" {
return errors.New("Cluster's controlPlaneRef needs to be an AWSManagedControlPlane in order to use the EKS bootstrap provider")
return ctrl.Result{}, errors.New("Cluster's controlPlaneRef needs to be an AWSManagedControlPlane in order to use the EKS bootstrap provider")
}

if !cluster.Status.InfrastructureReady {
Expand All @@ -212,30 +224,54 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
eksbootstrapv1.DataSecretAvailableCondition,
eksbootstrapv1.WaitingForClusterInfrastructureReason,
clusterv1.ConditionSeverityInfo, "")
return nil
return ctrl.Result{}, nil
}

if !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) {
log.Info("Control Plane has not yet been initialized")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.WaitingForControlPlaneInitializationReason, clusterv1.ConditionSeverityInfo, "")
return nil
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
eksbootstrapv1.DataSecretGenerationFailedReason,
clusterv1.ConditionSeverityInfo, "Control plane is not initialized yet")

// For AL2023, requeue to ensure we retry when control plane is ready
// For AL2, follow upstream behavior and return nil
if config.Spec.NodeType == NodeTypeAL2023 {
log.Info("AL2023 detected, returning requeue after 30 seconds")
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}
log.Info("AL2 detected, returning no requeue")
return ctrl.Result{}, nil
}

// Get the AWSManagedControlPlane
controlPlane := &ekscontrolplanev1.AWSManagedControlPlane{}
if err := r.Get(ctx, client.ObjectKey{Name: cluster.Spec.ControlPlaneRef.Name, Namespace: cluster.Spec.ControlPlaneRef.Namespace}, controlPlane); err != nil {
return err
return ctrl.Result{}, errors.Wrap(err, "failed to get control plane")
}

// Check if control plane is ready (skip in test environments for AL2023)
if config.Spec.NodeType == NodeTypeAL2023 && !conditions.IsTrue(controlPlane, ekscontrolplanev1.EKSControlPlaneReadyCondition) {
// Skip control plane readiness check for AL2023 in test environment
if os.Getenv("TEST_ENV") != "true" {
log.Info("AL2023 detected, waiting for control plane to be ready")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
eksbootstrapv1.DataSecretGenerationFailedReason,
clusterv1.ConditionSeverityInfo, "Control plane is not ready yet")
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}
log.Info("Skipping control plane readiness check for AL2023 in test environment")
}
log.Info("Control plane is ready, proceeding with userdata generation")

log.Info("Generating userdata")
files, err := r.resolveFiles(ctx, config)
if err != nil {
log.Info("Failed to resolve files for user data")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.DataSecretGenerationFailedReason, clusterv1.ConditionSeverityWarning, "%s", err.Error())
return err
return ctrl.Result{}, err
}

// Create unified NodeInput for both AL2 and AL2023
nodeInput := &userdata.NodeInput{
// AWSManagedControlPlane webhooks default and validate EKSClusterName
ClusterName: controlPlane.Spec.EKSClusterName,
KubeletExtraArgs: config.Spec.KubeletExtraArgs,
ContainerRuntime: config.Spec.ContainerRuntime,
Expand All @@ -251,7 +287,9 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
DiskSetup: config.Spec.DiskSetup,
Mounts: config.Spec.Mounts,
Files: files,
ClusterCIDR: controlPlane.Spec.NetworkSpec.VPC.CidrBlock,
}

if config.Spec.PauseContainer != nil {
nodeInput.PauseContainerAccount = &config.Spec.PauseContainer.AccountNumber
nodeInput.PauseContainerVersion = &config.Spec.PauseContainer.Version
Expand All @@ -271,22 +309,99 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
nodeInput.IPFamily = ptr.To[string]("ipv6")
}

// generate userdata
// Set AMI family type and AL2023-specific fields if needed
if config.Spec.NodeType == NodeTypeAL2023 {
log.Info("Processing AL2023 node type")
nodeInput.AMIFamilyType = userdata.AMIFamilyAL2023

// Set AL2023-specific fields
nodeInput.APIServerEndpoint = controlPlane.Spec.ControlPlaneEndpoint.Host
nodeInput.NodeGroupName = config.Name

// In test environments, provide a mock CA certificate
if os.Getenv("TEST_ENV") == "true" {
log.Info("Using mock CA certificate for test environment")
nodeInput.CACert = "mock-ca-certificate-for-testing"
} else {
// Fetch CA cert from EKS API
sess, err := session.NewSession(&aws.Config{Region: aws.String(controlPlane.Spec.Region)})
if err != nil {
log.Error(err, "Failed to create AWS session for EKS API")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
eksbootstrapv1.DataSecretGenerationFailedReason,
clusterv1.ConditionSeverityWarning,
"Failed to create AWS session: %v", err)
return ctrl.Result{}, err
}
eksClient := eks.New(sess)
describeInput := &eks.DescribeClusterInput{Name: aws.String(controlPlane.Spec.EKSClusterName)}
clusterOut, err := eksClient.DescribeCluster(describeInput)
if err != nil {
log.Error(err, "Failed to describe EKS cluster for CA cert fetch")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
eksbootstrapv1.DataSecretGenerationFailedReason,
clusterv1.ConditionSeverityWarning,
"Failed to describe EKS cluster: %v", err)
return ctrl.Result{}, err
} else if clusterOut.Cluster != nil && clusterOut.Cluster.CertificateAuthority != nil && clusterOut.Cluster.CertificateAuthority.Data != nil {
nodeInput.CACert = *clusterOut.Cluster.CertificateAuthority.Data
} else {
log.Error(nil, "CA certificate not found in EKS cluster response")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
eksbootstrapv1.DataSecretGenerationFailedReason,
clusterv1.ConditionSeverityWarning,
"CA certificate not found in EKS cluster response")
return ctrl.Result{}, fmt.Errorf("CA certificate not found in EKS cluster response")
}
}

// Get AMI ID from AWSManagedMachinePool's launch template if specified
if configOwner.GetKind() == "AWSManagedMachinePool" {
amp := &expinfrav1.AWSManagedMachinePool{}
if err := r.Get(ctx, client.ObjectKey{Namespace: config.Namespace, Name: configOwner.GetName()}, amp); err == nil {
log.Info("Found AWSManagedMachinePool", "name", amp.Name, "launchTemplate", amp.Spec.AWSLaunchTemplate != nil)
if amp.Spec.AWSLaunchTemplate != nil && amp.Spec.AWSLaunchTemplate.AMI.ID != nil {
nodeInput.AMIImageID = *amp.Spec.AWSLaunchTemplate.AMI.ID
log.Info("Set AMI ID from launch template", "amiID", nodeInput.AMIImageID)
} else {
log.Info("No AMI ID found in launch template")
}
if amp.Spec.CapacityType != nil {
nodeInput.CapacityType = amp.Spec.CapacityType
log.Info("Set capacity type from AWSManagedMachinePool", "capacityType", *amp.Spec.CapacityType)
} else {
log.Info("No capacity type found in AWSManagedMachinePool")
}
} else {
log.Info("Failed to get AWSManagedMachinePool", "error", err)
}
}

log.Info("Generating AL2023 userdata",
"cluster", controlPlane.Spec.EKSClusterName,
"endpoint", nodeInput.APIServerEndpoint)
} else {
nodeInput.AMIFamilyType = userdata.AMIFamilyAL2
log.Info("Generating standard userdata for node type", "type", config.Spec.NodeType)
}

// Generate userdata using unified approach
userDataScript, err := userdata.NewNode(nodeInput)
if err != nil {
log.Error(err, "Failed to create a worker join configuration")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.DataSecretGenerationFailedReason, clusterv1.ConditionSeverityWarning, "")
return err
return ctrl.Result{}, err
}

// store userdata as secret
// Store the userdata in a secret
if err := r.storeBootstrapData(ctx, cluster, config, userDataScript); err != nil {
log.Error(err, "Failed to store bootstrap data")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.DataSecretGenerationFailedReason, clusterv1.ConditionSeverityWarning, "")
return err
return ctrl.Result{}, err
}

return nil
conditions.MarkTrue(config, eksbootstrapv1.DataSecretAvailableCondition)
return ctrl.Result{}, nil
}

func (r *EKSConfigReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, option controller.Options) error {
Expand Down
Loading