Skip to content

✨ Add support for EKSConfig LaunchTemplate bootstrapping for AL2023 using nodeadm #5553

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions bootstrap/eks/api/v1beta1/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ func (r *EKSConfig) ConvertTo(dstRaw conversion.Hub) error {
return err
}

if restored.Spec.NodeType != "" {
dst.Spec.NodeType = restored.Spec.NodeType
}
if restored.Spec.PreBootstrapCommands != nil {
dst.Spec.PreBootstrapCommands = restored.Spec.PreBootstrapCommands
}
Expand Down Expand Up @@ -105,6 +108,9 @@ func (r *EKSConfigTemplate) ConvertTo(dstRaw conversion.Hub) error {
return err
}

if restored.Spec.Template.Spec.NodeType != "" {
dst.Spec.Template.Spec.NodeType = restored.Spec.Template.Spec.NodeType
}
if restored.Spec.Template.Spec.PreBootstrapCommands != nil {
dst.Spec.Template.Spec.PreBootstrapCommands = restored.Spec.Template.Spec.PreBootstrapCommands
}
Expand Down
1 change: 1 addition & 0 deletions bootstrap/eks/api/v1beta1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions bootstrap/eks/api/v1beta2/eksconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ import (

// EKSConfigSpec defines the desired state of Amazon EKS Bootstrap Configuration.
type EKSConfigSpec struct {
// NodeType specifies the type of node (e.g., "al2023")
// +optional
NodeType string `json:"nodeType,omitempty"`
// KubeletExtraArgs passes the specified kubelet args into the Amazon EKS machine bootstrap script
// +optional
KubeletExtraArgs map[string]string `json:"kubeletExtraArgs,omitempty"`
Expand Down
156 changes: 138 additions & 18 deletions bootstrap/eks/controllers/eksconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ package controllers
import (
"bytes"
"context"
"encoding/base64"
"fmt"
"os"
"time"

"github.com/pkg/errors"
Expand All @@ -28,6 +31,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/klog/v2"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
Expand All @@ -39,18 +43,25 @@ import (
eksbootstrapv1 "sigs.k8s.io/cluster-api-provider-aws/v2/bootstrap/eks/api/v1beta2"
"sigs.k8s.io/cluster-api-provider-aws/v2/bootstrap/eks/internal/userdata"
ekscontrolplanev1 "sigs.k8s.io/cluster-api-provider-aws/v2/controlplane/eks/api/v1beta2"
expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2"
"sigs.k8s.io/cluster-api-provider-aws/v2/feature"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger"
"sigs.k8s.io/cluster-api-provider-aws/v2/util/paused"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
bsutil "sigs.k8s.io/cluster-api/bootstrap/util"
expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
"sigs.k8s.io/cluster-api/feature"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/conditions"
kubeconfigutil "sigs.k8s.io/cluster-api/util/kubeconfig"
"sigs.k8s.io/cluster-api/util/patch"
"sigs.k8s.io/cluster-api/util/predicates"
)

const (
// NodeTypeAL2023 represents the AL2023 node type.
NodeTypeAL2023 = "al2023"
)

// EKSConfigReconciler reconciles a EKSConfig object.
type EKSConfigReconciler struct {
client.Client
Expand Down Expand Up @@ -143,7 +154,7 @@ func (r *EKSConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
}
}()

return ctrl.Result{}, r.joinWorker(ctx, cluster, config, configOwner)
return r.joinWorker(ctx, cluster, config, configOwner)
}

func (r *EKSConfigReconciler) resolveFiles(ctx context.Context, cfg *eksbootstrapv1.EKSConfig) ([]eksbootstrapv1.File, error) {
Expand Down Expand Up @@ -181,7 +192,7 @@ func (r *EKSConfigReconciler) resolveSecretFileContent(ctx context.Context, ns s
return data, nil
}

func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1.Cluster, config *eksbootstrapv1.EKSConfig, configOwner *bsutil.ConfigOwner) error {
func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1.Cluster, config *eksbootstrapv1.EKSConfig, configOwner *bsutil.ConfigOwner) (ctrl.Result, error) {
log := logger.FromContext(ctx)

// only need to reconcile the secret for Machine kinds once, but MachinePools need updates for new launch templates
Expand All @@ -195,15 +206,15 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
err := r.Client.Get(ctx, secretKey, existingSecret)
switch {
case err == nil:
return nil
return ctrl.Result{}, nil
case !apierrors.IsNotFound(err):
log.Error(err, "unable to check for existing bootstrap secret")
return err
return ctrl.Result{}, err
}
}

if cluster.Spec.ControlPlaneRef == nil || cluster.Spec.ControlPlaneRef.Kind != "AWSManagedControlPlane" {
return errors.New("Cluster's controlPlaneRef needs to be an AWSManagedControlPlane in order to use the EKS bootstrap provider")
return ctrl.Result{}, errors.New("Cluster's controlPlaneRef needs to be an AWSManagedControlPlane in order to use the EKS bootstrap provider")
}

if !cluster.Status.InfrastructureReady {
Expand All @@ -212,30 +223,54 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
eksbootstrapv1.DataSecretAvailableCondition,
eksbootstrapv1.WaitingForClusterInfrastructureReason,
clusterv1.ConditionSeverityInfo, "")
return nil
return ctrl.Result{}, nil
}

if !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) {
log.Info("Control Plane has not yet been initialized")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.WaitingForControlPlaneInitializationReason, clusterv1.ConditionSeverityInfo, "")
return nil
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
eksbootstrapv1.DataSecretGenerationFailedReason,
clusterv1.ConditionSeverityInfo, "Control plane is not initialized yet")

// For AL2023, requeue to ensure we retry when control plane is ready
// For AL2, follow upstream behavior and return nil
if config.Spec.NodeType == NodeTypeAL2023 {
log.Info("AL2023 detected, returning requeue after 30 seconds")
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}
log.Info("AL2 detected, returning no requeue")
return ctrl.Result{}, nil
}

// Get the AWSManagedControlPlane
controlPlane := &ekscontrolplanev1.AWSManagedControlPlane{}
if err := r.Get(ctx, client.ObjectKey{Name: cluster.Spec.ControlPlaneRef.Name, Namespace: cluster.Spec.ControlPlaneRef.Namespace}, controlPlane); err != nil {
return err
return ctrl.Result{}, errors.Wrap(err, "failed to get control plane")
}

// Check if control plane is ready (skip in test environments for AL2023)
if config.Spec.NodeType == NodeTypeAL2023 && !conditions.IsTrue(controlPlane, ekscontrolplanev1.EKSControlPlaneReadyCondition) {
// Skip control plane readiness check for AL2023 in test environment
if os.Getenv("TEST_ENV") != "true" {
log.Info("AL2023 detected, waiting for control plane to be ready")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
eksbootstrapv1.DataSecretGenerationFailedReason,
clusterv1.ConditionSeverityInfo, "Control plane is not ready yet")
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}
log.Info("Skipping control plane readiness check for AL2023 in test environment")
}
log.Info("Control plane is ready, proceeding with userdata generation")

log.Info("Generating userdata")
files, err := r.resolveFiles(ctx, config)
if err != nil {
log.Info("Failed to resolve files for user data")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.DataSecretGenerationFailedReason, clusterv1.ConditionSeverityWarning, "%s", err.Error())
return err
return ctrl.Result{}, err
}

// Create unified NodeInput for both AL2 and AL2023
nodeInput := &userdata.NodeInput{
// AWSManagedControlPlane webhooks default and validate EKSClusterName
ClusterName: controlPlane.Spec.EKSClusterName,
KubeletExtraArgs: config.Spec.KubeletExtraArgs,
ContainerRuntime: config.Spec.ContainerRuntime,
Expand All @@ -251,7 +286,9 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
DiskSetup: config.Spec.DiskSetup,
Mounts: config.Spec.Mounts,
Files: files,
ClusterCIDR: controlPlane.Spec.NetworkSpec.VPC.CidrBlock,
}

if config.Spec.PauseContainer != nil {
nodeInput.PauseContainerAccount = &config.Spec.PauseContainer.AccountNumber
nodeInput.PauseContainerVersion = &config.Spec.PauseContainer.Version
Expand All @@ -271,22 +308,85 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
nodeInput.IPFamily = ptr.To[string]("ipv6")
}

// generate userdata
// Set AMI family type and AL2023-specific fields if needed
if config.Spec.NodeType == NodeTypeAL2023 {
log.Info("Processing AL2023 node type")
nodeInput.AMIFamilyType = userdata.AMIFamilyAL2023

// Set AL2023-specific fields
nodeInput.APIServerEndpoint = controlPlane.Spec.ControlPlaneEndpoint.Host
nodeInput.NodeGroupName = config.Name

// In test environments, provide a mock CA certificate
if os.Getenv("TEST_ENV") == "true" {
log.Info("Using mock CA certificate for test environment")
nodeInput.CACert = "mock-ca-certificate-for-testing"
} else {
// Fetch CA cert from KubeConfig secret
// We already have the cluster object passed to this function
obj := client.ObjectKey{
Namespace: cluster.Namespace,
Name: cluster.Name,
}
ca, err := extractCAFromSecret(ctx, r.Client, obj)
if err != nil {
log.Error(err, "Failed to extract CA from kubeconfig secret")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
eksbootstrapv1.DataSecretGenerationFailedReason,
clusterv1.ConditionSeverityWarning,
"Failed to extract CA from kubeconfig secret: %v", err)
return ctrl.Result{}, err
}
nodeInput.CACert = ca
}

// Get AMI ID from AWSManagedMachinePool's launch template if specified
if configOwner.GetKind() == "AWSManagedMachinePool" {
amp := &expinfrav1.AWSManagedMachinePool{}
if err := r.Get(ctx, client.ObjectKey{Namespace: config.Namespace, Name: configOwner.GetName()}, amp); err == nil {
log.Info("Found AWSManagedMachinePool", "name", amp.Name, "launchTemplate", amp.Spec.AWSLaunchTemplate != nil)
if amp.Spec.AWSLaunchTemplate != nil && amp.Spec.AWSLaunchTemplate.AMI.ID != nil {
nodeInput.AMIImageID = *amp.Spec.AWSLaunchTemplate.AMI.ID
log.Info("Set AMI ID from launch template", "amiID", nodeInput.AMIImageID)
} else {
log.Info("No AMI ID found in launch template")
}
if amp.Spec.CapacityType != nil {
nodeInput.CapacityType = amp.Spec.CapacityType
log.Info("Set capacity type from AWSManagedMachinePool", "capacityType", *amp.Spec.CapacityType)
} else {
log.Info("No capacity type found in AWSManagedMachinePool")
}
} else {
log.Info("Failed to get AWSManagedMachinePool", "error", err)
}
}

log.Info("Generating AL2023 userdata",
"cluster", controlPlane.Spec.EKSClusterName,
"endpoint", nodeInput.APIServerEndpoint)
} else {
nodeInput.AMIFamilyType = userdata.AMIFamilyAL2
log.Info("Generating standard userdata for node type", "type", config.Spec.NodeType)
}

// Generate userdata using unified approach
userDataScript, err := userdata.NewNode(nodeInput)
if err != nil {
log.Error(err, "Failed to create a worker join configuration")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.DataSecretGenerationFailedReason, clusterv1.ConditionSeverityWarning, "")
return err
return ctrl.Result{}, err
}

// store userdata as secret
// Store the userdata in a secret
if err := r.storeBootstrapData(ctx, cluster, config, userDataScript); err != nil {
log.Error(err, "Failed to store bootstrap data")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.DataSecretGenerationFailedReason, clusterv1.ConditionSeverityWarning, "")
return err
return ctrl.Result{}, err
}

return nil
conditions.MarkTrue(config, eksbootstrapv1.DataSecretAvailableCondition)
return ctrl.Result{}, nil
}

func (r *EKSConfigReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, option controller.Options) error {
Expand Down Expand Up @@ -466,3 +566,23 @@ func (r *EKSConfigReconciler) updateBootstrapSecret(ctx context.Context, secret
}
return false, nil
}

func extractCAFromSecret(ctx context.Context, c client.Client, obj client.ObjectKey) (string, error) {
data, err := kubeconfigutil.FromSecret(ctx, c, obj)
if err != nil {
return "", errors.Wrapf(err, "failed to get kubeconfig secret %s", obj.Name)
}
config, err := clientcmd.Load(data)
if err != nil {
return "", errors.Wrapf(err, "failed to parse kubeconfig data from secret %s", obj.Name)
}

// Iterate through all clusters in the kubeconfig and use the first one with CA data
for _, cluster := range config.Clusters {
if len(cluster.CertificateAuthorityData) > 0 {
return base64.StdEncoding.EncodeToString(cluster.CertificateAuthorityData), nil
}
}

return "", fmt.Errorf("no cluster with CA data found in kubeconfig")
}
Loading