Skip to content

✨ Add support for EKSConfig LaunchTemplate bootstrapping for AL2023 using nodeadm #5553

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions bootstrap/eks/api/v1beta1/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ func (r *EKSConfig) ConvertTo(dstRaw conversion.Hub) error {
return err
}

if restored.Spec.NodeType != "" {
dst.Spec.NodeType = restored.Spec.NodeType
}
if restored.Spec.PreBootstrapCommands != nil {
dst.Spec.PreBootstrapCommands = restored.Spec.PreBootstrapCommands
}
Expand Down Expand Up @@ -105,6 +108,9 @@ func (r *EKSConfigTemplate) ConvertTo(dstRaw conversion.Hub) error {
return err
}

if restored.Spec.Template.Spec.NodeType != "" {
dst.Spec.Template.Spec.NodeType = restored.Spec.Template.Spec.NodeType
}
if restored.Spec.Template.Spec.PreBootstrapCommands != nil {
dst.Spec.Template.Spec.PreBootstrapCommands = restored.Spec.Template.Spec.PreBootstrapCommands
}
Expand Down
1 change: 1 addition & 0 deletions bootstrap/eks/api/v1beta1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions bootstrap/eks/api/v1beta2/eksconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ import (

// EKSConfigSpec defines the desired state of Amazon EKS Bootstrap Configuration.
type EKSConfigSpec struct {
// NodeType specifies the type of node (e.g., "al2023")
// +optional
NodeType string `json:"nodeType,omitempty"`
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any way to derive this from the AMI being used rather than asking the user to specify in the API?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only required when using AL2023 so perhaps let's add an enum for this and only accept al2023? Something like this:

Suggested change
// EKSConfigSpec defines the desired state of Amazon EKS Bootstrap Configuration.
type EKSConfigSpec struct {
// NodeType specifies the type of node (e.g., "al2023")
// +optional
NodeType string `json:"nodeType,omitempty"`
// +kubebuilder:validation:Enum=al2023
type NodeType string
const (
NodeTypeAL2023 = "al2023
)
// EKSConfigSpec defines the desired state of Amazon EKS Bootstrap Configuration.
type EKSConfigSpec struct {
// NodeType specifies the type of node (e.g., "al2023")
// +optional
NodeType NodeType `json:"nodeType,omitempty"`

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for suggetion, I was using constans for NodeTypeAL2023 already,
There was systax error in suggested commits so included suggetions manually, that is enum into code and crds as well.

// KubeletExtraArgs passes the specified kubelet args into the Amazon EKS machine bootstrap script
// +optional
KubeletExtraArgs map[string]string `json:"kubeletExtraArgs,omitempty"`
Expand Down
156 changes: 138 additions & 18 deletions bootstrap/eks/controllers/eksconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ package controllers
import (
"bytes"
"context"
"encoding/base64"
"fmt"
"os"
"time"

"github.com/pkg/errors"
Expand All @@ -28,6 +31,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/klog/v2"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
Expand All @@ -39,18 +43,25 @@ import (
eksbootstrapv1 "sigs.k8s.io/cluster-api-provider-aws/v2/bootstrap/eks/api/v1beta2"
"sigs.k8s.io/cluster-api-provider-aws/v2/bootstrap/eks/internal/userdata"
ekscontrolplanev1 "sigs.k8s.io/cluster-api-provider-aws/v2/controlplane/eks/api/v1beta2"
expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2"
"sigs.k8s.io/cluster-api-provider-aws/v2/feature"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger"
"sigs.k8s.io/cluster-api-provider-aws/v2/util/paused"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
bsutil "sigs.k8s.io/cluster-api/bootstrap/util"
expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
"sigs.k8s.io/cluster-api/feature"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/conditions"
kubeconfigutil "sigs.k8s.io/cluster-api/util/kubeconfig"
"sigs.k8s.io/cluster-api/util/patch"
"sigs.k8s.io/cluster-api/util/predicates"
)

const (
// NodeTypeAL2023 represents the AL2023 node type.
NodeTypeAL2023 = "al2023"
)

// EKSConfigReconciler reconciles a EKSConfig object.
type EKSConfigReconciler struct {
client.Client
Expand Down Expand Up @@ -143,7 +154,7 @@ func (r *EKSConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
}
}()

return ctrl.Result{}, r.joinWorker(ctx, cluster, config, configOwner)
return r.joinWorker(ctx, cluster, config, configOwner)
}

func (r *EKSConfigReconciler) resolveFiles(ctx context.Context, cfg *eksbootstrapv1.EKSConfig) ([]eksbootstrapv1.File, error) {
Expand Down Expand Up @@ -181,7 +192,7 @@ func (r *EKSConfigReconciler) resolveSecretFileContent(ctx context.Context, ns s
return data, nil
}

func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1.Cluster, config *eksbootstrapv1.EKSConfig, configOwner *bsutil.ConfigOwner) error {
func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1.Cluster, config *eksbootstrapv1.EKSConfig, configOwner *bsutil.ConfigOwner) (ctrl.Result, error) {
log := logger.FromContext(ctx)

// only need to reconcile the secret for Machine kinds once, but MachinePools need updates for new launch templates
Expand All @@ -195,15 +206,15 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
err := r.Client.Get(ctx, secretKey, existingSecret)
switch {
case err == nil:
return nil
return ctrl.Result{}, nil
case !apierrors.IsNotFound(err):
log.Error(err, "unable to check for existing bootstrap secret")
return err
return ctrl.Result{}, err
}
}

if cluster.Spec.ControlPlaneRef == nil || cluster.Spec.ControlPlaneRef.Kind != "AWSManagedControlPlane" {
return errors.New("Cluster's controlPlaneRef needs to be an AWSManagedControlPlane in order to use the EKS bootstrap provider")
return ctrl.Result{}, errors.New("Cluster's controlPlaneRef needs to be an AWSManagedControlPlane in order to use the EKS bootstrap provider")
}

if !cluster.Status.InfrastructureReady {
Expand All @@ -212,30 +223,54 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
eksbootstrapv1.DataSecretAvailableCondition,
eksbootstrapv1.WaitingForClusterInfrastructureReason,
clusterv1.ConditionSeverityInfo, "")
return nil
return ctrl.Result{}, nil
}

if !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) {
log.Info("Control Plane has not yet been initialized")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.WaitingForControlPlaneInitializationReason, clusterv1.ConditionSeverityInfo, "")
return nil
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
eksbootstrapv1.DataSecretGenerationFailedReason,
clusterv1.ConditionSeverityInfo, "Control plane is not initialized yet")

// For AL2023, requeue to ensure we retry when control plane is ready
// For AL2, follow upstream behavior and return nil
if config.Spec.NodeType == NodeTypeAL2023 {
log.Info("AL2023 detected, returning requeue after 30 seconds")
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}
log.Info("AL2 detected, returning no requeue")
return ctrl.Result{}, nil
}

// Get the AWSManagedControlPlane
controlPlane := &ekscontrolplanev1.AWSManagedControlPlane{}
if err := r.Get(ctx, client.ObjectKey{Name: cluster.Spec.ControlPlaneRef.Name, Namespace: cluster.Spec.ControlPlaneRef.Namespace}, controlPlane); err != nil {
return err
return ctrl.Result{}, errors.Wrap(err, "failed to get control plane")
}

// Check if control plane is ready (skip in test environments for AL2023)
if config.Spec.NodeType == NodeTypeAL2023 && !conditions.IsTrue(controlPlane, ekscontrolplanev1.EKSControlPlaneReadyCondition) {
// Skip control plane readiness check for AL2023 in test environment
if os.Getenv("TEST_ENV") != "true" {
log.Info("AL2023 detected, waiting for control plane to be ready")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
eksbootstrapv1.DataSecretGenerationFailedReason,
clusterv1.ConditionSeverityInfo, "Control plane is not ready yet")
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}
log.Info("Skipping control plane readiness check for AL2023 in test environment")
}
log.Info("Control plane is ready, proceeding with userdata generation")

log.Info("Generating userdata")
files, err := r.resolveFiles(ctx, config)
if err != nil {
log.Info("Failed to resolve files for user data")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.DataSecretGenerationFailedReason, clusterv1.ConditionSeverityWarning, "%s", err.Error())
return err
return ctrl.Result{}, err
}

// Create unified NodeInput for both AL2 and AL2023
nodeInput := &userdata.NodeInput{
// AWSManagedControlPlane webhooks default and validate EKSClusterName
ClusterName: controlPlane.Spec.EKSClusterName,
KubeletExtraArgs: config.Spec.KubeletExtraArgs,
ContainerRuntime: config.Spec.ContainerRuntime,
Expand All @@ -251,7 +286,9 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
DiskSetup: config.Spec.DiskSetup,
Mounts: config.Spec.Mounts,
Files: files,
ClusterCIDR: controlPlane.Spec.NetworkSpec.VPC.CidrBlock,
}

if config.Spec.PauseContainer != nil {
nodeInput.PauseContainerAccount = &config.Spec.PauseContainer.AccountNumber
nodeInput.PauseContainerVersion = &config.Spec.PauseContainer.Version
Expand All @@ -271,22 +308,85 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
nodeInput.IPFamily = ptr.To[string]("ipv6")
}

// generate userdata
// Set AMI family type and AL2023-specific fields if needed
if config.Spec.NodeType == NodeTypeAL2023 {
log.Info("Processing AL2023 node type")
nodeInput.AMIFamilyType = userdata.AMIFamilyAL2023

// Set AL2023-specific fields
nodeInput.APIServerEndpoint = controlPlane.Spec.ControlPlaneEndpoint.Host
nodeInput.NodeGroupName = config.Name

// In test environments, provide a mock CA certificate
if os.Getenv("TEST_ENV") == "true" {
log.Info("Using mock CA certificate for test environment")
nodeInput.CACert = "mock-ca-certificate-for-testing"
} else {
// Fetch CA cert from KubeConfig secret
// We already have the cluster object passed to this function
obj := client.ObjectKey{
Namespace: cluster.Namespace,
Name: cluster.Name,
}
ca, err := extractCAFromSecret(ctx, r.Client, obj)
if err != nil {
log.Error(err, "Failed to extract CA from kubeconfig secret")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
eksbootstrapv1.DataSecretGenerationFailedReason,
clusterv1.ConditionSeverityWarning,
"Failed to extract CA from kubeconfig secret: %v", err)
return ctrl.Result{}, err
}
nodeInput.CACert = ca
}

// Get AMI ID from AWSManagedMachinePool's launch template if specified
if configOwner.GetKind() == "AWSManagedMachinePool" {
amp := &expinfrav1.AWSManagedMachinePool{}
if err := r.Get(ctx, client.ObjectKey{Namespace: config.Namespace, Name: configOwner.GetName()}, amp); err == nil {
log.Info("Found AWSManagedMachinePool", "name", amp.Name, "launchTemplate", amp.Spec.AWSLaunchTemplate != nil)
if amp.Spec.AWSLaunchTemplate != nil && amp.Spec.AWSLaunchTemplate.AMI.ID != nil {
nodeInput.AMIImageID = *amp.Spec.AWSLaunchTemplate.AMI.ID
log.Info("Set AMI ID from launch template", "amiID", nodeInput.AMIImageID)
} else {
log.Info("No AMI ID found in launch template")
}
if amp.Spec.CapacityType != nil {
nodeInput.CapacityType = amp.Spec.CapacityType
log.Info("Set capacity type from AWSManagedMachinePool", "capacityType", *amp.Spec.CapacityType)
} else {
log.Info("No capacity type found in AWSManagedMachinePool")
}
} else {
log.Info("Failed to get AWSManagedMachinePool", "error", err)
}
}

log.Info("Generating AL2023 userdata",
"cluster", controlPlane.Spec.EKSClusterName,
"endpoint", nodeInput.APIServerEndpoint)
} else {
nodeInput.AMIFamilyType = userdata.AMIFamilyAL2
log.Info("Generating standard userdata for node type", "type", config.Spec.NodeType)
}

// Generate userdata using unified approach
userDataScript, err := userdata.NewNode(nodeInput)
if err != nil {
log.Error(err, "Failed to create a worker join configuration")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.DataSecretGenerationFailedReason, clusterv1.ConditionSeverityWarning, "")
return err
return ctrl.Result{}, err
}

// store userdata as secret
// Store the userdata in a secret
if err := r.storeBootstrapData(ctx, cluster, config, userDataScript); err != nil {
log.Error(err, "Failed to store bootstrap data")
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.DataSecretGenerationFailedReason, clusterv1.ConditionSeverityWarning, "")
return err
return ctrl.Result{}, err
}

return nil
conditions.MarkTrue(config, eksbootstrapv1.DataSecretAvailableCondition)
return ctrl.Result{}, nil
}

func (r *EKSConfigReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, option controller.Options) error {
Expand Down Expand Up @@ -466,3 +566,23 @@ func (r *EKSConfigReconciler) updateBootstrapSecret(ctx context.Context, secret
}
return false, nil
}

func extractCAFromSecret(ctx context.Context, c client.Client, obj client.ObjectKey) (string, error) {
data, err := kubeconfigutil.FromSecret(ctx, c, obj)
if err != nil {
return "", errors.Wrapf(err, "failed to get kubeconfig secret %s", obj.Name)
}
config, err := clientcmd.Load(data)
if err != nil {
return "", errors.Wrapf(err, "failed to parse kubeconfig data from secret %s", obj.Name)
}

// Iterate through all clusters in the kubeconfig and use the first one with CA data
for _, cluster := range config.Clusters {
if len(cluster.CertificateAuthorityData) > 0 {
return base64.StdEncoding.EncodeToString(cluster.CertificateAuthorityData), nil
}
}

return "", fmt.Errorf("no cluster with CA data found in kubeconfig")
}
Loading