Skip to content

Commit d12643c

Browse files
committed
Add support for EKSConfig LaunchTemplate bootstrapping for AL2023 using nodeadm
1 parent 6725536 commit d12643c

File tree

7 files changed

+272
-78
lines changed

7 files changed

+272
-78
lines changed

bootstrap/eks/api/v1beta1/zz_generated.conversion.go

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bootstrap/eks/api/v1beta2/eksconfig_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ import (
2424

2525
// EKSConfigSpec defines the desired state of Amazon EKS Bootstrap Configuration.
2626
type EKSConfigSpec struct {
27+
// NodeType specifies the type of node (e.g., "al2023")
28+
// +optional
29+
NodeType string `json:"nodeType,omitempty"`
2730
// KubeletExtraArgs passes the specified kubelet args into the Amazon EKS machine bootstrap script
2831
// +optional
2932
KubeletExtraArgs map[string]string `json:"kubeletExtraArgs,omitempty"`

bootstrap/eks/controllers/eksconfig_controller.go

Lines changed: 173 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,12 @@ package controllers
2020
import (
2121
"bytes"
2222
"context"
23+
"fmt"
2324
"time"
2425

26+
"github.com/aws/aws-sdk-go/aws"
27+
"github.com/aws/aws-sdk-go/aws/session"
28+
"github.com/aws/aws-sdk-go/service/eks"
2529
"github.com/pkg/errors"
2630
corev1 "k8s.io/api/core/v1"
2731
apierrors "k8s.io/apimachinery/pkg/api/errors"
@@ -39,6 +43,7 @@ import (
3943
eksbootstrapv1 "sigs.k8s.io/cluster-api-provider-aws/v2/bootstrap/eks/api/v1beta2"
4044
"sigs.k8s.io/cluster-api-provider-aws/v2/bootstrap/eks/internal/userdata"
4145
ekscontrolplanev1 "sigs.k8s.io/cluster-api-provider-aws/v2/controlplane/eks/api/v1beta2"
46+
expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2"
4247
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger"
4348
"sigs.k8s.io/cluster-api-provider-aws/v2/util/paused"
4449
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
@@ -221,9 +226,19 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
221226
return nil
222227
}
223228

229+
// Get the AWSManagedControlPlane
224230
controlPlane := &ekscontrolplanev1.AWSManagedControlPlane{}
225231
if err := r.Get(ctx, client.ObjectKey{Name: cluster.Spec.ControlPlaneRef.Name, Namespace: cluster.Spec.ControlPlaneRef.Namespace}, controlPlane); err != nil {
226-
return err
232+
return errors.Wrap(err, "failed to get control plane")
233+
}
234+
235+
// Check if control plane is ready
236+
if !conditions.IsTrue(controlPlane, ekscontrolplanev1.EKSControlPlaneReadyCondition) {
237+
log.Info("Control plane is not ready yet, waiting...")
238+
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
239+
eksbootstrapv1.DataSecretGenerationFailedReason,
240+
clusterv1.ConditionSeverityInfo, "Control plane is not ready yet")
241+
return nil
227242
}
228243

229244
log.Info("Generating userdata")
@@ -234,61 +249,174 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
234249
return err
235250
}
236251

237-
nodeInput := &userdata.NodeInput{
238-
// AWSManagedControlPlane webhooks default and validate EKSClusterName
239-
ClusterName: controlPlane.Spec.EKSClusterName,
240-
KubeletExtraArgs: config.Spec.KubeletExtraArgs,
241-
ContainerRuntime: config.Spec.ContainerRuntime,
242-
DNSClusterIP: config.Spec.DNSClusterIP,
243-
DockerConfigJSON: config.Spec.DockerConfigJSON,
244-
APIRetryAttempts: config.Spec.APIRetryAttempts,
245-
UseMaxPods: config.Spec.UseMaxPods,
246-
PreBootstrapCommands: config.Spec.PreBootstrapCommands,
247-
PostBootstrapCommands: config.Spec.PostBootstrapCommands,
248-
BootstrapCommandOverride: config.Spec.BootstrapCommandOverride,
249-
NTP: config.Spec.NTP,
250-
Users: config.Spec.Users,
251-
DiskSetup: config.Spec.DiskSetup,
252-
Mounts: config.Spec.Mounts,
253-
Files: files,
254-
}
255-
if config.Spec.PauseContainer != nil {
256-
nodeInput.PauseContainerAccount = &config.Spec.PauseContainer.AccountNumber
257-
nodeInput.PauseContainerVersion = &config.Spec.PauseContainer.Version
258-
}
259-
260-
// Check if IPv6 was provided to the user configuration first
261-
// If not, we also check if the cluster is ipv6 based.
262-
if config.Spec.ServiceIPV6Cidr != nil && *config.Spec.ServiceIPV6Cidr != "" {
263-
nodeInput.ServiceIPV6Cidr = config.Spec.ServiceIPV6Cidr
264-
nodeInput.IPFamily = ptr.To[string]("ipv6")
265-
}
266-
267-
// we don't want to override any manually set configuration options.
268-
if config.Spec.ServiceIPV6Cidr == nil && controlPlane.Spec.NetworkSpec.VPC.IsIPv6Enabled() {
269-
log.Info("Adding ipv6 data to userdata....")
270-
nodeInput.ServiceIPV6Cidr = ptr.To[string](controlPlane.Spec.NetworkSpec.VPC.IPv6.CidrBlock)
271-
nodeInput.IPFamily = ptr.To[string]("ipv6")
272-
}
273-
274-
// generate userdata
275-
userDataScript, err := userdata.NewNode(nodeInput)
276-
if err != nil {
277-
log.Error(err, "Failed to create a worker join configuration")
278-
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.DataSecretGenerationFailedReason, clusterv1.ConditionSeverityWarning, "")
279-
return err
252+
// Generate userdata based on node type
253+
var userDataScript []byte
254+
255+
if config.Spec.NodeType == "al2023" {
256+
// Use the ControlPlaneEndpoint from the AWSManagedControlPlane spec
257+
apiServerEndpoint := controlPlane.Spec.ControlPlaneEndpoint.Host
258+
259+
log.Info("Generating AL2023 userdata",
260+
"cluster", controlPlane.Spec.EKSClusterName,
261+
"endpoint", apiServerEndpoint)
262+
263+
// Fetch CA cert directly from EKS API
264+
sess, err := session.NewSession(&aws.Config{Region: aws.String(controlPlane.Spec.Region)})
265+
if err != nil {
266+
log.Error(err, "Failed to create AWS session for EKS API")
267+
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
268+
eksbootstrapv1.DataSecretGenerationFailedReason,
269+
clusterv1.ConditionSeverityWarning,
270+
"Failed to create AWS session: %v", err)
271+
return err
272+
}
273+
eksClient := eks.New(sess)
274+
describeInput := &eks.DescribeClusterInput{Name: aws.String(controlPlane.Spec.EKSClusterName)}
275+
clusterOut, err := eksClient.DescribeCluster(describeInput)
276+
if err != nil {
277+
log.Error(err, "Failed to describe EKS cluster for CA cert fetch")
278+
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
279+
eksbootstrapv1.DataSecretGenerationFailedReason,
280+
clusterv1.ConditionSeverityWarning,
281+
"Failed to describe EKS cluster: %v", err)
282+
return err
283+
}
284+
285+
caCert := ""
286+
if clusterOut.Cluster != nil && clusterOut.Cluster.CertificateAuthority != nil && clusterOut.Cluster.CertificateAuthority.Data != nil {
287+
caCert = *clusterOut.Cluster.CertificateAuthority.Data
288+
} else {
289+
log.Error(nil, "CA certificate not found in EKS cluster response")
290+
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
291+
eksbootstrapv1.DataSecretGenerationFailedReason,
292+
clusterv1.ConditionSeverityWarning,
293+
"CA certificate not found in EKS cluster response")
294+
return fmt.Errorf("CA certificate not found in EKS cluster response")
295+
}
296+
297+
// Get AMI ID from AWSManagedMachinePool's launch template if specified
298+
var amiID string
299+
if configOwner.GetKind() == "MachinePool" {
300+
amp := &expinfrav1.AWSManagedMachinePool{}
301+
if err := r.Get(ctx, client.ObjectKey{Namespace: config.Namespace, Name: configOwner.GetName()}, amp); err == nil {
302+
if amp.Spec.AWSLaunchTemplate != nil && amp.Spec.AWSLaunchTemplate.AMI.ID != nil {
303+
amiID = *amp.Spec.AWSLaunchTemplate.AMI.ID
304+
}
305+
}
306+
}
307+
308+
input := &userdata.AL2023UserDataInput{
309+
ClusterName: controlPlane.Spec.EKSClusterName,
310+
APIServerEndpoint: apiServerEndpoint,
311+
CACert: caCert,
312+
NodeGroupName: config.Name, // Use the config name as nodegroup name
313+
MaxPods: getMaxPods(config), // Get from config or use default
314+
ClusterDNS: getClusterDNS(config), // Get from config or use default
315+
AMIImageID: amiID, // Use launch template AMI if specified
316+
CapacityType: getCapacityType(config), // Get from config or use default
317+
}
318+
319+
// Try to generate userdata with retries
320+
var userDataErr error
321+
for i := 0; i < 3; i++ { // Retry up to 3 times
322+
userDataScript, userDataErr = userdata.GenerateAL2023UserData(input)
323+
if userDataErr == nil {
324+
break
325+
}
326+
log.Error(userDataErr, "Failed to generate AL2023 userdata, retrying",
327+
"attempt", i+1,
328+
"cluster", input.ClusterName)
329+
time.Sleep(time.Second * time.Duration(i+1)) // Exponential backoff
330+
}
331+
332+
if userDataErr != nil {
333+
log.Error(userDataErr, "Failed to generate AL2023 userdata after retries")
334+
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
335+
eksbootstrapv1.DataSecretGenerationFailedReason,
336+
clusterv1.ConditionSeverityWarning,
337+
"Failed to generate AL2023 userdata: %v", userDataErr)
338+
return userDataErr
339+
}
340+
} else {
341+
log.Info("Generating standard userdata for node type", "type", config.Spec.NodeType)
342+
nodeInput := &userdata.NodeInput{
343+
// AWSManagedControlPlane webhooks default and validate EKSClusterName
344+
ClusterName: controlPlane.Spec.EKSClusterName,
345+
KubeletExtraArgs: config.Spec.KubeletExtraArgs,
346+
ContainerRuntime: config.Spec.ContainerRuntime,
347+
DNSClusterIP: config.Spec.DNSClusterIP,
348+
DockerConfigJSON: config.Spec.DockerConfigJSON,
349+
APIRetryAttempts: config.Spec.APIRetryAttempts,
350+
UseMaxPods: config.Spec.UseMaxPods,
351+
PreBootstrapCommands: config.Spec.PreBootstrapCommands,
352+
PostBootstrapCommands: config.Spec.PostBootstrapCommands,
353+
BootstrapCommandOverride: config.Spec.BootstrapCommandOverride,
354+
NTP: config.Spec.NTP,
355+
Users: config.Spec.Users,
356+
DiskSetup: config.Spec.DiskSetup,
357+
Mounts: config.Spec.Mounts,
358+
Files: files,
359+
}
360+
361+
if config.Spec.PauseContainer != nil {
362+
nodeInput.PauseContainerAccount = &config.Spec.PauseContainer.AccountNumber
363+
nodeInput.PauseContainerVersion = &config.Spec.PauseContainer.Version
364+
}
365+
366+
// Check if IPv6 was provided to the user configuration first
367+
// If not, we also check if the cluster is ipv6 based.
368+
if config.Spec.ServiceIPV6Cidr != nil && *config.Spec.ServiceIPV6Cidr != "" {
369+
nodeInput.ServiceIPV6Cidr = config.Spec.ServiceIPV6Cidr
370+
nodeInput.IPFamily = ptr.To[string]("ipv6")
371+
}
372+
373+
// we don't want to override any manually set configuration options.
374+
if config.Spec.ServiceIPV6Cidr == nil && controlPlane.Spec.NetworkSpec.VPC.IsIPv6Enabled() {
375+
log.Info("Adding ipv6 data to userdata....")
376+
nodeInput.ServiceIPV6Cidr = ptr.To[string](controlPlane.Spec.NetworkSpec.VPC.IPv6.CidrBlock)
377+
nodeInput.IPFamily = ptr.To[string]("ipv6")
378+
}
379+
380+
userDataScript, err = userdata.NewNode(nodeInput)
381+
if err != nil {
382+
log.Error(err, "Failed to create a worker join configuration")
383+
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.DataSecretGenerationFailedReason, clusterv1.ConditionSeverityWarning, "")
384+
return err
385+
}
280386
}
281387

282-
// store userdata as secret
388+
// Store the userdata in a secret
283389
if err := r.storeBootstrapData(ctx, cluster, config, userDataScript); err != nil {
284390
log.Error(err, "Failed to store bootstrap data")
285391
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.DataSecretGenerationFailedReason, clusterv1.ConditionSeverityWarning, "")
286392
return err
287393
}
288394

395+
conditions.MarkTrue(config, eksbootstrapv1.DataSecretAvailableCondition)
289396
return nil
290397
}
291398

399+
// Helper functions to get dynamic values
400+
func getMaxPods(config *eksbootstrapv1.EKSConfig) int {
401+
if config.Spec.UseMaxPods != nil && *config.Spec.UseMaxPods {
402+
return 58 // Default value when UseMaxPods is true
403+
}
404+
return 110 // Default value when UseMaxPods is false
405+
}
406+
407+
func getClusterDNS(config *eksbootstrapv1.EKSConfig) string {
408+
if config.Spec.DNSClusterIP != nil && *config.Spec.DNSClusterIP != "" {
409+
return *config.Spec.DNSClusterIP
410+
}
411+
return "10.96.0.10" // Default value
412+
}
413+
414+
func getCapacityType(config *eksbootstrapv1.EKSConfig) string {
415+
// TODO: Get from AWSManagedMachinePool spec if available
416+
// For now, return default
417+
return "ON_DEMAND"
418+
}
419+
292420
func (r *EKSConfigReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, option controller.Options) error {
293421
b := ctrl.NewControllerManagedBy(mgr).
294422
For(&eksbootstrapv1.EKSConfig{}).

bootstrap/eks/internal/userdata/node.go

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package userdata
1919
import (
2020
"bytes"
2121
"fmt"
22+
"strings"
2223
"text/template"
2324

2425
"github.com/alessio/shellescape"
@@ -138,3 +139,91 @@ func NewNode(input *NodeInput) ([]byte, error) {
138139

139140
return out.Bytes(), nil
140141
}
142+
143+
// AL2023UserDataInput defines the required input for generating AL2023 userdata
144+
type AL2023UserDataInput struct {
145+
ClusterName string
146+
APIServerEndpoint string
147+
CACert string
148+
NodeGroupName string
149+
MaxPods int
150+
ClusterDNS string
151+
AMIImageID string
152+
CapacityType string
153+
}
154+
155+
// ValidateAL2023UserDataInput validates the input for AL2023 userdata generation
156+
func ValidateAL2023UserDataInput(input *AL2023UserDataInput) error {
157+
if input.ClusterName == "" {
158+
return fmt.Errorf("cluster name is required")
159+
}
160+
if input.APIServerEndpoint == "" {
161+
return fmt.Errorf("API server endpoint is required")
162+
}
163+
if !strings.HasPrefix(input.APIServerEndpoint, "https://") {
164+
return fmt.Errorf("API server endpoint must start with https://")
165+
}
166+
if input.CACert == "" {
167+
return fmt.Errorf("CA certificate is required")
168+
}
169+
if input.NodeGroupName == "" {
170+
return fmt.Errorf("node group name is required")
171+
}
172+
if input.MaxPods <= 0 {
173+
return fmt.Errorf("max pods must be greater than 0")
174+
}
175+
if input.ClusterDNS == "" {
176+
return fmt.Errorf("cluster DNS is required")
177+
}
178+
if input.AMIImageID == "" {
179+
return fmt.Errorf("AMI image ID is required")
180+
}
181+
if input.CapacityType == "" {
182+
return fmt.Errorf("capacity type is required")
183+
}
184+
return nil
185+
}
186+
187+
// GenerateAL2023UserData generates userdata for Amazon Linux 2023 nodes with validation and retry
188+
func GenerateAL2023UserData(input *AL2023UserDataInput) ([]byte, error) {
189+
// Validate input
190+
if err := ValidateAL2023UserDataInput(input); err != nil {
191+
return nil, fmt.Errorf("invalid input: %w", err)
192+
}
193+
194+
// Generate userdata with validated input
195+
userData := fmt.Sprintf(`MIME-Version: 1.0
196+
Content-Type: multipart/mixed; boundary="//"
197+
198+
--//
199+
Content-Type: application/node.eks.aws
200+
201+
---
202+
apiVersion: node.eks.aws/v1alpha1
203+
kind: NodeConfig
204+
spec:
205+
cluster:
206+
apiServerEndpoint: %s
207+
certificateAuthority: %s
208+
cidr: 10.96.0.0/12
209+
name: %s
210+
kubelet:
211+
config:
212+
maxPods: %d
213+
clusterDNS:
214+
- %s
215+
flags:
216+
- "--node-labels=eks.amazonaws.com/nodegroup-image=%s,eks.amazonaws.com/capacityType=%s,eks.amazonaws.com/nodegroup=%s"
217+
218+
--//--`,
219+
input.APIServerEndpoint,
220+
input.CACert,
221+
input.ClusterName,
222+
input.MaxPods,
223+
input.ClusterDNS,
224+
input.AMIImageID,
225+
input.CapacityType,
226+
input.NodeGroupName)
227+
228+
return []byte(userData), nil
229+
}

0 commit comments

Comments
 (0)