Skip to content

Commit ab6be84

Browse files
authored
Merge pull request #5319 from AndiDog/awsmachinepool-awsmachines
✨ Add AWSMachines to back the EC2 instances in AWSMachinePools and AWSManagedMachinePools
2 parents 2bf86d4 + 3d7c9a8 commit ab6be84

16 files changed

+607
-72
lines changed

config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,6 +1161,10 @@ spec:
11611161
can be added as events to the Machine object and/or logged in the
11621162
controller's output.
11631163
type: string
1164+
infrastructureMachineKind:
1165+
description: InfrastructureMachineKind is the kind of the infrastructure
1166+
resources behind MachinePool Machines.
1167+
type: string
11641168
instances:
11651169
description: Instances contains the status for each instance in the
11661170
pool

config/rbac/role.yaml

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ rules:
7272
- clusters
7373
- clusters/status
7474
- machinedeployments
75-
- machines
7675
- machines/status
7776
verbs:
7877
- get
@@ -88,6 +87,15 @@ rules:
8887
- list
8988
- patch
9089
- watch
90+
- apiGroups:
91+
- cluster.x-k8s.io
92+
resources:
93+
- machines
94+
verbs:
95+
- delete
96+
- get
97+
- list
98+
- watch
9199
- apiGroups:
92100
- controlplane.cluster.x-k8s.io
93101
resources:
@@ -150,7 +158,6 @@ rules:
150158
- awsclusters
151159
- awsfargateprofiles
152160
- awsmachinepools
153-
- awsmachines
154161
- awsmanagedclusters
155162
- awsmanagedmachinepools
156163
- rosaclusters
@@ -186,6 +193,18 @@ rules:
186193
- patch
187194
- update
188195
- watch
196+
- apiGroups:
197+
- infrastructure.cluster.x-k8s.io
198+
resources:
199+
- awsmachines
200+
verbs:
201+
- create
202+
- delete
203+
- get
204+
- list
205+
- patch
206+
- update
207+
- watch
189208
- apiGroups:
190209
- infrastructure.cluster.x-k8s.io
191210
resources:

controllers/awsmachine_controller.go

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -142,10 +142,11 @@ func (r *AWSMachineReconciler) getObjectStoreService(scope scope.S3Scope) servic
142142
return s3.NewService(scope)
143143
}
144144

145-
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachines,verbs=get;list;watch;update;patch;delete
146-
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachines/status,verbs=get;update;patch
147145
// +kubebuilder:rbac:groups=controlplane.cluster.x-k8s.io,resources=*,verbs=get;list;watch
148-
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines;machines/status,verbs=get;list;watch
146+
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachines,verbs=create;get;list;watch;update;patch;delete
147+
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachines/status,verbs=get;update;patch
148+
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;list;watch;delete
149+
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines/status,verbs=get;list;watch
149150
// +kubebuilder:rbac:groups="",resources=secrets;,verbs=get;list;watch
150151
// +kubebuilder:rbac:groups="",resources=namespaces,verbs=get;list;watch
151152
// +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch
@@ -458,6 +459,7 @@ func (r *AWSMachineReconciler) findInstance(machineScope *scope.MachineScope, ec
458459
return instance, nil
459460
}
460461

462+
//nolint:gocyclo
461463
func (r *AWSMachineReconciler) reconcileNormal(_ context.Context, machineScope *scope.MachineScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope, elbScope scope.ELBScope, objectStoreScope scope.S3Scope) (ctrl.Result, error) {
462464
machineScope.Trace("Reconciling AWSMachine")
463465

@@ -481,7 +483,7 @@ func (r *AWSMachineReconciler) reconcileNormal(_ context.Context, machineScope *
481483
}
482484

483485
// Make sure bootstrap data is available and populated.
484-
if machineScope.Machine.Spec.Bootstrap.DataSecretName == nil {
486+
if !machineScope.IsMachinePoolMachine() && machineScope.Machine.Spec.Bootstrap.DataSecretName == nil {
485487
machineScope.Info("Bootstrap data secret reference is not yet available")
486488
conditions.MarkFalse(machineScope.AWSMachine, infrav1.InstanceReadyCondition, infrav1.WaitingForBootstrapDataReason, clusterv1.ConditionSeverityInfo, "")
487489
return ctrl.Result{}, nil
@@ -496,6 +498,12 @@ func (r *AWSMachineReconciler) reconcileNormal(_ context.Context, machineScope *
496498
conditions.MarkUnknown(machineScope.AWSMachine, infrav1.InstanceReadyCondition, infrav1.InstanceNotFoundReason, "%s", err.Error())
497499
return ctrl.Result{}, err
498500
}
501+
if instance == nil && machineScope.IsMachinePoolMachine() {
502+
err = errors.New("no instance found for machine pool")
503+
machineScope.Error(err, "unable to find instance")
504+
conditions.MarkUnknown(machineScope.AWSMachine, infrav1.InstanceReadyCondition, infrav1.InstanceNotFoundReason, "%s", err.Error())
505+
return ctrl.Result{}, err
506+
}
499507

500508
// If the AWSMachine doesn't have our finalizer, add it.
501509
if controllerutil.AddFinalizer(machineScope.AWSMachine, infrav1.MachineFinalizer) {
@@ -585,9 +593,18 @@ func (r *AWSMachineReconciler) reconcileNormal(_ context.Context, machineScope *
585593
conditions.MarkTrue(machineScope.AWSMachine, infrav1.InstanceReadyCondition)
586594
case infrav1.InstanceStateShuttingDown, infrav1.InstanceStateTerminated:
587595
machineScope.SetNotReady()
588-
machineScope.Info("Unexpected EC2 instance termination", "state", instance.State, "instance-id", *machineScope.GetInstanceID())
589-
r.Recorder.Eventf(machineScope.AWSMachine, corev1.EventTypeWarning, "InstanceUnexpectedTermination", "Unexpected EC2 instance termination")
590-
conditions.MarkFalse(machineScope.AWSMachine, infrav1.InstanceReadyCondition, infrav1.InstanceTerminatedReason, clusterv1.ConditionSeverityError, "")
596+
597+
if machineScope.IsMachinePoolMachine() {
598+
// In an auto-scaling group, instance termination is perfectly normal on scale-down
599+
// and therefore should not be reported as error.
600+
machineScope.Info("EC2 instance of machine pool was terminated", "state", instance.State, "instance-id", *machineScope.GetInstanceID())
601+
r.Recorder.Eventf(machineScope.AWSMachine, corev1.EventTypeNormal, infrav1.InstanceTerminatedReason, "EC2 instance termination")
602+
conditions.MarkFalse(machineScope.AWSMachine, infrav1.InstanceReadyCondition, infrav1.InstanceTerminatedReason, clusterv1.ConditionSeverityInfo, "")
603+
} else {
604+
machineScope.Info("Unexpected EC2 instance termination", "state", instance.State, "instance-id", *machineScope.GetInstanceID())
605+
r.Recorder.Eventf(machineScope.AWSMachine, corev1.EventTypeWarning, "InstanceUnexpectedTermination", "Unexpected EC2 instance termination")
606+
conditions.MarkFalse(machineScope.AWSMachine, infrav1.InstanceReadyCondition, infrav1.InstanceTerminatedReason, clusterv1.ConditionSeverityError, "")
607+
}
591608
default:
592609
machineScope.SetNotReady()
593610
machineScope.Info("EC2 instance state is undefined", "state", instance.State, "instance-id", *machineScope.GetInstanceID())
@@ -598,14 +615,18 @@ func (r *AWSMachineReconciler) reconcileNormal(_ context.Context, machineScope *
598615
}
599616

600617
// reconcile the deletion of the bootstrap data secret now that we have updated instance state
601-
if deleteSecretErr := r.deleteBootstrapData(machineScope, clusterScope, objectStoreScope); deleteSecretErr != nil {
602-
r.Log.Error(deleteSecretErr, "unable to delete secrets")
603-
return ctrl.Result{}, deleteSecretErr
604-
}
618+
if !machineScope.IsMachinePoolMachine() {
619+
if deleteSecretErr := r.deleteBootstrapData(machineScope, clusterScope, objectStoreScope); deleteSecretErr != nil {
620+
r.Log.Error(deleteSecretErr, "unable to delete secrets")
621+
return ctrl.Result{}, deleteSecretErr
622+
}
605623

606-
if instance.State == infrav1.InstanceStateTerminated {
607-
machineScope.SetFailureReason("UpdateError")
608-
machineScope.SetFailureMessage(errors.Errorf("EC2 instance state %q is unexpected", instance.State))
624+
// For machine pool machines, it is expected that the ASG terminates instances at any time,
625+
// so no error is logged for those.
626+
if instance.State == infrav1.InstanceStateTerminated {
627+
machineScope.SetFailureReason("UpdateError")
628+
machineScope.SetFailureMessage(errors.Errorf("EC2 instance state %q is unexpected", instance.State))
629+
}
609630
}
610631

611632
// tasks that can take place during all known instance states
@@ -875,9 +896,13 @@ func getIgnitionVersion(scope *scope.MachineScope) string {
875896
}
876897

877898
func (r *AWSMachineReconciler) deleteBootstrapData(machineScope *scope.MachineScope, clusterScope cloud.ClusterScoper, objectStoreScope scope.S3Scope) error {
878-
_, userDataFormat, err := machineScope.GetRawBootstrapDataWithFormat()
879-
if client.IgnoreNotFound(err) != nil {
880-
return errors.Wrap(err, "failed to get raw userdata")
899+
var userDataFormat string
900+
var err error
901+
if machineScope.Machine.Spec.Bootstrap.DataSecretName != nil {
902+
_, userDataFormat, err = machineScope.GetRawBootstrapDataWithFormat()
903+
if client.IgnoreNotFound(err) != nil {
904+
return errors.Wrap(err, "failed to get raw userdata")
905+
}
881906
}
882907

883908
if machineScope.UseSecretsManager(userDataFormat) {

exp/api/v1beta1/conversion.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ func (src *AWSMachinePool) ConvertTo(dstRaw conversion.Hub) error {
5252
if restored.Spec.AvailabilityZoneSubnetType != nil {
5353
dst.Spec.AvailabilityZoneSubnetType = restored.Spec.AvailabilityZoneSubnetType
5454
}
55+
dst.Status.InfrastructureMachineKind = restored.Status.InfrastructureMachineKind
5556

5657
if restored.Spec.AWSLaunchTemplate.PrivateDNSName != nil {
5758
dst.Spec.AWSLaunchTemplate.PrivateDNSName = restored.Spec.AWSLaunchTemplate.PrivateDNSName
@@ -90,7 +91,6 @@ func (src *AWSMachinePoolList) ConvertTo(dstRaw conversion.Hub) error {
9091
// ConvertFrom converts the v1beta2 AWSMachinePoolList receiver to v1beta1 AWSMachinePoolList.
9192
func (r *AWSMachinePoolList) ConvertFrom(srcRaw conversion.Hub) error {
9293
src := srcRaw.(*infrav1exp.AWSMachinePoolList)
93-
9494
return Convert_v1beta2_AWSMachinePoolList_To_v1beta1_AWSMachinePoolList(src, r, nil)
9595
}
9696

@@ -149,6 +149,10 @@ func Convert_v1beta2_AWSManagedMachinePoolSpec_To_v1beta1_AWSManagedMachinePoolS
149149
return autoConvert_v1beta2_AWSManagedMachinePoolSpec_To_v1beta1_AWSManagedMachinePoolSpec(in, out, s)
150150
}
151151

152+
func Convert_v1beta2_AWSMachinePoolStatus_To_v1beta1_AWSMachinePoolStatus(in *infrav1exp.AWSMachinePoolStatus, out *AWSMachinePoolStatus, s apiconversion.Scope) error {
153+
return autoConvert_v1beta2_AWSMachinePoolStatus_To_v1beta1_AWSMachinePoolStatus(in, out, s)
154+
}
155+
152156
// ConvertTo converts the v1beta1 AWSManagedMachinePoolList receiver to a v1beta2 AWSManagedMachinePoolList.
153157
func (src *AWSManagedMachinePoolList) ConvertTo(dstRaw conversion.Hub) error {
154158
dst := dstRaw.(*infrav1exp.AWSManagedMachinePoolList)

exp/api/v1beta1/zz_generated.conversion.go

Lines changed: 6 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

exp/api/v1beta2/awsmachinepool_types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,10 @@ type AWSMachinePoolStatus struct {
209209
// +optional
210210
LaunchTemplateVersion *string `json:"launchTemplateVersion,omitempty"`
211211

212+
// InfrastructureMachineKind is the kind of the infrastructure resources behind MachinePool Machines.
213+
// +optional
214+
InfrastructureMachineKind string `json:"infrastructureMachineKind,omitempty"`
215+
212216
// FailureReason will be set in the event that there is a terminal problem
213217
// reconciling the Machine and will contain a succinct value suitable
214218
// for machine interpretation.

exp/api/v1beta2/conditions_consts.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ const (
5454
InstanceRefreshNotReadyReason = "InstanceRefreshNotReady"
5555
// InstanceRefreshFailedReason used to report when there instance refresh is not initiated.
5656
InstanceRefreshFailedReason = "InstanceRefreshFailed"
57+
58+
// AWSMachineCreationFailed reports if creating AWSMachines to represent ASG (machine pool) machines failed.
59+
AWSMachineCreationFailed = "AWSMachineCreationFailed"
60+
// AWSMachineDeletionFailed reports if deleting AWSMachines failed.
61+
AWSMachineDeletionFailed = "AWSMachineDeletionFailed"
5762
)
5863

5964
const (

exp/api/v1beta2/types.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ import (
2222
infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2"
2323
)
2424

25+
const (
26+
// KindMachinePool is a MachinePool resource Kind
27+
KindMachinePool string = "MachinePool"
28+
)
29+
2530
// EBS can be used to automatically set up EBS volumes when an instance is launched.
2631
type EBS struct {
2732
// Encrypted is whether the volume should be encrypted or not.

0 commit comments

Comments
 (0)