Skip to content

Commit 59f8000

Browse files
committed
Suspend and Resume ASG Processes
1 parent 13c0c2e commit 59f8000

13 files changed

+359
-50
lines changed

config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,13 @@ spec:
846846
type: string
847847
type: object
848848
type: array
849+
suspendProcesses:
850+
description: SuspendProcesses defines a list of processes to suspend
851+
for the given ASG. This is constantly reconciled. If a process is
852+
removed from this list it will automatically be resumed.
853+
items:
854+
type: string
855+
type: array
849856
required:
850857
- awsLaunchTemplate
851858
- maxSize
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Suspend ASG Processes
2+
3+
- **Feature status:** Experimental
4+
- **Feature gate:** MachinePool=true
5+
6+
MachinePool allows users to manage many machines as a single entity. Infrastructure providers implement a separate CRD that handles infrastructure side of the feature.
7+
8+
## Suspend Processes
9+
10+
It's possible to suspend certain processes for ASG. The list of processes can be found [here](https://docs.aws.amazon.com/autoscaling/ec2/APIReference/API_SuspendProcesses.html).
11+
12+
To utilize this feature, simply denote the list of processes that are desired to be suspended.
13+
14+
```yaml
15+
---
16+
apiVersion: infrastructure.cluster.x-k8s.io/v1beta2
17+
kind: AWSMachinePool
18+
metadata:
19+
name: capa-mp-0
20+
spec:
21+
minSize: 1
22+
maxSize: 10
23+
availabilityZones:
24+
- "${AWS_AVAILABILITY_ZONE}"
25+
awsLaunchTemplate:
26+
instanceType: "${AWS_CONTROL_PLANE_MACHINE_TYPE}"
27+
sshKeyName: "${AWS_SSH_KEY_NAME}"
28+
suspendProcesses:
29+
- Launch
30+
- AlarmNotification
31+
- AZRebalance
32+
---
33+
```
34+
35+
## Resume Processes
36+
37+
If a service is desired to be resumed, simply remove it from the list of suspended processes. The reconciler will then
38+
resume any process that is not part of the desired suspended processes list.
39+
40+
```yaml
41+
---
42+
apiVersion: infrastructure.cluster.x-k8s.io/v1beta2
43+
kind: AWSMachinePool
44+
metadata:
45+
name: capa-mp-0
46+
spec:
47+
minSize: 1
48+
maxSize: 10
49+
availabilityZones:
50+
- "${AWS_AVAILABILITY_ZONE}"
51+
awsLaunchTemplate:
52+
instanceType: "${AWS_CONTROL_PLANE_MACHINE_TYPE}"
53+
sshKeyName: "${AWS_SSH_KEY_NAME}"
54+
suspendProcesses:
55+
- Launch
56+
---
57+
```
58+
59+
_Note_ that now `AlarmNotification` and `AZRebalance` will be resumed, but the reconciler will not try to suspend
60+
`Launch` again. So it doesn't incur additional expensive, redundant API calls.

exp/api/v1beta1/conversion.go

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
infrav1beta1 "sigs.k8s.io/cluster-api-provider-aws/api/v1beta1"
2222
infrav1 "sigs.k8s.io/cluster-api-provider-aws/api/v1beta2"
2323
infrav1exp "sigs.k8s.io/cluster-api-provider-aws/exp/api/v1beta2"
24+
utilconversion "sigs.k8s.io/cluster-api/util/conversion"
2425
"sigs.k8s.io/controller-runtime/pkg/conversion"
2526
)
2627

@@ -30,7 +31,19 @@ func (src *AWSMachinePool) ConvertTo(dstRaw conversion.Hub) error {
3031
if err := Convert_v1beta1_AWSMachinePool_To_v1beta2_AWSMachinePool(src, dst, nil); err != nil {
3132
return err
3233
}
33-
34+
35+
// Manually restore data.
36+
restored := &infrav1exp.AWSMachinePool{}
37+
if ok, err := utilconversion.UnmarshalData(src, restored); err != nil || !ok {
38+
return err
39+
}
40+
41+
if restored.Spec.SuspendProcesses != nil {
42+
restoredProcesses := make([]string, len(restored.Spec.SuspendProcesses))
43+
copy(restoredProcesses, restored.Spec.SuspendProcesses)
44+
dst.Spec.SuspendProcesses = restoredProcesses
45+
}
46+
3447
return nil
3548
}
3649

@@ -41,8 +54,8 @@ func (r *AWSMachinePool) ConvertFrom(srcRaw conversion.Hub) error {
4154
if err := Convert_v1beta2_AWSMachinePool_To_v1beta1_AWSMachinePool(src, r, nil); err != nil {
4255
return err
4356
}
44-
45-
return nil
57+
58+
return utilconversion.MarshalData(src, r)
4659
}
4760

4861
// ConvertTo converts the v1beta1 AWSMachinePoolList receiver to a v1beta2 AWSMachinePoolList.
@@ -75,7 +88,7 @@ func (r *AWSManagedMachinePool) ConvertFrom(srcRaw conversion.Hub) error {
7588
if err := Convert_v1beta2_AWSManagedMachinePool_To_v1beta1_AWSManagedMachinePool(src, r, nil); err != nil {
7689
return err
7790
}
78-
91+
7992
return nil
8093
}
8194

@@ -147,3 +160,20 @@ func Convert_v1beta1_Instance_To_v1beta2_Instance(in *infrav1beta1.Instance, out
147160
func Convert_v1beta2_AWSLaunchTemplate_To_v1beta1_AWSLaunchTemplate(in *infrav1exp.AWSLaunchTemplate, out *AWSLaunchTemplate, s apiconversion.Scope) error {
148161
return autoConvert_v1beta2_AWSLaunchTemplate_To_v1beta1_AWSLaunchTemplate(in, out, s)
149162
}
163+
164+
func Convert_v1beta1_AWSMachinePoolSpec_To_v1beta2_AWSMachinePoolSpec(in *AWSMachinePoolSpec, out *infrav1exp.AWSMachinePoolSpec, s apiconversion.Scope) error {
165+
return autoConvert_v1beta1_AWSMachinePoolSpec_To_v1beta2_AWSMachinePoolSpec(in, out, s)
166+
}
167+
168+
func Convert_v1beta2_AWSMachinePoolSpec_To_v1beta1_AWSMachinePoolSpec(in *infrav1exp.AWSMachinePoolSpec, out *AWSMachinePoolSpec, s apiconversion.Scope) error {
169+
return autoConvert_v1beta2_AWSMachinePoolSpec_To_v1beta1_AWSMachinePoolSpec(in, out, s)
170+
}
171+
172+
func Convert_v1beta1_AutoScalingGroup_To_v1beta2_AutoScalingGroup(in *AutoScalingGroup, out *infrav1exp.AutoScalingGroup, s apiconversion.Scope) error {
173+
return autoConvert_v1beta1_AutoScalingGroup_To_v1beta2_AutoScalingGroup(in, out, s)
174+
}
175+
176+
func Convert_v1beta2_AutoScalingGroup_To_v1beta1_AutoScalingGroup(in *infrav1exp.AutoScalingGroup, out *AutoScalingGroup, s apiconversion.Scope) error {
177+
// explicitly ignore CurrentlySuspended.
178+
return autoConvert_v1beta2_AutoScalingGroup_To_v1beta1_AutoScalingGroup(in, out, s)
179+
}

exp/api/v1beta1/zz_generated.conversion.go

Lines changed: 22 additions & 40 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

exp/api/v1beta2/awsmachinepool_types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ type AWSMachinePoolSpec struct {
8383
// Enable or disable the capacity rebalance autoscaling group feature
8484
// +optional
8585
CapacityRebalance bool `json:"capacityRebalance,omitempty"`
86+
87+
// SuspendProcesses defines a list of processes to suspend for the given ASG. This is constantly reconciled.
88+
// If a process is removed from this list it will automatically be resumed.
89+
SuspendProcesses []string `json:"suspendProcesses,omitempty"`
8690
}
8791

8892
// RefreshPreferences defines the specs for instance refreshing.

exp/api/v1beta2/types.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,10 @@ type AutoScalingGroup struct {
193193
DefaultCoolDown metav1.Duration `json:"defaultCoolDown,omitempty"`
194194
CapacityRebalance bool `json:"capacityRebalance,omitempty"`
195195

196-
MixedInstancesPolicy *MixedInstancesPolicy `json:"mixedInstancesPolicy,omitempty"`
197-
Status ASGStatus
198-
Instances []infrav1.Instance `json:"instances,omitempty"`
196+
MixedInstancesPolicy *MixedInstancesPolicy `json:"mixedInstancesPolicy,omitempty"`
197+
Status ASGStatus
198+
Instances []infrav1.Instance `json:"instances,omitempty"`
199+
CurrentlySuspendProcesses []string `json:"currentlySuspendProcesses,omitempty"`
199200
}
200201

201202
// ASGStatus is a status string returned by the autoscaling API.

exp/api/v1beta2/zz_generated.deepcopy.go

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

exp/controllers/awsmachinepool_controller.go

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -374,16 +374,64 @@ func (r *AWSMachinePoolReconciler) reconcileDelete(machinePoolScope *scope.Machi
374374
}
375375

376376
func (r *AWSMachinePoolReconciler) updatePool(machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, existingASG *expinfrav1.AutoScalingGroup) error {
377+
asgSvc := r.getASGService(clusterScope)
377378
if asgNeedsUpdates(machinePoolScope, existingASG) {
378379
machinePoolScope.Info("updating AutoScalingGroup")
379-
asgSvc := r.getASGService(clusterScope)
380380

381381
if err := asgSvc.UpdateASG(machinePoolScope); err != nil {
382382
r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeWarning, "FailedUpdate", "Failed to update ASG: %v", err)
383383
return errors.Wrap(err, "unable to update ASG")
384384
}
385385
}
386386

387+
if !cmp.Equal(existingASG.CurrentlySuspendProcesses, machinePoolScope.AWSMachinePool.Spec.SuspendProcesses) {
388+
var (
389+
toBeSuspended []string
390+
toBeResumed []string
391+
392+
currentlySuspended = make(map[string]struct{})
393+
desiredSuspended = make(map[string]struct{})
394+
)
395+
396+
// Convert the items to a map, so it's easy to create an effective diff from these two slices.
397+
for _, p := range existingASG.CurrentlySuspendProcesses {
398+
currentlySuspended[p] = struct{}{}
399+
}
400+
401+
for _, p := range machinePoolScope.AWSMachinePool.Spec.SuspendProcesses {
402+
desiredSuspended[p] = struct{}{}
403+
}
404+
405+
// Anything that remains in the desired items is not currently suspended so must be suspended.
406+
// Anything that remains in the currentlySuspended list must be resumed since they were not part of
407+
// desiredSuspended.
408+
for k := range desiredSuspended {
409+
if _, ok := currentlySuspended[k]; ok {
410+
delete(desiredSuspended, k)
411+
}
412+
delete(currentlySuspended, k)
413+
}
414+
415+
// Convert them back into lists so
416+
for k := range desiredSuspended {
417+
toBeSuspended = append(toBeSuspended, k)
418+
}
419+
420+
for k := range currentlySuspended {
421+
toBeResumed = append(toBeResumed, k)
422+
}
423+
424+
if len(toBeSuspended) > 0 {
425+
if err := asgSvc.SuspendProcesses(existingASG.Name, toBeSuspended); err != nil {
426+
return errors.Wrapf(err, "failed to suspend processes while trying update pool")
427+
}
428+
}
429+
if len(toBeResumed) > 0 {
430+
if err := asgSvc.ResumeProcesses(existingASG.Name, toBeResumed); err != nil {
431+
return errors.Wrapf(err, "failed to resume processes while trying update pool")
432+
}
433+
}
434+
}
387435
return nil
388436
}
389437

@@ -397,7 +445,9 @@ func (r *AWSMachinePoolReconciler) createPool(machinePoolScope *scope.MachinePoo
397445
if err != nil {
398446
return nil, errors.Wrapf(err, "failed to create AWSMachinePool")
399447
}
400-
448+
if err := asgsvc.SuspendProcesses(asg.Name, machinePoolScope.AWSMachinePool.Spec.SuspendProcesses); err != nil {
449+
return nil, errors.Wrapf(err, "failed to suspend processes while trying to create Pool")
450+
}
401451
return asg, nil
402452
}
403453

0 commit comments

Comments
 (0)