Skip to content

Commit 8188689

Browse files
Implement autoscaling from zero by auto-populating AWSMachineTemplate capacity
1 parent 15a2d14 commit 8188689

File tree

2 files changed

+209
-0
lines changed

2 files changed

+209
-0
lines changed
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
/*
2+
Copyright 2024 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package controllers
18+
19+
import (
20+
"context"
21+
22+
"github.com/aws/aws-sdk-go-v2/service/ec2"
23+
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
24+
"github.com/pkg/errors"
25+
corev1 "k8s.io/api/core/v1"
26+
apierrors "k8s.io/apimachinery/pkg/api/errors"
27+
"k8s.io/apimachinery/pkg/api/resource"
28+
ctrl "sigs.k8s.io/controller-runtime"
29+
"sigs.k8s.io/controller-runtime/pkg/client"
30+
"sigs.k8s.io/controller-runtime/pkg/controller"
31+
32+
infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2"
33+
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope"
34+
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger"
35+
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/record"
36+
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
37+
"sigs.k8s.io/cluster-api/util/predicates"
38+
)
39+
40+
// AWSMachineTemplateReconciler reconciles AWSMachineTemplate objects.
41+
//
42+
// This controller automatically populates capacity information for AWSMachineTemplate resources
43+
// to enable autoscaling from zero. The capacity is queried from AWS EC2 API when:
44+
// 1. The template has a Cluster in its ownerReferences (e.g., Hypershift)
45+
// 2. AWS credentials are available via the default credential chain
46+
// 3. The status.capacity field is not already set
47+
//
48+
// For templates without owner references, capacity must be set manually.
49+
//
50+
// See: https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20210310-opt-in-autoscaling-from-zero.md
51+
type AWSMachineTemplateReconciler struct {
52+
client.Client
53+
WatchFilterValue string
54+
}
55+
56+
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachinetemplates,verbs=get;list;watch
57+
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachinetemplates/status,verbs=get;update;patch
58+
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsclusters,verbs=get;list;watch
59+
// +kubebuilder:rbac:groups=controlplane.cluster.x-k8s.io,resources=awsmanagedcontrolplanes,verbs=get;list;watch
60+
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters,verbs=get;list;watch
61+
// +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch
62+
63+
// Reconcile populates capacity information for AWSMachineTemplate.
64+
func (r *AWSMachineTemplateReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
65+
log := logger.FromContext(ctx)
66+
67+
// Fetch the AWSMachineTemplate
68+
awsMachineTemplate := &infrav1.AWSMachineTemplate{}
69+
if err := r.Get(ctx, req.NamespacedName, awsMachineTemplate); err != nil {
70+
if apierrors.IsNotFound(err) {
71+
return ctrl.Result{}, nil
72+
}
73+
return ctrl.Result{}, err
74+
}
75+
76+
// Skip if capacity is already set
77+
if awsMachineTemplate.Status.Capacity != nil && len(awsMachineTemplate.Status.Capacity) > 0 {
78+
return ctrl.Result{}, nil
79+
}
80+
81+
// Get instance type from spec
82+
instanceType := awsMachineTemplate.Spec.Template.Spec.InstanceType
83+
if instanceType == "" {
84+
return ctrl.Result{}, nil
85+
}
86+
87+
// Find the region by checking ownerReferences
88+
region, err := r.getRegion(ctx, awsMachineTemplate)
89+
if err != nil {
90+
return ctrl.Result{}, nil
91+
}
92+
if region == "" {
93+
return ctrl.Result{}, nil
94+
}
95+
96+
// Create global scope for this region
97+
// Reference: exp/instancestate/awsinstancestate_controller.go:68-76
98+
globalScope, err := scope.NewGlobalScope(scope.GlobalScopeParams{
99+
ControllerName: "awsmachinetemplate",
100+
Region: region,
101+
})
102+
if err != nil {
103+
record.Warnf(awsMachineTemplate, "AWSSessionFailed", "Failed to create AWS session for region %q: %v", region, err)
104+
return ctrl.Result{}, nil
105+
}
106+
107+
// Query instance type capacity
108+
capacity, err := r.getInstanceTypeCapacity(ctx, globalScope, instanceType)
109+
if err != nil {
110+
record.Warnf(awsMachineTemplate, "CapacityQueryFailed", "Failed to query capacity for instance type %q: %v", instanceType, err)
111+
return ctrl.Result{}, nil
112+
}
113+
114+
// Update status with capacity
115+
awsMachineTemplate.Status.Capacity = capacity
116+
117+
if err := r.Status().Update(ctx, awsMachineTemplate); err != nil {
118+
return ctrl.Result{}, errors.Wrap(err, "failed to update AWSMachineTemplate status")
119+
}
120+
121+
log.Info("Successfully populated capacity information", "instanceType", instanceType, "region", region, "capacity", capacity)
122+
return ctrl.Result{}, nil
123+
}
124+
125+
// getRegion finds the region by checking the template's owner cluster reference.
126+
func (r *AWSMachineTemplateReconciler) getRegion(ctx context.Context, template *infrav1.AWSMachineTemplate) (string, error) {
127+
// Check if the template has a Cluster owner reference (e.g., in Hypershift scenarios)
128+
for _, owner := range template.GetOwnerReferences() {
129+
if owner.Kind == "Cluster" && owner.APIVersion == clusterv1.GroupVersion.String() {
130+
cluster := &clusterv1.Cluster{}
131+
clusterKey := client.ObjectKey{
132+
Namespace: template.Namespace,
133+
Name: owner.Name,
134+
}
135+
if err := r.Get(ctx, clusterKey, cluster); err != nil {
136+
continue
137+
}
138+
139+
// Try to get region from AWSCluster (standard cluster)
140+
if cluster.Spec.InfrastructureRef != nil && cluster.Spec.InfrastructureRef.Kind == "AWSCluster" {
141+
awsCluster := &infrav1.AWSCluster{}
142+
if err := r.Get(ctx, client.ObjectKey{
143+
Namespace: cluster.Namespace,
144+
Name: cluster.Spec.InfrastructureRef.Name,
145+
}, awsCluster); err == nil && awsCluster.Spec.Region != "" {
146+
return awsCluster.Spec.Region, nil
147+
}
148+
}
149+
}
150+
}
151+
152+
return "", nil
153+
}
154+
155+
// getInstanceTypeCapacity queries AWS EC2 API for instance type capacity.
156+
func (r *AWSMachineTemplateReconciler) getInstanceTypeCapacity(ctx context.Context, globalScope *scope.GlobalScope, instanceType string) (corev1.ResourceList, error) {
157+
// Create EC2 client from global scope
158+
ec2Client := ec2.NewFromConfig(globalScope.Session())
159+
160+
// Query instance type information
161+
input := &ec2.DescribeInstanceTypesInput{
162+
InstanceTypes: []ec2types.InstanceType{ec2types.InstanceType(instanceType)},
163+
}
164+
165+
result, err := ec2Client.DescribeInstanceTypes(ctx, input)
166+
if err != nil {
167+
return nil, errors.Wrapf(err, "failed to describe instance type %q", instanceType)
168+
}
169+
170+
if len(result.InstanceTypes) == 0 {
171+
return nil, errors.Errorf("no information found for instance type %q", instanceType)
172+
}
173+
174+
// Extract capacity information
175+
info := result.InstanceTypes[0]
176+
resourceList := corev1.ResourceList{}
177+
178+
// CPU
179+
if info.VCpuInfo != nil && info.VCpuInfo.DefaultVCpus != nil {
180+
resourceList[corev1.ResourceCPU] = *resource.NewQuantity(int64(*info.VCpuInfo.DefaultVCpus), resource.DecimalSI)
181+
}
182+
183+
// Memory
184+
if info.MemoryInfo != nil && info.MemoryInfo.SizeInMiB != nil {
185+
memoryBytes := *info.MemoryInfo.SizeInMiB * 1024 * 1024
186+
resourceList[corev1.ResourceMemory] = *resource.NewQuantity(memoryBytes, resource.BinarySI)
187+
}
188+
return resourceList, nil
189+
}
190+
191+
// SetupWithManager sets up the controller with the Manager.
192+
func (r *AWSMachineTemplateReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
193+
log := logger.FromContext(ctx)
194+
195+
return ctrl.NewControllerManagedBy(mgr).
196+
For(&infrav1.AWSMachineTemplate{}).
197+
WithOptions(options).
198+
WithEventFilter(predicates.ResourceHasFilterLabel(mgr.GetScheme(), log.GetLogger(), r.WatchFilterValue)).
199+
Complete(r)
200+
}

main.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,15 @@ func setupReconcilersAndWebhooks(ctx context.Context, mgr ctrl.Manager,
402402
}
403403
}
404404

405+
setupLog.Debug("enabling AWSMachineTemplate controller")
406+
if err := (&controllers.AWSMachineTemplateReconciler{
407+
Client: mgr.GetClient(),
408+
WatchFilterValue: watchFilterValue,
409+
}).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: awsClusterConcurrency, RecoverPanic: ptr.To[bool](true)}); err != nil {
410+
setupLog.Error(err, "unable to create controller", "controller", "AWSMachineTemplate")
411+
os.Exit(1)
412+
}
413+
405414
if err := (&infrav1.AWSMachineTemplate{}).SetupWebhookWithManager(mgr); err != nil {
406415
setupLog.Error(err, "unable to create webhook", "webhook", "AWSMachineTemplate")
407416
os.Exit(1)

0 commit comments

Comments
 (0)