Skip to content

Commit 292862f

Browse files
committed
feat: support capacity property for autoscale from zero
1 parent b71127b commit 292862f

10 files changed

+341
-2
lines changed

api/v1beta1/azuremachinetemplate_types.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,20 @@ limitations under the License.
1717
package v1beta1
1818

1919
import (
20+
corev1 "k8s.io/api/core/v1"
2021
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2122
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
2223
)
2324

25+
// AzureMachineTemplateStatus defines the observed state for an AzureMachineTemplate.
26+
type AzureMachineTemplateStatus struct {
27+
// Capacity defines the resource capacity for this machine.
28+
// This value is used for autoscaling from zero operations as defined in:
29+
// https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20210310-opt-in-autoscaling-from-zero.md
30+
// +optional
31+
Capacity corev1.ResourceList `json:"capacity,omitempty"`
32+
}
33+
2434
// AzureMachineTemplateSpec defines the desired state of AzureMachineTemplate.
2535
type AzureMachineTemplateSpec struct {
2636
Template AzureMachineTemplateResource `json:"template"`
@@ -35,7 +45,8 @@ type AzureMachineTemplate struct {
3545
metav1.TypeMeta `json:",inline"`
3646
metav1.ObjectMeta `json:"metadata,omitempty"`
3747

38-
Spec AzureMachineTemplateSpec `json:"spec,omitempty"`
48+
Spec AzureMachineTemplateSpec `json:"spec,omitempty"`
49+
Status AzureMachineTemplateStatus `json:"status,omitempty"`
3950
}
4051

4152
// +kubebuilder:object:root=true

api/v1beta1/zz_generated.deepcopy.go

Lines changed: 23 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

azure/services/resourceskus/sku.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,13 @@ limitations under the License.
1717
package resourceskus
1818

1919
import (
20+
"fmt"
2021
"strconv"
2122
"strings"
2223

24+
corev1 "k8s.io/api/core/v1"
25+
"k8s.io/apimachinery/pkg/api/resource"
26+
2327
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
2428
"github.com/pkg/errors"
2529
"k8s.io/utils/ptr"
@@ -169,3 +173,19 @@ func (s SKU) HasLocationCapability(capabilityName, location, zone string) bool {
169173
}
170174
return false
171175
}
176+
177+
func MapCapabilitiesToResourceList(capabilities []*armcompute.ResourceSKUCapabilities) corev1.ResourceList {
178+
rl := make(corev1.ResourceList)
179+
for _, c := range capabilities {
180+
if c != nil || c.Name == nil || c.Value == nil {
181+
continue
182+
}
183+
switch *c.Name {
184+
case VCPUs:
185+
rl[corev1.ResourceCPU] = resource.MustParse(*c.Value)
186+
case MemoryGB:
187+
rl[corev1.ResourceMemory] = resource.MustParse(fmt.Sprintf("%sG", *c.Value))
188+
}
189+
}
190+
return rl
191+
}

config/crd/bases/infrastructure.cluster.x-k8s.io_azuremachinepools.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,18 @@ spec:
796796
status:
797797
description: AzureMachinePoolStatus defines the observed state of AzureMachinePool.
798798
properties:
799+
capacity:
800+
additionalProperties:
801+
anyOf:
802+
- type: integer
803+
- type: string
804+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
805+
x-kubernetes-int-or-string: true
806+
description: |-
807+
Capacity defines the resource capacity for this machine.
808+
This value is used for autoscaling from zero operations as defined in:
809+
https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20210310-opt-in-autoscaling-from-zero.md
810+
type: object
799811
conditions:
800812
description: Conditions defines current service state of the AzureMachinePool.
801813
items:

config/crd/bases/infrastructure.cluster.x-k8s.io_azuremachinetemplates.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,23 @@ spec:
748748
required:
749749
- template
750750
type: object
751+
status:
752+
description: AzureMachineTemplateStatus defines the observed state for
753+
an AzureMachineTemplate.
754+
properties:
755+
capacity:
756+
additionalProperties:
757+
anyOf:
758+
- type: integer
759+
- type: string
760+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
761+
x-kubernetes-int-or-string: true
762+
description: |-
763+
Capacity defines the resource capacity for this machine.
764+
This value is used for autoscaling from zero operations as defined in:
765+
https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20210310-opt-in-autoscaling-from-zero.md
766+
type: object
767+
type: object
751768
type: object
752769
served: true
753770
storage: true
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
package controllers
2+
3+
import (
4+
"context"
5+
6+
"sigs.k8s.io/cluster-api/util/patch"
7+
"sigs.k8s.io/controller-runtime/pkg/reconcile"
8+
9+
"k8s.io/apimachinery/pkg/types"
10+
"sigs.k8s.io/cluster-api-provider-azure/azure/scope"
11+
"sigs.k8s.io/cluster-api-provider-azure/azure/services/resourceskus"
12+
13+
apierrors "k8s.io/apimachinery/pkg/api/errors"
14+
"sigs.k8s.io/cluster-api/util/annotations"
15+
16+
"github.com/pkg/errors"
17+
"k8s.io/client-go/tools/record"
18+
infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
19+
"sigs.k8s.io/cluster-api-provider-azure/pkg/coalescing"
20+
"sigs.k8s.io/cluster-api-provider-azure/util/reconciler"
21+
"sigs.k8s.io/cluster-api-provider-azure/util/tele"
22+
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
23+
"sigs.k8s.io/cluster-api/util"
24+
"sigs.k8s.io/cluster-api/util/predicates"
25+
ctrl "sigs.k8s.io/controller-runtime"
26+
"sigs.k8s.io/controller-runtime/pkg/builder"
27+
"sigs.k8s.io/controller-runtime/pkg/client"
28+
"sigs.k8s.io/controller-runtime/pkg/handler"
29+
)
30+
31+
type AzureMachineTemplateReconciler struct {
32+
client.Client
33+
Recorder record.EventRecorder
34+
Timeouts reconciler.Timeouts
35+
WatchFilterValue string
36+
}
37+
38+
func NewAzureMachineTemplateReconciler(client client.Client, recorder record.EventRecorder, timeouts reconciler.Timeouts, watchFilterValue string) *AzureMachineTemplateReconciler {
39+
return &AzureMachineTemplateReconciler{
40+
Client: client,
41+
Recorder: recorder,
42+
Timeouts: timeouts,
43+
WatchFilterValue: watchFilterValue,
44+
}
45+
}
46+
47+
func (amtr *AzureMachineTemplateReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options Options) error {
48+
ctx, log, done := tele.StartSpanWithLogger(ctx,
49+
"controllers.AzureMachineTemplateReconciler.SetupWithManager",
50+
tele.KVP("controller", "AzureMachineTemplate"),
51+
)
52+
defer done()
53+
54+
var r reconcile.Reconciler = amtr
55+
coalescing.NewReconciler(amtr, options.Cache, log)
56+
if options.Cache != nil {
57+
r = coalescing.NewReconciler(amtr, options.Cache, log)
58+
}
59+
60+
azureMachineTemplateMapper, err := util.ClusterToTypedObjectsMapper(amtr.Client, &infrav1.AzureMachineTemplateList{}, mgr.GetScheme())
61+
if err != nil {
62+
return errors.Wrap(err, "failed to create mapper for Cluster to AzureMachineTemplates")
63+
}
64+
65+
return ctrl.NewControllerManagedBy(mgr).
66+
WithOptions(options.Options).
67+
For(&infrav1.AzureMachineTemplate{}).
68+
WithEventFilter(predicates.ResourceHasFilterLabel(log, amtr.WatchFilterValue)).
69+
// Add a watch on Clusters to requeue when the infraRef is set. This is needed because the infraRef is not initially
70+
// set in Clusters created from a ClusterClass.
71+
Watches(
72+
&clusterv1.Cluster{},
73+
handler.EnqueueRequestsFromMapFunc(azureMachineTemplateMapper),
74+
builder.WithPredicates(
75+
predicates.ClusterUnpausedAndInfrastructureReady(log),
76+
predicates.ResourceNotPausedAndHasFilterLabel(log, amtr.WatchFilterValue),
77+
),
78+
).
79+
Complete(r)
80+
}
81+
82+
func (amtr *AzureMachineTemplateReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
83+
ctx, cancel := context.WithTimeout(ctx, amtr.Timeouts.DefaultedLoopTimeout())
84+
defer cancel()
85+
86+
ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureMachineTemplateReconciler.Reconcile",
87+
tele.KVP("namespace", req.Namespace),
88+
tele.KVP("name", req.Name),
89+
tele.KVP("kind", "AzureMachineTemplate"),
90+
)
91+
defer done()
92+
93+
// Fetch the AzureMachineTemplate instance
94+
azureMachineTemplate := &infrav1.AzureMachineTemplate{}
95+
err := amtr.Get(ctx, req.NamespacedName, azureMachineTemplate)
96+
if err != nil {
97+
if apierrors.IsNotFound(err) {
98+
log.Info("object was not found")
99+
return reconcile.Result{}, nil
100+
}
101+
return ctrl.Result{}, err
102+
}
103+
104+
if !azureMachineTemplate.ObjectMeta.DeletionTimestamp.IsZero() {
105+
return ctrl.Result{}, nil
106+
}
107+
108+
// Fetch the Cluster.
109+
cluster, err := util.GetOwnerCluster(ctx, amtr.Client, azureMachineTemplate.ObjectMeta)
110+
if err != nil {
111+
return ctrl.Result{}, err
112+
}
113+
if cluster == nil {
114+
log.Info("Cluster Controller has not yet set OwnerRef")
115+
return ctrl.Result{}, nil
116+
}
117+
118+
log = log.WithValues("cluster", cluster.Name)
119+
120+
// Return early if the object or Cluster is paused.
121+
if annotations.IsPaused(cluster, azureMachineTemplate) {
122+
log.Info("AzureMachineTemplate or linked Cluster is marked as paused. Won't reconcile")
123+
return ctrl.Result{}, nil
124+
}
125+
126+
// only look at azure clusters
127+
if cluster.Spec.InfrastructureRef == nil {
128+
log.Info("infra ref is nil")
129+
return ctrl.Result{}, nil
130+
}
131+
if cluster.Spec.InfrastructureRef.Kind != infrav1.AzureClusterKind {
132+
log.WithValues("kind", cluster.Spec.InfrastructureRef.Kind).Info("infra ref was not an AzureCluster")
133+
return ctrl.Result{}, nil
134+
}
135+
136+
// fetch the corresponding azure cluster
137+
azureCluster := &infrav1.AzureCluster{}
138+
azureClusterName := types.NamespacedName{
139+
Namespace: req.Namespace,
140+
Name: cluster.Spec.InfrastructureRef.Name,
141+
}
142+
143+
if err := amtr.Get(ctx, azureClusterName, azureCluster); err != nil {
144+
log.Error(err, "failed to fetch AzureCluster")
145+
return ctrl.Result{}, err
146+
}
147+
148+
// Create the scope.
149+
clusterScope, err := scope.NewClusterScope(ctx, scope.ClusterScopeParams{
150+
Client: amtr.Client,
151+
Cluster: cluster,
152+
AzureCluster: azureCluster,
153+
Timeouts: amtr.Timeouts,
154+
})
155+
if err != nil {
156+
return ctrl.Result{}, errors.Wrap(err, "failed to create scope")
157+
}
158+
159+
if azureMachineTemplate.Status.Capacity != nil {
160+
log.V(4).Info("capacity already set, done reconciling")
161+
return ctrl.Result{}, nil
162+
}
163+
164+
helper, err := patch.NewHelper(azureMachineTemplate, amtr.Client)
165+
if err != nil {
166+
return ctrl.Result{}, errors.Wrap(err, "failed to init patch helper")
167+
}
168+
169+
defer func() {
170+
if err := helper.Patch(ctx, azureMachineTemplate); err != nil {
171+
reterr = err
172+
}
173+
}()
174+
175+
skuCache, err := resourceskus.GetCache(clusterScope, clusterScope.Location())
176+
if err != nil {
177+
return ctrl.Result{}, err
178+
}
179+
180+
vmSKU, err := skuCache.Get(ctx, azureMachineTemplate.Spec.Template.Spec.VMSize, resourceskus.VirtualMachines)
181+
if err != nil {
182+
return ctrl.Result{}, errors.Wrapf(err, "failed to get VM SKU %s in compute api", azureMachineTemplate.Spec.Template.Spec.VMSize)
183+
}
184+
185+
azureMachineTemplate.Status.Capacity = resourceskus.MapCapabilitiesToResourceList(vmSKU.Capabilities)
186+
187+
return ctrl.Result{}, nil
188+
}

exp/api/v1beta1/azuremachinepool_types.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package v1beta1
1818

1919
import (
20+
corev1 "k8s.io/api/core/v1"
2021
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2122
"k8s.io/apimachinery/pkg/util/intstr"
2223
infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
@@ -315,6 +316,12 @@ type (
315316
// InfrastructureMachineKind is the kind of the infrastructure resources behind MachinePool Machines.
316317
// +optional
317318
InfrastructureMachineKind string `json:"infrastructureMachineKind,omitempty"`
319+
320+
// Capacity defines the resource capacity for this machine.
321+
// This value is used for autoscaling from zero operations as defined in:
322+
// https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20210310-opt-in-autoscaling-from-zero.md
323+
// +optional
324+
Capacity corev1.ResourceList `json:"capacity,omitempty"`
318325
}
319326

320327
// AzureMachinePoolInstanceStatus provides status information for each instance in the VMSS.

exp/api/v1beta1/zz_generated.deepcopy.go

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

exp/controllers/azuremachinepool_reconciler.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,13 @@ func (s *azureMachinePoolService) Reconcile(ctx context.Context) error {
8484
}
8585
}
8686

87+
vmSKU, err := s.skuCache.Get(ctx, s.scope.AzureMachinePool.Spec.Template.VMSize, resourceskus.VirtualMachines)
88+
if err != nil {
89+
return errors.Wrapf(err, "failed to get VM SKU %s in compute api", s.scope.AzureMachinePool.Spec.Template.VMSize)
90+
}
91+
92+
s.scope.AzureMachinePool.Status.Capacity = resourceskus.MapCapabilitiesToResourceList(vmSKU.Capabilities)
93+
8794
return nil
8895
}
8996

0 commit comments

Comments
 (0)