Skip to content

Commit ed43258

Browse files
committed
Update cluster-api provider to use machineTemplate.status.nodeInfo for architecture-aware autoscale from zero
kubernetes-sigs/cluster-api#11962 introduced the nodeInfo field for MachineTemplates. Providers can reconcile this field in the status subresource to inform the autoscaler about the architecture and operating system that the MachineTemplate's nodes will run. Previously, we have been implementing this behavior in the cluster autoscaler by leveraging the labels capacity annotation and, as a fallback, default values set in environment variables at cluster-autoscaler deployment time. With this commit, the cluster autoscaler computes the future architecture of a node with the following priority order: - Labels set in existing nodes for not-autoscale-from-zero cases - Labels set in the labels capacity annotation of machine template, machine set, and machine deployment. - Values in the status.nodeSystemInfo of MachineTemplates - Generic/default labels set in the environment of the cluster autoscaler
1 parent 9a256e5 commit ed43258

File tree

2 files changed

+57
-3
lines changed

2 files changed

+57
-3
lines changed

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -361,12 +361,17 @@ func (ng *nodegroup) TemplateNodeInfo() (*framework.NodeInfo, error) {
361361
},
362362
}
363363

364+
nsi := ng.scalableResource.InstanceSystemInfo()
365+
if nsi != nil {
366+
node.Status.NodeInfo = *nsi
367+
}
368+
364369
node.Status.Capacity = capacity
365370
node.Status.Allocatable = capacity
366371
node.Status.Conditions = cloudprovider.BuildReadyConditions()
367372
node.Spec.Taints = ng.scalableResource.Taints()
368373

369-
node.Labels, err = ng.buildTemplateLabels(nodeName)
374+
node.Labels, err = ng.buildTemplateLabels(nodeName, nsi)
370375
if err != nil {
371376
return nil, err
372377
}
@@ -380,8 +385,19 @@ func (ng *nodegroup) TemplateNodeInfo() (*framework.NodeInfo, error) {
380385
return nodeInfo, nil
381386
}
382387

383-
func (ng *nodegroup) buildTemplateLabels(nodeName string) (map[string]string, error) {
384-
labels := cloudprovider.JoinStringMaps(buildGenericLabels(nodeName), ng.scalableResource.Labels())
388+
func (ng *nodegroup) buildTemplateLabels(nodeName string, nsi *corev1.NodeSystemInfo) (map[string]string, error) {
389+
nsiLabels := make(map[string]string)
390+
if nsi != nil {
391+
nsiLabels[corev1.LabelArchStable] = nsi.Architecture
392+
nsiLabels[corev1.LabelOSStable] = nsi.OperatingSystem
393+
}
394+
395+
// The order of priority is:
396+
// - Labels set in existing nodes for not-autoscale-from-zero cases
397+
// - Labels set in the labels capacity annotation of machine template, machine set, and machine deployment.
398+
// - Values in the status.nodeSystemInfo of MachineTemplates
399+
// - Generic/default labels set in the environment of the cluster autoscaler
400+
labels := cloudprovider.JoinStringMaps(buildGenericLabels(nodeName), nsiLabels, ng.scalableResource.Labels())
385401

386402
nodes, err := ng.Nodes()
387403
if err != nil {

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ import (
4343
type unstructuredScalableResource struct {
4444
controller *machineController
4545
unstructured *unstructured.Unstructured
46+
infraObj *unstructured.Unstructured
4647
maxSize int
4748
minSize int
4849
autoscalingOptions map[string]string
@@ -304,6 +305,17 @@ func (r unstructuredScalableResource) InstanceCapacity() (map[corev1.ResourceNam
304305
return capacity, nil
305306
}
306307

308+
// InstanceSystemInfo sets the nodeSystemInfo from the infrastructure reference resource.
309+
// If the infrastructure reference resource is not found, returns nil.
310+
func (r unstructuredScalableResource) InstanceSystemInfo() *apiv1.NodeSystemInfo {
311+
infraObj, err := r.readInfrastructureReferenceResource()
312+
if err != nil || infraObj == nil {
313+
return nil
314+
}
315+
nsiObj := systemInfoFromInfrastructureObject(infraObj)
316+
return &nsiObj
317+
}
318+
307319
func (r unstructuredScalableResource) InstanceResourceSlices(nodeName string) ([]*resourceapi.ResourceSlice, error) {
308320
var result []*resourceapi.ResourceSlice
309321
driver := r.InstanceDRADriver()
@@ -375,6 +387,11 @@ func (r unstructuredScalableResource) InstanceDRADriver() string {
375387
}
376388

377389
func (r unstructuredScalableResource) readInfrastructureReferenceResource() (*unstructured.Unstructured, error) {
390+
// Cache w/ lazy loading of the infrastructure reference resource.
391+
if r.infraObj != nil {
392+
return r.infraObj, nil
393+
}
394+
378395
infraref, found, err := unstructured.NestedStringMap(r.unstructured.Object, "spec", "template", "spec", "infrastructureRef")
379396
if !found || err != nil {
380397
return nil, nil
@@ -403,6 +420,8 @@ func (r unstructuredScalableResource) readInfrastructureReferenceResource() (*un
403420
return nil, err
404421
}
405422

423+
r.infraObj = infra
424+
406425
return infra, nil
407426
}
408427

@@ -440,6 +459,25 @@ func resourceCapacityFromInfrastructureObject(infraobj *unstructured.Unstructure
440459
return capacity
441460
}
442461

462+
func systemInfoFromInfrastructureObject(infraobj *unstructured.Unstructured) apiv1.NodeSystemInfo {
463+
nsi := apiv1.NodeSystemInfo{}
464+
infransi, found, err := unstructured.NestedStringMap(infraobj.Object, "status", "nodeInfo")
465+
if !found || err != nil {
466+
return nsi
467+
}
468+
469+
for k, v := range infransi {
470+
switch k {
471+
case "architecture":
472+
nsi.Architecture = v
473+
case "operatingSystem":
474+
nsi.OperatingSystem = v
475+
}
476+
}
477+
478+
return nsi
479+
}
480+
443481
// adapted from https://github.com/kubernetes/kubernetes/blob/release-1.25/pkg/util/taints/taints.go#L39
444482
func parseTaint(st string) (apiv1.Taint, error) {
445483
var taint apiv1.Taint

0 commit comments

Comments
 (0)