Skip to content

Commit 627f307

Browse files
committed
Update cluster-api provider to use machineTemplate.status.nodeInfo for architecture-aware autoscale from zero
kubernetes-sigs/cluster-api#11962 introduced the nodeInfo field for MachineTemplates. Providers can reconcile this field in the status subresource to inform the autoscaler about the architecture and operating system that the MachineTemplate's nodes will run. Previously, we have been implementing this behavior in the cluster autoscaler by leveraging the labels capacity annotation and, as a fallback, default values set in environment variables at cluster-autoscaler deployment time. With this commit, the cluster autoscaler computes the future architecture of a node with the following priority order: - Labels set in existing nodes for not-autoscale-from-zero cases - Labels set in the labels capacity annotation of machine template, machine set, and machine deployment. - Values in the status.nodeSystemInfo of MachineTemplates - Generic/default labels set in the environment of the cluster autoscaler
1 parent 9a256e5 commit 627f307

File tree

2 files changed

+64
-3
lines changed

2 files changed

+64
-3
lines changed

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -361,12 +361,17 @@ func (ng *nodegroup) TemplateNodeInfo() (*framework.NodeInfo, error) {
361361
},
362362
}
363363

364+
nsi := ng.scalableResource.InstanceSystemInfo()
365+
if nsi != nil {
366+
node.Status.NodeInfo = *nsi
367+
}
368+
364369
node.Status.Capacity = capacity
365370
node.Status.Allocatable = capacity
366371
node.Status.Conditions = cloudprovider.BuildReadyConditions()
367372
node.Spec.Taints = ng.scalableResource.Taints()
368373

369-
node.Labels, err = ng.buildTemplateLabels(nodeName)
374+
node.Labels, err = ng.buildTemplateLabels(nodeName, nsi)
370375
if err != nil {
371376
return nil, err
372377
}
@@ -380,8 +385,19 @@ func (ng *nodegroup) TemplateNodeInfo() (*framework.NodeInfo, error) {
380385
return nodeInfo, nil
381386
}
382387

383-
func (ng *nodegroup) buildTemplateLabels(nodeName string) (map[string]string, error) {
384-
labels := cloudprovider.JoinStringMaps(buildGenericLabels(nodeName), ng.scalableResource.Labels())
388+
func (ng *nodegroup) buildTemplateLabels(nodeName string, nsi *corev1.NodeSystemInfo) (map[string]string, error) {
389+
nsiLabels := make(map[string]string)
390+
if nsi != nil {
391+
nsiLabels[corev1.LabelArchStable] = nsi.Architecture
392+
nsiLabels[corev1.LabelOSStable] = nsi.OperatingSystem
393+
}
394+
395+
// The order of priority is:
396+
// - Labels set in existing nodes for not-autoscale-from-zero cases
397+
// - Labels set in the labels capacity annotation of machine template, machine set, and machine deployment.
398+
// - Values in the status.nodeSystemInfo of MachineTemplates
399+
// - Generic/default labels set in the environment of the cluster autoscaler
400+
labels := cloudprovider.JoinStringMaps(buildGenericLabels(nodeName), nsiLabels, ng.scalableResource.Labels())
385401

386402
nodes, err := ng.Nodes()
387403
if err != nil {

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"path"
2424
"strconv"
2525
"strings"
26+
"sync"
2627
"time"
2728

2829
"github.com/pkg/errors"
@@ -43,6 +44,8 @@ import (
4344
type unstructuredScalableResource struct {
4445
controller *machineController
4546
unstructured *unstructured.Unstructured
47+
infraObj *unstructured.Unstructured
48+
infraMutex sync.RWMutex
4649
maxSize int
4750
minSize int
4851
autoscalingOptions map[string]string
@@ -304,6 +307,17 @@ func (r unstructuredScalableResource) InstanceCapacity() (map[corev1.ResourceNam
304307
return capacity, nil
305308
}
306309

310+
// InstanceSystemInfo sets the nodeSystemInfo from the infrastructure reference resource.
311+
// If the infrastructure reference resource is not found, returns nil.
312+
func (r unstructuredScalableResource) InstanceSystemInfo() *apiv1.NodeSystemInfo {
313+
infraObj, err := r.readInfrastructureReferenceResource()
314+
if err != nil || infraObj == nil {
315+
return nil
316+
}
317+
nsiObj := systemInfoFromInfrastructureObject(infraObj)
318+
return &nsiObj
319+
}
320+
307321
func (r unstructuredScalableResource) InstanceResourceSlices(nodeName string) ([]*resourceapi.ResourceSlice, error) {
308322
var result []*resourceapi.ResourceSlice
309323
driver := r.InstanceDRADriver()
@@ -375,6 +389,16 @@ func (r unstructuredScalableResource) InstanceDRADriver() string {
375389
}
376390

377391
func (r unstructuredScalableResource) readInfrastructureReferenceResource() (*unstructured.Unstructured, error) {
392+
// Cache w/ lazy loading of the infrastructure reference resource.
393+
r.infraMutex.RLock()
394+
if r.infraObj != nil {
395+
defer r.infraMutex.RUnlock()
396+
return r.infraObj, nil
397+
}
398+
r.infraMutex.RUnlock()
399+
r.infraMutex.Lock()
400+
defer r.infraMutex.Unlock()
401+
378402
infraref, found, err := unstructured.NestedStringMap(r.unstructured.Object, "spec", "template", "spec", "infrastructureRef")
379403
if !found || err != nil {
380404
return nil, nil
@@ -403,6 +427,8 @@ func (r unstructuredScalableResource) readInfrastructureReferenceResource() (*un
403427
return nil, err
404428
}
405429

430+
r.infraObj = infra
431+
406432
return infra, nil
407433
}
408434

@@ -440,6 +466,25 @@ func resourceCapacityFromInfrastructureObject(infraobj *unstructured.Unstructure
440466
return capacity
441467
}
442468

469+
func systemInfoFromInfrastructureObject(infraobj *unstructured.Unstructured) apiv1.NodeSystemInfo {
470+
nsi := apiv1.NodeSystemInfo{}
471+
infransi, found, err := unstructured.NestedStringMap(infraobj.Object, "status", "nodeInfo")
472+
if !found || err != nil {
473+
return nsi
474+
}
475+
476+
for k, v := range infransi {
477+
switch k {
478+
case "architecture":
479+
nsi.Architecture = v
480+
case "operatingSystem":
481+
nsi.OperatingSystem = v
482+
}
483+
}
484+
485+
return nsi
486+
}
487+
443488
// adapted from https://github.com/kubernetes/kubernetes/blob/release-1.25/pkg/util/taints/taints.go#L39
444489
func parseTaint(st string) (apiv1.Taint, error) {
445490
var taint apiv1.Taint

0 commit comments

Comments
 (0)