Skip to content

Commit 3ed953a

Browse files
committed
Update cluster-api provider to use machineTemplate.status.nodeInfo for architecture-aware autoscale from zero
kubernetes-sigs/cluster-api#11962 introduced the nodeInfo field for MachineTemplates. Providers can reconcile this field in the status subresource to inform the autoscaler about the architecture and operating system that the MachineTemplate's nodes will run. Previously, we have been implementing this behavior in the cluster autoscaler by leveraging the labels capacity annotation and, as a fallback, default values set in environment variables at cluster-autoscaler deployment time. With this commit, the cluster autoscaler computes the future architecture of a node with the following priority order: - Labels set in existing nodes for not-autoscale-from-zero cases - Labels set in the labels capacity annotation of machine template, machine set, and machine deployment. - Values in the status.nodeSystemInfo of MachineTemplates - Generic/default labels set in the environment of the cluster autoscaler # Conflicts: # cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go
1 parent f15a08a commit 3ed953a

File tree

2 files changed

+64
-3
lines changed

2 files changed

+64
-3
lines changed

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -361,12 +361,17 @@ func (ng *nodegroup) TemplateNodeInfo() (*framework.NodeInfo, error) {
361361
},
362362
}
363363

364+
nsi := ng.scalableResource.InstanceSystemInfo()
365+
if nsi != nil {
366+
node.Status.NodeInfo = *nsi
367+
}
368+
364369
node.Status.Capacity = capacity
365370
node.Status.Allocatable = capacity
366371
node.Status.Conditions = cloudprovider.BuildReadyConditions()
367372
node.Spec.Taints = ng.scalableResource.Taints()
368373

369-
node.Labels, err = ng.buildTemplateLabels(nodeName)
374+
node.Labels, err = ng.buildTemplateLabels(nodeName, nsi)
370375
if err != nil {
371376
return nil, err
372377
}
@@ -380,8 +385,19 @@ func (ng *nodegroup) TemplateNodeInfo() (*framework.NodeInfo, error) {
380385
return nodeInfo, nil
381386
}
382387

383-
func (ng *nodegroup) buildTemplateLabels(nodeName string) (map[string]string, error) {
384-
labels := cloudprovider.JoinStringMaps(buildGenericLabels(nodeName), ng.scalableResource.Labels())
388+
func (ng *nodegroup) buildTemplateLabels(nodeName string, nsi *corev1.NodeSystemInfo) (map[string]string, error) {
389+
nsiLabels := make(map[string]string)
390+
if nsi != nil {
391+
nsiLabels[corev1.LabelArchStable] = nsi.Architecture
392+
nsiLabels[corev1.LabelOSStable] = nsi.OperatingSystem
393+
}
394+
395+
// The order of priority is:
396+
// - Labels set in existing nodes for not-autoscale-from-zero cases
397+
// - Labels set in the labels capacity annotation of machine template, machine set, and machine deployment.
398+
// - Values in the status.nodeSystemInfo of MachineTemplates
399+
// - Generic/default labels set in the environment of the cluster autoscaler
400+
labels := cloudprovider.JoinStringMaps(buildGenericLabels(nodeName), nsiLabels, ng.scalableResource.Labels())
385401

386402
nodes, err := ng.Nodes()
387403
if err != nil {

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"path"
2424
"strconv"
2525
"strings"
26+
"sync"
2627
"time"
2728

2829
"github.com/pkg/errors"
@@ -42,6 +43,8 @@ import (
4243
type unstructuredScalableResource struct {
4344
controller *machineController
4445
unstructured *unstructured.Unstructured
46+
infraObj *unstructured.Unstructured
47+
infraMutex sync.RWMutex
4548
maxSize int
4649
minSize int
4750
autoscalingOptions map[string]string
@@ -321,6 +324,17 @@ func (r unstructuredScalableResource) InstanceCapacity() (map[corev1.ResourceNam
321324
return capacity, nil
322325
}
323326

327+
// InstanceSystemInfo sets the nodeSystemInfo from the infrastructure reference resource.
328+
// If the infrastructure reference resource is not found, returns nil.
329+
func (r unstructuredScalableResource) InstanceSystemInfo() *apiv1.NodeSystemInfo {
330+
infraObj, err := r.readInfrastructureReferenceResource()
331+
if err != nil || infraObj == nil {
332+
return nil
333+
}
334+
nsiObj := systemInfoFromInfrastructureObject(infraObj)
335+
return &nsiObj
336+
}
337+
324338
func (r unstructuredScalableResource) InstanceResourceSlices(nodeName string) ([]*resourceapi.ResourceSlice, error) {
325339
var result []*resourceapi.ResourceSlice
326340
driver := r.InstanceDRADriver()
@@ -395,6 +409,16 @@ func (r unstructuredScalableResource) readInfrastructureReferenceResource() (*un
395409
obKind := r.unstructured.GetKind()
396410
obName := r.unstructured.GetName()
397411

412+
// Cache w/ lazy loading of the infrastructure reference resource.
413+
r.infraMutex.RLock()
414+
if r.infraObj != nil {
415+
defer r.infraMutex.RUnlock()
416+
return r.infraObj, nil
417+
}
418+
r.infraMutex.RUnlock()
419+
r.infraMutex.Lock()
420+
defer r.infraMutex.Unlock()
421+
398422
infraref, found, err := unstructured.NestedStringMap(r.unstructured.Object, "spec", "template", "spec", "infrastructureRef")
399423
if !found || err != nil {
400424
return nil, nil
@@ -442,6 +466,8 @@ func (r unstructuredScalableResource) readInfrastructureReferenceResource() (*un
442466
return nil, err
443467
}
444468

469+
r.infraObj = infra
470+
445471
return infra, nil
446472
}
447473

@@ -479,6 +505,25 @@ func resourceCapacityFromInfrastructureObject(infraobj *unstructured.Unstructure
479505
return capacity
480506
}
481507

508+
func systemInfoFromInfrastructureObject(infraobj *unstructured.Unstructured) apiv1.NodeSystemInfo {
509+
nsi := apiv1.NodeSystemInfo{}
510+
infransi, found, err := unstructured.NestedStringMap(infraobj.Object, "status", "nodeInfo")
511+
if !found || err != nil {
512+
return nsi
513+
}
514+
515+
for k, v := range infransi {
516+
switch k {
517+
case "architecture":
518+
nsi.Architecture = v
519+
case "operatingSystem":
520+
nsi.OperatingSystem = v
521+
}
522+
}
523+
524+
return nsi
525+
}
526+
482527
// adapted from https://github.com/kubernetes/kubernetes/blob/release-1.25/pkg/util/taints/taints.go#L39
483528
func parseTaint(st string) (apiv1.Taint, error) {
484529
var taint apiv1.Taint

0 commit comments

Comments
 (0)