Skip to content

Commit 2a84032

Browse files
committed
Update cluster-api provider to use machineTemplate.status.nodeInfo for architecture-aware autoscale from zero
kubernetes-sigs/cluster-api#11962 introduced the nodeInfo field for MachineTemplates. Providers can reconcile this field in the status subresource to inform the autoscaler about the architecture and operating system that the MachineTemplate's nodes will run. Previously, we have been implementing this behavior in the cluster autoscaler by leveraging the labels capacity annotation and, as a fallback, default values set in environment variables at cluster-autoscaler deployment time. With this commit, the cluster autoscaler computes the future architecture of a node with the following priority order: - Labels set in existing nodes for not-autoscale-from-zero cases - Labels set in the labels capacity annotation of machine template, machine set, and machine deployment. - Values in the status.nodeSystemInfo of MachineTemplates - Generic/default labels set in the environment of the cluster autoscaler # Conflicts: # cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go
1 parent f15a08a commit 2a84032

File tree

2 files changed

+65
-3
lines changed

2 files changed

+65
-3
lines changed

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -361,12 +361,17 @@ func (ng *nodegroup) TemplateNodeInfo() (*framework.NodeInfo, error) {
361361
},
362362
}
363363

364+
nsi := ng.scalableResource.InstanceSystemInfo()
365+
if nsi != nil {
366+
node.Status.NodeInfo = *nsi
367+
}
368+
364369
node.Status.Capacity = capacity
365370
node.Status.Allocatable = capacity
366371
node.Status.Conditions = cloudprovider.BuildReadyConditions()
367372
node.Spec.Taints = ng.scalableResource.Taints()
368373

369-
node.Labels, err = ng.buildTemplateLabels(nodeName)
374+
node.Labels, err = ng.buildTemplateLabels(nodeName, nsi)
370375
if err != nil {
371376
return nil, err
372377
}
@@ -380,8 +385,19 @@ func (ng *nodegroup) TemplateNodeInfo() (*framework.NodeInfo, error) {
380385
return nodeInfo, nil
381386
}
382387

383-
func (ng *nodegroup) buildTemplateLabels(nodeName string) (map[string]string, error) {
384-
labels := cloudprovider.JoinStringMaps(buildGenericLabels(nodeName), ng.scalableResource.Labels())
388+
func (ng *nodegroup) buildTemplateLabels(nodeName string, nsi *corev1.NodeSystemInfo) (map[string]string, error) {
389+
nsiLabels := make(map[string]string)
390+
if nsi != nil {
391+
nsiLabels[corev1.LabelArchStable] = nsi.Architecture
392+
nsiLabels[corev1.LabelOSStable] = nsi.OperatingSystem
393+
}
394+
395+
// The order of priority is:
396+
// - Labels set in existing nodes for not-autoscale-from-zero cases
397+
// - Labels set in the labels capacity annotation of machine template, machine set, and machine deployment.
398+
// - Values in the status.nodeSystemInfo of MachineTemplates
399+
// - Generic/default labels set in the environment of the cluster autoscaler
400+
labels := cloudprovider.JoinStringMaps(buildGenericLabels(nodeName), nsiLabels, ng.scalableResource.Labels())
385401

386402
nodes, err := ng.Nodes()
387403
if err != nil {

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"path"
2424
"strconv"
2525
"strings"
26+
"sync"
2627
"time"
2728

2829
"github.com/pkg/errors"
@@ -42,6 +43,8 @@ import (
4243
type unstructuredScalableResource struct {
4344
controller *machineController
4445
unstructured *unstructured.Unstructured
46+
infraObj *unstructured.Unstructured
47+
infraMutex sync.RWMutex
4548
maxSize int
4649
minSize int
4750
autoscalingOptions map[string]string
@@ -321,6 +324,17 @@ func (r unstructuredScalableResource) InstanceCapacity() (map[corev1.ResourceNam
321324
return capacity, nil
322325
}
323326

327+
// InstanceSystemInfo sets the nodeSystemInfo from the infrastructure reference resource.
328+
// If the infrastructure reference resource is not found, returns nil.
329+
func (r unstructuredScalableResource) InstanceSystemInfo() *apiv1.NodeSystemInfo {
330+
infraObj, err := r.readInfrastructureReferenceResource()
331+
if err != nil || infraObj == nil {
332+
return nil
333+
}
334+
nsiObj := systemInfoFromInfrastructureObject(infraObj)
335+
return &nsiObj
336+
}
337+
324338
func (r unstructuredScalableResource) InstanceResourceSlices(nodeName string) ([]*resourceapi.ResourceSlice, error) {
325339
var result []*resourceapi.ResourceSlice
326340
driver := r.InstanceDRADriver()
@@ -392,6 +406,17 @@ func (r unstructuredScalableResource) InstanceDRADriver() string {
392406
}
393407

394408
func (r unstructuredScalableResource) readInfrastructureReferenceResource() (*unstructured.Unstructured, error) {
409+
// Cache w/ lazy loading of the infrastructure reference resource.
410+
r.infraMutex.RLock()
411+
if r.infraObj != nil {
412+
defer r.infraMutex.RUnlock()
413+
return r.infraObj, nil
414+
}
415+
r.infraMutex.RUnlock()
416+
417+
r.infraMutex.Lock()
418+
defer r.infraMutex.Unlock()
419+
395420
obKind := r.unstructured.GetKind()
396421
obName := r.unstructured.GetName()
397422

@@ -442,6 +467,8 @@ func (r unstructuredScalableResource) readInfrastructureReferenceResource() (*un
442467
return nil, err
443468
}
444469

470+
r.infraObj = infra
471+
445472
return infra, nil
446473
}
447474

@@ -479,6 +506,25 @@ func resourceCapacityFromInfrastructureObject(infraobj *unstructured.Unstructure
479506
return capacity
480507
}
481508

509+
func systemInfoFromInfrastructureObject(infraobj *unstructured.Unstructured) apiv1.NodeSystemInfo {
510+
nsi := apiv1.NodeSystemInfo{}
511+
infransi, found, err := unstructured.NestedStringMap(infraobj.Object, "status", "nodeInfo")
512+
if !found || err != nil {
513+
return nsi
514+
}
515+
516+
for k, v := range infransi {
517+
switch k {
518+
case "architecture":
519+
nsi.Architecture = v
520+
case "operatingSystem":
521+
nsi.OperatingSystem = v
522+
}
523+
}
524+
525+
return nsi
526+
}
527+
482528
// adapted from https://github.com/kubernetes/kubernetes/blob/release-1.25/pkg/util/taints/taints.go#L39
483529
func parseTaint(st string) (apiv1.Taint, error) {
484530
var taint apiv1.Taint

0 commit comments

Comments
 (0)