Skip to content

Commit ca88570

Browse files
committed
Update cluster-api provider to use machineTemplate.status.nodeInfo for architecture-aware autoscale from zero
kubernetes-sigs/cluster-api#11962 introduced the nodeInfo field for MachineTemplates. Providers can reconcile this field in the status subresource to inform the autoscaler about the architecture and operating system that the MachineTemplate's nodes will run. Previously, we have been implementing this behavior in the cluster autoscaler by leveraging the labels capacity annotation and, as a fallback, default values set in environment variables at cluster-autoscaler deployment time. With this commit, the cluster autoscaler computes the future architecture of a node with the following priority order: - Labels set in existing nodes for not-autoscale-from-zero cases - Labels set in the labels capacity annotation of machine template, machine set, and machine deployment. - Values in the status.nodeSystemInfo of MachineTemplates - Generic/default labels set in the environment of the cluster autoscaler
1 parent 9a256e5 commit ca88570

File tree

2 files changed

+49
-3
lines changed

2 files changed

+49
-3
lines changed

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -361,12 +361,17 @@ func (ng *nodegroup) TemplateNodeInfo() (*framework.NodeInfo, error) {
361361
},
362362
}
363363

364+
nsi := ng.scalableResource.InstanceSystemInfo()
365+
if nsi != nil {
366+
node.Status.NodeInfo = *nsi
367+
}
368+
364369
node.Status.Capacity = capacity
365370
node.Status.Allocatable = capacity
366371
node.Status.Conditions = cloudprovider.BuildReadyConditions()
367372
node.Spec.Taints = ng.scalableResource.Taints()
368373

369-
node.Labels, err = ng.buildTemplateLabels(nodeName)
374+
node.Labels, err = ng.buildTemplateLabels(nodeName, nsi)
370375
if err != nil {
371376
return nil, err
372377
}
@@ -380,8 +385,19 @@ func (ng *nodegroup) TemplateNodeInfo() (*framework.NodeInfo, error) {
380385
return nodeInfo, nil
381386
}
382387

383-
func (ng *nodegroup) buildTemplateLabels(nodeName string) (map[string]string, error) {
384-
labels := cloudprovider.JoinStringMaps(buildGenericLabels(nodeName), ng.scalableResource.Labels())
388+
func (ng *nodegroup) buildTemplateLabels(nodeName string, nsi *corev1.NodeSystemInfo) (map[string]string, error) {
389+
nsiLabels := make(map[string]string)
390+
if nsi != nil {
391+
nsiLabels[corev1.LabelArchStable] = nsi.Architecture
392+
nsiLabels[corev1.LabelOSStable] = nsi.OperatingSystem
393+
}
394+
395+
// The order of priority is:
396+
// - Labels set in existing nodes for not-autoscale-from-zero cases
397+
// - Labels set in the labels capacity annotation of machine template, machine set, and machine deployment.
398+
// - Values in the status.nodeSystemInfo of MachineTemplates
399+
// - Generic/default labels set in the environment of the cluster autoscaler
400+
labels := cloudprovider.JoinStringMaps(buildGenericLabels(nodeName), nsiLabels, ng.scalableResource.Labels())
385401

386402
nodes, err := ng.Nodes()
387403
if err != nil {

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,17 @@ func (r unstructuredScalableResource) InstanceCapacity() (map[corev1.ResourceNam
304304
return capacity, nil
305305
}
306306

307+
// InstanceSystemInfo sets the nodeSystemInfo from the infrastructure reference resource.
308+
// If the infrastructure reference resource is not found, returns nil.
309+
func (r unstructuredScalableResource) InstanceSystemInfo() *apiv1.NodeSystemInfo {
310+
infraObj, err := r.readInfrastructureReferenceResource()
311+
if err != nil || infraObj == nil {
312+
return nil
313+
}
314+
nsiObj := systemInfoFromInfrastructureObject(infraObj)
315+
return &nsiObj
316+
}
317+
307318
func (r unstructuredScalableResource) InstanceResourceSlices(nodeName string) ([]*resourceapi.ResourceSlice, error) {
308319
var result []*resourceapi.ResourceSlice
309320
driver := r.InstanceDRADriver()
@@ -440,6 +451,25 @@ func resourceCapacityFromInfrastructureObject(infraobj *unstructured.Unstructure
440451
return capacity
441452
}
442453

454+
func systemInfoFromInfrastructureObject(infraobj *unstructured.Unstructured) apiv1.NodeSystemInfo {
455+
nsi := apiv1.NodeSystemInfo{}
456+
infransi, found, err := unstructured.NestedStringMap(infraobj.Object, "status", "nodeInfo")
457+
if !found || err != nil {
458+
return nsi
459+
}
460+
461+
for k, v := range infransi {
462+
switch k {
463+
case "architecture":
464+
nsi.Architecture = v
465+
case "operatingSystem":
466+
nsi.OperatingSystem = v
467+
}
468+
}
469+
470+
return nsi
471+
}
472+
443473
// adapted from https://github.com/kubernetes/kubernetes/blob/release-1.25/pkg/util/taints/taints.go#L39
444474
func parseTaint(st string) (apiv1.Taint, error) {
445475
var taint apiv1.Taint

0 commit comments

Comments
 (0)