Skip to content

Commit 46c3568

Browse files
committed
Handle the kubernetes.io/architecture label based onthe Azure VMSize
Handling the kubernetes.io/architecture via the labels capacity annotation will allow the autoscaler run autoscale from 0 when workloads define strong predicates like required affinities on the architecture of the nodes.
1 parent e3b7287 commit 46c3568

File tree

3 files changed

+139
-0
lines changed

3 files changed

+139
-0
lines changed

pkg/cloud/azure/actuators/machineset/controller.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"github.com/go-logr/logr"
2424
machinev1 "github.com/openshift/api/machine/v1beta1"
2525
mapierrors "github.com/openshift/machine-api-operator/pkg/controller/machine"
26+
"github.com/openshift/machine-api-operator/pkg/util"
2627
"github.com/openshift/machine-api-provider-azure/pkg/cloud/azure/actuators"
2728
"github.com/openshift/machine-api-provider-azure/pkg/cloud/azure/services/resourceskus"
2829
corev1 "k8s.io/api/core/v1"
@@ -35,13 +36,24 @@ import (
3536
"sigs.k8s.io/controller-runtime/pkg/controller"
3637
)
3738

39+
// We define a new type to represent the normalized architecture as the Azure APIs use a different format.
40+
type normalizedArch string
41+
42+
const (
43+
// ArchitectureAmd64 is the normalized architecture for amd64.
44+
ArchitectureAmd64 normalizedArch = "amd64"
45+
// ArchitectureArm64 is the normalized architecture for arm64.
46+
ArchitectureArm64 normalizedArch = "arm64"
47+
)
48+
3849
const (
3950
// This exposes compute information based on the providerSpec input.
4051
// This is needed by the autoscaler to foresee upcoming capacity when scaling from zero.
4152
// https://github.com/openshift/enhancements/pull/186
4253
cpuKey = "machine.openshift.io/vCPU"
4354
memoryKey = "machine.openshift.io/memoryMb"
4455
gpuKey = "machine.openshift.io/GPU"
56+
labelsKey = "machine.openshift.io/labels"
4557
)
4658

4759
// Reconciler reconciles machineSets.
@@ -231,6 +243,17 @@ func updateMachineSetAnnotations(machineSet *machinev1.MachineSet, sku resources
231243
machineSet.Annotations[gpuKey] = gpuCap
232244
}
233245

246+
// Architecture
247+
architecture, ok := sku.GetCapability(resourceskus.CPUArchitectureType)
248+
if !ok {
249+
klog.V(2).Infof("SKU '%s' does not have the CPUArchitecture capability. Defaulting to amd64", *sku.Name)
250+
}
251+
// We guarantee that any existing labels provided via the capacity annotations are preserved.
252+
// See https://github.com/kubernetes/autoscaler/pull/5382 and https://github.com/kubernetes/autoscaler/pull/5697
253+
machineSet.Annotations[labelsKey] = util.MergeCommaSeparatedKeyValuePairs(
254+
fmt.Sprintf("kubernetes.io/arch=%s", normalizedArchitecture(architecture)),
255+
machineSet.Annotations[labelsKey])
256+
234257
return nil
235258
}
236259

@@ -248,3 +271,19 @@ func memoryGiBtoMiB(memoryGiB string) (string, error) {
248271
func getproviderConfig(machineSet *machinev1.MachineSet) (*machinev1.AzureMachineProviderSpec, error) {
249272
return actuators.MachineConfigFromProviderSpec(machineSet.Spec.Template.Spec.ProviderSpec)
250273
}
274+
275+
// normalizedArchitecture adapts the value of the architecture capability for the SKU to the
276+
// one required by the kubernetes APIs.
277+
// For example, the x86_64 architecture is called "x64" in the Azure API.
278+
// The kubernetes.io/arch label, instead, expects the Golang/LLVM names.
279+
func normalizedArchitecture(architecture string) normalizedArch {
280+
switch architecture {
281+
case resourceskus.X64:
282+
return ArchitectureAmd64
283+
case resourceskus.Arm64:
284+
return ArchitectureArm64
285+
}
286+
klog.V(2).Infof("unknown architecture '%s'. Defaulting to amd64", architecture)
287+
// Default to amd64 if we don't recognize the architecture.
288+
return ArchitectureAmd64
289+
}

pkg/cloud/azure/actuators/machineset/controller_test.go

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,10 @@ var _ = Describe("Reconciler", func() {
102102
Name: to.StringPtr(resourceskus.MemoryGB),
103103
Value: to.StringPtr("16"),
104104
},
105+
{
106+
Name: to.StringPtr(resourceskus.CPUArchitectureType),
107+
Value: to.StringPtr(resourceskus.X64),
108+
},
105109
},
106110
},
107111
{
@@ -120,6 +124,60 @@ var _ = Describe("Reconciler", func() {
120124
Name: to.StringPtr(resourceskus.GPUs),
121125
Value: to.StringPtr("4"),
122126
},
127+
{
128+
Name: to.StringPtr(resourceskus.CPUArchitectureType),
129+
Value: to.StringPtr(resourceskus.X64),
130+
},
131+
},
132+
},
133+
{
134+
Name: to.StringPtr("Standard_D4ps_v5"),
135+
ResourceType: to.StringPtr("virtualMachines"),
136+
Capabilities: &[]compute.ResourceSkuCapabilities{
137+
{
138+
Name: to.StringPtr(resourceskus.VCPUs),
139+
Value: to.StringPtr("4"),
140+
},
141+
{
142+
Name: to.StringPtr(resourceskus.MemoryGB),
143+
Value: to.StringPtr("16"),
144+
},
145+
{
146+
Name: to.StringPtr(resourceskus.CPUArchitectureType),
147+
Value: to.StringPtr(resourceskus.Arm64),
148+
},
149+
},
150+
},
151+
{
152+
Name: to.StringPtr("Standard_D4s_v3_wrong-arch"),
153+
ResourceType: to.StringPtr("virtualMachines"),
154+
Capabilities: &[]compute.ResourceSkuCapabilities{
155+
{
156+
Name: to.StringPtr(resourceskus.VCPUs),
157+
Value: to.StringPtr("4"),
158+
},
159+
{
160+
Name: to.StringPtr(resourceskus.MemoryGB),
161+
Value: to.StringPtr("16"),
162+
},
163+
{
164+
Name: to.StringPtr(resourceskus.CPUArchitectureType),
165+
Value: to.StringPtr("wrong-arch"),
166+
},
167+
},
168+
},
169+
{
170+
Name: to.StringPtr("Standard_D4s_v3_missing-arch"),
171+
ResourceType: to.StringPtr("virtualMachines"),
172+
Capabilities: &[]compute.ResourceSkuCapabilities{
173+
{
174+
Name: to.StringPtr(resourceskus.VCPUs),
175+
Value: to.StringPtr("4"),
176+
},
177+
{
178+
Name: to.StringPtr(resourceskus.MemoryGB),
179+
Value: to.StringPtr("16"),
180+
},
123181
},
124182
},
125183
}, "centralus")
@@ -190,6 +248,7 @@ var _ = Describe("Reconciler", func() {
190248
cpuKey: "4",
191249
memoryKey: "16384",
192250
gpuKey: "0",
251+
labelsKey: "kubernetes.io/arch=amd64",
193252
},
194253
expectedEvents: []string{},
195254
}),
@@ -200,6 +259,7 @@ var _ = Describe("Reconciler", func() {
200259
cpuKey: "24",
201260
memoryKey: "229376",
202261
gpuKey: "4",
262+
labelsKey: "kubernetes.io/arch=amd64",
203263
},
204264
expectedEvents: []string{},
205265
}),
@@ -215,6 +275,7 @@ var _ = Describe("Reconciler", func() {
215275
cpuKey: "24",
216276
memoryKey: "229376",
217277
gpuKey: "4",
278+
labelsKey: "kubernetes.io/arch=amd64",
218279
},
219280
expectedEvents: []string{},
220281
}),
@@ -230,6 +291,39 @@ var _ = Describe("Reconciler", func() {
230291
},
231292
expectedEvents: []string{"FailedUpdate"},
232293
}),
294+
Entry("with a Standard_D4ps_v5 (aarch64)", reconcileTestCase{
295+
vmSize: "Standard_D4ps_v5",
296+
existingAnnotations: make(map[string]string),
297+
expectedAnnotations: map[string]string{
298+
cpuKey: "4",
299+
memoryKey: "16384",
300+
gpuKey: "0",
301+
labelsKey: "kubernetes.io/arch=arm64",
302+
},
303+
expectedEvents: []string{},
304+
}),
305+
Entry("with a vmSize missing the architecture capability", reconcileTestCase{
306+
vmSize: "Standard_D4s_v3_missing-arch",
307+
existingAnnotations: make(map[string]string),
308+
expectedAnnotations: map[string]string{
309+
cpuKey: "4",
310+
memoryKey: "16384",
311+
gpuKey: "0",
312+
labelsKey: "kubernetes.io/arch=amd64",
313+
},
314+
expectedEvents: []string{},
315+
}),
316+
Entry("with a vmSize missing an unknown architecture", reconcileTestCase{
317+
vmSize: "Standard_D4s_v3_wrong-arch",
318+
existingAnnotations: make(map[string]string),
319+
expectedAnnotations: map[string]string{
320+
cpuKey: "4",
321+
memoryKey: "16384",
322+
gpuKey: "0",
323+
labelsKey: "kubernetes.io/arch=amd64",
324+
},
325+
expectedEvents: []string{},
326+
}),
233327
)
234328
})
235329

pkg/cloud/azure/services/resourceskus/sku.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ const (
7070
MaximumPlatformFaultDomainCount = "MaximumPlatformFaultDomainCount"
7171
// UltraSSDAvailable identifies the capability for the support of UltraSSD data disks.
7272
UltraSSDAvailable = "UltraSSDAvailable"
73+
// CPUArchitectureType identifies the capability for the CPU architecture.
74+
CPUArchitectureType = "CpuArchitecture"
75+
// X64 and Arm64 are the possible values for CPUArchitectureType, in the Azure APIs. We will adapt them in the controller
76+
// to the ones kubernetes expect.
77+
X64 = "x64"
78+
Arm64 = "Arm64"
7379
)
7480

7581
// HasCapability return true for a capability which can be either

0 commit comments

Comments
 (0)