Skip to content

Commit 388836c

Browse files
Add variable for setting the GPUs on cluster (#397)
The change also includes a patch to set the GPUs on nutanixmachinetemplate resources for control plane and worker machine deployments.
1 parent 2e57c08 commit 388836c

File tree

4 files changed

+226
-15
lines changed

4 files changed

+226
-15
lines changed

templates/cluster-template-clusterclass.yaml

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,19 @@ spec:
184184
matchResources:
185185
controlPlane: true
186186
name: update-control-plane-machine-template
187+
- definitions:
188+
- jsonPatches:
189+
- op: add
190+
path: /spec/template/spec/gpus
191+
valueFrom:
192+
variable: controlPlaneMachineDetails.gpus
193+
selector:
194+
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
195+
kind: NutanixMachineTemplate
196+
matchResources:
197+
controlPlane: true
198+
enabledIf: '{{if .controlPlaneMachineDetails.gpus}}true{{end}}'
199+
name: update-control-plane-machine-template-gpus
187200
- definitions:
188201
- jsonPatches:
189202
- op: add
@@ -224,6 +237,21 @@ spec:
224237
names:
225238
- nutanix-quick-start-worker
226239
name: update-worker-machine-template
240+
- definitions:
241+
- jsonPatches:
242+
- op: add
243+
path: /spec/template/spec/gpus
244+
valueFrom:
245+
variable: workerMachineDetails.gpus
246+
selector:
247+
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
248+
kind: NutanixMachineTemplate
249+
matchResources:
250+
machineDeploymentClass:
251+
names:
252+
- nutanix-quick-start-worker
253+
enabledIf: '{{if .workerMachineDetails.gpus}}true{{end}}'
254+
name: update-worker-machine-template-gpus
227255
- definitions:
228256
- jsonPatches:
229257
- op: replace
@@ -308,6 +336,7 @@ spec:
308336
required: true
309337
schema:
310338
openAPIV3Schema:
339+
description: IP and port of the control plane endpoint.
311340
properties:
312341
IP:
313342
type: string
@@ -318,6 +347,7 @@ spec:
318347
required: true
319348
schema:
320349
openAPIV3Schema:
350+
description: Endpoint and credentials of the Prism Central.
321351
properties:
322352
additionalTrustBundle:
323353
type: string
@@ -334,11 +364,23 @@ spec:
334364
required: true
335365
schema:
336366
openAPIV3Schema:
367+
description: Details of the control plane machine deployment.
337368
properties:
338369
bootType:
339370
type: string
340371
clusterName:
341372
type: string
373+
gpus:
374+
items:
375+
properties:
376+
deviceID:
377+
type: integer
378+
name:
379+
type: string
380+
type:
381+
type: string
382+
type: object
383+
type: array
342384
imageName:
343385
type: string
344386
memorySize:
@@ -356,11 +398,23 @@ spec:
356398
required: true
357399
schema:
358400
openAPIV3Schema:
401+
description: Details of the worker machine deployment.
359402
properties:
360403
bootType:
361404
type: string
362405
clusterName:
363406
type: string
407+
gpus:
408+
items:
409+
properties:
410+
deviceID:
411+
type: integer
412+
name:
413+
type: string
414+
type:
415+
type: string
416+
type: object
417+
type: array
364418
imageName:
365419
type: string
366420
memorySize:
@@ -378,6 +432,7 @@ spec:
378432
required: false
379433
schema:
380434
openAPIV3Schema:
435+
description: List of failure domains to be used in the cluster.
381436
items:
382437
properties:
383438
cluster:
@@ -432,6 +487,8 @@ spec:
432487
required: false
433488
schema:
434489
openAPIV3Schema:
490+
description: Additional categories to be added to the machine deployment in
491+
cluster.
435492
items:
436493
properties:
437494
key:

templates/clusterclass/clusterclass.yaml

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,19 @@ spec:
200200
template: |
201201
- type: name
202202
name: {{ .controlPlaneMachineDetails.subnetName }}
203+
- name: update-control-plane-machine-template-gpus
204+
enabledIf: "{{if .controlPlaneMachineDetails.gpus}}true{{end}}"
205+
definitions:
206+
- selector:
207+
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
208+
kind: NutanixMachineTemplate
209+
matchResources:
210+
controlPlane: true
211+
jsonPatches:
212+
- op: add
213+
path: /spec/template/spec/gpus
214+
valueFrom:
215+
variable: controlPlaneMachineDetails.gpus
203216
- name: update-worker-machine-template
204217
definitions:
205218
- selector:
@@ -240,6 +253,21 @@ spec:
240253
template: |
241254
- type: name
242255
name: {{ .controlPlaneMachineDetails.subnetName }}
256+
- name: update-worker-machine-template-gpus
257+
enabledIf: "{{if .workerMachineDetails.gpus}}true{{end}}"
258+
definitions:
259+
- selector:
260+
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
261+
kind: NutanixMachineTemplate
262+
matchResources:
263+
machineDeploymentClass:
264+
names:
265+
- nutanix-quick-start-worker
266+
jsonPatches:
267+
- op: add
268+
path: /spec/template/spec/gpus
269+
valueFrom:
270+
variable: workerMachineDetails.gpus
243271
- name: add-failure-domains
244272
enabledIf: "{{if .failureDomains}}true{{end}}"
245273
definitions:
@@ -324,6 +352,7 @@ spec:
324352
required: true
325353
schema:
326354
openAPIV3Schema:
355+
description: IP and port of the control plane endpoint.
327356
properties:
328357
IP:
329358
type: string
@@ -334,6 +363,7 @@ spec:
334363
required: true
335364
schema:
336365
openAPIV3Schema:
366+
description: Endpoint and credentials of the Prism Central.
337367
properties:
338368
address:
339369
type: string
@@ -350,6 +380,7 @@ spec:
350380
required: true
351381
schema:
352382
openAPIV3Schema:
383+
description: Details of the control plane machine deployment.
353384
properties:
354385
bootType:
355386
type: string
@@ -367,11 +398,23 @@ spec:
367398
type: string
368399
subnetName:
369400
type: string
401+
gpus:
402+
type: array
403+
items:
404+
type: object
405+
properties:
406+
name:
407+
type: string
408+
deviceID:
409+
type: integer
410+
type:
411+
type: string
370412
type: object
371413
- name: workerMachineDetails
372414
required: true
373415
schema:
374416
openAPIV3Schema:
417+
description: Details of the worker machine deployment.
375418
properties:
376419
bootType:
377420
type: string
@@ -389,11 +432,23 @@ spec:
389432
type: string
390433
subnetName:
391434
type: string
435+
gpus:
436+
type: array
437+
items:
438+
type: object
439+
properties:
440+
name:
441+
type: string
442+
deviceID:
443+
type: integer
444+
type:
445+
type: string
392446
type: object
393447
- name: failureDomains
394448
required: false
395449
schema:
396450
openAPIV3Schema:
451+
description: List of failure domains to be used in the cluster.
397452
type: array
398453
items:
399454
type: object
@@ -448,6 +503,7 @@ spec:
448503
required: false
449504
schema:
450505
openAPIV3Schema:
506+
description: Additional categories to be added to the machine deployment in cluster.
451507
type: array
452508
items:
453509
type: object

templates/template_test.go

Lines changed: 55 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"k8s.io/client-go/kubernetes/scheme"
2020
"k8s.io/client-go/tools/clientcmd"
2121
"k8s.io/klog/v2/textlogger"
22+
"k8s.io/utils/ptr"
2223
capiv1 "sigs.k8s.io/cluster-api/api/v1beta1"
2324
clusterctllog "sigs.k8s.io/cluster-api/cmd/clusterctl/log"
2425
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
@@ -238,6 +239,29 @@ func fetchControlPlaneMachineTemplate(clnt client.Client, clusterName string) (*
238239
return nil, fmt.Errorf("no control plane NutanixMachineTemplate found for cluster %s", clusterName)
239240
}
240241

242+
func fetchWorkerMachineTemplates(clnt client.Client, clusterName string) ([]*v1beta1.NutanixMachineTemplate, error) {
243+
nmts, err := fetchMachineTemplates(clnt, clusterName)
244+
if err != nil {
245+
return nil, err
246+
}
247+
248+
kcp, err := fetchKubeadmControlPlane(clnt, clusterName)
249+
if err != nil {
250+
return nil, err
251+
}
252+
253+
workerNmts := make([]*v1beta1.NutanixMachineTemplate, 0)
254+
for _, nmt := range nmts {
255+
if nmt.ObjectMeta.Name == kcp.Spec.MachineTemplate.InfrastructureRef.Name {
256+
continue
257+
}
258+
259+
workerNmts = append(workerNmts, nmt)
260+
}
261+
262+
return workerNmts, nil
263+
}
264+
241265
func TestClusterClassTemplateSuite(t *testing.T) {
242266
RegisterFailHandler(Fail)
243267
BeforeSuite(func() {
@@ -338,11 +362,6 @@ var _ = Describe("Cluster Class Template Patches Test Suite", Ordered, func() {
338362
err = clnt.Create(context.Background(), obj) // Create the cluster
339363
Expect(err).NotTo(HaveOccurred())
340364

341-
Eventually(func() error {
342-
_, err = fetchNutanixCluster(clnt, obj.GetName())
343-
return err
344-
}).Within(time.Minute).Should(Succeed())
345-
346365
Eventually(func() ([]*v1beta1.NutanixMachineTemplate, error) {
347366
return fetchMachineTemplates(clnt, obj.GetName())
348367
}).Within(time.Minute).Should(And(HaveLen(2),
@@ -361,11 +380,6 @@ var _ = Describe("Cluster Class Template Patches Test Suite", Ordered, func() {
361380
err = clnt.Create(context.Background(), obj) // Create the cluster
362381
Expect(err).NotTo(HaveOccurred())
363382

364-
Eventually(func() error {
365-
_, err = fetchNutanixCluster(clnt, obj.GetName())
366-
return err
367-
}).Within(time.Minute).Should(Succeed())
368-
369383
Eventually(func() ([]*v1beta1.NutanixMachineTemplate, error) {
370384
return fetchMachineTemplates(clnt, obj.GetName())
371385
}).Within(time.Minute).Should(And(HaveLen(2),
@@ -384,11 +398,6 @@ var _ = Describe("Cluster Class Template Patches Test Suite", Ordered, func() {
384398
err = clnt.Create(context.Background(), obj) // Create the cluster
385399
Expect(err).NotTo(HaveOccurred())
386400

387-
Eventually(func() error {
388-
_, err = fetchNutanixCluster(clnt, obj.GetName())
389-
return err
390-
}).Within(time.Minute).Should(Succeed())
391-
392401
Eventually(func() ([]*v1beta1.NutanixMachineTemplate, error) {
393402
return fetchMachineTemplates(clnt, obj.GetName())
394403
}).Within(time.Minute).Should(And(HaveLen(2),
@@ -400,4 +409,35 @@ var _ = Describe("Cluster Class Template Patches Test Suite", Ordered, func() {
400409
})))))
401410
})
402411
})
412+
413+
Describe("patches for GPUs", func() {
414+
It("should have correct GPUs", func() {
415+
clusterManifest := "testdata/cluster-with-gpu.yaml"
416+
obj, err := getClusterManifest(clusterManifest)
417+
Expect(err).NotTo(HaveOccurred())
418+
419+
err = clnt.Create(context.Background(), obj) // Create the cluster
420+
Expect(err).NotTo(HaveOccurred())
421+
422+
Eventually(func() (*v1beta1.NutanixMachineTemplate, error) {
423+
return fetchControlPlaneMachineTemplate(clnt, obj.GetName())
424+
}).Within(time.Minute).Should(And(HaveExistingField("Spec.Template.Spec.GPUs"),
425+
HaveField("Spec.Template.Spec.GPUs", HaveLen(1)),
426+
HaveField("Spec.Template.Spec.GPUs", ContainElement(v1beta1.NutanixGPU{
427+
Type: v1beta1.NutanixGPUIdentifierDeviceID,
428+
DeviceID: ptr.To(int64(42)),
429+
}))))
430+
431+
Eventually(func() ([]*v1beta1.NutanixMachineTemplate, error) {
432+
return fetchWorkerMachineTemplates(clnt, obj.GetName())
433+
}).Within(time.Minute).Should(And(HaveLen(1),
434+
HaveEach(HaveExistingField("Spec.Template.Spec.GPUs")),
435+
HaveEach(HaveField("Spec.Template.Spec.GPUs", HaveLen(1))),
436+
HaveEach(HaveField("Spec.Template.Spec.GPUs", ContainElement(v1beta1.NutanixGPU{
437+
Type: v1beta1.NutanixGPUIdentifierName,
438+
Name: ptr.To("fake-gpu"),
439+
}))),
440+
))
441+
})
442+
})
403443
})

0 commit comments

Comments
 (0)