Skip to content

Commit 5f2aa05

Browse files
authored
Merge pull request #1341 from jwmay2012/guest-accelerators
Add GPU/Accelerator support to VMs in GCPMachineTemplate
2 parents 5e0eb53 + aed6b4c commit 5f2aa05

File tree

6 files changed

+136
-0
lines changed

6 files changed

+136
-0
lines changed

api/v1beta1/gcpmachine_types.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,25 @@ type GCPMachineSpec struct {
360360
// RootDiskEncryptionKey defines the KMS key to be used to encrypt the root disk.
361361
// +optional
362362
RootDiskEncryptionKey *CustomerEncryptionKey `json:"rootDiskEncryptionKey,omitempty"`
363+
364+
// GuestAccelerators is a list of the type and count of accelerator cards
365+
// attached to the instance.
366+
// +optional
367+
GuestAccelerators []Accelerator `json:"guestAccelerators,omitempty"`
368+
}
369+
370+
// Accelerator is a specification of the type and number of accelerator
371+
// cards attached to the instance.
372+
type Accelerator struct {
373+
// Count is the number of the guest accelerator cards exposed to this
374+
// instance.
375+
Count int64 `json:"count,omitempty"`
376+
// Type is the full or partial URL of the accelerator type resource to
377+
// attach to this instance. For example:
378+
// projects/my-project/zones/us-central1-c/acceleratorTypes/nvidia-tesla-p100
379+
// If you are creating an instance template, specify only the accelerator name.
380+
// See GPUs on Compute Engine for a full list of accelerator types.
381+
Type string `json:"type,omitempty"`
363382
}
364383

365384
// MetadataItem defines a single piece of metadata associated with an instance.

api/v1beta1/zz_generated.deepcopy.go

Lines changed: 20 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cloud/scope/machine.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,22 @@ func (m *MachineScope) InstanceAdditionalMetadataSpec() *compute.Metadata {
378378
return metadata
379379
}
380380

381+
// InstanceGuestAcceleratorsSpec returns a slice of Guest Accelerator Config specs.
382+
func (m *MachineScope) InstanceGuestAcceleratorsSpec() []*compute.AcceleratorConfig {
383+
if len(m.GCPMachine.Spec.GuestAccelerators) == 0 {
384+
return nil
385+
}
386+
accelConfigs := make([]*compute.AcceleratorConfig, 0, len(m.GCPMachine.Spec.GuestAccelerators))
387+
for _, accel := range m.GCPMachine.Spec.GuestAccelerators {
388+
accelConfig := &compute.AcceleratorConfig{
389+
AcceleratorType: accel.Type,
390+
AcceleratorCount: accel.Count,
391+
}
392+
accelConfigs = append(accelConfigs, accelConfig)
393+
}
394+
return accelConfigs
395+
}
396+
381397
// InstanceSpec returns instance spec.
382398
func (m *MachineScope) InstanceSpec(log logr.Logger) *compute.Instance {
383399
ctx := context.TODO()
@@ -472,6 +488,11 @@ func (m *MachineScope) InstanceSpec(log logr.Logger) *compute.Instance {
472488
instance.Metadata = m.InstanceAdditionalMetadataSpec()
473489
instance.ServiceAccounts = append(instance.ServiceAccounts, m.InstanceServiceAccountsSpec())
474490
instance.NetworkInterfaces = append(instance.NetworkInterfaces, m.InstanceNetworkInterfaceSpec())
491+
instance.GuestAccelerators = m.InstanceGuestAcceleratorsSpec()
492+
if len(instance.GuestAccelerators) > 0 {
493+
instance.Scheduling.OnHostMaintenance = "TERMINATE"
494+
}
495+
475496
return instance
476497
}
477498

config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmachines.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,31 @@ spec:
208208
- AMDEncryptedVirtualizationNestedPaging
209209
- IntelTrustedDomainExtensions
210210
type: string
211+
guestAccelerators:
212+
description: |-
213+
GuestAccelerators is a list of the type and count of accelerator cards
214+
attached to the instance.
215+
items:
216+
description: |-
217+
Accelerator is a specification of the type and number of accelerator
218+
cards attached to the instance.
219+
properties:
220+
count:
221+
description: |-
222+
Count is the number of the guest accelerator cards exposed to this
223+
instance.
224+
format: int64
225+
type: integer
226+
type:
227+
description: |-
228+
Type is the full or partial URL of the accelerator type resource to
229+
attach to this instance. For example:
230+
projects/my-project/zones/us-central1-c/acceleratorTypes/nvidia-tesla-p100
231+
If you are creating an instance template, specify only the accelerator name.
232+
See GPUs on Compute Engine for a full list of accelerator types.
233+
type: string
234+
type: object
235+
type: array
211236
image:
212237
description: |-
213238
Image is the full reference to a valid image to be used for this machine.

config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmachinetemplates.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,31 @@ spec:
223223
- AMDEncryptedVirtualizationNestedPaging
224224
- IntelTrustedDomainExtensions
225225
type: string
226+
guestAccelerators:
227+
description: |-
228+
GuestAccelerators is a list of the type and count of accelerator cards
229+
attached to the instance.
230+
items:
231+
description: |-
232+
Accelerator is a specification of the type and number of accelerator
233+
cards attached to the instance.
234+
properties:
235+
count:
236+
description: |-
237+
Count is the number of the guest accelerator cards exposed to this
238+
instance.
239+
format: int64
240+
type: integer
241+
type:
242+
description: |-
243+
Type is the full or partial URL of the accelerator type resource to
244+
attach to this instance. For example:
245+
projects/my-project/zones/us-central1-c/acceleratorTypes/nvidia-tesla-p100
246+
If you are creating an instance template, specify only the accelerator name.
247+
See GPUs on Compute Engine for a full list of accelerator types.
248+
type: string
249+
type: object
250+
type: array
226251
image:
227252
description: |-
228253
Image is the full reference to a valid image to be used for this machine.

docs/book/src/topics/gpus.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# GPUs
2+
3+
Add GPUs via the `guestAccelerators` field in `GCPMachineTemplate`.
4+
5+
```
6+
---
7+
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
8+
kind: GCPMachineTemplate
9+
metadata:
10+
name: mygcpmachinetemplate
11+
namespace: mynamespace
12+
spec:
13+
template:
14+
spec:
15+
image: projects/myproject/global/images/myimage
16+
instanceType: n1-standard-2
17+
guestAccelerators:
18+
- type: projects/myproject/zones/us-central1-c/acceleratorTypes/nvidia-tesla-t4
19+
count: 1
20+
```
21+
22+
https://cloud.google.com/compute/docs/gpus
23+
24+
NOTE: Instances with accelerators/GPUs do NOT support live migration.
25+
Therefore, the `onHostMaintenance` event is always `TERMINATE`.
26+
https://cloud.google.com/compute/docs/instances/setting-vm-host-options

0 commit comments

Comments
 (0)