Skip to content

Commit f6ad184

Browse files
authored
Feature: Add support for GPU with KVM hosts (#11143)
This PR allows attaching of GPU devices via PCI, mdev or VF to an Instance for KVM. It allows the operator to discover the GPU devices on the KVM host and create a Compute Offering with GPU support based on the available GPU devices on the host. Once the operator has created the Compute offering, it can be used by users to launch Instances with GPU devices.
1 parent b92c196 commit f6ad184

File tree

183 files changed

+16220
-596
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

183 files changed

+16220
-596
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ jobs:
140140
smoke/test_vm_deployment_planner
141141
smoke/test_vm_strict_host_tags
142142
smoke/test_vm_schedule
143+
smoke/test_deploy_vgpu_enabled_vm
143144
smoke/test_vm_life_cycle
144145
smoke/test_vm_lifecycle_unmanage_import
145146
smoke/test_vm_snapshot_kvm

agent/src/main/java/com/cloud/agent/properties/AgentProperties.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,15 @@ public class AgentProperties{
221221
*/
222222
public static final Property<String> AGENT_HOOKS_LIBVIRT_VM_XML_TRANSFORMER_SCRIPT = new Property<>("agent.hooks.libvirt_vm_xml_transformer.script", "libvirt-vm-xml-transformer.groovy");
223223

224+
/**
225+
* This property is used with the agent.hooks.basedir property to define the Libvirt VM XML transformer shell script.<br>
226+
* The shell script is used to execute the Libvirt VM XML transformer script.<br>
227+
* For more information see the agent.properties file.<br>
228+
* Data type: String.<br>
229+
* Default value: <code>libvirt-vm-xml-transformer.sh</code>
230+
*/
231+
public static final Property<String> AGENT_HOOKS_LIBVIRT_VM_XML_TRANSFORMER_SHELL_SCRIPT = new Property<>("agent.hooks.libvirt_vm_xml_transformer.shell_script", "libvirt-vm-xml-transformer.sh");
232+
224233
/**
225234
* This property is used with the agent.hooks.basedir and agent.hooks.libvirt_vm_xml_transformer.script properties to define the Libvirt VM XML transformer method.<br>
226235
* Libvirt XML transformer hook does XML-to-XML transformation.<br>
@@ -241,6 +250,15 @@ public class AgentProperties{
241250
*/
242251
public static final Property<String> AGENT_HOOKS_LIBVIRT_VM_ON_START_SCRIPT = new Property<>("agent.hooks.libvirt_vm_on_start.script", "libvirt-vm-state-change.groovy");
243252

253+
/**
254+
* This property is used with the agent.hooks.basedir property to define the Libvirt VM on start shell script.<br>
255+
* The shell script is used to execute the Libvirt VM on start script.<br>
256+
* For more information see the agent.properties file.<br>
257+
* Data type: String.<br>
258+
* Default value: <code>libvirt-vm-state-change.sh</code>
259+
*/
260+
public static final Property<String> AGENT_HOOKS_LIBVIRT_VM_ON_START_SHELL_SCRIPT = new Property<>("agent.hooks.libvirt_vm_on_start.shell_script", "libvirt-vm-state-change.sh");
261+
244262
/**
245263
* This property is used with the agent.hooks.basedir and agent.hooks.libvirt_vm_on_start.script properties to define the Libvirt VM on start method.<br>
246264
* The hook is called right after Libvirt successfully launched the VM.<br>
@@ -260,6 +278,15 @@ public class AgentProperties{
260278
*/
261279
public static final Property<String> AGENT_HOOKS_LIBVIRT_VM_ON_STOP_SCRIPT = new Property<>("agent.hooks.libvirt_vm_on_stop.script", "libvirt-vm-state-change.groovy");
262280

281+
/**
282+
* This property is used with the agent.hooks.basedir property to define the Libvirt VM on stop shell script.<br>
283+
* The shell script is used to execute the Libvirt VM on stop script.<br>
284+
* For more information see the agent.properties file.<br>
285+
* Data type: String.<br>
286+
* Default value: <code>libvirt-vm-state-change.sh</code>
287+
*/
288+
public static final Property<String> AGENT_HOOKS_LIBVIRT_VM_ON_STOP_SHELL_SCRIPT = new Property<>("agent.hooks.libvirt_vm_on_stop.shell_script", "libvirt-vm-state-change.sh");
289+
263290
/**
264291
* This property is used with the agent.hooks.basedir and agent.hooks.libvirt_vm_on_stop.script properties to define the Libvirt VM on stop method.<br>
265292
* The hook is called right after libvirt successfully stopped the VM.<br>

api/src/main/java/com/cloud/agent/api/VgpuTypesInfo.java

Lines changed: 162 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,32 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717
package com.cloud.agent.api;
18+
19+
import org.apache.cloudstack.gpu.GpuDevice;
20+
1821
public class VgpuTypesInfo {
1922

23+
private boolean passthroughEnabled = true;
24+
private GpuDevice.DeviceType deviceType;
25+
private String parentBusAddress;
26+
private String busAddress;
27+
private String numaNode;
28+
private String pciRoot;
29+
private String deviceId;
30+
private String deviceName;
31+
private String vendorId;
32+
private String vendorName;
2033
private String modelName;
2134
private String groupName;
35+
private String vmName;
2236
private Long maxHeads;
2337
private Long videoRam;
2438
private Long maxResolutionX;
2539
private Long maxResolutionY;
2640
private Long maxVgpuPerGpu;
2741
private Long remainingCapacity;
2842
private Long maxCapacity;
43+
private boolean display = false;
2944

3045
public String getModelName() {
3146
return modelName;
@@ -39,22 +54,42 @@ public Long getVideoRam() {
3954
return videoRam;
4055
}
4156

57+
public void setVideoRam(Long videoRam) {
58+
this.videoRam = videoRam;
59+
}
60+
4261
public Long getMaxHeads() {
4362
return maxHeads;
4463
}
4564

65+
public void setMaxHeads(Long maxHeads) {
66+
this.maxHeads = maxHeads;
67+
}
68+
4669
public Long getMaxResolutionX() {
4770
return maxResolutionX;
4871
}
4972

73+
public void setMaxResolutionX(Long maxResolutionX) {
74+
this.maxResolutionX = maxResolutionX;
75+
}
76+
5077
public Long getMaxResolutionY() {
5178
return maxResolutionY;
5279
}
5380

81+
public void setMaxResolutionY(Long maxResolutionY) {
82+
this.maxResolutionY = maxResolutionY;
83+
}
84+
5485
public Long getMaxVpuPerGpu() {
5586
return maxVgpuPerGpu;
5687
}
5788

89+
public void setMaxVgpuPerGpu(Long maxVgpuPerGpu) {
90+
this.maxVgpuPerGpu = maxVgpuPerGpu;
91+
}
92+
5893
public Long getRemainingCapacity() {
5994
return remainingCapacity;
6095
}
@@ -71,8 +106,133 @@ public void setMaxVmCapacity(Long maxCapacity) {
71106
this.maxCapacity = maxCapacity;
72107
}
73108

74-
public VgpuTypesInfo(String groupName, String modelName, Long videoRam, Long maxHeads, Long maxResolutionX, Long maxResolutionY, Long maxVgpuPerGpu,
75-
Long remainingCapacity, Long maxCapacity) {
109+
public boolean isPassthroughEnabled() {
110+
return passthroughEnabled;
111+
}
112+
113+
public void setPassthroughEnabled(boolean passthroughEnabled) {
114+
this.passthroughEnabled = passthroughEnabled;
115+
}
116+
117+
public GpuDevice.DeviceType getDeviceType() {
118+
return deviceType;
119+
}
120+
121+
public void setDeviceType(GpuDevice.DeviceType deviceType) {
122+
this.deviceType = deviceType;
123+
}
124+
125+
public String getParentBusAddress() {
126+
return parentBusAddress;
127+
}
128+
129+
public void setParentBusAddress(String parentBusAddress) {
130+
this.parentBusAddress = parentBusAddress;
131+
}
132+
133+
public String getBusAddress() {
134+
return busAddress;
135+
}
136+
137+
public void setBusAddress(String busAddress) {
138+
this.busAddress = busAddress;
139+
}
140+
141+
public String getNumaNode() {
142+
return numaNode;
143+
}
144+
145+
public void setNumaNode(String numaNode) {
146+
this.numaNode = numaNode;
147+
}
148+
149+
public String getPciRoot() {
150+
return pciRoot;
151+
}
152+
153+
public void setPciRoot(String pciRoot) {
154+
this.pciRoot = pciRoot;
155+
}
156+
157+
public String getDeviceId() {
158+
return deviceId;
159+
}
160+
161+
public void setDeviceId(String deviceId) {
162+
this.deviceId = deviceId;
163+
}
164+
165+
public String getDeviceName() {
166+
return deviceName;
167+
}
168+
169+
public void setDeviceName(String deviceName) {
170+
this.deviceName = deviceName;
171+
}
172+
173+
public String getVendorId() {
174+
return vendorId;
175+
}
176+
177+
public void setVendorId(String vendorId) {
178+
this.vendorId = vendorId;
179+
}
180+
181+
public String getVendorName() {
182+
return vendorName;
183+
}
184+
185+
public void setVendorName(String vendorName) {
186+
this.vendorName = vendorName;
187+
}
188+
189+
public String getVmName() {
190+
return vmName;
191+
}
192+
193+
public void setVmName(String vmName) {
194+
this.vmName = vmName;
195+
}
196+
197+
public boolean isDisplay() {
198+
return display;
199+
}
200+
201+
public void setDisplay(boolean display) {
202+
this.display = display;
203+
}
204+
205+
public VgpuTypesInfo(GpuDevice.DeviceType deviceType, String groupName, String modelName, String busAddress,
206+
String vendorId, String vendorName, String deviceId, String deviceName, String numaNode, String pciRoot
207+
) {
208+
this.deviceType = deviceType;
209+
this.groupName = groupName;
210+
this.modelName = modelName;
211+
this.busAddress = busAddress;
212+
this.deviceId = deviceId;
213+
this.deviceName = deviceName;
214+
this.vendorId = vendorId;
215+
this.vendorName = vendorName;
216+
this.numaNode = numaNode;
217+
this.pciRoot = pciRoot;
218+
}
219+
220+
public VgpuTypesInfo(GpuDevice.DeviceType deviceType, String groupName, String modelName, String busAddress,
221+
String vendorId, String vendorName, String deviceId, String deviceName
222+
) {
223+
this.deviceType = deviceType;
224+
this.groupName = groupName;
225+
this.modelName = modelName;
226+
this.busAddress = busAddress;
227+
this.deviceId = deviceId;
228+
this.deviceName = deviceName;
229+
this.vendorId = vendorId;
230+
this.vendorName = vendorName;
231+
}
232+
233+
public VgpuTypesInfo(String groupName, String modelName, Long videoRam, Long maxHeads, Long maxResolutionX,
234+
Long maxResolutionY, Long maxVgpuPerGpu, Long remainingCapacity, Long maxCapacity
235+
) {
76236
this.groupName = groupName;
77237
this.modelName = modelName;
78238
this.videoRam = videoRam;

api/src/main/java/com/cloud/agent/api/to/GPUDeviceTO.java

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,33 @@
1616
// under the License.
1717
package com.cloud.agent.api.to;
1818

19+
import java.util.ArrayList;
1920
import java.util.HashMap;
21+
import java.util.List;
2022

2123
import com.cloud.agent.api.VgpuTypesInfo;
2224

2325
public class GPUDeviceTO {
2426

2527
private String gpuGroup;
2628
private String vgpuType;
29+
private int gpuCount;
2730
private HashMap<String, HashMap<String, VgpuTypesInfo>> groupDetails = new HashMap<String, HashMap<String, VgpuTypesInfo>>();
31+
private List<VgpuTypesInfo> gpuDevices = new ArrayList<>();
2832

29-
public GPUDeviceTO( String gpuGroup, String vgpuType, HashMap<String, HashMap<String, VgpuTypesInfo>> groupDetails) {
33+
public GPUDeviceTO(String gpuGroup, String vgpuType, int gpuCount,
34+
HashMap<String, HashMap<String, VgpuTypesInfo>> groupDetails,
35+
List<VgpuTypesInfo> gpuDevices) {
36+
this.gpuGroup = gpuGroup;
37+
this.vgpuType = vgpuType;
38+
this.groupDetails = groupDetails;
39+
this.gpuCount = gpuCount;
40+
this.gpuDevices = gpuDevices;
41+
42+
}
43+
44+
public GPUDeviceTO(String gpuGroup, String vgpuType,
45+
HashMap<String, HashMap<String, VgpuTypesInfo>> groupDetails) {
3046
this.gpuGroup = gpuGroup;
3147
this.vgpuType = vgpuType;
3248
this.groupDetails = groupDetails;
@@ -48,6 +64,14 @@ public void setVgpuType(String vgpuType) {
4864
this.vgpuType = vgpuType;
4965
}
5066

67+
public int getGpuCount() {
68+
return gpuCount;
69+
}
70+
71+
public void setGpuCount(int gpuCount) {
72+
this.gpuCount = gpuCount;
73+
}
74+
5175
public HashMap<String, HashMap<String, VgpuTypesInfo>> getGroupDetails() {
5276
return groupDetails;
5377
}
@@ -56,4 +80,11 @@ public void setGroupDetails(HashMap<String, HashMap<String, VgpuTypesInfo>> grou
5680
this.groupDetails = groupDetails;
5781
}
5882

83+
public List<VgpuTypesInfo> getGpuDevices() {
84+
return gpuDevices;
85+
}
86+
87+
public void setGpuDevices(List<VgpuTypesInfo> gpuDevices) {
88+
this.gpuDevices = gpuDevices;
89+
}
5990
}

api/src/main/java/com/cloud/configuration/Resource.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ enum ResourceType { // All storage type resources are allocated_storage and not
3737
backup("backup", 12),
3838
backup_storage("backup_storage", 13),
3939
bucket("bucket", 14),
40-
object_storage("object_storage", 15);
40+
object_storage("object_storage", 15),
41+
gpu("gpu", 16);
4142

4243
private String name;
4344
private int ordinal;

api/src/main/java/com/cloud/event/EventTypes.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
import org.apache.cloudstack.datacenter.DataCenterIpv4GuestSubnet;
3232
import org.apache.cloudstack.extension.Extension;
3333
import org.apache.cloudstack.extension.ExtensionCustomAction;
34+
import org.apache.cloudstack.gpu.GpuCard;
35+
import org.apache.cloudstack.gpu.GpuDevice;
36+
import org.apache.cloudstack.gpu.VgpuProfile;
3437
import org.apache.cloudstack.ha.HAConfig;
3538
import org.apache.cloudstack.network.BgpPeer;
3639
import org.apache.cloudstack.network.Ipv4GuestSubnetNetworkMap;
@@ -378,6 +381,21 @@ public class EventTypes {
378381
public static final String EVENT_DISK_OFFERING_EDIT = "DISK.OFFERING.EDIT";
379382
public static final String EVENT_DISK_OFFERING_DELETE = "DISK.OFFERING.DELETE";
380383

384+
// GPU Cards
385+
public static final String EVENT_GPU_CARD_CREATE = "GPU.CARD.CREATE";
386+
public static final String EVENT_GPU_CARD_EDIT = "GPU.CARD.EDIT";
387+
public static final String EVENT_GPU_CARD_DELETE = "GPU.CARD.DELETE";
388+
389+
// vGPU Profile
390+
public static final String EVENT_VGPU_PROFILE_CREATE = "VGPU.PROFILE.CREATE";
391+
public static final String EVENT_VGPU_PROFILE_EDIT = "VGPU.PROFILE.EDIT";
392+
public static final String EVENT_VGPU_PROFILE_DELETE = "VGPU.PROFILE.DELETE";
393+
394+
// GPU Devices
395+
public static final String EVENT_GPU_DEVICE_CREATE = "GPU.DEVICE.CREATE";
396+
public static final String EVENT_GPU_DEVICE_EDIT = "GPU.DEVICE.EDIT";
397+
public static final String EVENT_GPU_DEVICE_DELETE = "GPU.DEVICE.DELETE";
398+
381399
// Network offerings
382400
public static final String EVENT_NETWORK_OFFERING_CREATE = "NETWORK.OFFERING.CREATE";
383401
public static final String EVENT_NETWORK_OFFERING_ASSIGN = "NETWORK.OFFERING.ASSIGN";
@@ -1026,6 +1044,21 @@ public class EventTypes {
10261044
entityEventDetails.put(EVENT_DISK_OFFERING_EDIT, DiskOffering.class);
10271045
entityEventDetails.put(EVENT_DISK_OFFERING_DELETE, DiskOffering.class);
10281046

1047+
// GPU Cards
1048+
entityEventDetails.put(EVENT_GPU_CARD_CREATE, GpuCard.class);
1049+
entityEventDetails.put(EVENT_GPU_CARD_EDIT, GpuCard.class);
1050+
entityEventDetails.put(EVENT_GPU_CARD_DELETE, GpuCard.class);
1051+
1052+
// vGPU Profiles
1053+
entityEventDetails.put(EVENT_VGPU_PROFILE_CREATE, VgpuProfile.class);
1054+
entityEventDetails.put(EVENT_VGPU_PROFILE_EDIT, VgpuProfile.class);
1055+
entityEventDetails.put(EVENT_VGPU_PROFILE_DELETE, VgpuProfile.class);
1056+
1057+
// GPU Devices
1058+
entityEventDetails.put(EVENT_GPU_DEVICE_CREATE, GpuDevice.class);
1059+
entityEventDetails.put(EVENT_GPU_DEVICE_EDIT, GpuDevice.class);
1060+
entityEventDetails.put(EVENT_GPU_DEVICE_DELETE, GpuDevice.class);
1061+
10291062
// Network offerings
10301063
entityEventDetails.put(EVENT_NETWORK_OFFERING_CREATE, NetworkOffering.class);
10311064
entityEventDetails.put(EVENT_NETWORK_OFFERING_ASSIGN, NetworkOffering.class);

0 commit comments

Comments
 (0)