Skip to content

Commit 2c493d1

Browse files
authored
Add support for nvidia vGPU support with vendor specific framework (#11432)
1 parent 9fd2b90 commit 2c493d1

File tree

2 files changed

+163
-10
lines changed

2 files changed

+163
-10
lines changed

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ public String toString() {
4747

4848
private void generateMdevXml(StringBuilder gpuBuilder) {
4949
String mdevUuid = vgpuType.getBusAddress(); // For MDEV devices, busAddress contains the UUID
50-
String displayAttribute = vgpuType.isDisplay() ? "on" : "off";
50+
String displayAttribute = vgpuType.isDisplay() ? "on' ramfb='on" : "off";
5151

5252
gpuBuilder.append("<hostdev mode='subsystem' type='mdev' model='vfio-pci' display='").append(displayAttribute).append("'>\n");
5353
gpuBuilder.append(" <source>\n");
@@ -65,11 +65,16 @@ private void generatePciXml(StringBuilder gpuBuilder) {
6565
// - UEFI/OVMF firmware environments
6666
// - ARM64 hosts (cache coherency issues with traditional VGA)
6767
// - Multi-monitor VDI setups (primary display)
68+
String managed = "yes";
69+
// To support passthrough NVIDIA GPUs with SR-IOV & vendor specific GPU integration
70+
if (vgpuType.getVendorId().equals("10de") && !vgpuType.getModelName().equals("passthrough")) {
71+
managed = "no";
72+
}
6873
if (vgpuType.isDisplay()) {
69-
gpuBuilder.append("<hostdev mode='subsystem' type='pci' managed='yes' display='on' ramfb='on'>\n");
74+
gpuBuilder.append("<hostdev mode='subsystem' type='pci' managed='").append(managed).append("' display='on' ramfb='on'>\n");
7075
} else {
7176
// Compute-only workloads don't need display or ramfb
72-
gpuBuilder.append("<hostdev mode='subsystem' type='pci' managed='yes' display='off'>\n");
77+
gpuBuilder.append("<hostdev mode='subsystem' type='pci' managed='").append(managed).append("' display='off'>\n");
7378
}
7479
gpuBuilder.append(" <driver name='vfio'/>\n");
7580
gpuBuilder.append(" <source>\n");

scripts/vm/hypervisor/kvm/gpudiscovery.sh

Lines changed: 155 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,130 @@ json_escape() {
349349
# Cache for nodedev XML data to avoid repeated virsh calls
350350
declare -A nodedev_cache
351351

352+
# Cache for nvidia-smi vgpu profile data
353+
declare -A nvidia_vgpu_profiles
354+
355+
# Parse nvidia-smi vgpu -s -v output and populate profile cache
356+
parse_nvidia_vgpu_profiles() {
357+
local gpu_address=""
358+
local profile_id=""
359+
local profile_name=""
360+
local max_instances=""
361+
local fb_memory=""
362+
local max_heads=""
363+
local max_x_res=""
364+
local max_y_res=""
365+
366+
# Function to store current profile data
367+
store_profile_data() {
368+
if [[ -n "$gpu_address" && -n "$profile_id" && -n "$profile_name" ]]; then
369+
local key="${gpu_address}:${profile_id}"
370+
nvidia_vgpu_profiles["$key"]="$profile_name|${max_instances:-0}|${fb_memory:-0}|${max_heads:-0}|${max_x_res:-0}|${max_y_res:-0}"
371+
fi
372+
}
373+
374+
# Skip if nvidia-smi is not available
375+
if ! command -v nvidia-smi >/dev/null 2>&1; then
376+
return
377+
fi
378+
379+
while IFS= read -r line; do
380+
# Match GPU address line
381+
if [[ $line =~ ^GPU[[:space:]]+([0-9A-Fa-f:]+\.[0-9A-Fa-f]+) ]]; then
382+
# Store previous profile data before starting new GPU
383+
store_profile_data
384+
385+
gpu_address="${BASH_REMATCH[1]}"
386+
# Convert from format like 00000000:AF:00.0 to AF:00.0 and normalize to lowercase
387+
if [[ $gpu_address =~ [0-9A-Fa-f]+:([0-9A-Fa-f]+:[0-9A-Fa-f]+\.[0-9A-Fa-f]+) ]]; then
388+
gpu_address="${BASH_REMATCH[1],,}"
389+
else
390+
gpu_address="${gpu_address,,}"
391+
fi
392+
# Reset profile variables for new GPU
393+
profile_id=""
394+
profile_name=""
395+
max_instances=""
396+
fb_memory=""
397+
max_heads=""
398+
max_x_res=""
399+
max_y_res=""
400+
elif [[ $line =~ ^[[:space:]]*vGPU[[:space:]]+Type[[:space:]]+ID[[:space:]]*:[[:space:]]*0x([0-9A-Fa-f]+) ]]; then
401+
# Store previous profile data before starting new profile
402+
store_profile_data
403+
404+
# Normalize to lowercase hex without 0x prefix
405+
profile_id="${BASH_REMATCH[1],,}"
406+
# Reset profile-specific variables
407+
profile_name=""
408+
max_instances=""
409+
fb_memory=""
410+
max_heads=""
411+
max_x_res=""
412+
max_y_res=""
413+
elif [[ $line =~ ^[[:space:]]*Name[[:space:]]*:[[:space:]]*(.+)$ ]]; then
414+
profile_name="${BASH_REMATCH[1]}"
415+
elif [[ $line =~ ^[[:space:]]*Max[[:space:]]+Instances[[:space:]]*:[[:space:]]*([0-9]+) ]]; then
416+
max_instances="${BASH_REMATCH[1]}"
417+
elif [[ $line =~ ^[[:space:]]*FB[[:space:]]+Memory[[:space:]]*:[[:space:]]*([0-9]+)[[:space:]]*MiB ]]; then
418+
fb_memory="${BASH_REMATCH[1]}"
419+
elif [[ $line =~ ^[[:space:]]*Display[[:space:]]+Heads[[:space:]]*:[[:space:]]*([0-9]+) ]]; then
420+
max_heads="${BASH_REMATCH[1]}"
421+
elif [[ $line =~ ^[[:space:]]*Maximum[[:space:]]+X[[:space:]]+Resolution[[:space:]]*:[[:space:]]*([0-9]+) ]]; then
422+
max_x_res="${BASH_REMATCH[1]}"
423+
elif [[ $line =~ ^[[:space:]]*Maximum[[:space:]]+Y[[:space:]]+Resolution[[:space:]]*:[[:space:]]*([0-9]+) ]]; then
424+
max_y_res="${BASH_REMATCH[1]}"
425+
fi
426+
done < <(nvidia-smi vgpu -s -v 2>/dev/null || true)
427+
428+
# Store the last profile data after processing all lines
429+
store_profile_data
430+
}
431+
432+
# Get current vGPU type ID for a VF from sysfs
433+
get_current_vgpu_type() {
434+
local vf_path="$1"
435+
local current_type_file="$vf_path/nvidia/current_vgpu_type"
436+
437+
if [[ -f "$current_type_file" ]]; then
438+
local type_id
439+
type_id=$(<"$current_type_file")
440+
441+
# Remove any whitespace
442+
type_id="${type_id// /}"
443+
444+
# Handle different input formats and normalize to lowercase hex without 0x
445+
if [[ $type_id =~ ^0x([0-9A-Fa-f]+)$ ]]; then
446+
# Input is hex with 0x prefix (e.g., "0x252")
447+
echo "${BASH_REMATCH[1],,}"
448+
elif [[ $type_id =~ ^[0-9]+$ ]]; then
449+
# Input is decimal (e.g., "594")
450+
printf "%x" "$type_id"
451+
elif [[ $type_id =~ ^[0-9A-Fa-f]+$ ]]; then
452+
# Input is hex without 0x prefix (e.g., "252")
453+
echo "${type_id,,}"
454+
else
455+
# Fallback for unknown format
456+
echo "0"
457+
fi
458+
else
459+
echo "0"
460+
fi
461+
}
462+
463+
# Get profile information from nvidia-smi cache
464+
get_nvidia_profile_info() {
465+
local gpu_address="$1"
466+
local profile_id="$2"
467+
local key="${gpu_address}:${profile_id}"
468+
469+
if [[ -n "${nvidia_vgpu_profiles[$key]:-}" ]]; then
470+
echo "${nvidia_vgpu_profiles[$key]}"
471+
else
472+
echo "|0|0|0|0|0" # Default empty values
473+
fi
474+
}
475+
352476
# Get nodedev name for a PCI address (e.g. "00:02.0" -> "pci_0000_00_02_0")
353477
get_nodedev_name() {
354478
local addr="$1"
@@ -567,6 +691,9 @@ process_mdev_instances() {
567691

568692
# === GPU Discovery ===
569693

694+
# Parse nvidia-smi vgpu profiles once at the beginning
695+
parse_nvidia_vgpu_profiles
696+
570697
mapfile -t LINES < <(lspci -nnm)
571698

572699
echo '{ "gpus": ['
@@ -575,7 +702,7 @@ first_gpu=true
575702
for LINE in "${LINES[@]}"; do
576703
# Parse lspci -nnm fields: SLOT "CLASS [CODE]" "VENDOR [VID]" "DEVICE [DID]" ...
577704
if [[ $LINE =~ ^([^[:space:]]+)[[:space:]]\"([^\"]+)\"[[:space:]]\"([^\"]+)\"[[:space:]]\"([^\"]+)\" ]]; then
578-
PCI_ADDR="${BASH_REMATCH[1]}"
705+
PCI_ADDR="${BASH_REMATCH[1],,}" # Normalize to lowercase
579706
PCI_CLASS="${BASH_REMATCH[2]}"
580707
VENDOR_FIELD="${BASH_REMATCH[3]}"
581708
DEVICE_FIELD="${BASH_REMATCH[4]}"
@@ -662,12 +789,33 @@ for LINE in "${LINES[@]}"; do
662789
SLOT="0x${VF_BDF:3:2}"
663790
FUNC="0x${VF_BDF:6:1}"
664791

665-
# Determine vf_profile
792+
# Determine vf_profile using nvidia-smi information
666793
VF_PROFILE=""
667-
if VF_LINE=$(lspci -nnm -s "$VF_BDF" 2>/dev/null); then
668-
if [[ $VF_LINE =~ \"([^\"]+)\"[[:space:]]\"([^\"]+)\"[[:space:]]\"([^\"]+)\"[[:space:]]\"([^\"]+)\" ]]; then
669-
VF_DEVICE_FIELD="${BASH_REMATCH[4]}"
670-
VF_PROFILE=$(sed -E 's/ \[[0-9A-Fa-f]{4}\]$//' <<<"$VF_DEVICE_FIELD")
794+
VF_PROFILE_NAME=""
795+
VF_MAX_INSTANCES="null"
796+
VF_VIDEO_RAM="null"
797+
VF_MAX_HEADS="null"
798+
VF_MAX_RESOLUTION_X="null"
799+
VF_MAX_RESOLUTION_Y="null"
800+
801+
if [[ "$VENDOR_ID" == "10de" ]]; then
802+
# For NVIDIA GPUs, check current vGPU type
803+
current_vgpu_type=$(get_current_vgpu_type "$VF_PATH")
804+
if [[ "$current_vgpu_type" != "0" ]]; then
805+
# Get profile info from nvidia-smi cache
806+
profile_info=$(get_nvidia_profile_info "$PCI_ADDR" "$current_vgpu_type")
807+
IFS='|' read -r VF_PROFILE_NAME VF_MAX_INSTANCES VF_VIDEO_RAM VF_MAX_HEADS VF_MAX_RESOLUTION_X VF_MAX_RESOLUTION_Y <<< "$profile_info"
808+
VF_PROFILE="$VF_PROFILE_NAME"
809+
fi
810+
fi
811+
812+
# Fallback to lspci parsing if no nvidia-smi profile found
813+
if [[ -z "$VF_PROFILE" ]]; then
814+
if VF_LINE=$(lspci -nnm -s "$VF_BDF" 2>/dev/null); then
815+
if [[ $VF_LINE =~ \"([^\"]+)\"[[:space:]]\"([^\"]+)\"[[:space:]]\"([^\"]+)\"[[:space:]]\"([^\"]+)\" ]]; then
816+
VF_DEVICE_FIELD="${BASH_REMATCH[4]}"
817+
VF_PROFILE=$(sed -E 's/ \[[0-9A-Fa-f]{4}\]$//' <<<"$VF_DEVICE_FIELD")
818+
fi
671819
fi
672820
fi
673821
VF_PROFILE_JSON=$(json_escape "$VF_PROFILE")
@@ -677,7 +825,7 @@ for LINE in "${LINES[@]}"; do
677825
USED_JSON=$(to_json_vm "$raw")
678826

679827
flist+=(
680-
"{\"vf_pci_address\":\"$VF_BDF\",\"vf_profile\":$VF_PROFILE_JSON,\"libvirt_address\":{\"domain\":\"$DOMAIN\",\"bus\":\"$BUS\",\"slot\":\"$SLOT\",\"function\":\"$FUNC\"},\"used_by_vm\":$USED_JSON}")
828+
"{\"vf_pci_address\":\"$VF_BDF\",\"vf_profile\":$VF_PROFILE_JSON,\"max_instances\":$VF_MAX_INSTANCES,\"video_ram\":$VF_VIDEO_RAM,\"max_heads\":$VF_MAX_HEADS,\"max_resolution_x\":$VF_MAX_RESOLUTION_X,\"max_resolution_y\":$VF_MAX_RESOLUTION_Y,\"libvirt_address\":{\"domain\":\"$DOMAIN\",\"bus\":\"$BUS\",\"slot\":\"$SLOT\",\"function\":\"$FUNC\"},\"used_by_vm\":$USED_JSON}")
681829
done
682830
if [ ${#flist[@]} -gt 0 ]; then
683831
VF_ARRAY="[$(

0 commit comments

Comments
 (0)