Skip to content

Commit 0c36f1c

Browse files
committed
Use xmlstarlet for pasring xml in gpudiscovery script
1 parent c8e47e9 commit 0c36f1c

File tree

2 files changed

+35
-57
lines changed

2 files changed

+35
-57
lines changed

scripts/vm/hypervisor/kvm/gpudiscovery.sh

Lines changed: 34 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -377,14 +377,9 @@ get_iommu_group() {
377377
local addr="$1"
378378
local xml
379379
xml=$(get_nodedev_xml "$addr")
380-
381-
if [[ -n "$xml" ]]; then
382-
if [[ $xml =~ \<iommuGroup[^\>]*number=\'([0-9]+)\' ]]; then
383-
echo "${BASH_REMATCH[1]}"
384-
return
385-
fi
386-
fi
387-
echo "null"
380+
local group
381+
group=$(echo "$xml" | xmlstarlet sel -t -v "//iommuGroup/@number" 2>/dev/null || true)
382+
echo "${group:-null}"
388383
}
389384

390385
# Given a PCI address, output "TOTALVFS NUMVFS"
@@ -397,37 +392,27 @@ get_sriov_counts() {
397392
local numvfs=0
398393

399394
if [[ -n "$xml" ]]; then
400-
if [[ $xml =~ \<capability[^\>]*type=\'virt_functions\' ]]; then
401-
# Count max VFs from capability
402-
totalvfs=$(echo "$xml" | grep -o '<capability[^>]*type=.virt_functions.' | wc -l)
403-
if [[ $totalvfs -eq 0 ]]; then
404-
# Try alternative method - look for maxCount attribute
405-
if [[ $xml =~ maxCount=\'([0-9]+)\' ]]; then
406-
totalvfs="${BASH_REMATCH[1]}"
407-
fi
408-
fi
395+
# Check for SR-IOV capability before parsing
396+
local cap_xml
397+
cap_xml=$(echo "$xml" | xmlstarlet sel -t -c "//capability[@type='virt_functions']" 2>/dev/null || true)
409398

410-
# Count current VFs by looking for address elements within virt_functions capability
411-
numvfs=$(echo "$xml" | sed -n '/<capability[^>]*type=.virt_functions./,/<\/capability>/p' | grep -c '<address ')
399+
if [[ -n "$cap_xml" ]]; then
400+
totalvfs=$(echo "$cap_xml" | xmlstarlet sel -t -v "/capability/@maxCount" 2>/dev/null || true)
401+
numvfs=$(echo "$cap_xml" | xmlstarlet sel -t -v "count(/capability/address)" 2>/dev/null || true)
412402
fi
413403
fi
414404

415-
echo "$totalvfs $numvfs"
405+
echo "${totalvfs:-0} ${numvfs:-0}"
416406
}
417407

418408
# Given a PCI address, return its NUMA node (or -1 if none)
419409
get_numa_node() {
420410
local addr="$1"
421411
local xml
422412
xml=$(get_nodedev_xml "$addr")
423-
424-
if [[ -n "$xml" ]]; then
425-
if [[ $xml =~ \<numa[^\>]*node=\'([0-9]+)\' ]]; then
426-
echo "${BASH_REMATCH[1]}"
427-
return
428-
fi
429-
fi
430-
echo "-1"
413+
local node
414+
node=$(echo "$xml" | xmlstarlet sel -t -v "//numa/@node" 2>/dev/null || true)
415+
echo "${node:--1}"
431416
}
432417

433418
# Given a PCI address, return its PCI root (the top‐level bridge ID, e.g. "0000:00:03")
@@ -438,8 +423,9 @@ get_pci_root() {
438423

439424
if [[ -n "$xml" ]]; then
440425
# Extract the parent device from XML
441-
if [[ $xml =~ \<parent\>([^\<]+)\<\/parent\> ]]; then
442-
local parent="${BASH_REMATCH[1]}"
426+
local parent
427+
parent=$(echo "$xml" | xmlstarlet sel -t -v "/device/parent" 2>/dev/null || true)
428+
if [[ -n "$parent" ]]; then
443429
# If parent is a PCI device, recursively find its root
444430
if [[ $parent =~ ^pci_0000_([0-9A-Fa-f]{2})_([0-9A-Fa-f]{2})_([0-9A-Fa-f])$ ]]; then
445431
local parent_addr="${BASH_REMATCH[1]}:${BASH_REMATCH[2]}.${BASH_REMATCH[3]}"
@@ -470,32 +456,24 @@ for VM in "${VMS[@]}"; do
470456
continue
471457
fi
472458

473-
flat_xml=$(echo "$xml" | tr -d '\n\r')
474-
475-
# -- PCI hostdevs: locate <hostdev type='pci'> blocks and extract BDF --
476-
pci_dev_xml=$flat_xml
477-
while [[ $pci_dev_xml =~ (<hostdev.*type=\'pci\'.*?<\/hostdev>) ]]; do
478-
hostdev_block="${BASH_REMATCH[1]}"
479-
if [[ $hostdev_block =~ bus=\'0x([0-9a-fA-F]{2})\'[[:space:]]*slot=\'0x([0-9a-fA-F]{2})\'[[:space:]]*function=\'0x([0-9a-fA-F])\' ]]; then
480-
B="${BASH_REMATCH[1]}"
481-
S="${BASH_REMATCH[2]}"
482-
F="${BASH_REMATCH[3]}"
483-
BDF="${B}:${S}.${F}"
484-
pci_to_vm["$BDF"]="$VM"
485-
fi
486-
pci_dev_xml="${pci_dev_xml#*$hostdev_block}" # Move to next match
487-
done
488-
489-
# -- MDEV hostdevs: locate <hostdev type='mdev'> and extract UUID --
490-
mdev_xml=$flat_xml
491-
while [[ $mdev_xml =~ (<hostdev.*type=\'mdev\'.*?<\/hostdev>) ]]; do
492-
hostdev_block="${BASH_REMATCH[1]}"
493-
if [[ $hostdev_block =~ uuid=\'([0-9a-fA-F-]+)\' ]]; then
494-
UUID="${BASH_REMATCH[1]}"
495-
mdev_to_vm["$UUID"]="$VM"
496-
fi
497-
mdev_xml="${mdev_xml#*$hostdev_block}" # Move to next match
498-
done
459+
# -- PCI hostdevs: use xmlstarlet to extract BDF for all PCI host devices --
460+
while read -r bus slot func; do
461+
[[ -n "$bus" && -n "$slot" && -n "$func" ]] || continue
462+
# Format to match lspci output (e.g., 01:00.0) by padding with zeros
463+
bus_fmt=$(printf "%02x" "0x$bus")
464+
slot_fmt=$(printf "%02x" "0x$slot")
465+
func_fmt=$(printf "%x" "0x$func")
466+
BDF="$bus_fmt:$slot_fmt.$func_fmt"
467+
pci_to_vm["$BDF"]="$VM"
468+
done < <(echo "$xml" | xmlstarlet sel -T -t -m "//hostdev[@type='pci']/source/address" \
469+
-v "substring-after(@bus, '0x')" -o " " \
470+
-v "substring-after(@slot, '0x')" -o " " \
471+
-v "substring-after(@function, '0x')" -n 2>/dev/null || true)
472+
473+
# -- MDEV hostdevs: use xmlstarlet to extract UUIDs --
474+
while IFS= read -r UUID; do
475+
[[ -n "$UUID" ]] && mdev_to_vm["$UUID"]="$VM"
476+
done < <(echo "$xml" | xmlstarlet sel -T -t -m "//hostdev[@type='mdev']" -v "@uuid" -n 2>/dev/null || true)
499477
done
500478

501479
# Helper: convert a VM name to JSON value (quoted string or null)

server/src/main/java/org/apache/cloudstack/gpu/GpuServiceImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1058,7 +1058,7 @@ private void setStateAndVmName(VgpuTypesInfo deviceInfo, GpuDeviceVO device, Gpu
10581058
device.setState(GpuDevice.State.Free);
10591059
} else {
10601060
VMInstanceVO vm = vmInstanceDao.findById(device.getVmId());
1061-
if (vm != null && vm.getState().equals(VirtualMachine.State.Stopped)) {
1061+
if (vm != null && vm.getState().equals(VirtualMachine.State.Stopped) && !GpuDetachOnStop.valueIn(vm.getDomainId())) {
10621062
device.setState(GpuDevice.State.Allocated);
10631063
} else {
10641064
logger.warn("VM with ID {} not found for GPU device {}. Allocated to a removed VM. Setting state to Free.",

0 commit comments

Comments
 (0)