Skip to content

Commit 32cf0c6

Browse files
committed
feat: Support gpu of schema v11
1 parent 90eca45 commit 32cf0c6

File tree

5 files changed

+367
-4
lines changed

5 files changed

+367
-4
lines changed

agent/utils/ai_tools/gpu/gpu.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212

1313
"github.com/1Panel-dev/1Panel/agent/global"
1414
"github.com/1Panel-dev/1Panel/agent/utils/ai_tools/gpu/common"
15+
"github.com/1Panel-dev/1Panel/agent/utils/ai_tools/gpu/schema_v11"
1516
"github.com/1Panel-dev/1Panel/agent/utils/ai_tools/gpu/schema_v12"
1617
"github.com/1Panel-dev/1Panel/agent/utils/cmd"
1718
)
@@ -58,8 +59,14 @@ func (n NvidiaSMI) LoadGpuInfo() (*common.GpuInfo, error) {
5859
break
5960
}
6061

61-
if schema != "v12" {
62-
return &common.GpuInfo{}, nil
62+
switch schema {
63+
case "v12":
64+
return schema_v12.Parse(data)
65+
case "v11":
66+
return schema_v11.Parse(data)
67+
default:
68+
global.LOG.Errorf("dont support such schema version %s", schema)
6369
}
64-
return schema_v12.Parse(data)
70+
71+
return &common.GpuInfo{}, nil
6572
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
package schema_v11
2+
3+
import (
4+
"encoding/xml"
5+
6+
"github.com/1Panel-dev/1Panel/agent/utils/ai_tools/gpu/common"
7+
)
8+
9+
func Parse(buf []byte) (*common.GpuInfo, error) {
10+
var (
11+
s smi
12+
info common.GpuInfo
13+
)
14+
if err := xml.Unmarshal(buf, &s); err != nil {
15+
return nil, err
16+
}
17+
18+
info.Type = "nvidia"
19+
info.CudaVersion = s.CudaVersion
20+
info.DriverVersion = s.DriverVersion
21+
if len(s.Gpu) == 0 {
22+
return &info, nil
23+
}
24+
for i := 0; i < len(s.Gpu); i++ {
25+
var gpuItem common.GPU
26+
gpuItem.Index = uint(i)
27+
gpuItem.ProductName = s.Gpu[i].ProductName
28+
gpuItem.PersistenceMode = s.Gpu[i].PersistenceMode
29+
gpuItem.BusID = s.Gpu[i].ID
30+
gpuItem.DisplayActive = s.Gpu[i].DisplayActive
31+
gpuItem.ECC = s.Gpu[i].EccErrors.Volatile.DramUncorrectable
32+
gpuItem.FanSpeed = s.Gpu[i].FanSpeed
33+
34+
gpuItem.Temperature = s.Gpu[i].Temperature.GpuTemp
35+
gpuItem.PerformanceState = s.Gpu[i].PerformanceState
36+
gpuItem.PowerDraw = s.Gpu[i].PowerReadings.PowerDraw
37+
gpuItem.MaxPowerLimit = s.Gpu[i].PowerReadings.MaxPowerLimit
38+
gpuItem.MemUsed = s.Gpu[i].FbMemoryUsage.Used
39+
gpuItem.MemTotal = s.Gpu[i].FbMemoryUsage.Total
40+
gpuItem.GPUUtil = s.Gpu[i].Utilization.GpuUtil
41+
gpuItem.ComputeMode = s.Gpu[i].ComputeMode
42+
gpuItem.MigMode = s.Gpu[i].MigMode.CurrentMig
43+
44+
for _, process := range s.Gpu[i].Processes.ProcessInfo {
45+
gpuItem.Processes = append(gpuItem.Processes, common.Process{
46+
Pid: process.Pid,
47+
Type: process.Type,
48+
ProcessName: process.ProcessName,
49+
UsedMemory: process.UsedMemory,
50+
})
51+
}
52+
info.GPUs = append(info.GPUs, gpuItem)
53+
}
54+
return &info, nil
55+
}
Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
package schema_v11
2+
3+
type smi struct {
4+
AttachedGpus string `xml:"attached_gpus"`
5+
CudaVersion string `xml:"cuda_version"`
6+
DriverVersion string `xml:"driver_version"`
7+
Gpu []struct {
8+
ID string `xml:"id,attr"`
9+
AccountedProcesses struct{} `xml:"accounted_processes"`
10+
AccountingMode string `xml:"accounting_mode"`
11+
AccountingModeBufferSize string `xml:"accounting_mode_buffer_size"`
12+
AddressingMode string `xml:"addressing_mode"`
13+
ApplicationsClocks struct {
14+
GraphicsClock string `xml:"graphics_clock"`
15+
MemClock string `xml:"mem_clock"`
16+
} `xml:"applications_clocks"`
17+
Bar1MemoryUsage struct {
18+
Free string `xml:"free"`
19+
Total string `xml:"total"`
20+
Used string `xml:"used"`
21+
} `xml:"bar1_memory_usage"`
22+
BoardID string `xml:"board_id"`
23+
BoardPartNumber string `xml:"board_part_number"`
24+
CcProtectedMemoryUsage struct {
25+
Free string `xml:"free"`
26+
Total string `xml:"total"`
27+
Used string `xml:"used"`
28+
} `xml:"cc_protected_memory_usage"`
29+
ClockPolicy struct {
30+
AutoBoost string `xml:"auto_boost"`
31+
AutoBoostDefault string `xml:"auto_boost_default"`
32+
} `xml:"clock_policy"`
33+
Clocks struct {
34+
GraphicsClock string `xml:"graphics_clock"`
35+
MemClock string `xml:"mem_clock"`
36+
SmClock string `xml:"sm_clock"`
37+
VideoClock string `xml:"video_clock"`
38+
} `xml:"clocks"`
39+
ClocksEventReasons struct {
40+
ClocksEventReasonApplicationsClocksSetting string `xml:"clocks_event_reason_applications_clocks_setting"`
41+
ClocksEventReasonDisplayClocksSetting string `xml:"clocks_event_reason_display_clocks_setting"`
42+
ClocksEventReasonGpuIdle string `xml:"clocks_event_reason_gpu_idle"`
43+
ClocksEventReasonHwPowerBrakeSlowdown string `xml:"clocks_event_reason_hw_power_brake_slowdown"`
44+
ClocksEventReasonHwSlowdown string `xml:"clocks_event_reason_hw_slowdown"`
45+
ClocksEventReasonHwThermalSlowdown string `xml:"clocks_event_reason_hw_thermal_slowdown"`
46+
ClocksEventReasonSwPowerCap string `xml:"clocks_event_reason_sw_power_cap"`
47+
ClocksEventReasonSwThermalSlowdown string `xml:"clocks_event_reason_sw_thermal_slowdown"`
48+
ClocksEventReasonSyncBoost string `xml:"clocks_event_reason_sync_boost"`
49+
} `xml:"clocks_event_reasons"`
50+
ComputeMode string `xml:"compute_mode"`
51+
DefaultApplicationsClocks struct {
52+
GraphicsClock string `xml:"graphics_clock"`
53+
MemClock string `xml:"mem_clock"`
54+
} `xml:"default_applications_clocks"`
55+
DeferredClocks struct {
56+
MemClock string `xml:"mem_clock"`
57+
} `xml:"deferred_clocks"`
58+
DisplayActive string `xml:"display_active"`
59+
DisplayMode string `xml:"display_mode"`
60+
DriverModel struct {
61+
CurrentDm string `xml:"current_dm"`
62+
PendingDm string `xml:"pending_dm"`
63+
} `xml:"driver_model"`
64+
EccErrors struct {
65+
Aggregate struct {
66+
DramCorrectable string `xml:"dram_correctable"`
67+
DramUncorrectable string `xml:"dram_uncorrectable"`
68+
SramCorrectable string `xml:"sram_correctable"`
69+
SramUncorrectable string `xml:"sram_uncorrectable"`
70+
} `xml:"aggregate"`
71+
Volatile struct {
72+
DramCorrectable string `xml:"dram_correctable"`
73+
DramUncorrectable string `xml:"dram_uncorrectable"`
74+
SramCorrectable string `xml:"sram_correctable"`
75+
SramUncorrectable string `xml:"sram_uncorrectable"`
76+
} `xml:"volatile"`
77+
} `xml:"ecc_errors"`
78+
EccMode struct {
79+
CurrentEcc string `xml:"current_ecc"`
80+
PendingEcc string `xml:"pending_ecc"`
81+
} `xml:"ecc_mode"`
82+
EncoderStats struct {
83+
AverageFps string `xml:"average_fps"`
84+
AverageLatency string `xml:"average_latency"`
85+
SessionCount string `xml:"session_count"`
86+
} `xml:"encoder_stats"`
87+
Fabric struct {
88+
State string `xml:"state"`
89+
Status string `xml:"status"`
90+
} `xml:"fabric"`
91+
FanSpeed string `xml:"fan_speed"`
92+
FbMemoryUsage struct {
93+
Free string `xml:"free"`
94+
Reserved string `xml:"reserved"`
95+
Total string `xml:"total"`
96+
Used string `xml:"used"`
97+
} `xml:"fb_memory_usage"`
98+
FbcStats struct {
99+
AverageFps string `xml:"average_fps"`
100+
AverageLatency string `xml:"average_latency"`
101+
SessionCount string `xml:"session_count"`
102+
} `xml:"fbc_stats"`
103+
GpuFruPartNumber string `xml:"gpu_fru_part_number"`
104+
GpuModuleID string `xml:"gpu_module_id"`
105+
GpuOperationMode struct {
106+
CurrentGom string `xml:"current_gom"`
107+
PendingGom string `xml:"pending_gom"`
108+
} `xml:"gpu_operation_mode"`
109+
GpuPartNumber string `xml:"gpu_part_number"`
110+
GpuResetStatus struct {
111+
DrainAndResetRecommended string `xml:"drain_and_reset_recommended"`
112+
ResetRequired string `xml:"reset_required"`
113+
} `xml:"gpu_reset_status"`
114+
GpuVirtualizationMode struct {
115+
HostVgpuMode string `xml:"host_vgpu_mode"`
116+
VirtualizationMode string `xml:"virtualization_mode"`
117+
} `xml:"gpu_virtualization_mode"`
118+
GspFirmwareVersion string `xml:"gsp_firmware_version"`
119+
Ibmnpu struct {
120+
RelaxedOrderingMode string `xml:"relaxed_ordering_mode"`
121+
} `xml:"ibmnpu"`
122+
InforomVersion struct {
123+
EccObject string `xml:"ecc_object"`
124+
ImgVersion string `xml:"img_version"`
125+
OemObject string `xml:"oem_object"`
126+
PwrObject string `xml:"pwr_object"`
127+
} `xml:"inforom_version"`
128+
MaxClocks struct {
129+
GraphicsClock string `xml:"graphics_clock"`
130+
MemClock string `xml:"mem_clock"`
131+
SmClock string `xml:"sm_clock"`
132+
VideoClock string `xml:"video_clock"`
133+
} `xml:"max_clocks"`
134+
MaxCustomerBoostClocks struct {
135+
GraphicsClock string `xml:"graphics_clock"`
136+
} `xml:"max_customer_boost_clocks"`
137+
MigDevices struct {
138+
MigDevice []struct {
139+
Index string `xml:"index"`
140+
GpuInstanceID string `xml:"gpu_instance_id"`
141+
ComputeInstanceID string `xml:"compute_instance_id"`
142+
EccErrorCount struct {
143+
Text string `xml:",chardata" json:"text"`
144+
VolatileCount struct {
145+
SramUncorrectable string `xml:"sram_uncorrectable"`
146+
} `xml:"volatile_count" json:"volatile_count"`
147+
} `xml:"ecc_error_count" json:"ecc_error_count"`
148+
FbMemoryUsage struct {
149+
Total string `xml:"total"`
150+
Reserved string `xml:"reserved"`
151+
Used string `xml:"used"`
152+
Free string `xml:"free"`
153+
} `xml:"fb_memory_usage" json:"fb_memory_usage"`
154+
Bar1MemoryUsage struct {
155+
Total string `xml:"total"`
156+
Used string `xml:"used"`
157+
Free string `xml:"free"`
158+
} `xml:"bar1_memory_usage" json:"bar1_memory_usage"`
159+
} `xml:"mig_device" json:"mig_device"`
160+
} `xml:"mig_devices" json:"mig_devices"`
161+
MigMode struct {
162+
CurrentMig string `xml:"current_mig"`
163+
PendingMig string `xml:"pending_mig"`
164+
} `xml:"mig_mode"`
165+
MinorNumber string `xml:"minor_number"`
166+
ModulePowerReadings struct {
167+
CurrentPowerLimit string `xml:"current_power_limit"`
168+
DefaultPowerLimit string `xml:"default_power_limit"`
169+
MaxPowerLimit string `xml:"max_power_limit"`
170+
MinPowerLimit string `xml:"min_power_limit"`
171+
PowerDraw string `xml:"power_draw"`
172+
PowerState string `xml:"power_state"`
173+
RequestedPowerLimit string `xml:"requested_power_limit"`
174+
} `xml:"module_power_readings"`
175+
MultigpuBoard string `xml:"multigpu_board"`
176+
Pci struct {
177+
AtomicCapsInbound string `xml:"atomic_caps_inbound"`
178+
AtomicCapsOutbound string `xml:"atomic_caps_outbound"`
179+
PciBridgeChip struct {
180+
BridgeChipFw string `xml:"bridge_chip_fw"`
181+
BridgeChipType string `xml:"bridge_chip_type"`
182+
} `xml:"pci_bridge_chip"`
183+
PciBus string `xml:"pci_bus"`
184+
PciBusID string `xml:"pci_bus_id"`
185+
PciDevice string `xml:"pci_device"`
186+
PciDeviceID string `xml:"pci_device_id"`
187+
PciDomain string `xml:"pci_domain"`
188+
PciGpuLinkInfo struct {
189+
LinkWidths struct {
190+
CurrentLinkWidth string `xml:"current_link_width"`
191+
MaxLinkWidth string `xml:"max_link_width"`
192+
} `xml:"link_widths"`
193+
PcieGen struct {
194+
CurrentLinkGen string `xml:"current_link_gen"`
195+
DeviceCurrentLinkGen string `xml:"device_current_link_gen"`
196+
MaxDeviceLinkGen string `xml:"max_device_link_gen"`
197+
MaxHostLinkGen string `xml:"max_host_link_gen"`
198+
MaxLinkGen string `xml:"max_link_gen"`
199+
} `xml:"pcie_gen"`
200+
} `xml:"pci_gpu_link_info"`
201+
PciSubSystemID string `xml:"pci_sub_system_id"`
202+
ReplayCounter string `xml:"replay_counter"`
203+
ReplayRolloverCounter string `xml:"replay_rollover_counter"`
204+
RxUtil string `xml:"rx_util"`
205+
TxUtil string `xml:"tx_util"`
206+
} `xml:"pci"`
207+
PerformanceState string `xml:"performance_state"`
208+
PersistenceMode string `xml:"persistence_mode"`
209+
PowerReadings struct {
210+
PowerState string `xml:"power_state"`
211+
PowerManagement string `xml:"power_management"`
212+
PowerDraw string `xml:"power_draw"`
213+
PowerLimit string `xml:"power_limit"`
214+
DefaultPowerLimit string `xml:"default_power_limit"`
215+
EnforcedPowerLimit string `xml:"enforced_power_limit"`
216+
MinPowerLimit string `xml:"min_power_limit"`
217+
MaxPowerLimit string `xml:"max_power_limit"`
218+
} `xml:"power_readings"`
219+
Processes struct {
220+
ProcessInfo []struct {
221+
Pid string `xml:"pid"`
222+
Type string `xml:"type"`
223+
ProcessName string `xml:"process_name"`
224+
UsedMemory string `xml:"used_memory"`
225+
} `xml:"process_info"`
226+
} `xml:"processes"`
227+
ProductArchitecture string `xml:"product_architecture"`
228+
ProductBrand string `xml:"product_brand"`
229+
ProductName string `xml:"product_name"`
230+
RemappedRows struct {
231+
// Manually added
232+
Correctable string `xml:"remapped_row_corr"`
233+
Uncorrectable string `xml:"remapped_row_unc"`
234+
Pending string `xml:"remapped_row_pending"`
235+
Failure string `xml:"remapped_row_failure"`
236+
} `xml:"remapped_rows"`
237+
RetiredPages struct {
238+
DoubleBitRetirement struct {
239+
RetiredCount string `xml:"retired_count"`
240+
RetiredPagelist string `xml:"retired_pagelist"`
241+
} `xml:"double_bit_retirement"`
242+
MultipleSingleBitRetirement struct {
243+
RetiredCount string `xml:"retired_count"`
244+
RetiredPagelist string `xml:"retired_pagelist"`
245+
} `xml:"multiple_single_bit_retirement"`
246+
PendingBlacklist string `xml:"pending_blacklist"`
247+
PendingRetirement string `xml:"pending_retirement"`
248+
} `xml:"retired_pages"`
249+
Serial string `xml:"serial"`
250+
SupportedClocks struct {
251+
SupportedMemClock []struct {
252+
SupportedGraphicsClock []string `xml:"supported_graphics_clock"`
253+
Value string `xml:"value"`
254+
} `xml:"supported_mem_clock"`
255+
} `xml:"supported_clocks"`
256+
SupportedGpuTargetTemp struct {
257+
GpuTargetTempMax string `xml:"gpu_target_temp_max"`
258+
GpuTargetTempMin string `xml:"gpu_target_temp_min"`
259+
} `xml:"supported_gpu_target_temp"`
260+
Temperature struct {
261+
GpuTargetTemperature string `xml:"gpu_target_temperature"`
262+
GpuTemp string `xml:"gpu_temp"`
263+
GpuTempMaxGpuThreshold string `xml:"gpu_temp_max_gpu_threshold"`
264+
GpuTempMaxMemThreshold string `xml:"gpu_temp_max_mem_threshold"`
265+
GpuTempMaxThreshold string `xml:"gpu_temp_max_threshold"`
266+
GpuTempSlowThreshold string `xml:"gpu_temp_slow_threshold"`
267+
GpuTempTlimit string `xml:"gpu_temp_tlimit"`
268+
MemoryTemp string `xml:"memory_temp"`
269+
} `xml:"temperature"`
270+
Utilization struct {
271+
DecoderUtil string `xml:"decoder_util"`
272+
EncoderUtil string `xml:"encoder_util"`
273+
GpuUtil string `xml:"gpu_util"`
274+
JpegUtil string `xml:"jpeg_util"`
275+
MemoryUtil string `xml:"memory_util"`
276+
OfaUtil string `xml:"ofa_util"`
277+
} `xml:"utilization"`
278+
UUID string `xml:"uuid"`
279+
VbiosVersion string `xml:"vbios_version"`
280+
Voltage struct {
281+
GraphicsVolt string `xml:"graphics_volt"`
282+
} `xml:"voltage"`
283+
} `xml:"gpu"`
284+
Timestamp string `xml:"timestamp"`
285+
}

0 commit comments

Comments
 (0)