@@ -130,17 +130,21 @@ func (m *MonitorService) LoadGPUMonitorData(req dto.MonitorGPUSearch) (dto.Monit
130130 }
131131 if len (req .ProductName ) == 0 {
132132 if gpuExist {
133+ data .GPUType = "gpu"
133134 gpuInfo , err := gpuclient .LoadGpuInfo ()
134135 if err != nil || len (gpuInfo .GPUs ) == 0 {
136+ global .LOG .Error ("Load GPU info failed or no GPU found, err: " , err )
135137 return data , buserr .New ("ErrRecordNotFound" )
136138 }
137139 req .ProductName = gpuInfo .GPUs [0 ].ProductName
138140 for _ , item := range gpuInfo .GPUs {
139141 data .ProductNames = append (data .ProductNames , item .ProductName )
140142 }
141143 } else {
144+ data .GPUType = "xpu"
142145 xpuInfo , err := xpuClient .LoadGpuInfo ()
143146 if err != nil || len (xpuInfo .Xpu ) == 0 {
147+ global .LOG .Error ("Load XPU info failed or no XPU found, err: " , err )
144148 return data , buserr .New ("ErrRecordNotFound" )
145149 }
146150 req .ProductName = xpuInfo .Xpu [0 ].Basic .DeviceName
@@ -158,15 +162,18 @@ func (m *MonitorService) LoadGPUMonitorData(req dto.MonitorGPUSearch) (dto.Monit
158162 data .Date = append (data .Date , gpu .CreatedAt )
159163 data .GPUValue = append (data .GPUValue , gpu .GPUUtil )
160164 data .TemperatureValue = append (data .TemperatureValue , gpu .Temperature )
161- data .PowerValue = append (data .PowerValue , dto.GPUPowerUsageHelper {
162- Total : gpu .MaxPowerLimit ,
163- Used : gpu .PowerDraw ,
164- Percent : gpu .PowerDraw / gpu .MaxPowerLimit * 100 ,
165- })
165+ powerItem := dto.GPUPowerUsageHelper {
166+ Total : gpu .MaxPowerLimit ,
167+ Used : gpu .PowerDraw ,
168+ }
169+ if powerItem .Total != 0 {
170+ powerItem .Percent = powerItem .Used / powerItem .Total
171+ }
172+ data .PowerValue = append (data .PowerValue , powerItem )
166173 memItem := dto.GPUMemoryUsageHelper {
167174 Total : gpu .MemTotal ,
168175 Used : gpu .MemUsed ,
169- Percent : float64 ( gpu .MemUsed ) / float64 ( gpu .MemTotal ) * 100 ,
176+ Percent : gpu .MemUsed / gpu .MemTotal * 100 ,
170177 }
171178 var process []dto.GPUProcess
172179 if err := json .Unmarshal ([]byte (gpu .Processes ), & process ); err == nil {
@@ -563,14 +570,13 @@ func saveGPUDataToDB() {
563570 var list []model.MonitorGPU
564571 for _ , gpuItem := range gpuInfo .GPUs {
565572 item := model.MonitorGPU {
566- ProductName : gpuItem .ProductName ,
567- GPUUtil : loadGPUInfoFloat (gpuItem .GPUUtil ),
568- Temperature : loadGPUInfoInt (gpuItem .Temperature ),
569- PowerDraw : loadGPUInfoFloat (gpuItem .PowerDraw ),
570- MaxPowerLimit : loadGPUInfoFloat (gpuItem .MaxPowerLimit ),
571- MemUsed : loadGPUInfoInt (gpuItem .MemUsed ),
572- MemTotal : loadGPUInfoInt (gpuItem .MemTotal ),
573- FanSpeed : loadGPUInfoInt (gpuItem .FanSpeed ),
573+ ProductName : gpuItem .ProductName ,
574+ GPUUtil : loadGPUInfoFloat (gpuItem .GPUUtil ),
575+ Temperature : loadGPUInfoInt (gpuItem .Temperature ),
576+ PowerDraw : loadGPUInfoFloat (gpuItem .PowerDraw ),
577+ MemUsed : loadGPUInfoFloat (gpuItem .MemUsed ),
578+ MemTotal : loadGPUInfoFloat (gpuItem .MemTotal ),
579+ FanSpeed : loadGPUInfoInt (gpuItem .FanSpeed ),
574580 }
575581 process , _ := json .Marshal (gpuItem .Processes )
576582 if len (process ) != 0 {
@@ -595,25 +601,28 @@ func saveXPUDataToDB() {
595601 var list []model.MonitorGPU
596602 for _ , xpuItem := range xpuInfo .Xpu {
597603 item := model.MonitorGPU {
598- ProductName : xpuItem .Basic .DeviceName ,
599- GPUUtil : loadGPUInfoFloat (xpuItem .Stats .MemoryUtil ),
600- Temperature : loadGPUInfoInt (xpuItem .Stats .Temperature ),
601- PowerDraw : loadGPUInfoFloat (xpuItem .Stats .Power ),
602- MemUsed : loadGPUInfoInt (xpuItem .Stats .MemoryUsed ),
603- MemTotal : loadGPUInfoInt (xpuItem .Basic .Memory ),
604- }
605- var processItem []dto.GPUProcess
606- for _ , ps := range xpuItem .Processes {
607- processItem = append (processItem , dto.GPUProcess {
608- Pid : fmt .Sprintf ("%v" , ps .PID ),
609- Type : ps .SHR ,
610- ProcessName : ps .Command ,
611- UsedMemory : ps .Memory ,
612- })
613- }
614- process , _ := json .Marshal (processItem )
615- if len (process ) != 0 {
616- item .Processes = string (process )
604+ ProductName : xpuItem .Basic .DeviceName ,
605+ GPUUtil : loadGPUInfoFloat (xpuItem .Stats .MemoryUtil ),
606+ Temperature : loadGPUInfoInt (xpuItem .Stats .Temperature ),
607+ PowerDraw : loadGPUInfoFloat (xpuItem .Stats .Power ),
608+ MaxPowerLimit : float64 (xpuItem .Config .PowerLimit ),
609+ MemUsed : loadGPUInfoFloat (xpuItem .Stats .MemoryUsed ),
610+ MemTotal : loadGPUInfoFloat (xpuItem .Basic .Memory ),
611+ }
612+ if len (xpuItem .Processes ) != 0 {
613+ var processItem []dto.GPUProcess
614+ for _ , ps := range xpuItem .Processes {
615+ processItem = append (processItem , dto.GPUProcess {
616+ Pid : fmt .Sprintf ("%v" , ps .PID ),
617+ Type : ps .SHR ,
618+ ProcessName : ps .Command ,
619+ UsedMemory : ps .Memory ,
620+ })
621+ }
622+ process , _ := json .Marshal (processItem )
623+ if len (process ) != 0 {
624+ item .Processes = string (process )
625+ }
617626 }
618627 list = append (list , item )
619628 }
@@ -632,6 +641,7 @@ func loadGPUInfoInt(val string) int {
632641}
633642func loadGPUInfoFloat (val string ) float64 {
634643 valItem := strings .ReplaceAll (val , "W" , "" )
644+ valItem = strings .ReplaceAll (valItem , "MB" , "" )
635645 valItem = strings .ReplaceAll (valItem , "%" , "" )
636646 valItem = strings .TrimSpace (valItem )
637647 data , _ := strconv .ParseFloat (valItem , 64 )
0 commit comments