@@ -131,17 +131,21 @@ func (m *MonitorService) LoadGPUMonitorData(req dto.MonitorGPUSearch) (dto.Monit
131131 }
132132 if len (req .ProductName ) == 0 {
133133 if gpuExist {
134+ data .GPUType = "gpu"
134135 gpuInfo , err := gpuclient .LoadGpuInfo ()
135136 if err != nil || len (gpuInfo .GPUs ) == 0 {
137+ global .LOG .Error ("Load GPU info failed or no GPU found, err: " , err )
136138 return data , buserr .New ("ErrRecordNotFound" )
137139 }
138140 req .ProductName = gpuInfo .GPUs [0 ].ProductName
139141 for _ , item := range gpuInfo .GPUs {
140142 data .ProductNames = append (data .ProductNames , item .ProductName )
141143 }
142144 } else {
145+ data .GPUType = "xpu"
143146 xpuInfo , err := xpuClient .LoadGpuInfo ()
144147 if err != nil || len (xpuInfo .Xpu ) == 0 {
148+ global .LOG .Error ("Load XPU info failed or no XPU found, err: " , err )
145149 return data , buserr .New ("ErrRecordNotFound" )
146150 }
147151 req .ProductName = xpuInfo .Xpu [0 ].Basic .DeviceName
@@ -159,15 +163,18 @@ func (m *MonitorService) LoadGPUMonitorData(req dto.MonitorGPUSearch) (dto.Monit
159163 data .Date = append (data .Date , gpu .CreatedAt )
160164 data .GPUValue = append (data .GPUValue , gpu .GPUUtil )
161165 data .TemperatureValue = append (data .TemperatureValue , gpu .Temperature )
162- data .PowerValue = append (data .PowerValue , dto.GPUPowerUsageHelper {
163- Total : gpu .MaxPowerLimit ,
164- Used : gpu .PowerDraw ,
165- Percent : gpu .PowerDraw / gpu .MaxPowerLimit * 100 ,
166- })
166+ powerItem := dto.GPUPowerUsageHelper {
167+ Total : gpu .MaxPowerLimit ,
168+ Used : gpu .PowerDraw ,
169+ }
170+ if powerItem .Total != 0 {
171+ powerItem .Percent = powerItem .Used / powerItem .Total
172+ }
173+ data .PowerValue = append (data .PowerValue , powerItem )
167174 memItem := dto.GPUMemoryUsageHelper {
168175 Total : gpu .MemTotal ,
169176 Used : gpu .MemUsed ,
170- Percent : float64 ( gpu .MemUsed ) / float64 ( gpu .MemTotal ) * 100 ,
177+ Percent : gpu .MemUsed / gpu .MemTotal * 100 ,
171178 }
172179 var process []dto.GPUProcess
173180 if err := json .Unmarshal ([]byte (gpu .Processes ), & process ); err == nil {
@@ -564,14 +571,13 @@ func saveGPUDataToDB() {
564571 var list []model.MonitorGPU
565572 for _ , gpuItem := range gpuInfo .GPUs {
566573 item := model.MonitorGPU {
567- ProductName : gpuItem .ProductName ,
568- GPUUtil : loadGPUInfoFloat (gpuItem .GPUUtil ),
569- Temperature : loadGPUInfoInt (gpuItem .Temperature ),
570- PowerDraw : loadGPUInfoFloat (gpuItem .PowerDraw ),
571- MaxPowerLimit : loadGPUInfoFloat (gpuItem .MaxPowerLimit ),
572- MemUsed : loadGPUInfoInt (gpuItem .MemUsed ),
573- MemTotal : loadGPUInfoInt (gpuItem .MemTotal ),
574- FanSpeed : loadGPUInfoInt (gpuItem .FanSpeed ),
574+ ProductName : gpuItem .ProductName ,
575+ GPUUtil : loadGPUInfoFloat (gpuItem .GPUUtil ),
576+ Temperature : loadGPUInfoInt (gpuItem .Temperature ),
577+ PowerDraw : loadGPUInfoFloat (gpuItem .PowerDraw ),
578+ MemUsed : loadGPUInfoFloat (gpuItem .MemUsed ),
579+ MemTotal : loadGPUInfoFloat (gpuItem .MemTotal ),
580+ FanSpeed : loadGPUInfoInt (gpuItem .FanSpeed ),
575581 }
576582 process , _ := json .Marshal (gpuItem .Processes )
577583 if len (process ) != 0 {
@@ -596,25 +602,28 @@ func saveXPUDataToDB() {
596602 var list []model.MonitorGPU
597603 for _ , xpuItem := range xpuInfo .Xpu {
598604 item := model.MonitorGPU {
599- ProductName : xpuItem .Basic .DeviceName ,
600- GPUUtil : loadGPUInfoFloat (xpuItem .Stats .MemoryUtil ),
601- Temperature : loadGPUInfoInt (xpuItem .Stats .Temperature ),
602- PowerDraw : loadGPUInfoFloat (xpuItem .Stats .Power ),
603- MemUsed : loadGPUInfoInt (xpuItem .Stats .MemoryUsed ),
604- MemTotal : loadGPUInfoInt (xpuItem .Basic .Memory ),
605- }
606- var processItem []dto.GPUProcess
607- for _ , ps := range xpuItem .Processes {
608- processItem = append (processItem , dto.GPUProcess {
609- Pid : fmt .Sprintf ("%v" , ps .PID ),
610- Type : ps .SHR ,
611- ProcessName : ps .Command ,
612- UsedMemory : ps .Memory ,
613- })
614- }
615- process , _ := json .Marshal (processItem )
616- if len (process ) != 0 {
617- item .Processes = string (process )
605+ ProductName : xpuItem .Basic .DeviceName ,
606+ GPUUtil : loadGPUInfoFloat (xpuItem .Stats .MemoryUtil ),
607+ Temperature : loadGPUInfoInt (xpuItem .Stats .Temperature ),
608+ PowerDraw : loadGPUInfoFloat (xpuItem .Stats .Power ),
609+ MaxPowerLimit : float64 (xpuItem .Config .PowerLimit ),
610+ MemUsed : loadGPUInfoFloat (xpuItem .Stats .MemoryUsed ),
611+ MemTotal : loadGPUInfoFloat (xpuItem .Basic .Memory ),
612+ }
613+ if len (xpuItem .Processes ) != 0 {
614+ var processItem []dto.GPUProcess
615+ for _ , ps := range xpuItem .Processes {
616+ processItem = append (processItem , dto.GPUProcess {
617+ Pid : fmt .Sprintf ("%v" , ps .PID ),
618+ Type : ps .SHR ,
619+ ProcessName : ps .Command ,
620+ UsedMemory : ps .Memory ,
621+ })
622+ }
623+ process , _ := json .Marshal (processItem )
624+ if len (process ) != 0 {
625+ item .Processes = string (process )
626+ }
618627 }
619628 list = append (list , item )
620629 }
@@ -633,6 +642,7 @@ func loadGPUInfoInt(val string) int {
633642}
634643func loadGPUInfoFloat (val string ) float64 {
635644 valItem := strings .ReplaceAll (val , "W" , "" )
645+ valItem = strings .ReplaceAll (valItem , "MB" , "" )
636646 valItem = strings .ReplaceAll (valItem , "%" , "" )
637647 valItem = strings .TrimSpace (valItem )
638648 data , _ := strconv .ParseFloat (valItem , 64 )
0 commit comments