Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 12 additions & 17 deletions agent/app/dto/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,25 +48,20 @@ type MonitorGPUSearch struct {
EndTime time.Time `json:"endTime"`
}
type MonitorGPUData struct {
Date []time.Time `json:"date"`
GPUValue []float64 `json:"gpuValue"`
TemperatureValue []float64 `json:"temperatureValue"`
PowerValue []GPUPowerUsageHelper `json:"powerValue"`
MemoryValue []GPUMemoryUsageHelper `json:"memoryValue"`
SpeedValue []int `json:"speedValue"`
}
type GPUPowerUsageHelper struct {
Total float64 `json:"total"`
Used float64 `json:"used"`
Percent float64 `json:"percent"`
}
type GPUMemoryUsageHelper struct {
Total float64 `json:"total"`
Used float64 `json:"used"`
Percent float64 `json:"percent"`
Date []time.Time `json:"date"`
GPUValue []float64 `json:"gpuValue"`
TemperatureValue []float64 `json:"temperatureValue"`
PowerTotal []float64 `json:"powerTotal"`
PowerUsed []float64 `json:"powerUsed"`
PowerPercent []float64 `json:"powerPercent"`
MemoryTotal []float64 `json:"memoryTotal"`
MemoryUsed []float64 `json:"memoryUsed"`
MemoryPercent []float64 `json:"memoryPercent"`
SpeedValue []int `json:"speedValue"`

GPUProcesses []GPUProcess `json:"gpuProcesses"`
GPUProcesses [][]GPUProcess `json:"gpuProcesses"`
}

type GPUProcess struct {
Pid string `json:"pid"`
Type string `json:"type"`
Expand Down
30 changes: 16 additions & 14 deletions agent/app/service/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,24 +165,27 @@ func (m *MonitorService) LoadGPUMonitorData(req dto.MonitorGPUSearch) (dto.Monit
data.Date = append(data.Date, gpu.CreatedAt)
data.GPUValue = append(data.GPUValue, gpu.GPUUtil)
data.TemperatureValue = append(data.TemperatureValue, gpu.Temperature)
powerItem := dto.GPUPowerUsageHelper{
Total: gpu.MaxPowerLimit,
Used: gpu.PowerDraw,
}
if powerItem.Total != 0 {
powerItem.Percent = powerItem.Used / powerItem.Total
data.PowerUsed = append(data.PowerUsed, gpu.PowerDraw)
data.PowerTotal = append(data.PowerTotal, gpu.MaxPowerLimit)
if gpu.MaxPowerLimit != 0 {
data.PowerPercent = append(data.PowerPercent, gpu.PowerDraw/gpu.MaxPowerLimit*100)
} else {
data.PowerPercent = append(data.PowerPercent, float64(0))
}
data.PowerValue = append(data.PowerValue, powerItem)
memItem := dto.GPUMemoryUsageHelper{
Total: gpu.MemTotal,
Used: gpu.MemUsed,
Percent: gpu.MemUsed / gpu.MemTotal * 100,

data.MemoryTotal = append(data.MemoryTotal, gpu.MemTotal)
data.MemoryUsed = append(data.MemoryUsed, gpu.MemUsed)
if gpu.MemTotal != 0 {
data.MemoryPercent = append(data.MemoryPercent, gpu.MemUsed/gpu.MemTotal*100)
} else {
data.MemoryPercent = append(data.MemoryPercent, float64(0))
}
var process []dto.GPUProcess
if err := json.Unmarshal([]byte(gpu.Processes), &process); err == nil {
memItem.GPUProcesses = process
data.GPUProcesses = append(data.GPUProcesses, process)
} else {
data.GPUProcesses = append(data.GPUProcesses, []dto.GPUProcess{})
}
data.MemoryValue = append(data.MemoryValue, memItem)
data.SpeedValue = append(data.SpeedValue, gpu.FanSpeed)
}
return data, nil
Expand Down Expand Up @@ -606,7 +609,6 @@ func saveXPUDataToDB() {
for _, xpuItem := range xpuInfo.Xpu {
item := model.MonitorGPU{
ProductName: fmt.Sprintf("%d - %s", xpuItem.Basic.DeviceID, xpuItem.Basic.DeviceName),
GPUUtil: loadGPUInfoFloat(xpuItem.Stats.MemoryUtil),
Temperature: loadGPUInfoFloat(xpuItem.Stats.Temperature),
PowerDraw: loadGPUInfoFloat(xpuItem.Stats.Power),
MemUsed: loadGPUInfoFloat(xpuItem.Stats.MemoryUsed),
Expand Down
20 changes: 7 additions & 13 deletions frontend/src/api/interface/host.ts
Original file line number Diff line number Diff line change
Expand Up @@ -174,20 +174,14 @@ export namespace Host {
date: Array<Date>;
gpuValue: Array<number>;
temperatureValue: Array<number>;
powerValue: Array<GPUPowerUsageHelper>;
memoryValue: Array<GPUMemoryUsageHelper>;
powerTotal: Array<number>;
powerUsed: Array<number>;
powerPercent: Array<number>;
memoryTotal: Array<number>;
memoryUsed: Array<number>;
memoryPercent: Array<number>;
speedValue: Array<number>;
}
export interface GPUPowerUsageHelper {
total: number;
used: number;
percent: number;
}
export interface GPUMemoryUsageHelper {
total: number;
used: number;
percent: number;
gpuProcesses: Array<GPUProcess>;
gpuProcesses: Array<Array<GPUProcess>>;
}
export interface GPUProcess {
pid: string;
Expand Down
56 changes: 18 additions & 38 deletions frontend/src/lang/modules/en.ts
Original file line number Diff line number Diff line change
Expand Up @@ -680,42 +680,29 @@ const message = {
whiteListHelper: 'Restrict access to only IPs in the whitelist',
},
gpu: {
gpu: 'GPU Monitor',
base: 'Basic Information',
gpuHelper: 'NVIDIA-SMI or XPU-SMI command not detected on the current system. Please check and try again!',
driverVersion: 'Driver Version',
cudaVersion: 'CUDA Version',
gpu: 'GPU Monitoring',
gpuHelper: 'The system did not detect NVIDIA-SMI or XPU-SMI commands. Please check and try again!',
process: 'Process Information',
type: 'Type',
typeG: 'Graphics',
typeC: 'Compute',
typeCG: 'Compute + Graphics',
typeCG: 'Compute+Graphics',
processName: 'Process Name',
processMemoryUsage: 'Memory Usage',
temperatureHelper: 'High GPU temperature can cause GPU frequency throttling',
performanceStateHelper: 'From P0 (maximum performance) to P12 (minimum performance)',
busID: 'Bus ID',
persistenceMode: 'Persistence Mode',
enabled: 'Enabled',
disabled: 'Disabled',
persistenceModeHelper:
'Persistence mode allows quicker task responses but increases standby power consumption.',
displayActive: 'Graphics Card Initialized',
displayActiveT: 'Yes',
displayActiveF: 'No',
ecc: 'Error Correction and Check Technology',
computeMode: 'Compute Mode',
default: 'Default',
exclusiveProcess: 'Exclusive Process',
exclusiveThread: 'Exclusive Thread',
prohibited: 'Prohibited',
defaultHelper: 'Default: Processes can execute concurrently',
exclusiveProcessHelper:
'Exclusive Process: Only one CUDA context can use the GPU, but can be shared by multiple threads',
exclusiveThreadHelper: 'Exclusive Thread: Only one thread in a CUDA context can use the GPU',
prohibitedHelper: 'Prohibited: Processes are not allowed to execute simultaneously',
migModeHelper: 'Used to create MIG instances for physical isolation of the GPU at the user level.',
migModeNA: 'Not Supported',
shr: 'Shared Memory',
temperatureHelper: 'High GPU temperature may cause GPU frequency reduction',
gpuUtil: 'GPU Utilization',
temperature: 'Temperature',
performanceState: 'Performance State',
powerUsage: 'Power Consumption',
memoryUsage: 'Memory Utilization',
fanSpeed: 'Fan Speed',
power: 'Power',
powerCurrent: 'Current Power',
powerLimit: 'Power Limit',
memory: 'Memory',
memoryUsed: 'Memory Used',
memoryTotal: 'Total Memory',
percent: 'Utilization',
},
mcp: {
server: 'MCP Server',
Expand Down Expand Up @@ -1235,13 +1222,6 @@ const message = {
down: 'Down',
interval: 'Collection Interval',
intervalHelper: 'Please enter an appropriate monitoring collection interval (5 seconds - 12 hours)',

gpuUtil: 'GPU Utilization',
temperature: 'Temperature',
performanceState: 'Performance state',
powerUsage: 'Power usage',
memoryUsage: 'Memory usage',
fanSpeed: 'Fan speed',
},
terminal: {
local: 'Local',
Expand Down
60 changes: 20 additions & 40 deletions frontend/src/lang/modules/es-es.ts
Original file line number Diff line number Diff line change
Expand Up @@ -679,43 +679,29 @@ const message = {
whiteListHelper: 'Restringir el acceso solo a las IP incluidas en la lista blanca',
},
gpu: {
gpu: 'Monitor de GPU',
base: 'Información básica',
gpuHelper:
'No se detectó el comando NVIDIA-SMI o XPU-SMI en el sistema actual. ¡Por favor verifique e intente nuevamente!',
driverVersion: 'Versión del controlador',
cudaVersion: 'Versión de CUDA',
process: 'Información del proceso',
gpu: 'Monitoreo de GPU',
gpuHelper: 'El sistema no detectó comandos NVIDIA-SMI o XPU-SMI. ¡Compruebe e inténtelo de nuevo!',
process: 'Información del Proceso',
type: 'Tipo',
typeG: 'Gráficos',
typeC: 'Cómputo',
typeCG: 'Cómputo + Gráficos',
processName: 'Nombre del proceso',
processMemoryUsage: 'Uso de memoria',
temperatureHelper: 'Una temperatura alta de la GPU puede causar reducción de frecuencia',
performanceStateHelper: 'Desde P0 (máximo rendimiento) hasta P12 (mínimo rendimiento)',
busID: 'ID del bus',
persistenceMode: 'Modo de persistencia',
enabled: 'Habilitado',
disabled: 'Deshabilitado',
persistenceModeHelper:
'El modo de persistencia permite respuestas más rápidas, pero incrementa el consumo en reposo.',
displayActive: 'Tarjeta gráfica inicializada',
displayActiveT: 'Sí',
displayActiveF: 'No',
ecc: 'Tecnología de corrección y verificación de errores',
computeMode: 'Modo de cómputo',
default: 'Predeterminado',
exclusiveProcess: 'Proceso exclusivo',
exclusiveThread: 'Hilo exclusivo',
prohibited: 'Prohibido',
defaultHelper: 'Predeterminado: Los procesos pueden ejecutarse simultáneamente',
exclusiveProcessHelper:
'Proceso exclusivo: Solo un contexto CUDA puede usar la GPU, pero puede ser compartido por varios hilos',
exclusiveThreadHelper: 'Hilo exclusivo: Solo un hilo en un contexto CUDA puede usar la GPU',
prohibitedHelper: 'Prohibido: No se permite la ejecución simultánea de procesos',
migModeHelper: 'Se utiliza para crear instancias MIG y aislar físicamente la GPU a nivel de usuario.',
migModeNA: 'No compatible',
typeCG: 'Cómputo+Gráficos',
processName: 'Nombre del Proceso',
shr: 'Memoria Compartida',
temperatureHelper: 'La alta temperatura de la GPU puede causar una reducción de la frecuencia de la GPU',
gpuUtil: 'Utilización de GPU',
temperature: 'Temperatura',
performanceState: 'Estado de Rendimiento',
powerUsage: 'Consumo de Energía',
memoryUsage: 'Utilización de Memoria',
fanSpeed: 'Velocidad del Ventilador',
power: 'Energía',
powerCurrent: 'Energía Actual',
powerLimit: 'Límite de Energía',
memory: 'Memoria',
memoryUsed: 'Memoria Utilizada',
memoryTotal: 'Memoria Total',
percent: 'Utilización',
},
mcp: {
server: 'Servidor MCP',
Expand Down Expand Up @@ -1243,12 +1229,6 @@ const message = {
down: 'Bajada',
interval: 'Intervalo de Recolección',
intervalHelper: 'Ingrese un intervalo de recolección de monitoreo apropiado (5 segundos - 12 horas)',
gpuUtil: 'Uso de GPU',
temperature: 'Temperatura',
performanceState: 'Estado de rendimiento',
powerUsage: 'Consumo de energía',
memoryUsage: 'Uso de memoria',
fanSpeed: 'Velocidad del ventilador',
},
terminal: {
local: 'Local',
Expand Down
55 changes: 18 additions & 37 deletions frontend/src/lang/modules/ja.ts
Original file line number Diff line number Diff line change
Expand Up @@ -667,42 +667,30 @@ const message = {
whiteListHelper: 'ホワイトリスト内のIPのみアクセスを許可する',
},
gpu: {
gpu: 'GPUモニター',
base: '基本情報',
gpu: 'GPU 監視',
gpuHelper:
'現在のシステムでNVIDIA-SMIまたはXPU-SMIコマンドが検出されませんでした。確認して再試行してください!',
driverVersion: 'ドライバーバージョン',
cudaVersion: 'CUDAバージョン',
'システムが NVIDIA-SMI または XPU-SMI コマンドを検出しませんでした。確認して再試行してください!',
process: 'プロセス情報',
type: 'タイプ',
typeG: 'グラフィックス',
typeC: 'コンピュート',
typeCG: 'コンピュート + グラフィックス',
typeCG: 'コンピュート+グラフィックス',
processName: 'プロセス名',
processMemoryUsage: 'メモリ使用量',
temperatureHelper: '高いGPU温度はGPUの周波数制限を引き起こす可能性があります',
performanceStateHelper: 'P0(最大性能)からP12(最小性能)まで',
busID: 'バスID',
persistenceMode: '永続モード',
enabled: '有効',
disabled: '無効',
persistenceModeHelper: '永続モードはタスクの応答速度を速くしますが、待機時の消費電力が増加します。',
displayActive: 'グラフィックカード初期化済み',
displayActiveT: 'はい',
displayActiveF: 'いいえ',
ecc: 'エラー訂正およびチェック技術',
computeMode: 'コンピュートモード',
default: 'デフォルト',
exclusiveProcess: '専用プロセス',
exclusiveThread: '専用スレッド',
prohibited: '禁止',
defaultHelper: 'デフォルト:プロセスは並行して実行できます',
exclusiveProcessHelper:
'専用プロセス:1つのCUDAコンテキストのみがGPUを使用できますが、複数のスレッドで共有できます',
exclusiveThreadHelper: '専用スレッド:CUDAコンテキスト内の1つのスレッドのみがGPUを使用できます',
prohibitedHelper: '禁止:プロセスは同時に実行できません',
migModeHelper: 'ユーザーレベルでGPUの物理的分離を行うためのMIGインスタンスを作成するために使用されます。',
migModeNA: 'サポートされていません',
shr: '共有メモリ',
temperatureHelper: 'GPU 温度が高いと GPU 周波数が低下する可能性があります',
gpuUtil: 'GPU 使用率',
temperature: '温度',
performanceState: 'パフォーマンス状態',
powerUsage: '消費電力',
memoryUsage: 'メモリ使用率',
fanSpeed: 'ファン速度',
power: '電力',
powerCurrent: '現在の電力',
powerLimit: '電力上限',
memory: 'メモリ',
memoryUsed: '使用メモリ',
memoryTotal: '総メモリ',
percent: '使用率',
},
mcp: {
server: 'MCP サーバー',
Expand Down Expand Up @@ -1199,13 +1187,6 @@ const message = {
down: '下',
interval: '収集間隔',
intervalHelper: '適切な監視収集間隔を入力してください(5秒 - 12時間)',

gpuUtil: 'GPU利用',
temperature: '温度',
performanceState: 'パフォーマンス状態',
powerUsage: '電力使用量',
memoryUsage: 'メモリの使用',
fanSpeed: 'ファンの速度',
},
terminal: {
local: 'ローカル',
Expand Down
Loading
Loading