Skip to content

Commit 837cbe8

Browse files
committed
full implementation of most new metrics
1 parent 48d07c6 commit 837cbe8

File tree

3 files changed

+137
-49
lines changed

3 files changed

+137
-49
lines changed

IntelPresentMon/ControlLib/IntelPowerTelemetryAdapter.cpp

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ namespace pwr::intel
127127
IGCL_ERR(result);
128128
}
129129
pmlog_verb(v::gpu)("get memory state").pmwatch(GetName()).pmwatch(ref::DumpGenerated(memory_state));
130+
130131
if (const auto result = ctlMemoryGetBandwidth(memoryModules[0], &memory_bandwidth);
131132
result != CTL_RESULT_SUCCESS) {
132133
success = false;
@@ -498,7 +499,7 @@ namespace pwr::intel
498499
SetTelemetryCapBit(GpuTelemetryCapBits::gpu_current_limited);
499500
}
500501

501-
// these metrics only enabled with V1
502+
// these metrics only available with V1 struct
502503
if constexpr (std::same_as<T, ctl_power_telemetry2_t>) {
503504
result = GetInstantaneousPowerTelemetryItem(
504505
currentSample.gpuEffectiveClock,
@@ -507,6 +508,46 @@ namespace pwr::intel
507508
if (result != CTL_RESULT_SUCCESS) {
508509
return result;
509510
}
511+
512+
result = GetInstantaneousPowerTelemetryItem(
513+
currentSample.gpuVrTemp,
514+
pm_gpu_power_telemetry_info.gpu_voltage_regulator_temperature_c,
515+
GpuTelemetryCapBits::gpu_voltage_regulator_temperature);
516+
if (result != CTL_RESULT_SUCCESS) {
517+
return result;
518+
}
519+
520+
result = GetInstantaneousPowerTelemetryItem(
521+
currentSample.vramCurrentEffectiveFrequency,
522+
pm_gpu_power_telemetry_info.gpu_mem_effective_bandwidth_gbps,
523+
GpuTelemetryCapBits::gpu_mem_effective_bandwidth);
524+
if (result != CTL_RESULT_SUCCESS) {
525+
return result;
526+
}
527+
528+
result = GetInstantaneousPowerTelemetryItem(
529+
currentSample.gpuOverVoltagePercent,
530+
pm_gpu_power_telemetry_info.gpu_overvoltage_percent,
531+
GpuTelemetryCapBits::gpu_overvoltage_percent);
532+
if (result != CTL_RESULT_SUCCESS) {
533+
return result;
534+
}
535+
536+
result = GetInstantaneousPowerTelemetryItem(
537+
currentSample.gpuTemperaturePercent,
538+
pm_gpu_power_telemetry_info.gpu_temperature_percent,
539+
GpuTelemetryCapBits::gpu_temperature_percent);
540+
if (result != CTL_RESULT_SUCCESS) {
541+
return result;
542+
}
543+
544+
result = GetInstantaneousPowerTelemetryItem(
545+
currentSample.gpuPowerPercent,
546+
pm_gpu_power_telemetry_info.gpu_power_percent,
547+
GpuTelemetryCapBits::gpu_power_percent);
548+
if (result != CTL_RESULT_SUCCESS) {
549+
return result;
550+
}
510551
}
511552

512553
return result;

IntelPresentMon/ControlLib/PresentMonPowerTelemetry.h

Lines changed: 52 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,16 @@ struct PresentMonPowerTelemetryInfo {
2828
double gpu_sustained_power_limit_w;
2929
double gpu_voltage_v;
3030
double gpu_frequency_mhz;
31-
double gpu_effective_frequency_mhz;
3231
double gpu_temperature_c;
3332
double gpu_utilization;
3433
double gpu_render_compute_utilization;
3534
double gpu_media_utilization;
35+
double gpu_effective_frequency_mhz;
36+
double gpu_voltage_regulator_temperature_c;
37+
double gpu_mem_effective_bandwidth_gbps;
38+
double gpu_overvoltage_percent;
39+
double gpu_temperature_percent;
40+
double gpu_power_percent;
3641

3742
double vram_power_w;
3843
double vram_voltage_v;
@@ -67,52 +72,52 @@ struct PresentMonPowerTelemetryInfo {
6772
};
6873

6974
enum class GpuTelemetryCapBits {
70-
time_stamp,
71-
gpu_power,
72-
gpu_sustained_power_limit,
73-
gpu_voltage,
74-
gpu_frequency,
75-
gpu_temperature,
76-
gpu_utilization,
77-
gpu_render_compute_utilization,
78-
gpu_media_utilization,
79-
vram_power,
80-
vram_voltage,
81-
vram_frequency,
82-
vram_effective_frequency,
83-
vram_temperature,
84-
fan_speed_0,
85-
fan_speed_1,
86-
fan_speed_2,
87-
fan_speed_3,
88-
fan_speed_4,
89-
psu_info_0,
90-
psu_info_1,
91-
psu_info_2,
92-
psu_info_3,
93-
psu_info_4,
94-
gpu_mem_size,
95-
gpu_mem_used,
96-
gpu_mem_max_bandwidth,
97-
gpu_mem_write_bandwidth,
98-
gpu_mem_read_bandwidth,
99-
gpu_power_limited,
100-
gpu_temperature_limited,
101-
gpu_current_limited,
102-
gpu_voltage_limited,
103-
gpu_utilization_limited,
104-
vram_power_limited,
105-
vram_temperature_limited,
106-
vram_current_limited,
107-
vram_voltage_limited,
108-
vram_utilization_limited,
109-
gpu_effective_frequency,
110-
gpu_voltage_regulator_temperature,
111-
gpu_mem_effective_bandwidth,
112-
gpu_overvoltage_percent,
113-
gpu_temperature_percent,
114-
gpu_power_percent,
115-
gpu_telemetry_count,
75+
time_stamp,
76+
gpu_power,
77+
gpu_sustained_power_limit,
78+
gpu_voltage,
79+
gpu_frequency,
80+
gpu_temperature,
81+
gpu_utilization,
82+
gpu_render_compute_utilization,
83+
gpu_media_utilization,
84+
vram_power,
85+
vram_voltage,
86+
vram_frequency,
87+
vram_effective_frequency,
88+
vram_temperature,
89+
fan_speed_0,
90+
fan_speed_1,
91+
fan_speed_2,
92+
fan_speed_3,
93+
fan_speed_4,
94+
psu_info_0,
95+
psu_info_1,
96+
psu_info_2,
97+
psu_info_3,
98+
psu_info_4,
99+
gpu_mem_size,
100+
gpu_mem_used,
101+
gpu_mem_max_bandwidth,
102+
gpu_mem_write_bandwidth,
103+
gpu_mem_read_bandwidth,
104+
gpu_power_limited,
105+
gpu_temperature_limited,
106+
gpu_current_limited,
107+
gpu_voltage_limited,
108+
gpu_utilization_limited,
109+
vram_power_limited,
110+
vram_temperature_limited,
111+
vram_current_limited,
112+
vram_voltage_limited,
113+
vram_utilization_limited,
114+
gpu_effective_frequency,
115+
gpu_voltage_regulator_temperature,
116+
gpu_mem_effective_bandwidth,
117+
gpu_overvoltage_percent,
118+
gpu_temperature_percent,
119+
gpu_power_percent,
120+
gpu_telemetry_count,
116121
};
117122

118123
using GpuTelemetryBitset = std::bitset<static_cast<size_t>(GpuTelemetryCapBits::gpu_telemetry_count)>;

IntelPresentMon/PresentMonMiddleware/ConcreteMiddleware.cpp

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,24 @@ namespace pmon::mid
391391
break;
392392
}
393393
break;
394+
case PM_METRIC_GPU_EFFECTIVE_FREQUENCY:
395+
pQuery->accumCpuBits.set(static_cast<size_t>(GpuTelemetryCapBits::gpu_effective_frequency));
396+
break;
397+
case PM_METRIC_GPU_VOLTAGE_REGULATOR_TEMPERATURE:
398+
pQuery->accumCpuBits.set(static_cast<size_t>(GpuTelemetryCapBits::gpu_voltage_regulator_temperature));
399+
break;
400+
case PM_METRIC_GPU_MEM_EFFECTIVE_BANDWIDTH:
401+
pQuery->accumCpuBits.set(static_cast<size_t>(GpuTelemetryCapBits::gpu_mem_effective_bandwidth));
402+
break;
403+
case PM_METRIC_GPU_OVERVOLTAGE_PERCENT:
404+
pQuery->accumCpuBits.set(static_cast<size_t>(GpuTelemetryCapBits::gpu_overvoltage_percent));
405+
break;
406+
case PM_METRIC_GPU_TEMPERATURE_PERCENT:
407+
pQuery->accumCpuBits.set(static_cast<size_t>(GpuTelemetryCapBits::gpu_temperature_percent));
408+
break;
409+
case PM_METRIC_GPU_POWER_PERCENT:
410+
pQuery->accumCpuBits.set(static_cast<size_t>(GpuTelemetryCapBits::gpu_power_percent));
411+
break;
394412
case PM_METRIC_CPU_UTILIZATION:
395413
pQuery->accumCpuBits.set(static_cast<size_t>(CpuTelemetryCapBits::cpu_utilization));
396414
break;
@@ -1555,6 +1573,24 @@ static void ReportMetrics(
15551573
case GpuTelemetryCapBits::vram_utilization_limited:
15561574
metricInfo[PM_METRIC_GPU_MEM_UTILIZATION_LIMITED].data[0].emplace_back(power_telemetry_info.vram_utilization_limited);
15571575
break;
1576+
case GpuTelemetryCapBits::gpu_effective_frequency:
1577+
metricInfo[PM_METRIC_GPU_EFFECTIVE_FREQUENCY].data[0].emplace_back(power_telemetry_info.gpu_effective_frequency_mhz);
1578+
break;
1579+
case GpuTelemetryCapBits::gpu_voltage_regulator_temperature:
1580+
metricInfo[PM_METRIC_GPU_VOLTAGE_REGULATOR_TEMPERATURE].data[0].emplace_back(power_telemetry_info.gpu_voltage_regulator_temperature_c);
1581+
break;
1582+
case GpuTelemetryCapBits::gpu_mem_effective_bandwidth:
1583+
metricInfo[PM_METRIC_GPU_MEM_EFFECTIVE_BANDWIDTH].data[0].emplace_back(power_telemetry_info.gpu_mem_effective_bandwidth_gbps);
1584+
break;
1585+
case GpuTelemetryCapBits::gpu_overvoltage_percent:
1586+
metricInfo[PM_METRIC_GPU_OVERVOLTAGE_PERCENT].data[0].emplace_back(power_telemetry_info.gpu_overvoltage_percent);
1587+
break;
1588+
case GpuTelemetryCapBits::gpu_temperature_percent:
1589+
metricInfo[PM_METRIC_GPU_TEMPERATURE_PERCENT].data[0].emplace_back(power_telemetry_info.gpu_temperature_percent);
1590+
break;
1591+
case GpuTelemetryCapBits::gpu_power_percent:
1592+
metricInfo[PM_METRIC_GPU_POWER_PERCENT].data[0].emplace_back(power_telemetry_info.gpu_power_percent);
1593+
break;
15581594
default:
15591595
validGpuMetric = false;
15601596
break;
@@ -1803,6 +1839,12 @@ static void ReportMetrics(
18031839
case PM_METRIC_CPU_TEMPERATURE:
18041840
case PM_METRIC_CPU_FREQUENCY:
18051841
case PM_METRIC_CPU_CORE_UTILITY:
1842+
case PM_METRIC_GPU_EFFECTIVE_FREQUENCY:
1843+
case PM_METRIC_GPU_VOLTAGE_REGULATOR_TEMPERATURE:
1844+
case PM_METRIC_GPU_MEM_EFFECTIVE_BANDWIDTH:
1845+
case PM_METRIC_GPU_OVERVOLTAGE_PERCENT:
1846+
case PM_METRIC_GPU_TEMPERATURE_PERCENT:
1847+
case PM_METRIC_GPU_POWER_PERCENT:
18061848
CalculateGpuCpuMetric(metricInfo, qe, pBlob);
18071849
break;
18081850
case PM_METRIC_CPU_VENDOR:
@@ -1821,8 +1863,8 @@ static void ReportMetrics(
18211863
{
18221864
auto& output = reinterpret_cast<double&>(pBlob[qe.dataOffset]);
18231865
output = CalcGpuMemUtilization(qe.stat);
1866+
break;
18241867
}
1825-
break;
18261868
default:
18271869
break;
18281870
}

0 commit comments

Comments
 (0)