Skip to content

Commit dfaac30

Browse files
committed
refactor: try to get available temp in amd
Signed-off-by: thxCode <[email protected]>
1 parent 51729af commit dfaac30

File tree

1 file changed

+32
-14
lines changed

1 file changed

+32
-14
lines changed

gpustack_runtime/detector/pyrocmsmi/__init__.py

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -233,26 +233,44 @@ def rsmi_dev_temp_metric_get(device=0, sensor=None, metric=None):
233233
if not rocmsmiLib:
234234
raise ROCMSMIError(ROCMSMI_ERROR_UNINITIALIZED)
235235

236-
if sensor is None:
237-
sensor = rsmi_temperature_type_t.RSMI_TEMP_TYPE_JUNCTION
238236
if metric is None:
239237
metric = rsmi_temperature_metric_t.RSMI_TEMP_CURRENT
240-
c_temp = c_int64(0)
241-
ret = rocmsmiLib.rsmi_dev_temp_metric_get(
242-
c_uint32(device),
243-
sensor,
244-
metric,
245-
byref(c_temp),
246-
)
247-
_rocmsmiCheckReturn(ret)
248-
return c_temp.value // 1000
238+
239+
if sensor is None:
240+
sensor = rsmi_temperature_type_t.RSMI_TEMP_TYPE_EDGE
241+
242+
if sensor:
243+
c_temp = c_int64(0)
244+
ret = rocmsmiLib.rsmi_dev_temp_metric_get(
245+
c_uint32(device),
246+
sensor,
247+
metric,
248+
byref(c_temp),
249+
)
250+
_rocmsmiCheckReturn(ret)
251+
return c_temp.value // 1000
252+
253+
# If no sensor specified,
254+
# try all sensors and return the first valid temperature.
255+
for sensor_i in range(7):
256+
c_temp = c_int64(0)
257+
ret = rocmsmiLib.rsmi_dev_temp_metric_get(
258+
c_uint32(device),
259+
sensor_i,
260+
metric,
261+
byref(c_temp),
262+
)
263+
if ret == rsmi_status_t.RSMI_STATUS_SUCCESS:
264+
return c_temp.value // 1000
265+
266+
return None
249267

250268

251269
def rsmi_dev_power_cap_get(device=0):
252270
if not rocmsmiLib:
253271
raise ROCMSMIError(ROCMSMI_ERROR_UNINITIALIZED)
254272

255-
c_power_cap = c_uint64()
273+
c_power_cap = c_uint64(0)
256274
ret = rocmsmiLib.rsmi_dev_power_cap_get(device, 0, byref(c_power_cap))
257275
_rocmsmiCheckReturn(ret)
258276
return c_power_cap.value // 1000000
@@ -263,7 +281,7 @@ def rsmi_dev_power_ave_get(device=0):
263281
raise ROCMSMIError(ROCMSMI_ERROR_UNINITIALIZED)
264282

265283
c_device_chip = c_uint32(0)
266-
c_power = c_uint64()
284+
c_power = c_uint64(0)
267285
ret = rocmsmiLib.rsmi_dev_power_ave_get(device, c_device_chip, byref(c_power))
268286
_rocmsmiCheckReturn(ret)
269287
return c_power.value // 1000000
@@ -274,7 +292,7 @@ def rsmi_dev_power_get(device=0):
274292
raise ROCMSMIError(ROCMSMI_ERROR_UNINITIALIZED)
275293

276294
try:
277-
c_power = c_uint64()
295+
c_power = c_uint64(0)
278296
c_power_type = rsmi_power_type_t()
279297
ret = rocmsmiLib.rsmi_dev_power_get(device, byref(c_power), byref(c_power_type))
280298
_rocmsmiCheckReturn(ret)

0 commit comments

Comments
 (0)