Skip to content

Commit 555aba8

Browse files
committed
undid var name change + added vars back + pytest fix
1 parent 5cbe96f commit 555aba8

File tree

4 files changed

+92
-40
lines changed

4 files changed

+92
-40
lines changed

nodescraper/plugins/inband/amdsmi/amdsmi_analyzer.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def static_consistancy_check(self, amdsmi_static_data: list[AmdSmiStatic]):
177177
"subsystem_id": {gpu.asic.subsystem_id for gpu in amdsmi_static_data},
178178
"device_id": {gpu.asic.device_id for gpu in amdsmi_static_data},
179179
"rev_id": {gpu.asic.rev_id for gpu in amdsmi_static_data},
180-
"num_compute_units": {gpu.asic.num_compute_units for gpu in amdsmi_static_data},
180+
"num_compute_units": {str(gpu.asic.num_compute_units) for gpu in amdsmi_static_data},
181181
"target_graphics_version": {
182182
gpu.asic.target_graphics_version for gpu in amdsmi_static_data
183183
},
@@ -330,10 +330,13 @@ def check_pldm_version(
330330
pldm_missing_gpus: list[int] = []
331331
for fw_data in amdsmi_fw_data:
332332
gpu = fw_data.gpu
333+
if isinstance(fw_data.fw_list, str):
334+
pldm_missing_gpus.append(gpu)
335+
continue
333336
for fw_info in fw_data.fw_list:
334-
if PLDM_STRING == fw_info.fw_name and expected_pldm_version != fw_info.fw_version:
337+
if PLDM_STRING == fw_info.fw_id and expected_pldm_version != fw_info.fw_version:
335338
mismatched_gpus.append(gpu)
336-
if PLDM_STRING == fw_info.fw_name:
339+
if PLDM_STRING == fw_info.fw_id:
337340
break
338341
else:
339342
pldm_missing_gpus.append(gpu)

nodescraper/plugins/inband/amdsmi/amdsmi_collector.py

Lines changed: 63 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -388,18 +388,15 @@ def get_process(self) -> Optional[list[Processes]]:
388388
enc=self._valueunit(eu.get("enc"), "ns"),
389389
)
390390

391-
cu_occ = self._valueunit(entry.get("cu_occupancy"), "")
392-
393391
try:
394392
plist.append(
395393
ProcessListItem(
396394
process_info=ProcessInfo(
397395
name=str(name),
398396
pid=pid,
399-
mem=mem_vu,
400397
memory_usage=mem_usage,
398+
mem_usage=mem_vu,
401399
usage=usage,
402-
cu_occupancy=cu_occ,
403400
)
404401
)
405402
)
@@ -525,7 +522,7 @@ def get_firmware(self) -> Optional[list[Fw]]:
525522
ver = e.get("fw_version")
526523
normalized.append(
527524
FwListItem(
528-
fw_name="" if fid is None else str(fid),
525+
fw_id="" if fid is None else str(fid),
529526
fw_version="" if ver is None else str(ver),
530527
)
531528
)
@@ -593,6 +590,22 @@ def get_static(self) -> Optional[list[AmdSmiStatic]]:
593590
)
594591

595592
# ASIC
593+
oam_id_raw = asic.get("oam_id")
594+
if oam_id_raw in (None, "", "N/A"):
595+
oam_id_val: Union[int, str] = "N/A"
596+
elif isinstance(oam_id_raw, str):
597+
oam_id_val = oam_id_raw
598+
else:
599+
oam_id_val = int(oam_id_raw) if oam_id_raw is not None else "N/A"
600+
601+
num_cu_raw = asic.get("num_compute_units")
602+
if num_cu_raw in (None, "", "N/A"):
603+
num_cu_val: Union[int, str] = "N/A"
604+
elif isinstance(num_cu_raw, str):
605+
num_cu_val = num_cu_raw
606+
else:
607+
num_cu_val = int(num_cu_raw) if num_cu_raw is not None else "N/A"
608+
596609
asic_model = StaticAsic(
597610
market_name=self._normalize(
598611
asic.get("market_name") or asic.get("asic_name"), default=""
@@ -604,8 +617,8 @@ def get_static(self) -> Optional[list[AmdSmiStatic]]:
604617
subsystem_id=str(asic.get("subsystem_id", "")),
605618
rev_id=str(asic.get("rev_id", "")),
606619
asic_serial=str(asic.get("asic_serial", "")),
607-
oam_id=int(asic.get("oam_id", 0) or 0),
608-
num_compute_units=int(asic.get("num_compute_units", 0) or 0),
620+
oam_id=oam_id_val,
621+
num_compute_units=num_cu_val,
609622
target_graphics_version=str(asic.get("target_graphics_version", "")),
610623
)
611624

@@ -621,12 +634,14 @@ def get_static(self) -> Optional[list[AmdSmiStatic]]:
621634
)
622635

623636
# Driver
624-
driver_model = None
625-
if driver:
626-
driver_model = StaticDriver(
627-
name=self._normalize(driver.get("driver_name"), default="unknown"),
628-
version=self._normalize(driver.get("driver_version"), default="unknown"),
629-
)
637+
driver_model = StaticDriver(
638+
name=self._normalize(
639+
driver.get("driver_name") if driver else None, default="unknown"
640+
),
641+
version=self._normalize(
642+
driver.get("driver_version") if driver else None, default="unknown"
643+
),
644+
)
630645

631646
# VBIOS
632647
vbios_model: Optional[StaticVbios] = None
@@ -640,8 +655,15 @@ def get_static(self) -> Optional[list[AmdSmiStatic]]:
640655

641656
# NUMA
642657
numa_node = int(numa.get("node", 0) or 0)
643-
affinity = int(numa.get("affinity", 0) or 0)
644-
numa_model = StaticNuma(node=numa_node, affinity=affinity)
658+
affinity_raw = numa.get("affinity")
659+
if affinity_raw in (None, "", "N/A"):
660+
affinity_val: Union[int, str] = "N/A"
661+
elif isinstance(affinity_raw, str):
662+
affinity_val = affinity_raw
663+
else:
664+
affinity_val = int(affinity_raw) if affinity_raw is not None else "N/A"
665+
666+
numa_model = StaticNuma(node=numa_node, affinity=affinity_val)
645667

646668
# VRAM
647669
vram_type = str(vram.get("vram_type", "") or "unknown")
@@ -672,7 +694,7 @@ def get_static(self) -> Optional[list[AmdSmiStatic]]:
672694
cache_info_model = self._parse_cache_info(cache)
673695

674696
# Clock
675-
clock_model = self._parse_clock(clock)
697+
clock_dict_model = self._parse_clock_dict(clock)
676698

677699
try:
678700
out.append(
@@ -691,7 +713,7 @@ def get_static(self) -> Optional[list[AmdSmiStatic]]:
691713
vram=vram_model,
692714
cache_info=cache_info_model,
693715
partition=None,
694-
clock=clock_model,
716+
clock=clock_dict_model,
695717
)
696718
)
697719
except ValidationError as e:
@@ -928,10 +950,33 @@ def _fmt(n: Optional[int]) -> Optional[str]:
928950
{"Level 0": level0, "Level 1": level1, "Level 2": level2}
929951
)
930952

931-
return StaticClockData(frequency=levels, current=current)
953+
# Use the alias "current level" as defined in the model
954+
return StaticClockData.model_validate(
955+
{"frequency_levels": levels, "current level": current}
956+
)
932957
except ValidationError:
933958
return None
934959

960+
def _parse_clock_dict(self, data: dict) -> Optional[dict[str, Union[StaticClockData, None]]]:
961+
"""Parse clock data into dictionary structure
962+
963+
Args:
964+
data (dict): Clock data from amd-smi
965+
966+
Returns:
967+
Optional[dict[str, Union[StaticClockData, None]]]: dictionary of clock data or None
968+
"""
969+
if not isinstance(data, dict):
970+
return None
971+
972+
clock_dict: dict[str, Union[StaticClockData, None]] = {}
973+
974+
clock_data = self._parse_clock(data)
975+
if clock_data:
976+
clock_dict["clk"] = clock_data
977+
978+
return clock_dict if clock_dict else None
979+
935980
def collect_data(
936981
self,
937982
args: Any = None,

nodescraper/plugins/inband/amdsmi/amdsmidata.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from typing import Any, List, Mapping, Optional, Union
33

44
from pydantic import (
5+
AliasChoices,
56
BaseModel,
67
ConfigDict,
78
Field,
@@ -156,12 +157,10 @@ class ProcessUsage(BaseModel):
156157
class ProcessInfo(BaseModel):
157158
name: str
158159
pid: int
159-
160-
mem: Optional[ValueUnit] = None
161160
memory_usage: ProcessMemoryUsage
161+
mem_usage: Optional[ValueUnit]
162162
usage: ProcessUsage
163-
cu_occupancy: Optional[ValueUnit] = None
164-
na_validator = field_validator("mem", "cu_occupancy", mode="before")(na_to_none)
163+
na_validator = field_validator("mem_usage", mode="before")(na_to_none)
165164

166165

167166
class ProcessListItem(BaseModel):
@@ -175,13 +174,13 @@ class Processes(BaseModel):
175174

176175
# FW
177176
class FwListItem(BaseModel):
177+
fw_id: str
178178
fw_version: str
179-
fw_name: str
180179

181180

182181
class Fw(BaseModel):
183182
gpu: int
184-
fw_list: List[FwListItem]
183+
fw_list: Union[List[FwListItem], str]
185184

186185

187186
class AmdSmiListItem(BaseModel):
@@ -256,8 +255,8 @@ class StaticAsic(BaseModel):
256255
subsystem_id: str
257256
rev_id: str
258257
asic_serial: str
259-
oam_id: int
260-
num_compute_units: int
258+
oam_id: Union[int, str] # can be N/A
259+
num_compute_units: Union[int, str] # can be N/A
261260
target_graphics_version: str
262261

263262

@@ -320,8 +319,11 @@ class StaticBoard(BaseModel):
320319

321320

322321
class StaticPartition(BaseModel):
322+
# The name for compute_partition has changed we will support both for now
323323

324-
compute_partition: str
324+
compute_partition: str = Field(
325+
validation_alias=AliasChoices("compute_partition", "accelerator_partition")
326+
)
325327
memory_partition: str
326328
partition_id: int
327329

@@ -345,7 +347,7 @@ class StaticXgmiPlpd(BaseModel):
345347

346348
class StaticNuma(BaseModel):
347349
node: int
348-
affinity: int
350+
affinity: Union[int, str] # can be N/A
349351

350352

351353
class StaticVram(AmdSmiBaseModel):
@@ -383,10 +385,10 @@ class StaticClockData(BaseModel):
383385
model_config = ConfigDict(
384386
populate_by_name=True,
385387
)
386-
frequency: StaticFrequencyLevels
388+
frequency_levels: StaticFrequencyLevels
387389

388-
current: Optional[int] = Field(..., alias="current")
389-
na_validator = field_validator("current", mode="before")(na_to_none)
390+
current_level: Optional[int] = Field(..., alias="current level")
391+
na_validator = field_validator("current_level", mode="before")(na_to_none)
390392

391393

392394
class AmdSmiStatic(BaseModel):
@@ -397,16 +399,16 @@ class AmdSmiStatic(BaseModel):
397399
bus: StaticBus
398400
vbios: Optional[StaticVbios]
399401
limit: Optional[StaticLimit]
400-
driver: Optional[StaticDriver]
402+
driver: StaticDriver
401403
board: StaticBoard
402404
soc_pstate: Optional[StaticSocPstate]
403405
xgmi_plpd: Optional[StaticXgmiPlpd]
404406
process_isolation: str
405407
numa: StaticNuma
406408
vram: StaticVram
407409
cache_info: List[StaticCacheInfoItem]
408-
partition: Optional[StaticPartition] = None
409-
clock: Optional[StaticClockData] = None
410+
partition: Optional[StaticPartition] = None # This has been removed in Amd-smi 26.0.0+d30a0afe+
411+
clock: Optional[dict[str, Union[StaticClockData, None]]] = None
410412
na_validator_dict = field_validator("clock", mode="before")(na_to_none_dict)
411413
na_validator = field_validator("soc_pstate", "xgmi_plpd", "vbios", "limit", mode="before")(
412414
na_to_none

test/unit/plugin/test_amdsmi_collector.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ def test_get_process(collector):
277277
p0 = procs[0].process_list[0].process_info
278278
assert p0.name == "python"
279279
assert p0.pid == 4242
280-
assert p0.mem is not None and p0.mem.unit == "B"
280+
assert p0.mem_usage is not None and p0.mem_usage.unit == "B"
281281
assert p0.usage.gfx is not None and p0.usage.gfx.unit == "ns"
282282

283283
p1 = procs[0].process_list[1].process_info
@@ -300,7 +300,7 @@ def test_get_firmware(collector):
300300
assert fw is not None and len(fw) == 1
301301
assert fw[0].gpu == 0
302302
assert len(fw[0].fw_list) == 2
303-
assert fw[0].fw_list[0].fw_name == "SMU"
303+
assert fw[0].fw_list[0].fw_id == "SMU"
304304
assert fw[0].fw_list[0].fw_version == "55.33"
305305

306306

@@ -335,7 +335,9 @@ def test_get_static(collector):
335335
assert cache.cache_properties
336336

337337
if s.clock is not None:
338-
assert s.clock.frequency is not None
338+
assert isinstance(s.clock, dict)
339+
if "clk" in s.clock and s.clock["clk"] is not None:
340+
assert s.clock["clk"].frequency_levels is not None
339341

340342

341343
def test_cache_properties_parsing(collector):

0 commit comments

Comments
 (0)