Skip to content

Commit e4deeea

Browse files
Merge branch 'development' into alex_amdsmi3
2 parents bc9e5f6 + 26b689e commit e4deeea

38 files changed

+1652
-72
lines changed

nodescraper/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,10 @@
2323
# SOFTWARE.
2424
#
2525
###############################################################################
26+
27+
from importlib.metadata import PackageNotFoundError, version
28+
29+
try:
30+
__version__ = version("node-scraper")
31+
except PackageNotFoundError:
32+
__version__ = "unknown"

nodescraper/cli/cli.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import sys
3333
from typing import Optional
3434

35+
import nodescraper
3536
from nodescraper.cli.constants import DEFAULT_CONFIG, META_VAR_MAP
3637
from nodescraper.cli.dynamicparserbuilder import DynamicParserBuilder
3738
from nodescraper.cli.helper import (
@@ -79,6 +80,12 @@ def build_parser(
7980
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
8081
)
8182

83+
parser.add_argument(
84+
"--version",
85+
action="version",
86+
version=f"%(prog)s {nodescraper.__version__}",
87+
)
88+
8289
parser.add_argument(
8390
"--sys-name", default=platform.node(), help="System name", metavar=META_VAR_MAP[str]
8491
)

nodescraper/models/systeminfo.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,4 @@ class SystemInfo(BaseModel):
4141
platform: Optional[str] = None
4242
metadata: Optional[dict] = Field(default_factory=dict)
4343
location: Optional[SystemLocation] = SystemLocation.LOCAL
44+
vendorid_ep: int = 0x1002
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
###############################################################################
2+
#
3+
# MIT License
4+
#
5+
# Copyright (c) 2025 Advanced Micro Devices, Inc.
6+
#
7+
# Permission is hereby granted, free of charge, to any person obtaining a copy
8+
# of this software and associated documentation files (the "Software"), to deal
9+
# in the Software without restriction, including without limitation the rights
10+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
# copies of the Software, and to permit persons to whom the Software is
12+
# furnished to do so, subject to the following conditions:
13+
#
14+
# The above copyright notice and this permission notice shall be included in all
15+
# copies or substantial portions of the Software.
16+
#
17+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
# SOFTWARE.
24+
#
25+
###############################################################################
26+
from .analyzer_args import DeviceEnumerationAnalyzerArgs
27+
from .device_enumeration_plugin import DeviceEnumerationPlugin
28+
29+
__all__ = ["DeviceEnumerationPlugin", "DeviceEnumerationAnalyzerArgs"]
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
###############################################################################
2+
#
3+
# MIT License
4+
#
5+
# Copyright (c) 2025 Advanced Micro Devices, Inc.
6+
#
7+
# Permission is hereby granted, free of charge, to any person obtaining a copy
8+
# of this software and associated documentation files (the "Software"), to deal
9+
# in the Software without restriction, including without limitation the rights
10+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
# copies of the Software, and to permit persons to whom the Software is
12+
# furnished to do so, subject to the following conditions:
13+
#
14+
# The above copyright notice and this permission notice shall be included in all
15+
# copies or substantial portions of the Software.
16+
#
17+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
# SOFTWARE.
24+
#
25+
###############################################################################
26+
from typing import Any, Optional
27+
28+
from pydantic import field_validator
29+
30+
from nodescraper.models import AnalyzerArgs
31+
32+
from .deviceenumdata import DeviceEnumerationDataModel
33+
34+
35+
class DeviceEnumerationAnalyzerArgs(AnalyzerArgs):
36+
cpu_count: Optional[list[int]] = None
37+
gpu_count: Optional[list[int]] = None
38+
vf_count: Optional[list[int]] = None
39+
40+
@field_validator("cpu_count", "gpu_count", "vf_count", mode="before")
41+
@classmethod
42+
def normalize_to_list(cls, v: Any) -> Optional[list[int]]:
43+
"""Convert single integer values to lists for consistent handling.
44+
45+
Args:
46+
v: The input value (can be int, list[int], or None).
47+
48+
Returns:
49+
Optional[list[int]]: The normalized list value or None.
50+
"""
51+
if v is None:
52+
return None
53+
if isinstance(v, int):
54+
return [v]
55+
return v
56+
57+
@classmethod
58+
def build_from_model(
59+
cls, datamodel: DeviceEnumerationDataModel
60+
) -> "DeviceEnumerationAnalyzerArgs":
61+
"""build analyzer args from data model
62+
63+
Args:
64+
datamodel (DeviceEnumerationDataModel): data model for plugin
65+
66+
Returns:
67+
DeviceEnumerationAnalyzerArgs: instance of analyzer args class
68+
"""
69+
return cls(
70+
cpu_count=[datamodel.cpu_count] if datamodel.cpu_count is not None else None,
71+
gpu_count=[datamodel.gpu_count] if datamodel.gpu_count is not None else None,
72+
vf_count=[datamodel.vf_count] if datamodel.vf_count is not None else None,
73+
)
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
###############################################################################
2+
#
3+
# MIT License
4+
#
5+
# Copyright (c) 2025 Advanced Micro Devices, Inc.
6+
#
7+
# Permission is hereby granted, free of charge, to any person obtaining a copy
8+
# of this software and associated documentation files (the "Software"), to deal
9+
# in the Software without restriction, including without limitation the rights
10+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
# copies of the Software, and to permit persons to whom the Software is
12+
# furnished to do so, subject to the following conditions:
13+
#
14+
# The above copyright notice and this permission notice shall be included in all
15+
# copies or substantial portions of the Software.
16+
#
17+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
# SOFTWARE.
24+
#
25+
###############################################################################
26+
from typing import Optional
27+
28+
from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus
29+
from nodescraper.interfaces import DataAnalyzer
30+
from nodescraper.models import TaskResult
31+
32+
from .analyzer_args import DeviceEnumerationAnalyzerArgs
33+
from .deviceenumdata import DeviceEnumerationDataModel
34+
35+
36+
class DeviceEnumerationAnalyzer(
37+
DataAnalyzer[DeviceEnumerationDataModel, DeviceEnumerationAnalyzerArgs]
38+
):
39+
"""Check Device Enumeration matches expected cpu and gpu count
40+
supported by all OSs, SKUs, and platforms."""
41+
42+
DATA_MODEL = DeviceEnumerationDataModel
43+
44+
def analyze_data(
45+
self, data: DeviceEnumerationDataModel, args: Optional[DeviceEnumerationAnalyzerArgs] = None
46+
) -> TaskResult:
47+
48+
if args is None:
49+
self.result.status = ExecutionStatus.NOT_RAN
50+
self.result.message = (
51+
"Expected Device Enumeration data not provided, skipping analysis."
52+
)
53+
return self.result
54+
55+
checks = {}
56+
if args.cpu_count is not None and args.cpu_count != []:
57+
checks["cpu_count"] = args.cpu_count
58+
if args.gpu_count is not None and args.gpu_count != []:
59+
checks["gpu_count"] = args.gpu_count
60+
if args.vf_count is not None and args.vf_count != []:
61+
checks["vf_count"] = args.vf_count
62+
63+
self.result.message = ""
64+
for check, accepted_counts in checks.items():
65+
actual_count = getattr(data, check)
66+
if actual_count not in accepted_counts:
67+
message = f"Expected {check} in {accepted_counts}, but got {actual_count}. "
68+
self.result.message += message
69+
self.result.status = ExecutionStatus.ERROR
70+
self._log_event(
71+
category=EventCategory.PLATFORM,
72+
description=message,
73+
data={check: actual_count},
74+
priority=EventPriority.CRITICAL,
75+
console_log=True,
76+
)
77+
if self.result.message == "":
78+
self.result.status = ExecutionStatus.OK
79+
self.result.message = f"Device Enumeration validated on {checks.keys()}."
80+
81+
return self.result
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
###############################################################################
2+
#
3+
# MIT License
4+
#
5+
# Copyright (c) 2025 Advanced Micro Devices, Inc.
6+
#
7+
# Permission is hereby granted, free of charge, to any person obtaining a copy
8+
# of this software and associated documentation files (the "Software"), to deal
9+
# in the Software without restriction, including without limitation the rights
10+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
# copies of the Software, and to permit persons to whom the Software is
12+
# furnished to do so, subject to the following conditions:
13+
#
14+
# The above copyright notice and this permission notice shall be included in all
15+
# copies or substantial portions of the Software.
16+
#
17+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
# SOFTWARE.
24+
#
25+
###############################################################################
26+
from typing import Optional
27+
28+
from nodescraper.base import InBandDataCollector
29+
from nodescraper.connection.inband.inband import CommandArtifact
30+
from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus, OSFamily
31+
from nodescraper.models import TaskResult
32+
33+
from .deviceenumdata import DeviceEnumerationDataModel
34+
35+
36+
class DeviceEnumerationCollector(InBandDataCollector[DeviceEnumerationDataModel, None]):
37+
"""Collect CPU and GPU count"""
38+
39+
DATA_MODEL = DeviceEnumerationDataModel
40+
41+
CMD_CPU_COUNT_LINUX = "lscpu | grep Socket | awk '{ print $2 }'"
42+
CMD_GPU_COUNT_LINUX = "lspci -d {vendorid_ep}: | grep -i 'VGA\\|Display\\|3D' | wc -l"
43+
CMD_VF_COUNT_LINUX = "lspci -d {vendorid_ep}: | grep -i 'Virtual Function' | wc -l"
44+
45+
CMD_CPU_COUNT_WINDOWS = (
46+
'powershell -Command "(Get-WmiObject -Class Win32_Processor | Measure-Object).Count"'
47+
)
48+
CMD_GPU_COUNT_WINDOWS = 'powershell -Command "(wmic path win32_VideoController get name | findstr AMD | Measure-Object).Count"'
49+
CMD_VF_COUNT_WINDOWS = (
50+
'powershell -Command "(Get-VMHostPartitionableGpu | Measure-Object).Count"'
51+
)
52+
53+
def _warning(
54+
self,
55+
description: str,
56+
command: CommandArtifact,
57+
category: EventCategory = EventCategory.PLATFORM,
58+
):
59+
self._log_event(
60+
category=category,
61+
description=description,
62+
data={
63+
"command": command.command,
64+
"stdout": command.stdout,
65+
"stderr": command.stderr,
66+
"exit_code": command.exit_code,
67+
},
68+
priority=EventPriority.WARNING,
69+
)
70+
71+
def collect_data(self, args=None) -> tuple[TaskResult, Optional[DeviceEnumerationDataModel]]:
72+
"""
73+
Read CPU and GPU count
74+
On Linux, use lscpu and lspci
75+
On Windows, use WMI and hyper-v cmdlets
76+
"""
77+
if self.system_info.os_family == OSFamily.LINUX:
78+
# Count CPU sockets
79+
cpu_count_res = self._run_sut_cmd(self.CMD_CPU_COUNT_LINUX)
80+
81+
# Count all AMD GPUs
82+
vendor_id = format(self.system_info.vendorid_ep, "x")
83+
gpu_count_res = self._run_sut_cmd(
84+
self.CMD_GPU_COUNT_LINUX.format(vendorid_ep=vendor_id)
85+
)
86+
87+
# Count AMD Virtual Functions
88+
vf_count_res = self._run_sut_cmd(self.CMD_VF_COUNT_LINUX.format(vendorid_ep=vendor_id))
89+
else:
90+
cpu_count_res = self._run_sut_cmd(self.CMD_CPU_COUNT_WINDOWS)
91+
gpu_count_res = self._run_sut_cmd(self.CMD_GPU_COUNT_WINDOWS)
92+
vf_count_res = self._run_sut_cmd(self.CMD_VF_COUNT_WINDOWS)
93+
94+
device_enum = DeviceEnumerationDataModel()
95+
96+
if cpu_count_res.exit_code == 0:
97+
device_enum.cpu_count = int(cpu_count_res.stdout)
98+
else:
99+
self._warning(description="Cannot determine CPU count", command=cpu_count_res)
100+
101+
if gpu_count_res.exit_code == 0:
102+
device_enum.gpu_count = int(gpu_count_res.stdout)
103+
else:
104+
self._warning(description="Cannot determine GPU count", command=gpu_count_res)
105+
106+
if vf_count_res.exit_code == 0:
107+
device_enum.vf_count = int(vf_count_res.stdout)
108+
else:
109+
self._warning(
110+
description="Cannot determine VF count",
111+
command=vf_count_res,
112+
category=EventCategory.SW_DRIVER,
113+
)
114+
115+
if device_enum.cpu_count or device_enum.gpu_count or device_enum.vf_count:
116+
self._log_event(
117+
category=EventCategory.PLATFORM,
118+
description=f"Counted {device_enum.cpu_count} CPUs, {device_enum.gpu_count} GPUs, {device_enum.vf_count} VFs",
119+
data=device_enum.model_dump(exclude_none=True),
120+
priority=EventPriority.INFO,
121+
)
122+
self.result.message = f"Device Enumeration: {device_enum.model_dump(exclude_none=True)}"
123+
self.result.status = ExecutionStatus.OK
124+
return self.result, device_enum
125+
else:
126+
self.result.message = "Device Enumeration info not found"
127+
self.result.status = ExecutionStatus.EXECUTION_FAILURE
128+
self._log_event(
129+
category=EventCategory.SW_DRIVER,
130+
description=self.result.message,
131+
priority=EventPriority.CRITICAL,
132+
)
133+
return self.result, None

0 commit comments

Comments
 (0)