Skip to content

Commit 95d6ac9

Browse files
Merge pull request #73 from amd/alex_devenum_update
DeviceEnumerationPlugin update
2 parents 720832b + baa75a5 commit 95d6ac9

File tree

4 files changed

+84
-16
lines changed

4 files changed

+84
-16
lines changed

nodescraper/interfaces/task.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ def _build_event(
107107
data = {"task_name": self.__class__.__name__, "task_type": self.TASK_TYPE}
108108

109109
else:
110+
# Copy to avoid mutating the caller's dict
111+
data = copy.copy(data)
110112
data["task_name"] = self.__class__.__name__
111113
data["task_type"] = self.TASK_TYPE
112114

nodescraper/plugins/inband/device_enumeration/device_enumeration_collector.py

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from typing import Optional
2727

2828
from nodescraper.base import InBandDataCollector
29-
from nodescraper.connection.inband.inband import CommandArtifact
29+
from nodescraper.connection.inband.inband import CommandArtifact, TextFileArtifact
3030
from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus, OSFamily
3131
from nodescraper.models import TaskResult
3232

@@ -38,9 +38,10 @@ class DeviceEnumerationCollector(InBandDataCollector[DeviceEnumerationDataModel,
3838

3939
DATA_MODEL = DeviceEnumerationDataModel
4040

41-
CMD_CPU_COUNT_LINUX = "lscpu | grep Socket | awk '{ print $2 }'"
4241
CMD_GPU_COUNT_LINUX = "lspci -d {vendorid_ep}: | grep -i 'VGA\\|Display\\|3D' | wc -l"
4342
CMD_VF_COUNT_LINUX = "lspci -d {vendorid_ep}: | grep -i 'Virtual Function' | wc -l"
43+
CMD_LSCPU_LINUX = "lscpu"
44+
CMD_LSHW_LINUX = "lshw"
4445

4546
CMD_CPU_COUNT_WINDOWS = (
4647
'powershell -Command "(Get-WmiObject -Class Win32_Processor | Measure-Object).Count"'
@@ -61,9 +62,8 @@ def _warning(
6162
description=description,
6263
data={
6364
"command": command.command,
64-
"stdout": command.stdout,
65-
"stderr": command.stderr,
6665
"exit_code": command.exit_code,
66+
"stderr": command.stderr,
6767
},
6868
priority=EventPriority.WARNING,
6969
)
@@ -75,8 +75,7 @@ def collect_data(self, args=None) -> tuple[TaskResult, Optional[DeviceEnumeratio
7575
On Windows, use WMI and hyper-v cmdlets
7676
"""
7777
if self.system_info.os_family == OSFamily.LINUX:
78-
# Count CPU sockets
79-
cpu_count_res = self._run_sut_cmd(self.CMD_CPU_COUNT_LINUX)
78+
lscpu_res = self._run_sut_cmd(self.CMD_LSCPU_LINUX, log_artifact=False)
8079

8180
# Count all AMD GPUs
8281
vendor_id = format(self.system_info.vendorid_ep, "x")
@@ -86,17 +85,42 @@ def collect_data(self, args=None) -> tuple[TaskResult, Optional[DeviceEnumeratio
8685

8786
# Count AMD Virtual Functions
8887
vf_count_res = self._run_sut_cmd(self.CMD_VF_COUNT_LINUX.format(vendorid_ep=vendor_id))
88+
89+
# Collect lshw output
90+
lshw_res = self._run_sut_cmd(self.CMD_LSHW_LINUX, sudo=True, log_artifact=False)
8991
else:
9092
cpu_count_res = self._run_sut_cmd(self.CMD_CPU_COUNT_WINDOWS)
9193
gpu_count_res = self._run_sut_cmd(self.CMD_GPU_COUNT_WINDOWS)
9294
vf_count_res = self._run_sut_cmd(self.CMD_VF_COUNT_WINDOWS)
9395

9496
device_enum = DeviceEnumerationDataModel()
9597

96-
if cpu_count_res.exit_code == 0:
97-
device_enum.cpu_count = int(cpu_count_res.stdout)
98+
if self.system_info.os_family == OSFamily.LINUX:
99+
if lscpu_res.exit_code == 0 and lscpu_res.stdout:
100+
# Extract socket count from lscpu output
101+
for line in lscpu_res.stdout.splitlines():
102+
if line.startswith("Socket(s):"):
103+
try:
104+
device_enum.cpu_count = int(line.split(":")[1].strip())
105+
break
106+
except (ValueError, IndexError):
107+
self._warning(
108+
description="Cannot parse CPU count from lscpu output",
109+
command=lscpu_res,
110+
)
111+
device_enum.lscpu_output = lscpu_res.stdout
112+
self._log_event(
113+
category=EventCategory.PLATFORM,
114+
description="Collected lscpu output",
115+
priority=EventPriority.INFO,
116+
)
117+
else:
118+
self._warning(description="Cannot collect lscpu output", command=lscpu_res)
98119
else:
99-
self._warning(description="Cannot determine CPU count", command=cpu_count_res)
120+
if cpu_count_res.exit_code == 0:
121+
device_enum.cpu_count = int(cpu_count_res.stdout)
122+
else:
123+
self._warning(description="Cannot determine CPU count", command=cpu_count_res)
100124

101125
if gpu_count_res.exit_code == 0:
102126
device_enum.gpu_count = int(gpu_count_res.stdout)
@@ -112,14 +136,33 @@ def collect_data(self, args=None) -> tuple[TaskResult, Optional[DeviceEnumeratio
112136
category=EventCategory.SW_DRIVER,
113137
)
114138

139+
# Collect lshw output on Linux
140+
if self.system_info.os_family == OSFamily.LINUX:
141+
if lshw_res.exit_code == 0 and lshw_res.stdout:
142+
device_enum.lshw_output = lshw_res.stdout
143+
self.result.artifacts.append(
144+
TextFileArtifact(filename="lshw.txt", contents=lshw_res.stdout)
145+
)
146+
self._log_event(
147+
category=EventCategory.PLATFORM,
148+
description="Collected lshw output",
149+
priority=EventPriority.INFO,
150+
)
151+
else:
152+
self._warning(description="Cannot collect lshw output", command=lshw_res)
153+
115154
if device_enum.cpu_count or device_enum.gpu_count or device_enum.vf_count:
155+
log_data = device_enum.model_dump(
156+
exclude_none=True,
157+
exclude={"lscpu_output", "lshw_output", "task_name", "task_type", "parent"},
158+
)
116159
self._log_event(
117160
category=EventCategory.PLATFORM,
118161
description=f"Counted {device_enum.cpu_count} CPUs, {device_enum.gpu_count} GPUs, {device_enum.vf_count} VFs",
119-
data=device_enum.model_dump(exclude_none=True),
162+
data=log_data,
120163
priority=EventPriority.INFO,
121164
)
122-
self.result.message = f"Device Enumeration: {device_enum.model_dump(exclude_none=True)}"
165+
self.result.message = f"Device Enumeration: {log_data}"
123166
self.result.status = ExecutionStatus.OK
124167
return self.result, device_enum
125168
else:

nodescraper/plugins/inband/device_enumeration/deviceenumdata.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,5 @@ class DeviceEnumerationDataModel(DataModel):
3232
cpu_count: Optional[int] = None
3333
gpu_count: Optional[int] = None
3434
vf_count: Optional[int] = None
35+
lscpu_output: Optional[str] = None
36+
lshw_output: Optional[str] = None

test/unit/plugin/test_device_enumeration_collector.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,16 @@ def test_collect_linux(system_info, device_enumeration_collector):
5151
"""Test linux typical output"""
5252
system_info.os_family = OSFamily.LINUX
5353

54+
lscpu_output = "Architecture: x86_64\nCPU(s): 64\nSocket(s): 2"
55+
lshw_output = "*-cpu\n product: AMD EPYC 1234 64-Core Processor"
56+
5457
device_enumeration_collector._run_sut_cmd = MagicMock(
5558
side_effect=[
5659
MagicMock(
5760
exit_code=0,
58-
stdout="2",
61+
stdout=lscpu_output,
5962
stderr="",
60-
command="lscpu | grep Socket | awk '{ print $2 }'",
63+
command="lscpu",
6164
),
6265
MagicMock(
6366
exit_code=0,
@@ -71,12 +74,24 @@ def test_collect_linux(system_info, device_enumeration_collector):
7174
stderr="",
7275
command="lspci -d 1002: | grep -i 'Virtual Function' | wc -l",
7376
),
77+
MagicMock(
78+
exit_code=0,
79+
stdout=lshw_output,
80+
stderr="",
81+
command="lshw",
82+
),
7483
]
7584
)
7685

7786
result, data = device_enumeration_collector.collect_data()
7887
assert result.status == ExecutionStatus.OK
79-
assert data == DeviceEnumerationDataModel(cpu_count=2, gpu_count=8, vf_count=0)
88+
assert data == DeviceEnumerationDataModel(
89+
cpu_count=2, gpu_count=8, vf_count=0, lscpu_output=lscpu_output, lshw_output=lshw_output
90+
)
91+
assert (
92+
len([a for a in result.artifacts if hasattr(a, "filename") and a.filename == "lshw.txt"])
93+
== 1
94+
)
8095

8196

8297
def test_collect_windows(system_info, device_enumeration_collector):
@@ -119,9 +134,9 @@ def test_collect_error(system_info, device_enumeration_collector):
119134
side_effect=[
120135
MagicMock(
121136
exit_code=1,
122-
stdout="some output",
137+
stdout="",
123138
stderr="command failed",
124-
command="lscpu | grep Socket | awk '{ print $2 }'",
139+
command="lscpu",
125140
),
126141
MagicMock(
127142
exit_code=1,
@@ -135,6 +150,12 @@ def test_collect_error(system_info, device_enumeration_collector):
135150
stderr="command failed",
136151
command="lspci -d 1002: | grep -i 'Virtual Function' | wc -l",
137152
),
153+
MagicMock(
154+
exit_code=1,
155+
stdout="",
156+
stderr="command failed",
157+
command="lshw",
158+
),
138159
]
139160
)
140161

0 commit comments

Comments
 (0)