Skip to content

Commit 679605d

Browse files
Merge pull request #25 from amd/alex_nvme
Nvme collector
2 parents 669fc21 + 4e385b5 commit 679605d

File tree

5 files changed

+376
-0
lines changed

5 files changed

+376
-0
lines changed
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
###############################################################################
2+
#
3+
# MIT License
4+
#
5+
# Copyright (c) 2025 Advanced Micro Devices, Inc.
6+
#
7+
# Permission is hereby granted, free of charge, to any person obtaining a copy
8+
# of this software and associated documentation files (the "Software"), to deal
9+
# in the Software without restriction, including without limitation the rights
10+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
# copies of the Software, and to permit persons to whom the Software is
12+
# furnished to do so, subject to the following conditions:
13+
#
14+
# The above copyright notice and this permission notice shall be included in all
15+
# copies or substantial portions of the Software.
16+
#
17+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
# SOFTWARE.
24+
#
25+
###############################################################################
26+
from .nvme_plugin import NvmePlugin
27+
28+
__all__ = ["NvmePlugin"]
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
###############################################################################
2+
#
3+
# MIT License
4+
#
5+
# Copyright (c) 2025 Advanced Micro Devices, Inc.
6+
#
7+
# Permission is hereby granted, free of charge, to any person obtaining a copy
8+
# of this software and associated documentation files (the "Software"), to deal
9+
# in the Software without restriction, including without limitation the rights
10+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
# copies of the Software, and to permit persons to whom the Software is
12+
# furnished to do so, subject to the following conditions:
13+
#
14+
# The above copyright notice and this permission notice shall be included in all
15+
# copies or substantial portions of the Software.
16+
#
17+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
# SOFTWARE.
24+
#
25+
###############################################################################
26+
import os
27+
import re
28+
29+
from pydantic import ValidationError
30+
31+
from nodescraper.base import InBandDataCollector
32+
from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus, OSFamily
33+
from nodescraper.models import TaskResult
34+
35+
from .nvmedata import NvmeDataModel
36+
37+
38+
class NvmeCollector(InBandDataCollector[NvmeDataModel, None]):
39+
"""Collect NVMe details from the system."""
40+
41+
DATA_MODEL = NvmeDataModel
42+
43+
def collect_data(
44+
self,
45+
args=None,
46+
) -> tuple[TaskResult, NvmeDataModel | None]:
47+
"""Collect detailed NVMe information from all NVMe devices.
48+
49+
Returns:
50+
tuple[TaskResult, NvmeDataModel | None]: Task result and data model with NVMe command outputs.
51+
"""
52+
if self.system_info.os_family == OSFamily.WINDOWS:
53+
self._log_event(
54+
category=EventCategory.SW_DRIVER,
55+
description="NVMe collection not supported on Windows",
56+
priority=EventPriority.WARNING,
57+
)
58+
self.result.message = "NVMe data collection skipped on Windows"
59+
self.result.status = ExecutionStatus.NOT_RAN
60+
return self.result, None
61+
62+
nvme_devices = self._get_nvme_devices()
63+
if not nvme_devices:
64+
self._log_event(
65+
category=EventCategory.SW_DRIVER,
66+
description="No NVMe devices found",
67+
priority=EventPriority.ERROR,
68+
)
69+
self.result.message = "No NVMe devices found"
70+
self.result.status = ExecutionStatus.ERROR
71+
return self.result, None
72+
73+
all_device_data = {}
74+
75+
for dev in nvme_devices:
76+
device_data = {}
77+
commands = {
78+
"smart_log": f"nvme smart-log {dev}",
79+
"error_log": f"nvme error-log {dev} --log-entries=256",
80+
"id_ctrl": f"nvme id-ctrl {dev}",
81+
"id_ns": f"nvme id-ns {dev}n1",
82+
"fw_log": f"nvme fw-log {dev}",
83+
"self_test_log": f"nvme self-test-log {dev}",
84+
"get_log": f"nvme get-log {dev} --log-id=6 --log-len=512",
85+
}
86+
87+
for key, cmd in commands.items():
88+
res = self._run_sut_cmd(cmd, sudo=True)
89+
if res.exit_code == 0:
90+
device_data[key] = res.stdout
91+
else:
92+
self._log_event(
93+
category=EventCategory.SW_DRIVER,
94+
description=f"Failed to execute NVMe command: '{cmd}'",
95+
data={"command": cmd, "exit_code": res.exit_code},
96+
priority=EventPriority.WARNING,
97+
console_log=True,
98+
)
99+
100+
if device_data:
101+
all_device_data[os.path.basename(dev)] = device_data
102+
103+
if all_device_data:
104+
try:
105+
nvme_data = NvmeDataModel(devices=all_device_data)
106+
except ValidationError as exp:
107+
self._log_event(
108+
category=EventCategory.SW_DRIVER,
109+
description="Validation error while building NvmeDataModel",
110+
data={"errors": exp.errors(include_url=False)},
111+
priority=EventPriority.ERROR,
112+
)
113+
self.result.message = "NVMe data invalid format"
114+
self.result.status = ExecutionStatus.ERROR
115+
return self.result, None
116+
117+
self._log_event(
118+
category=EventCategory.SW_DRIVER,
119+
description="Collected NVMe data",
120+
data=nvme_data.model_dump(),
121+
priority=EventPriority.INFO,
122+
)
123+
self.result.message = "NVMe data successfully collected"
124+
self.result.status = ExecutionStatus.OK
125+
return self.result, nvme_data
126+
else:
127+
self._log_event(
128+
category=EventCategory.SW_DRIVER,
129+
description="Failed to collect any NVMe data",
130+
priority=EventPriority.ERROR,
131+
)
132+
self.result.message = "No NVMe data collected"
133+
self.result.status = ExecutionStatus.ERROR
134+
return self.result, None
135+
136+
def _get_nvme_devices(self) -> list[str]:
137+
nvme_devs = []
138+
139+
res = self._run_sut_cmd("ls /dev", sudo=False)
140+
if res.exit_code != 0:
141+
self._log_event(
142+
category=EventCategory.SW_DRIVER,
143+
description="Failed to list /dev directory",
144+
data={"exit_code": res.exit_code, "stderr": res.stderr},
145+
priority=EventPriority.ERROR,
146+
)
147+
return []
148+
149+
for entry in res.stdout.strip().splitlines():
150+
if re.fullmatch(r"nvme\d+$", entry):
151+
nvme_devs.append(f"/dev/{entry}")
152+
153+
return nvme_devs
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
###############################################################################
2+
#
3+
# MIT License
4+
#
5+
# Copyright (c) 2025 Advanced Micro Devices, Inc.
6+
#
7+
# Permission is hereby granted, free of charge, to any person obtaining a copy
8+
# of this software and associated documentation files (the "Software"), to deal
9+
# in the Software without restriction, including without limitation the rights
10+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
# copies of the Software, and to permit persons to whom the Software is
12+
# furnished to do so, subject to the following conditions:
13+
#
14+
# The above copyright notice and this permission notice shall be included in all
15+
# copies or substantial portions of the Software.
16+
#
17+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
# SOFTWARE.
24+
#
25+
###############################################################################
26+
from nodescraper.base import InBandDataPlugin
27+
28+
from .nvme_collector import NvmeCollector
29+
from .nvmedata import NvmeDataModel
30+
31+
32+
class NvmePlugin(InBandDataPlugin[NvmeDataModel, None, None]):
33+
"""Plugin for collection and analysis of nvme data"""
34+
35+
DATA_MODEL = NvmeDataModel
36+
37+
COLLECTOR = NvmeCollector
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
###############################################################################
2+
#
3+
# MIT License
4+
#
5+
# Copyright (c) 2025 Advanced Micro Devices, Inc.
6+
#
7+
# Permission is hereby granted, free of charge, to any person obtaining a copy
8+
# of this software and associated documentation files (the "Software"), to deal
9+
# in the Software without restriction, including without limitation the rights
10+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
# copies of the Software, and to permit persons to whom the Software is
12+
# furnished to do so, subject to the following conditions:
13+
#
14+
# The above copyright notice and this permission notice shall be included in all
15+
# copies or substantial portions of the Software.
16+
#
17+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
# SOFTWARE.
24+
#
25+
###############################################################################
26+
from pydantic import BaseModel
27+
28+
from nodescraper.models import DataModel
29+
30+
31+
class DeviceNvmeData(BaseModel):
32+
smart_log: str | None = None
33+
error_log: str | None = None
34+
id_ctrl: str | None = None
35+
id_ns: str | None = None
36+
fw_log: str | None = None
37+
self_test_log: str | None = None
38+
get_log: str | None = None
39+
telemetry_log: str | None = None
40+
41+
42+
class NvmeDataModel(DataModel):
43+
devices: dict[str, DeviceNvmeData]
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
###############################################################################
2+
#
3+
# MIT License
4+
#
5+
# Copyright (c) 2025 Advanced Micro Devices, Inc.
6+
#
7+
# Permission is hereby granted, free of charge, to any person obtaining a copy
8+
# of this software and associated documentation files (the "Software"), to deal
9+
# in the Software without restriction, including without limitation the rights
10+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
# copies of the Software, and to permit persons to whom the Software is
12+
# furnished to do so, subject to the following conditions:
13+
#
14+
# The above copyright notice and this permission notice shall be included in all
15+
# copies or substantial portions of the Software.
16+
#
17+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
# SOFTWARE.
24+
#
25+
###############################################################################
26+
from unittest.mock import MagicMock
27+
28+
import pytest
29+
30+
from nodescraper.enums import EventPriority, ExecutionStatus, OSFamily
31+
from nodescraper.enums.systeminteraction import SystemInteractionLevel
32+
from nodescraper.models import TaskResult
33+
from nodescraper.plugins.inband.nvme.nvme_collector import NvmeCollector
34+
from nodescraper.plugins.inband.nvme.nvmedata import NvmeDataModel
35+
36+
37+
@pytest.fixture
38+
def collector(system_info, conn_mock):
39+
c = NvmeCollector(
40+
system_info=system_info,
41+
system_interaction_level=SystemInteractionLevel.PASSIVE,
42+
connection=conn_mock,
43+
)
44+
c._log_event = MagicMock()
45+
c._run_sut_cmd = MagicMock()
46+
c.result = TaskResult()
47+
return c
48+
49+
50+
def test_skips_on_windows(collector):
51+
collector.system_info = MagicMock(os_family=OSFamily.WINDOWS)
52+
result, data = collector.collect_data()
53+
54+
assert result.status == ExecutionStatus.NOT_RAN
55+
assert data is None
56+
collector._log_event.assert_called_once()
57+
assert "Windows" in collector._log_event.call_args.kwargs["description"]
58+
59+
60+
@pytest.mark.skip(reason="No NVME device in testing infrastructure")
61+
def test_successful_collection(collector):
62+
collector.system_info = MagicMock(os_family=OSFamily.LINUX)
63+
64+
collector._run_sut_cmd.return_value = MagicMock(exit_code=0, stdout="output")
65+
66+
result, data = collector.collect_data()
67+
68+
assert result.status == ExecutionStatus.OK
69+
assert result.message == "NVMe data successfully collected"
70+
assert isinstance(data, NvmeDataModel)
71+
assert collector._run_sut_cmd.call_count == 7
72+
assert any(
73+
"Collected NVMe data" in call.kwargs["description"]
74+
for call in collector._log_event.call_args_list
75+
)
76+
77+
78+
def test_partial_failures(collector):
79+
collector.system_info = MagicMock(os_family=OSFamily.LINUX)
80+
81+
def fake_cmd(cmd, sudo):
82+
return MagicMock(exit_code=0 if "smart-log" in cmd else 1, stdout="out")
83+
84+
collector._run_sut_cmd.side_effect = fake_cmd
85+
86+
result, data = collector.collect_data()
87+
88+
assert result.status in {ExecutionStatus.OK, ExecutionStatus.ERROR}
89+
assert collector._log_event.call_count >= 1
90+
91+
92+
@pytest.mark.skip(reason="No NVME device in testing infrastructure")
93+
def test_no_data_collected(collector):
94+
collector.system_info = MagicMock(os_family=OSFamily.LINUX)
95+
96+
collector._run_sut_cmd.return_value = MagicMock(exit_code=1, stdout="")
97+
98+
result, data = collector.collect_data()
99+
100+
assert result.status == ExecutionStatus.ERROR
101+
assert data is None
102+
assert "No NVMe data collected" in result.message
103+
assert any(
104+
call.kwargs["priority"] == EventPriority.ERROR
105+
for call in collector._log_event.call_args_list
106+
)
107+
108+
109+
def test_get_nvme_devices_filters_partitions(collector):
110+
fake_ls_output = "\n".join(["nvme0", "nvme0n1", "nvme1", "nvme1n1", "sda", "loop0", "nvme2"])
111+
collector._run_sut_cmd.return_value = MagicMock(exit_code=0, stdout=fake_ls_output)
112+
113+
devices = collector._get_nvme_devices()
114+
115+
assert devices == ["/dev/nvme0", "/dev/nvme1", "/dev/nvme2"]

0 commit comments

Comments
 (0)