2323# SOFTWARE.
2424#
2525###############################################################################
26+ import glob
27+ import os
28+
2629from pydantic import ValidationError
2730
2831from nodescraper .base import InBandDataCollector
@@ -41,13 +44,11 @@ def collect_data(
4144 self ,
4245 args = None ,
4346 ) -> tuple [TaskResult , NvmeDataModel | None ]:
44- """Collect detailed NVMe information from the system .
47+ """Collect detailed NVMe information from all NVMe devices .
4548
4649 Returns:
4750 tuple[TaskResult, NvmeDataModel | None]: Task result and data model with NVMe command outputs.
4851 """
49- data = {}
50-
5152 if self .system_info .os_family == OSFamily .WINDOWS :
5253 self ._log_event (
5354 category = EventCategory .SW_DRIVER ,
@@ -58,32 +59,56 @@ def collect_data(
5859 self .result .status = ExecutionStatus .NOT_RAN
5960 return self .result , None
6061
61- commands = [
62- "nvme smart-log /dev/nvme0" ,
63- "nvme error-log /dev/nvme0 --log-entries=256" ,
64- "nvme id-ctrl /dev/nvme0" ,
65- "nvme id-ns /dev/nvme0n1" ,
66- "nvme fw-log /dev/nvme0" ,
67- "nvme self-test-log /dev/nvme0" ,
68- "nvme get-log /dev/nvme0 --log-id=6 --log-len=512" ,
69- ]
70-
71- for cmd in commands :
72- res = self ._run_sut_cmd (cmd , sudo = True )
73- if res .exit_code == 0 :
74- data [cmd ] = res .stdout
75- else :
76- self ._log_event (
77- category = EventCategory .SW_DRIVER ,
78- description = f"Failed to execute NVMe command: '{ cmd } '" ,
79- data = {"command" : cmd , "exit_code" : res .exit_code },
80- priority = EventPriority .ERROR ,
81- console_log = True ,
82- )
62+ nvme_devices = self ._get_nvme_devices ()
63+ if not nvme_devices :
64+ self ._log_event (
65+ category = EventCategory .SW_DRIVER ,
66+ description = "No NVMe devices found" ,
67+ priority = EventPriority .CRITICAL ,
68+ )
69+ self .result .message = "No NVMe devices found"
70+ self .result .status = ExecutionStatus .ERROR
71+ return self .result , None
72+
73+ all_device_data = {}
74+ telemetry_file = "telemetry_log"
75+
76+ for dev in nvme_devices :
77+ device_data = {}
78+ commands = {
79+ "smart_log" : f"nvme smart-log { dev } " ,
80+ "error_log" : f"nvme error-log { dev } --log-entries=256" ,
81+ "id_ctrl" : f"nvme id-ctrl { dev } " ,
82+ "id_ns" : f"nvme id-ns { dev } n1" ,
83+ "fw_log" : f"nvme fw-log { dev } " ,
84+ "self_test_log" : f"nvme self-test-log { dev } " ,
85+ "get_log" : f"nvme get-log { dev } --log-id=6 --log-len=512" ,
86+ "telemetry_log" : f"nvme telemetry-log { dev } --output-file={ telemetry_file } " ,
87+ }
8388
84- if data :
89+ for key , cmd in commands .items ():
90+ res = self ._run_sut_cmd (cmd , sudo = True )
91+ if "telemetry-log" in cmd and res .exit_code == 0 :
92+ file_artifact = self ._read_sut_file (filename = telemetry_file , encoding = None )
93+ self ._log_file_artifact (file_artifact .filename , file_artifact .contents )
94+
95+ if res .exit_code == 0 :
96+ device_data [key ] = res .stdout
97+ else :
98+ self ._log_event (
99+ category = EventCategory .SW_DRIVER ,
100+ description = f"Failed to execute NVMe command: '{ cmd } '" ,
101+ data = {"command" : cmd , "exit_code" : res .exit_code },
102+ priority = EventPriority .ERROR ,
103+ console_log = True ,
104+ )
105+
106+ if device_data :
107+ all_device_data [os .path .basename (dev )] = device_data
108+
109+ if all_device_data :
85110 try :
86- nvme_data = NvmeDataModel (nvme_data = data )
111+ nvme_data = NvmeDataModel (devices = all_device_data )
87112 except ValidationError as e :
88113 self ._log_event (
89114 category = EventCategory .SW_DRIVER ,
@@ -93,6 +118,7 @@ def collect_data(
93118 )
94119 self .result .message = "NVMe data invalid format"
95120 self .result .status = ExecutionStatus .ERROR
121+ return self .result , None
96122
97123 self ._log_event (
98124 category = EventCategory .SW_DRIVER ,
@@ -101,14 +127,24 @@ def collect_data(
101127 priority = EventPriority .INFO ,
102128 )
103129 self .result .message = "NVMe data successfully collected"
130+ self .result .status = ExecutionStatus .OK
131+ return self .result , nvme_data
104132 else :
105- nvme_data = None
106133 self ._log_event (
107134 category = EventCategory .SW_DRIVER ,
108135 description = "Failed to collect any NVMe data" ,
109136 priority = EventPriority .CRITICAL ,
110137 )
111138 self .result .message = "No NVMe data collected"
112139 self .result .status = ExecutionStatus .ERROR
140+ return self .result , None
113141
114- return self .result , nvme_data
142+ def _get_nvme_devices (self ) -> list [str ]:
143+ """Find all non-partition NVMe block devices (e.g., /dev/nvme0, /dev/nvme1)."""
144+ devices = []
145+ for dev_path in sorted (glob .glob ("/dev/nvme*" )):
146+ if os .path .basename (dev_path ).endswith ("n1" ):
147+ continue
148+ if os .path .exists (dev_path ) and os .path .isfile (dev_path ) is False :
149+ devices .append (dev_path )
150+ return devices
0 commit comments