Skip to content

Commit 975ca7a

Browse files
authored
Add nvme-cli 2.3 compat
Signed-off-by: Orsiris de Jong <[email protected]>
1 parent 4242258 commit 975ca7a

File tree

1 file changed

+36
-7
lines changed

1 file changed

+36
-7
lines changed

nvme_metrics.py

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,21 @@
131131
}
132132

133133

134+
def nvme_has_verbose():
135+
"""
136+
Old nvme-cli versions like 2.3 on Debian 12 don't have --verbose for smart-log command
137+
We need to check if --verbose is supported. This command will report usage to stderr
138+
Consider we have a recent version if something goes wrong
139+
"""
140+
try:
141+
result = subprocess.run(["nvme", "smart-log", "--help"], check=False, capture_output=True)
142+
if "--verbose" not in str(result.stderr):
143+
return False
144+
return True
145+
except subprocess.CalledProcessError:
146+
return True
147+
148+
134149
def exec_nvme(*args):
135150
"""
136151
Execute nvme CLI tool with specified arguments and return captured stdout result. Set LC_ALL=C
@@ -141,15 +156,20 @@ def exec_nvme(*args):
141156
return subprocess.check_output(cmd, stderr=subprocess.PIPE, env=dict(os.environ, LC_ALL="C"))
142157

143158

144-
def exec_nvme_json(*args):
159+
def exec_nvme_json(*args, has_verbose):
145160
"""
146161
Execute nvme CLI tool with specified arguments and return parsed JSON output.
147162
"""
148163
# Note: nvme-cli v2.11 effectively introduced a breaking change by forcing JSON output to always
149164
# be verbose. Older versions of nvme-cli optionally produced verbose output if the --verbose
150165
# flag was specified. In order to avoid having to handle two different JSON schemas, always
151166
# add the --verbose flag.
152-
output = exec_nvme(*args, "--output-format", "json", "--verbose")
167+
# Note2: nvme-cli 2.3 that ships with Debian 12 has no verbose parameter for smart-log command only
168+
169+
if "smart-log" in args and not has_verbose:
170+
output = exec_nvme(*args, "--output-format", "json")
171+
else:
172+
output = exec_nvme(*args, "--output-format", "json", "--verbose")
153173
return json.loads(output)
154174

155175

@@ -161,7 +181,8 @@ def main():
161181
cli_version = "unknown"
162182
metrics["nvmecli"].labels(cli_version).set(1)
163183

164-
device_list = exec_nvme_json("list")
184+
has_verbose = nvme_has_verbose()
185+
device_list = exec_nvme_json("list", has_verbose=has_verbose)
165186

166187
for device in device_list["Devices"]:
167188
for subsys in device["Subsystems"]:
@@ -187,7 +208,9 @@ def main():
187208
# FIXME: The smart-log should only need to be fetched once per controller, not
188209
# per namespace. However, in order to preserve legacy metric labels, fetch it
189210
# per namespace anyway. Most consumer grade SSDs will only have one namespace.
190-
smart_log = exec_nvme_json("smart-log", os.path.join("/dev", device_name))
211+
smart_log = exec_nvme_json(
212+
"smart-log", os.path.join("/dev", device_name), has_verbose=has_verbose
213+
)
191214

192215
# Various counters in the NVMe specification are 128-bit, which would have to
193216
# discard resolution if converted to a JSON number (i.e., float64_t). Instead,
@@ -208,9 +231,14 @@ def main():
208231
metrics["avail_spare"].labels(device_name).set(smart_log["avail_spare"] / 100)
209232
metrics["spare_thresh"].labels(device_name).set(smart_log["spare_thresh"] / 100)
210233
metrics["percent_used"].labels(device_name).set(smart_log["percent_used"] / 100)
211-
metrics["critical_warning"].labels(device_name).set(
212-
smart_log["critical_warning"]["value"]
213-
)
234+
if has_verbose:
235+
metrics["critical_warning"].labels(device_name).set(
236+
smart_log["critical_warning"]["value"]
237+
)
238+
else:
239+
metrics["critical_warning"].labels(device_name).set(
240+
smart_log["critical_warning"]
241+
)
214242
metrics["media_errors"].labels(device_name).inc(int(smart_log["media_errors"]))
215243
metrics["num_err_log_entries"].labels(device_name).inc(
216244
int(smart_log["num_err_log_entries"])
@@ -246,6 +274,7 @@ def main():
246274
main()
247275
except Exception as e:
248276
print("ERROR: {}".format(e), file=sys.stderr)
277+
raise
249278
sys.exit(1)
250279

251280
print(generate_latest(registry).decode(), end="")

0 commit comments

Comments
 (0)