Skip to content

Commit b19969e

Browse files
committed
tests: Fix NVMe device timing issues with polling mechanism
Fixes #1080 Replace immediate device lookup with intelligent polling to handle timing issues where NVMe namespace devices may not be immediately available after controller creation. - Add exponential backoff polling (100ms-500ms, 5s timeout) - Include udevadm settle for device readiness - Maintain backward compatibility - More reliable than simple sleep() approach
1 parent a21347f commit b19969e

File tree

1 file changed

+81
-39
lines changed

1 file changed

+81
-39
lines changed

tests/utils.py

Lines changed: 81 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -274,11 +274,12 @@ def clean_scsi_debug(scsi_debug_dev):
274274
except:
275275
pass
276276

277-
def find_nvme_ctrl_devs_for_subnqn(subnqn):
277+
def find_nvme_ctrl_devs_for_subnqn(subnqn, timeout=5):
278278
"""
279279
Find NVMe controller devices for the specified subsystem nqn
280280
281281
:param str subnqn: subsystem nqn
282+
:param int timeout: timeout in seconds to wait for devices to appear
282283
"""
283284

284285
def _check_subsys(subsys, dev_paths):
@@ -295,33 +296,54 @@ def _check_subsys(subsys, dev_paths):
295296
except:
296297
pass
297298

298-
ret, out, err = run_command("nvme list --output-format=json --verbose")
299-
if ret != 0:
300-
raise RuntimeError("Error getting NVMe list: '%s %s'" % (out, err))
299+
def _find_devices():
300+
ret, out, err = run_command("nvme list --output-format=json --verbose")
301+
if ret != 0:
302+
return []
303+
304+
try:
305+
decoder = json.JSONDecoder()
306+
decoded = decoder.decode(out)
307+
if not decoded or 'Devices' not in decoded:
308+
return []
309+
310+
dev_paths = []
311+
for dev in decoded['Devices']:
312+
# nvme-cli 2.x
313+
if 'Subsystems' in dev:
314+
for subsys in dev['Subsystems']:
315+
_check_subsys(subsys, dev_paths)
316+
# nvme-cli 1.x
317+
if 'SubsystemNQN' in dev:
318+
_check_subsys(dev, dev_paths)
319+
320+
return dev_paths
321+
except:
322+
return []
301323

302-
decoder = json.JSONDecoder()
303-
decoded = decoder.decode(out)
304-
if not decoded or 'Devices' not in decoded:
305-
return []
324+
# Poll for devices with exponential backoff
325+
start_time = time.time()
326+
wait_time = 0.1 # Start with 100ms
327+
max_wait = 0.5 # Cap at 500ms
306328

307-
dev_paths = []
308-
for dev in decoded['Devices']:
309-
# nvme-cli 2.x
310-
if 'Subsystems' in dev:
311-
for subsys in dev['Subsystems']:
312-
_check_subsys(subsys, dev_paths)
313-
# nvme-cli 1.x
314-
if 'SubsystemNQN' in dev:
315-
_check_subsys(dev, dev_paths)
329+
while time.time() - start_time < timeout:
330+
dev_paths = _find_devices()
331+
if dev_paths:
332+
return dev_paths
333+
334+
time.sleep(wait_time)
335+
wait_time = min(wait_time * 1.2, max_wait) # Slower exponential backoff
336+
os.system("udevadm settle") # Ensure udev has processed any pending events
316337

317-
return dev_paths
338+
return _find_devices()
318339

319340

320-
def find_nvme_ns_devs_for_subnqn(subnqn):
341+
def find_nvme_ns_devs_for_subnqn(subnqn, timeout=5):
321342
"""
322343
Find NVMe namespace block devices for the specified subsystem nqn
323344
324345
:param str subnqn: subsystem nqn
346+
:param int timeout: timeout in seconds to wait for devices to appear
325347
"""
326348

327349
def _check_namespaces(node, ns_dev_paths):
@@ -344,26 +366,46 @@ def _check_subsys(subsys, ns_dev_paths):
344366
if 'Namespaces' in ctrl:
345367
_check_namespaces(ctrl, ns_dev_paths)
346368

347-
ret, out, err = run_command("nvme list --output-format=json --verbose")
348-
if ret != 0:
349-
raise RuntimeError("Error getting NVMe list: '%s %s'" % (out, err))
350-
351-
decoder = json.JSONDecoder()
352-
decoded = decoder.decode(out)
353-
if not decoded or 'Devices' not in decoded:
354-
return []
355-
356-
ns_dev_paths = []
357-
for dev in decoded['Devices']:
358-
# nvme-cli 2.x
359-
if 'Subsystems' in dev:
360-
for subsys in dev['Subsystems']:
361-
_check_subsys(subsys, ns_dev_paths)
362-
# nvme-cli 1.x
363-
if 'SubsystemNQN' in dev:
364-
_check_subsys(dev, ns_dev_paths)
365-
366-
return ns_dev_paths
369+
def _find_devices():
370+
ret, out, err = run_command("nvme list --output-format=json --verbose")
371+
if ret != 0:
372+
return []
373+
374+
try:
375+
decoder = json.JSONDecoder()
376+
decoded = decoder.decode(out)
377+
if not decoded or 'Devices' not in decoded:
378+
return []
379+
380+
ns_dev_paths = []
381+
for dev in decoded['Devices']:
382+
# nvme-cli 2.x
383+
if 'Subsystems' in dev:
384+
for subsys in dev['Subsystems']:
385+
_check_subsys(subsys, ns_dev_paths)
386+
# nvme-cli 1.x
387+
if 'SubsystemNQN' in dev:
388+
_check_subsys(dev, ns_dev_paths)
389+
390+
return ns_dev_paths
391+
except:
392+
return []
393+
394+
# Poll for devices with exponential backoff
395+
start_time = time.time()
396+
wait_time = 0.1 # Start with 100ms
397+
max_wait = 0.5 # Cap at 500ms
398+
399+
while time.time() - start_time < timeout:
400+
ns_dev_paths = _find_devices()
401+
if ns_dev_paths: # Return as soon as we find any namespace devices
402+
return ns_dev_paths
403+
404+
time.sleep(wait_time)
405+
wait_time = min(wait_time * 1.2, max_wait) # Slower exponential backoff
406+
os.system("udevadm settle") # Ensure udev has processed any pending events
407+
408+
return _find_devices()
367409

368410

369411
def get_nvme_hostnqn():

0 commit comments

Comments
 (0)