@@ -274,11 +274,12 @@ def clean_scsi_debug(scsi_debug_dev):
274274 except :
275275 pass
276276
277- def find_nvme_ctrl_devs_for_subnqn (subnqn ):
277+ def find_nvme_ctrl_devs_for_subnqn (subnqn , timeout = 5 ):
278278 """
279279 Find NVMe controller devices for the specified subsystem nqn
280280
281281 :param str subnqn: subsystem nqn
282+ :param int timeout: timeout in seconds to wait for devices to appear
282283 """
283284
284285 def _check_subsys (subsys , dev_paths ):
@@ -295,33 +296,54 @@ def _check_subsys(subsys, dev_paths):
295296 except :
296297 pass
297298
298- ret , out , err = run_command ("nvme list --output-format=json --verbose" )
299- if ret != 0 :
300- raise RuntimeError ("Error getting NVMe list: '%s %s'" % (out , err ))
299+ def _find_devices ():
300+ ret , out , err = run_command ("nvme list --output-format=json --verbose" )
301+ if ret != 0 :
302+ return []
303+
304+ try :
305+ decoder = json .JSONDecoder ()
306+ decoded = decoder .decode (out )
307+ if not decoded or 'Devices' not in decoded :
308+ return []
309+
310+ dev_paths = []
311+ for dev in decoded ['Devices' ]:
312+ # nvme-cli 2.x
313+ if 'Subsystems' in dev :
314+ for subsys in dev ['Subsystems' ]:
315+ _check_subsys (subsys , dev_paths )
316+ # nvme-cli 1.x
317+ if 'SubsystemNQN' in dev :
318+ _check_subsys (dev , dev_paths )
319+
320+ return dev_paths
321+ except :
322+ return []
301323
302- decoder = json . JSONDecoder ()
303- decoded = decoder . decode ( out )
304- if not decoded or 'Devices' not in decoded :
305- return []
324+ # Poll for devices with exponential backoff
325+ start_time = time . time ( )
326+ wait_time = 0.1 # Start with 100ms
327+ max_wait = 0.5 # Cap at 500ms
306328
307- dev_paths = []
308- for dev in decoded ['Devices' ]:
309- # nvme-cli 2.x
310- if 'Subsystems' in dev :
311- for subsys in dev ['Subsystems' ]:
312- _check_subsys (subsys , dev_paths )
313- # nvme-cli 1.x
314- if 'SubsystemNQN' in dev :
315- _check_subsys (dev , dev_paths )
329+ while time .time () - start_time < timeout :
330+ dev_paths = _find_devices ()
331+ if dev_paths :
332+ return dev_paths
333+
334+ time .sleep (wait_time )
335+ wait_time = min (wait_time * 1.2 , max_wait ) # Slower exponential backoff
336+ os .system ("udevadm settle" ) # Ensure udev has processed any pending events
316337
317- return dev_paths
338+ return _find_devices ()
318339
319340
320- def find_nvme_ns_devs_for_subnqn (subnqn ):
341+ def find_nvme_ns_devs_for_subnqn (subnqn , timeout = 5 ):
321342 """
322343 Find NVMe namespace block devices for the specified subsystem nqn
323344
324345 :param str subnqn: subsystem nqn
346+ :param int timeout: timeout in seconds to wait for devices to appear
325347 """
326348
327349 def _check_namespaces (node , ns_dev_paths ):
@@ -344,26 +366,46 @@ def _check_subsys(subsys, ns_dev_paths):
344366 if 'Namespaces' in ctrl :
345367 _check_namespaces (ctrl , ns_dev_paths )
346368
347- ret , out , err = run_command ("nvme list --output-format=json --verbose" )
348- if ret != 0 :
349- raise RuntimeError ("Error getting NVMe list: '%s %s'" % (out , err ))
350-
351- decoder = json .JSONDecoder ()
352- decoded = decoder .decode (out )
353- if not decoded or 'Devices' not in decoded :
354- return []
355-
356- ns_dev_paths = []
357- for dev in decoded ['Devices' ]:
358- # nvme-cli 2.x
359- if 'Subsystems' in dev :
360- for subsys in dev ['Subsystems' ]:
361- _check_subsys (subsys , ns_dev_paths )
362- # nvme-cli 1.x
363- if 'SubsystemNQN' in dev :
364- _check_subsys (dev , ns_dev_paths )
365-
366- return ns_dev_paths
369+ def _find_devices ():
370+ ret , out , err = run_command ("nvme list --output-format=json --verbose" )
371+ if ret != 0 :
372+ return []
373+
374+ try :
375+ decoder = json .JSONDecoder ()
376+ decoded = decoder .decode (out )
377+ if not decoded or 'Devices' not in decoded :
378+ return []
379+
380+ ns_dev_paths = []
381+ for dev in decoded ['Devices' ]:
382+ # nvme-cli 2.x
383+ if 'Subsystems' in dev :
384+ for subsys in dev ['Subsystems' ]:
385+ _check_subsys (subsys , ns_dev_paths )
386+ # nvme-cli 1.x
387+ if 'SubsystemNQN' in dev :
388+ _check_subsys (dev , ns_dev_paths )
389+
390+ return ns_dev_paths
391+ except :
392+ return []
393+
394+ # Poll for devices with exponential backoff
395+ start_time = time .time ()
396+ wait_time = 0.1 # Start with 100ms
397+ max_wait = 0.5 # Cap at 500ms
398+
399+ while time .time () - start_time < timeout :
400+ ns_dev_paths = _find_devices ()
401+ if ns_dev_paths : # Return as soon as we find any namespace devices
402+ return ns_dev_paths
403+
404+ time .sleep (wait_time )
405+ wait_time = min (wait_time * 1.2 , max_wait ) # Slower exponential backoff
406+ os .system ("udevadm settle" ) # Ensure udev has processed any pending events
407+
408+ return _find_devices ()
367409
368410
369411def get_nvme_hostnqn ():
0 commit comments