1313from types import FunctionType
1414from warnings import filterwarnings , warn
1515from distutils .util import strtobool
16+ from time import sleep
1617
1718import click
1819import pynwb
2829)
2930from .register_checks import InspectorMessage , Importance
3031from .tools import get_s3_urls_and_dandi_paths
31- from .utils import FilePathType , PathType , OptionalListOfStrings , calculate_number_of_cpu
32+ from .utils import FilePathType , PathType , OptionalListOfStrings , robust_s3_read , calculate_number_of_cpu
3233
3334INTERNAL_CONFIGS = dict (dandi = Path (__file__ ).parent / "internal_configs" / "dandi.inspector_config.yaml" )
3435
@@ -417,8 +418,9 @@ def inspect_nwb(
417418 ignore : OptionalListOfStrings = None ,
418419 select : OptionalListOfStrings = None ,
419420 importance_threshold : Union [str , Importance ] = Importance .BEST_PRACTICE_SUGGESTION ,
420- driver : str = None ,
421+ driver : Optional [ str ] = None ,
421422 skip_validate : bool = False ,
423+ max_retries : int = 10 ,
422424) -> List [InspectorMessage ]:
423425 """
424426 Inspect a NWBFile object and return suggestions for improvements according to best practices.
@@ -452,6 +454,11 @@ def inspect_nwb(
452454 skip_validate : bool
453455 Skip the PyNWB validation step. This may be desired for older NWBFiles (< schema version v2.10).
454456 The default is False, which is also recommended.
457+ max_retries : int, optional
458+ When using the ros3 driver to stream data from an s3 path, occasional curl issues can result.
459+ AWS suggests using iterative retry with an exponential backoff of 0.1 * 2^retries.
460+ This sets a hard bound on the number of times to attempt to retry the collection of messages.
461+ Defaults to 10 (corresponds to 102.4s maximum delay on final attempt).
455462 """
456463 importance_threshold = (
457464 Importance [importance_threshold ] if isinstance (importance_threshold , str ) else importance_threshold
@@ -463,6 +470,7 @@ def inspect_nwb(
463470 nwbfile_path = str (nwbfile_path )
464471 filterwarnings (action = "ignore" , message = "No cached namespaces found in .*" )
465472 filterwarnings (action = "ignore" , message = "Ignoring cached namespace .*" )
473+
466474 with pynwb .NWBHDF5IO (path = nwbfile_path , mode = "r" , load_namespaces = True , driver = driver ) as io :
467475 if not skip_validate :
468476 validation_errors = pynwb .validate (io = io )
@@ -476,7 +484,7 @@ def inspect_nwb(
476484 )
477485
478486 try :
479- nwbfile = io .read ( )
487+ nwbfile = robust_s3_read ( command = io .read , max_retries = max_retries )
480488 for inspector_message in run_checks (nwbfile = nwbfile , checks = checks ):
481489 inspector_message .file_path = nwbfile_path
482490 yield inspector_message
@@ -502,7 +510,7 @@ def run_checks(nwbfile: pynwb.NWBFile, checks: list):
502510 for nwbfile_object in nwbfile .objects .values ():
503511 if check_function .neurodata_type is None or issubclass (type (nwbfile_object ), check_function .neurodata_type ):
504512 try :
505- output = check_function ( nwbfile_object )
513+ output = robust_s3_read ( command = check_function , command_args = [ nwbfile_object ] )
506514 # if an individual check fails, include it in the report and continue with the inspection
507515 except Exception :
508516 output = InspectorMessage (
0 commit comments