88from pathlib import Path
99from collections .abc import Iterable
1010from enum import Enum
11- from typing import Optional , List
11+ from typing import Union , Optional , List
1212from concurrent .futures import ProcessPoolExecutor , as_completed
1313from types import FunctionType
1414from warnings import filterwarnings , warn
1515from distutils .util import strtobool
16+ from time import sleep
1617
1718import click
1819import pynwb
2829)
2930from .register_checks import InspectorMessage , Importance
3031from .tools import get_s3_urls_and_dandi_paths
31- from .utils import FilePathType , PathType , OptionalListOfStrings
32+ from .utils import FilePathType , PathType , OptionalListOfStrings , robust_s3_read , calculate_number_of_cpu
3233
3334INTERNAL_CONFIGS = dict (dandi = Path (__file__ ).parent / "internal_configs" / "dandi.inspector_config.yaml" )
3435
@@ -278,7 +279,7 @@ def inspect_all(
278279 config : Optional [dict ] = None ,
279280 ignore : OptionalListOfStrings = None ,
280281 select : OptionalListOfStrings = None ,
281- importance_threshold : Importance = Importance .BEST_PRACTICE_SUGGESTION ,
282+ importance_threshold : Union [ str , Importance ] = Importance .BEST_PRACTICE_SUGGESTION ,
282283 n_jobs : int = 1 ,
283284 skip_validate : bool = False ,
284285 progress_bar : bool = True ,
@@ -305,7 +306,7 @@ def inspect_all(
305306 Names of functions to skip.
306307 select: list of strings, optional
307308 Names of functions to pick out of available checks.
308- importance_threshold : string, optional
309+ importance_threshold : string or Importance , optional
309310 Ignores tests with an assigned importance below this threshold.
310311 Importance has three levels:
311312 CRITICAL
@@ -317,6 +318,8 @@ def inspect_all(
317318 The default is the lowest level, BEST_PRACTICE_SUGGESTION.
318319 n_jobs : int
319320 Number of jobs to use in parallel. Set to -1 to use all available resources.
321+ This may also be a negative integer x from -2 to -(number_of_cpus - 1) which acts like negative slicing by using
322+ all available CPUs minus x.
320323 Set to 1 (also the default) to disable.
321324 skip_validate : bool, optional
322325 Skip the PyNWB validation step. This may be desired for older NWBFiles (< schema version v2.10).
@@ -336,7 +339,11 @@ def inspect_all(
336339 Common options are 'draft' or 'published'.
337340 Defaults to the most recent published version, or if not published then the most recent draft version.
338341 """
342+ importance_threshold = (
343+ Importance [importance_threshold ] if isinstance (importance_threshold , str ) else importance_threshold
344+ )
339345 modules = modules or []
346+ n_jobs = calculate_number_of_cpu (requested_cpu = n_jobs )
340347 if progress_bar_options is None :
341348 progress_bar_options = dict (position = 0 , leave = False )
342349 if stream :
@@ -410,9 +417,10 @@ def inspect_nwb(
410417 config : dict = None ,
411418 ignore : OptionalListOfStrings = None ,
412419 select : OptionalListOfStrings = None ,
413- importance_threshold : Importance = Importance .BEST_PRACTICE_SUGGESTION ,
414- driver : str = None ,
420+ importance_threshold : Union [ str , Importance ] = Importance .BEST_PRACTICE_SUGGESTION ,
421+ driver : Optional [ str ] = None ,
415422 skip_validate : bool = False ,
423+ max_retries : int = 10 ,
416424) -> List [InspectorMessage ]:
417425 """
418426 Inspect a NWBFile object and return suggestions for improvements according to best practices.
@@ -431,7 +439,7 @@ def inspect_nwb(
431439 Names of functions to skip.
432440 select: list, optional
433441 Names of functions to pick out of available checks.
434- importance_threshold : string, optional
442+ importance_threshold : string or Importance , optional
435443 Ignores tests with an assigned importance below this threshold.
436444 Importance has three levels:
437445 CRITICAL
@@ -446,14 +454,23 @@ def inspect_nwb(
446454 skip_validate : bool
447455 Skip the PyNWB validation step. This may be desired for older NWBFiles (< schema version v2.10).
448456 The default is False, which is also recommended.
457+ max_retries : int, optional
458+ When using the ros3 driver to stream data from an s3 path, occasional curl issues can result.
459+ AWS suggests using iterative retry with an exponential backoff of 0.1 * 2^retries.
460+ This sets a hard bound on the number of times to attempt to retry the collection of messages.
461+ Defaults to 10 (corresponds to 102.4s maximum delay on final attempt).
449462 """
463+ importance_threshold = (
464+ Importance [importance_threshold ] if isinstance (importance_threshold , str ) else importance_threshold
465+ )
450466 if any (x is not None for x in [config , ignore , select , importance_threshold ]):
451467 checks = configure_checks (
452468 checks = checks , config = config , ignore = ignore , select = select , importance_threshold = importance_threshold
453469 )
454470 nwbfile_path = str (nwbfile_path )
455471 filterwarnings (action = "ignore" , message = "No cached namespaces found in .*" )
456472 filterwarnings (action = "ignore" , message = "Ignoring cached namespace .*" )
473+
457474 with pynwb .NWBHDF5IO (path = nwbfile_path , mode = "r" , load_namespaces = True , driver = driver ) as io :
458475 if not skip_validate :
459476 validation_errors = pynwb .validate (io = io )
@@ -467,7 +484,7 @@ def inspect_nwb(
467484 )
468485
469486 try :
470- nwbfile = io .read ( )
487+ nwbfile = robust_s3_read ( command = io .read , max_retries = max_retries )
471488 for inspector_message in run_checks (nwbfile = nwbfile , checks = checks ):
472489 inspector_message .file_path = nwbfile_path
473490 yield inspector_message
@@ -493,7 +510,7 @@ def run_checks(nwbfile: pynwb.NWBFile, checks: list):
493510 for nwbfile_object in nwbfile .objects .values ():
494511 if check_function .neurodata_type is None or issubclass (type (nwbfile_object ), check_function .neurodata_type ):
495512 try :
496- output = check_function ( nwbfile_object )
513+ output = robust_s3_read ( command = check_function , command_args = [ nwbfile_object ] )
497514 # if an individual check fails, include it in the report and continue with the inspection
498515 except Exception :
499516 output = InspectorMessage (
0 commit comments