88from pathlib import Path
99from collections .abc import Iterable
1010from enum import Enum
11- from typing import Optional , List
11+ from typing import Union , Optional , List
1212from concurrent .futures import ProcessPoolExecutor , as_completed
1313from types import FunctionType
1414from warnings import filterwarnings , warn
1515from distutils .util import strtobool
16+ from time import sleep
1617
1718import click
1819import pynwb
2829)
2930from .register_checks import InspectorMessage , Importance
3031from .tools import get_s3_urls_and_dandi_paths
31- from .utils import FilePathType , PathType , OptionalListOfStrings
32+ from .utils import FilePathType , PathType , OptionalListOfStrings , robust_s3_read , calculate_number_of_cpu
3233
3334INTERNAL_CONFIGS = dict (dandi = Path (__file__ ).parent / "internal_configs" / "dandi.inspector_config.yaml" )
3435
@@ -294,7 +295,7 @@ def inspect_all(
294295 config : Optional [dict ] = None ,
295296 ignore : OptionalListOfStrings = None ,
296297 select : OptionalListOfStrings = None ,
297- importance_threshold : Importance = Importance .BEST_PRACTICE_SUGGESTION ,
298+ importance_threshold : Union [ str , Importance ] = Importance .BEST_PRACTICE_SUGGESTION ,
298299 n_jobs : int = 1 ,
299300 skip_validate : bool = False ,
300301 progress_bar : bool = True ,
@@ -321,7 +322,7 @@ def inspect_all(
321322 Names of functions to skip.
322323 select: list of strings, optional
323324 Names of functions to pick out of available checks.
324- importance_threshold : string, optional
325+ importance_threshold : string or Importance , optional
325326 Ignores tests with an assigned importance below this threshold.
326327 Importance has three levels:
327328 CRITICAL
@@ -333,6 +334,8 @@ def inspect_all(
333334 The default is the lowest level, BEST_PRACTICE_SUGGESTION.
334335 n_jobs : int
335336 Number of jobs to use in parallel. Set to -1 to use all available resources.
337+ This may also be a negative integer x from -2 to -(number_of_cpus - 1) which acts like negative slicing by using
338+ all available CPUs minus x.
336339 Set to 1 (also the default) to disable.
337340 skip_validate : bool, optional
338341 Skip the PyNWB validation step. This may be desired for older NWBFiles (< schema version v2.10).
@@ -352,7 +355,11 @@ def inspect_all(
352355 Common options are 'draft' or 'published'.
353356 Defaults to the most recent published version, or if not published then the most recent draft version.
354357 """
358+ importance_threshold = (
359+ Importance [importance_threshold ] if isinstance (importance_threshold , str ) else importance_threshold
360+ )
355361 modules = modules or []
362+ n_jobs = calculate_number_of_cpu (requested_cpu = n_jobs )
356363 if progress_bar_options is None :
357364 progress_bar_options = dict (position = 0 , leave = False )
358365 if stream :
@@ -426,9 +433,10 @@ def inspect_nwb(
426433 config : dict = None ,
427434 ignore : OptionalListOfStrings = None ,
428435 select : OptionalListOfStrings = None ,
429- importance_threshold : Importance = Importance .BEST_PRACTICE_SUGGESTION ,
430- driver : str = None ,
436+ importance_threshold : Union [ str , Importance ] = Importance .BEST_PRACTICE_SUGGESTION ,
437+ driver : Optional [ str ] = None ,
431438 skip_validate : bool = False ,
439+ max_retries : int = 10 ,
432440) -> List [InspectorMessage ]:
433441 """
434442 Inspect a NWBFile object and return suggestions for improvements according to best practices.
@@ -447,7 +455,7 @@ def inspect_nwb(
447455 Names of functions to skip.
448456 select: list, optional
449457 Names of functions to pick out of available checks.
450- importance_threshold : string, optional
458+ importance_threshold : string or Importance , optional
451459 Ignores tests with an assigned importance below this threshold.
452460 Importance has three levels:
453461 CRITICAL
@@ -462,14 +470,23 @@ def inspect_nwb(
462470 skip_validate : bool
463471 Skip the PyNWB validation step. This may be desired for older NWBFiles (< schema version v2.10).
464472 The default is False, which is also recommended.
473+ max_retries : int, optional
474+ When using the ros3 driver to stream data from an s3 path, occasional curl issues can result.
475+ AWS suggests using iterative retry with an exponential backoff of 0.1 * 2^retries.
476+ This sets a hard bound on the number of times to attempt to retry the collection of messages.
477+ Defaults to 10 (corresponds to 102.4s maximum delay on final attempt).
465478 """
479+ importance_threshold = (
480+ Importance [importance_threshold ] if isinstance (importance_threshold , str ) else importance_threshold
481+ )
466482 if any (x is not None for x in [config , ignore , select , importance_threshold ]):
467483 checks = configure_checks (
468484 checks = checks , config = config , ignore = ignore , select = select , importance_threshold = importance_threshold
469485 )
470486 nwbfile_path = str (nwbfile_path )
471487 filterwarnings (action = "ignore" , message = "No cached namespaces found in .*" )
472488 filterwarnings (action = "ignore" , message = "Ignoring cached namespace .*" )
489+
473490 with pynwb .NWBHDF5IO (path = nwbfile_path , mode = "r" , load_namespaces = True , driver = driver ) as io :
474491 if not skip_validate :
475492 validation_errors = pynwb .validate (io = io )
@@ -483,7 +500,7 @@ def inspect_nwb(
483500 )
484501
485502 try :
486- nwbfile = io .read ( )
503+ nwbfile = robust_s3_read ( command = io .read , max_retries = max_retries )
487504 for inspector_message in run_checks (nwbfile = nwbfile , checks = checks ):
488505 inspector_message .file_path = nwbfile_path
489506 yield inspector_message
@@ -509,7 +526,7 @@ def run_checks(nwbfile: pynwb.NWBFile, checks: list):
509526 for nwbfile_object in nwbfile .objects .values ():
510527 if check_function .neurodata_type is None or issubclass (type (nwbfile_object ), check_function .neurodata_type ):
511528 try :
512- output = check_function ( nwbfile_object )
529+ output = robust_s3_read ( command = check_function , command_args = [ nwbfile_object ] )
513530 # if an individual check fails, include it in the report and continue with the inspection
514531 except Exception :
515532 output = InspectorMessage (
0 commit comments