Skip to content

Commit e5f0ef5

Browse files
Merge branch 'dev' into negative_job_slicing
2 parents 4ccdf14 + 1f4010c commit e5f0ef5

File tree

3 files changed

+38
-9
lines changed

3 files changed

+38
-9
lines changed

nwbinspector/nwbinspector.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from types import FunctionType
1414
from warnings import filterwarnings, warn
1515
from distutils.util import strtobool
16+
from time import sleep
1617

1718
import click
1819
import pynwb
@@ -28,7 +29,7 @@
2829
)
2930
from .register_checks import InspectorMessage, Importance
3031
from .tools import get_s3_urls_and_dandi_paths
31-
from .utils import FilePathType, PathType, OptionalListOfStrings, calculate_number_of_cpu
32+
from .utils import FilePathType, PathType, OptionalListOfStrings, robust_s3_read, calculate_number_of_cpu
3233

3334
INTERNAL_CONFIGS = dict(dandi=Path(__file__).parent / "internal_configs" / "dandi.inspector_config.yaml")
3435

@@ -417,8 +418,9 @@ def inspect_nwb(
417418
ignore: OptionalListOfStrings = None,
418419
select: OptionalListOfStrings = None,
419420
importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION,
420-
driver: str = None,
421+
driver: Optional[str] = None,
421422
skip_validate: bool = False,
423+
max_retries: int = 10,
422424
) -> List[InspectorMessage]:
423425
"""
424426
Inspect a NWBFile object and return suggestions for improvements according to best practices.
@@ -452,6 +454,11 @@ def inspect_nwb(
452454
skip_validate : bool
453455
Skip the PyNWB validation step. This may be desired for older NWBFiles (< schema version v2.10).
454456
The default is False, which is also recommended.
457+
max_retries : int, optional
458+
When using the ros3 driver to stream data from an s3 path, occasional curl issues can result.
459+
AWS suggests using iterative retry with an exponential backoff of 0.1 * 2^retries.
460+
This sets a hard bound on the number of times to attempt to retry the collection of messages.
461+
Defaults to 10 (corresponds to 102.4s maximum delay on final attempt).
455462
"""
456463
importance_threshold = (
457464
Importance[importance_threshold] if isinstance(importance_threshold, str) else importance_threshold
@@ -463,6 +470,7 @@ def inspect_nwb(
463470
nwbfile_path = str(nwbfile_path)
464471
filterwarnings(action="ignore", message="No cached namespaces found in .*")
465472
filterwarnings(action="ignore", message="Ignoring cached namespace .*")
473+
466474
with pynwb.NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True, driver=driver) as io:
467475
if not skip_validate:
468476
validation_errors = pynwb.validate(io=io)
@@ -476,7 +484,7 @@ def inspect_nwb(
476484
)
477485

478486
try:
479-
nwbfile = io.read()
487+
nwbfile = robust_s3_read(command=io.read, max_retries=max_retries)
480488
for inspector_message in run_checks(nwbfile=nwbfile, checks=checks):
481489
inspector_message.file_path = nwbfile_path
482490
yield inspector_message
@@ -502,7 +510,7 @@ def run_checks(nwbfile: pynwb.NWBFile, checks: list):
502510
for nwbfile_object in nwbfile.objects.values():
503511
if check_function.neurodata_type is None or issubclass(type(nwbfile_object), check_function.neurodata_type):
504512
try:
505-
output = check_function(nwbfile_object)
513+
output = robust_s3_read(command=check_function, command_args=[nwbfile_object])
506514
# if an individual check fails, include it in the report and continue with the inspection
507515
except Exception:
508516
output = InspectorMessage(

nwbinspector/utils.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33
import re
44
import json
55
import numpy as np
6-
from typing import TypeVar, Optional, List
6+
from typing import TypeVar, Optional, List, Dict, Callable
77
from pathlib import Path
88
from importlib import import_module
99
from packaging import version
10+
from time import sleep
1011

1112
PathType = TypeVar("PathType", str, Path) # For types that can be either files or folders
1213
FilePathType = TypeVar("FilePathType", str, Path)
@@ -116,6 +117,22 @@ def get_package_version(name: str) -> version.Version:
116117
return version.parse(package_version)
117118

118119

120+
def robust_s3_read(
121+
command: Callable, max_retries: int = 10, command_args: Optional[list] = None, command_kwargs: Optional[Dict] = None
122+
):
123+
"""Attempt the command (usually acting on an S3 IO) up to the number of max_retries using exponential backoff."""
124+
command_args = command_args or []
125+
command_kwargs = command_kwargs or dict()
126+
for retry in range(max_retries):
127+
try:
128+
return command(*command_args, **command_kwargs)
129+
except OSError: # cannot curl request
130+
sleep(0.1 * 2**retry)
131+
except Exception as exc:
132+
raise exc
133+
raise TimeoutError(f"Unable to complete the command ({command.__name__}) after {max_retries} attempts!")
134+
135+
119136
def calculate_number_of_cpu(requested_cpu: int = 1) -> int:
120137
"""
121138
Calculate the number CPUs to use with respect to negative slicing and check against maximal available resources.

tests/unit_tests/test_time_series.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from packaging import version
2+
from time import sleep
23

34
import numpy as np
45
import pynwb
@@ -14,7 +15,7 @@
1415
check_missing_unit,
1516
check_resolution,
1617
)
17-
from nwbinspector.utils import get_package_version
18+
from nwbinspector.utils import get_package_version, robust_s3_read
1819

1920
try:
2021
# Test ros3 on sub-YutaMouse54/sub-YutaMouse54_ses-YutaMouse54-160630_behavior+ecephys.nwb from #3
@@ -195,9 +196,12 @@ def test_check_none_matnwb_resolution_pass():
195196
load_namespaces=True,
196197
driver="ros3",
197198
) as io:
198-
nwbfile = io.read()
199-
time_series = nwbfile.processing["video_files"]["video"].time_series["20170203_KIB_01_s1.1.h264"]
200-
assert check_resolution(time_series) is None
199+
nwbfile = robust_s3_read(command=io.read)
200+
time_series = robust_s3_read(
201+
"20170203_KIB_01_s1.1.h264",
202+
command=nwbfile.processing["video_files"]["video"].time_series.get,
203+
)
204+
assert check_resolution(time_series) is None
201205

202206

203207
def test_check_resolution_fail():

0 commit comments

Comments
 (0)