Skip to content

Commit df1a0b0

Browse files
Merge branch 'dev' into add_strain_details
2 parents 243d1e0 + 1d35700 commit df1a0b0

File tree

6 files changed

+99
-12
lines changed

6 files changed

+99
-12
lines changed

nwbinspector/nwbinspector.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from types import FunctionType
1414
from warnings import filterwarnings, warn
1515
from distutils.util import strtobool
16+
from time import sleep
1617

1718
import click
1819
import pynwb
@@ -28,7 +29,7 @@
2829
)
2930
from .register_checks import InspectorMessage, Importance
3031
from .tools import get_s3_urls_and_dandi_paths
31-
from .utils import FilePathType, PathType, OptionalListOfStrings
32+
from .utils import FilePathType, PathType, OptionalListOfStrings, robust_s3_read, calculate_number_of_cpu
3233

3334
INTERNAL_CONFIGS = dict(dandi=Path(__file__).parent / "internal_configs" / "dandi.inspector_config.yaml")
3435

@@ -317,6 +318,8 @@ def inspect_all(
317318
The default is the lowest level, BEST_PRACTICE_SUGGESTION.
318319
n_jobs : int
319320
Number of jobs to use in parallel. Set to -1 to use all available resources.
321+
This may also be a negative integer x from -2 to -(number_of_cpus - 1) which acts like negative slicing by using
322+
all available CPUs minus x.
320323
Set to 1 (also the default) to disable.
321324
skip_validate : bool, optional
322325
Skip the PyNWB validation step. This may be desired for older NWBFiles (< schema version v2.10).
@@ -340,6 +343,7 @@ def inspect_all(
340343
Importance[importance_threshold] if isinstance(importance_threshold, str) else importance_threshold
341344
)
342345
modules = modules or []
346+
n_jobs = calculate_number_of_cpu(requested_cpu=n_jobs)
343347
if progress_bar_options is None:
344348
progress_bar_options = dict(position=0, leave=False)
345349
if stream:
@@ -414,8 +418,9 @@ def inspect_nwb(
414418
ignore: OptionalListOfStrings = None,
415419
select: OptionalListOfStrings = None,
416420
importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION,
417-
driver: str = None,
421+
driver: Optional[str] = None,
418422
skip_validate: bool = False,
423+
max_retries: int = 10,
419424
) -> List[InspectorMessage]:
420425
"""
421426
Inspect a NWBFile object and return suggestions for improvements according to best practices.
@@ -449,6 +454,11 @@ def inspect_nwb(
449454
skip_validate : bool
450455
Skip the PyNWB validation step. This may be desired for older NWBFiles (< schema version v2.10).
451456
The default is False, which is also recommended.
457+
max_retries : int, optional
458+
When using the ros3 driver to stream data from an s3 path, occasional curl issues can result.
459+
AWS suggests using iterative retry with an exponential backoff of 0.1 * 2^retries.
460+
This sets a hard bound on the number of times to attempt to retry the collection of messages.
461+
Defaults to 10 (corresponds to 102.4s maximum delay on final attempt).
452462
"""
453463
importance_threshold = (
454464
Importance[importance_threshold] if isinstance(importance_threshold, str) else importance_threshold
@@ -460,6 +470,7 @@ def inspect_nwb(
460470
nwbfile_path = str(nwbfile_path)
461471
filterwarnings(action="ignore", message="No cached namespaces found in .*")
462472
filterwarnings(action="ignore", message="Ignoring cached namespace .*")
473+
463474
with pynwb.NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True, driver=driver) as io:
464475
if not skip_validate:
465476
validation_errors = pynwb.validate(io=io)
@@ -473,7 +484,7 @@ def inspect_nwb(
473484
)
474485

475486
try:
476-
nwbfile = io.read()
487+
nwbfile = robust_s3_read(command=io.read, max_retries=max_retries)
477488
for inspector_message in run_checks(nwbfile=nwbfile, checks=checks):
478489
inspector_message.file_path = nwbfile_path
479490
yield inspector_message
@@ -499,7 +510,7 @@ def run_checks(nwbfile: pynwb.NWBFile, checks: list):
499510
for nwbfile_object in nwbfile.objects.values():
500511
if check_function.neurodata_type is None or issubclass(type(nwbfile_object), check_function.neurodata_type):
501512
try:
502-
output = check_function(nwbfile_object)
513+
output = robust_s3_read(command=check_function, command_args=[nwbfile_object])
503514
# if an individual check fails, include it in the report and continue with the inspection
504515
except Exception:
505516
output = InspectorMessage(

nwbinspector/tools.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from pynwb import NWBFile
99

10-
from .utils import is_module_installed
10+
from .utils import is_module_installed, calculate_number_of_cpu
1111

1212

1313
def make_minimal_nwbfile():
@@ -43,6 +43,7 @@ def get_s3_urls_and_dandi_paths(dandiset_id: str, version_id: Optional[str] = No
4343
), "The specified 'path' is not a proper DANDISet ID. It should be a six-digit numeric identifier."
4444

4545
s3_urls_to_dandi_paths = dict()
46+
n_jobs = calculate_number_of_cpu(requested_cpu=n_jobs)
4647
if n_jobs != 1:
4748
with DandiAPIClient() as client:
4849
dandiset = client.get_dandiset(dandiset_id=dandiset_id, version_id=version_id)

nwbinspector/utils.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
"""Commonly reused logic for evaluating conditions; must not have external dependencies."""
2+
import os
23
import re
34
import json
45
import numpy as np
5-
from typing import TypeVar, Optional, List
6+
from typing import TypeVar, Optional, List, Dict, Callable
67
from pathlib import Path
78
from importlib import import_module
89
from packaging import version
10+
from time import sleep
911

1012
PathType = TypeVar("PathType", str, Path) # For types that can be either files or folders
1113
FilePathType = TypeVar("FilePathType", str, Path)
@@ -113,3 +115,41 @@ def get_package_version(name: str) -> version.Version:
113115

114116
package_version = get_distribution(name).version
115117
return version.parse(package_version)
118+
119+
120+
def robust_s3_read(
121+
command: Callable, max_retries: int = 10, command_args: Optional[list] = None, command_kwargs: Optional[Dict] = None
122+
):
123+
"""Attempt the command (usually acting on an S3 IO) up to the number of max_retries using exponential backoff."""
124+
command_args = command_args or []
125+
command_kwargs = command_kwargs or dict()
126+
for retry in range(max_retries):
127+
try:
128+
return command(*command_args, **command_kwargs)
129+
except OSError: # cannot curl request
130+
sleep(0.1 * 2**retry)
131+
except Exception as exc:
132+
raise exc
133+
raise TimeoutError(f"Unable to complete the command ({command.__name__}) after {max_retries} attempts!")
134+
135+
136+
def calculate_number_of_cpu(requested_cpu: int = 1) -> int:
137+
"""
138+
Calculate the number CPUs to use with respect to negative slicing and check against maximal available resources.
139+
140+
Parameters
141+
----------
142+
requested_cpu : int, optional
143+
The desired number of CPUs to use.
144+
145+
The default is 1.
146+
"""
147+
total_cpu = os.cpu_count()
148+
assert requested_cpu <= total_cpu, f"Requested more CPUs ({requested_cpu}) than are available ({total_cpu})!"
149+
assert requested_cpu >= -(
150+
total_cpu - 1
151+
), f"Requested fewer CPUs ({requested_cpu}) than are available ({total_cpu})!"
152+
if requested_cpu > 0:
153+
return requested_cpu
154+
else:
155+
return total_cpu + requested_cpu

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
long_description = f.read()
66
setup(
77
name="nwbinspector",
8-
version="0.4.6",
8+
version="0.4.7",
99
description="Tool to inspect NWB files for best practices compliance.",
1010
long_description=long_description,
1111
long_description_content_type="text/markdown",

tests/test_utils.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
1+
import os
12
from packaging import version
23

34
from hdmf.testing import TestCase
45

56
from nwbinspector import Importance
6-
from nwbinspector.utils import format_byte_size, check_regular_series, is_dict_in_string, get_package_version
7+
from nwbinspector.utils import (
8+
format_byte_size,
9+
check_regular_series,
10+
is_dict_in_string,
11+
get_package_version,
12+
calculate_number_of_cpu,
13+
)
714

815

916
def test_format_byte_size():
@@ -104,3 +111,27 @@ def test_get_package_version_type():
104111

105112
def test_get_package_version_value():
106113
assert get_package_version("hdmf") >= version.parse("3.1.1") # minimum supported PyNWB version
114+
115+
116+
class TestCalulcateNumberOfCPU(TestCase):
117+
total_cpu = os.cpu_count()
118+
119+
def test_request_more_than_available_assert(self):
120+
requested_cpu = 2500
121+
with self.assertRaisesWith(
122+
exc_type=AssertionError,
123+
exc_msg=f"Requested more CPUs ({requested_cpu}) than are available ({self.total_cpu})!",
124+
):
125+
calculate_number_of_cpu(requested_cpu=requested_cpu)
126+
127+
def test_request_fewer_than_available_assert(self):
128+
requested_cpu = -2500
129+
with self.assertRaisesWith(
130+
exc_type=AssertionError,
131+
exc_msg=f"Requested fewer CPUs ({requested_cpu}) than are available ({self.total_cpu})!",
132+
):
133+
calculate_number_of_cpu(requested_cpu=requested_cpu)
134+
135+
def test_calculate_number_of_cpu_negative_value(self):
136+
requested_cpu = -1 # CI only has 2 jobs available
137+
assert calculate_number_of_cpu(requested_cpu=requested_cpu) == requested_cpu % self.total_cpu

tests/unit_tests/test_time_series.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from packaging import version
2+
from time import sleep
23

34
import numpy as np
45
import pynwb
@@ -14,7 +15,7 @@
1415
check_missing_unit,
1516
check_resolution,
1617
)
17-
from nwbinspector.utils import get_package_version
18+
from nwbinspector.utils import get_package_version, robust_s3_read
1819

1920
try:
2021
# Test ros3 on sub-YutaMouse54/sub-YutaMouse54_ses-YutaMouse54-160630_behavior+ecephys.nwb from #3
@@ -195,9 +196,12 @@ def test_check_none_matnwb_resolution_pass():
195196
load_namespaces=True,
196197
driver="ros3",
197198
) as io:
198-
nwbfile = io.read()
199-
time_series = nwbfile.processing["video_files"]["video"].time_series["20170203_KIB_01_s1.1.h264"]
200-
assert check_resolution(time_series) is None
199+
nwbfile = robust_s3_read(command=io.read)
200+
time_series = robust_s3_read(
201+
"20170203_KIB_01_s1.1.h264",
202+
command=nwbfile.processing["video_files"]["video"].time_series.get,
203+
)
204+
assert check_resolution(time_series) is None
201205

202206

203207
def test_check_resolution_fail():

0 commit comments

Comments
 (0)