Skip to content

Commit 5a340e6

Browse files
Merge branch 'dev' into debug_none_names
2 parents 0e90faa + 1d35700 commit 5a340e6

File tree

8 files changed

+142
-20
lines changed

8 files changed

+142
-20
lines changed

nwbinspector/nwbinspector.py

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@
88
from pathlib import Path
99
from collections.abc import Iterable
1010
from enum import Enum
11-
from typing import Optional, List
11+
from typing import Union, Optional, List
1212
from concurrent.futures import ProcessPoolExecutor, as_completed
1313
from types import FunctionType
1414
from warnings import filterwarnings, warn
1515
from distutils.util import strtobool
16+
from time import sleep
1617

1718
import click
1819
import pynwb
@@ -28,7 +29,7 @@
2829
)
2930
from .register_checks import InspectorMessage, Importance
3031
from .tools import get_s3_urls_and_dandi_paths
31-
from .utils import FilePathType, PathType, OptionalListOfStrings
32+
from .utils import FilePathType, PathType, OptionalListOfStrings, robust_s3_read, calculate_number_of_cpu
3233

3334
INTERNAL_CONFIGS = dict(dandi=Path(__file__).parent / "internal_configs" / "dandi.inspector_config.yaml")
3435

@@ -294,7 +295,7 @@ def inspect_all(
294295
config: Optional[dict] = None,
295296
ignore: OptionalListOfStrings = None,
296297
select: OptionalListOfStrings = None,
297-
importance_threshold: Importance = Importance.BEST_PRACTICE_SUGGESTION,
298+
importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION,
298299
n_jobs: int = 1,
299300
skip_validate: bool = False,
300301
progress_bar: bool = True,
@@ -321,7 +322,7 @@ def inspect_all(
321322
Names of functions to skip.
322323
select: list of strings, optional
323324
Names of functions to pick out of available checks.
324-
importance_threshold : string, optional
325+
importance_threshold : string or Importance, optional
325326
Ignores tests with an assigned importance below this threshold.
326327
Importance has three levels:
327328
CRITICAL
@@ -333,6 +334,8 @@ def inspect_all(
333334
The default is the lowest level, BEST_PRACTICE_SUGGESTION.
334335
n_jobs : int
335336
Number of jobs to use in parallel. Set to -1 to use all available resources.
337+
This may also be a negative integer x from -2 to -(number_of_cpus - 1) which acts like negative slicing by using
338+
all available CPUs minus x.
336339
Set to 1 (also the default) to disable.
337340
skip_validate : bool, optional
338341
Skip the PyNWB validation step. This may be desired for older NWBFiles (< schema version v2.10).
@@ -352,7 +355,11 @@ def inspect_all(
352355
Common options are 'draft' or 'published'.
353356
Defaults to the most recent published version, or if not published then the most recent draft version.
354357
"""
358+
importance_threshold = (
359+
Importance[importance_threshold] if isinstance(importance_threshold, str) else importance_threshold
360+
)
355361
modules = modules or []
362+
n_jobs = calculate_number_of_cpu(requested_cpu=n_jobs)
356363
if progress_bar_options is None:
357364
progress_bar_options = dict(position=0, leave=False)
358365
if stream:
@@ -426,9 +433,10 @@ def inspect_nwb(
426433
config: dict = None,
427434
ignore: OptionalListOfStrings = None,
428435
select: OptionalListOfStrings = None,
429-
importance_threshold: Importance = Importance.BEST_PRACTICE_SUGGESTION,
430-
driver: str = None,
436+
importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION,
437+
driver: Optional[str] = None,
431438
skip_validate: bool = False,
439+
max_retries: int = 10,
432440
) -> List[InspectorMessage]:
433441
"""
434442
Inspect a NWBFile object and return suggestions for improvements according to best practices.
@@ -447,7 +455,7 @@ def inspect_nwb(
447455
Names of functions to skip.
448456
select: list, optional
449457
Names of functions to pick out of available checks.
450-
importance_threshold : string, optional
458+
importance_threshold : string or Importance, optional
451459
Ignores tests with an assigned importance below this threshold.
452460
Importance has three levels:
453461
CRITICAL
@@ -462,14 +470,23 @@ def inspect_nwb(
462470
skip_validate : bool
463471
Skip the PyNWB validation step. This may be desired for older NWBFiles (< schema version v2.10).
464472
The default is False, which is also recommended.
473+
max_retries : int, optional
474+
When using the ros3 driver to stream data from an s3 path, occasional curl issues can result.
475+
AWS suggests using iterative retry with an exponential backoff of 0.1 * 2^retries.
476+
This sets a hard bound on the number of times to attempt to retry the collection of messages.
477+
Defaults to 10 (corresponds to 102.4s maximum delay on final attempt).
465478
"""
479+
importance_threshold = (
480+
Importance[importance_threshold] if isinstance(importance_threshold, str) else importance_threshold
481+
)
466482
if any(x is not None for x in [config, ignore, select, importance_threshold]):
467483
checks = configure_checks(
468484
checks=checks, config=config, ignore=ignore, select=select, importance_threshold=importance_threshold
469485
)
470486
nwbfile_path = str(nwbfile_path)
471487
filterwarnings(action="ignore", message="No cached namespaces found in .*")
472488
filterwarnings(action="ignore", message="Ignoring cached namespace .*")
489+
473490
with pynwb.NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True, driver=driver) as io:
474491
if not skip_validate:
475492
validation_errors = pynwb.validate(io=io)
@@ -483,7 +500,7 @@ def inspect_nwb(
483500
)
484501

485502
try:
486-
nwbfile = io.read()
503+
nwbfile = robust_s3_read(command=io.read, max_retries=max_retries)
487504
for inspector_message in run_checks(nwbfile=nwbfile, checks=checks):
488505
inspector_message.file_path = nwbfile_path
489506
yield inspector_message
@@ -509,7 +526,7 @@ def run_checks(nwbfile: pynwb.NWBFile, checks: list):
509526
for nwbfile_object in nwbfile.objects.values():
510527
if check_function.neurodata_type is None or issubclass(type(nwbfile_object), check_function.neurodata_type):
511528
try:
512-
output = check_function(nwbfile_object)
529+
output = robust_s3_read(command=check_function, command_args=[nwbfile_object])
513530
# if an individual check fails, include it in the report and continue with the inspection
514531
except Exception:
515532
output = InspectorMessage(

nwbinspector/tools.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from pynwb import NWBFile
99

10-
from .utils import is_module_installed
10+
from .utils import is_module_installed, calculate_number_of_cpu
1111

1212

1313
def make_minimal_nwbfile():
@@ -43,6 +43,7 @@ def get_s3_urls_and_dandi_paths(dandiset_id: str, version_id: Optional[str] = No
4343
), "The specified 'path' is not a proper DANDISet ID. It should be a six-digit numeric identifier."
4444

4545
s3_urls_to_dandi_paths = dict()
46+
n_jobs = calculate_number_of_cpu(requested_cpu=n_jobs)
4647
if n_jobs != 1:
4748
with DandiAPIClient() as client:
4849
dandiset = client.get_dandiset(dandiset_id=dandiset_id, version_id=version_id)

nwbinspector/utils.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
"""Commonly reused logic for evaluating conditions; must not have external dependencies."""
2+
import os
23
import re
34
import json
45
import numpy as np
5-
from typing import TypeVar, Optional, List
6+
from typing import TypeVar, Optional, List, Dict, Callable
67
from pathlib import Path
78
from importlib import import_module
89
from packaging import version
10+
from time import sleep
911

1012
PathType = TypeVar("PathType", str, Path) # For types that can be either files or folders
1113
FilePathType = TypeVar("FilePathType", str, Path)
@@ -113,3 +115,41 @@ def get_package_version(name: str) -> version.Version:
113115

114116
package_version = get_distribution(name).version
115117
return version.parse(package_version)
118+
119+
120+
def robust_s3_read(
121+
command: Callable, max_retries: int = 10, command_args: Optional[list] = None, command_kwargs: Optional[Dict] = None
122+
):
123+
"""Attempt the command (usually acting on an S3 IO) up to the number of max_retries using exponential backoff."""
124+
command_args = command_args or []
125+
command_kwargs = command_kwargs or dict()
126+
for retry in range(max_retries):
127+
try:
128+
return command(*command_args, **command_kwargs)
129+
except OSError: # cannot curl request
130+
sleep(0.1 * 2**retry)
131+
except Exception as exc:
132+
raise exc
133+
raise TimeoutError(f"Unable to complete the command ({command.__name__}) after {max_retries} attempts!")
134+
135+
136+
def calculate_number_of_cpu(requested_cpu: int = 1) -> int:
137+
"""
138+
Calculate the number CPUs to use with respect to negative slicing and check against maximal available resources.
139+
140+
Parameters
141+
----------
142+
requested_cpu : int, optional
143+
The desired number of CPUs to use.
144+
145+
The default is 1.
146+
"""
147+
total_cpu = os.cpu_count()
148+
assert requested_cpu <= total_cpu, f"Requested more CPUs ({requested_cpu}) than are available ({total_cpu})!"
149+
assert requested_cpu >= -(
150+
total_cpu - 1
151+
), f"Requested fewer CPUs ({requested_cpu}) than are available ({total_cpu})!"
152+
if requested_cpu > 0:
153+
return requested_cpu
154+
else:
155+
return total_cpu + requested_cpu

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
install_requires = f.readlines()
99
setup(
1010
name="nwbinspector",
11-
version="0.4.6",
11+
version="0.4.7",
1212
description="Tool to inspect NWB files for best practices compliance.",
1313
long_description=long_description,
1414
long_description_content_type="text/markdown",

tests/test_inspector.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ def test_inspect_nwb(self):
332332
]
333333
self.assertCountEqual(first=test_results, second=true_results)
334334

335-
def test_inspect_nwb_importance_threshold(self):
335+
def test_inspect_nwb_importance_threshold_as_importance(self):
336336
test_results = list(
337337
inspect_nwb(
338338
nwbfile_path=self.nwbfile_paths[0], checks=self.checks, importance_threshold=Importance.CRITICAL
@@ -363,6 +363,35 @@ def test_inspect_nwb_importance_threshold(self):
363363
]
364364
self.assertCountEqual(first=test_results, second=true_results)
365365

366+
def test_inspect_nwb_importance_threshold_as_string(self):
367+
test_results = list(
368+
inspect_nwb(nwbfile_path=self.nwbfile_paths[0], checks=self.checks, importance_threshold="CRITICAL")
369+
)
370+
true_results = [
371+
InspectorMessage(
372+
message=(
373+
"Data may be in the wrong orientation. Time should be in the first dimension, and is "
374+
"usually the longest dimension. Here, another dimension is longer."
375+
),
376+
importance=Importance.CRITICAL,
377+
check_function_name="check_data_orientation",
378+
object_type="SpatialSeries",
379+
object_name="my_spatial_series",
380+
location="/processing/behavior/Position/my_spatial_series",
381+
file_path=self.nwbfile_paths[0],
382+
),
383+
InspectorMessage(
384+
message="The length of the first dimension of data does not match the length of timestamps.",
385+
importance=Importance.CRITICAL,
386+
check_function_name="check_timestamps_match_first_dimension",
387+
object_type="TimeSeries",
388+
object_name="test_time_series_3",
389+
location="/acquisition/test_time_series_3",
390+
file_path=self.nwbfile_paths[0],
391+
),
392+
]
393+
self.assertCountEqual(first=test_results, second=true_results)
394+
366395
def test_command_line_runs_cli_only(self):
367396
console_output_file = self.tempdir / "test_console_output.txt"
368397
os.system(

tests/test_utils.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
1+
import os
12
from packaging import version
23

34
from hdmf.testing import TestCase
45

56
from nwbinspector import Importance
6-
from nwbinspector.utils import format_byte_size, check_regular_series, is_dict_in_string, get_package_version
7+
from nwbinspector.utils import (
8+
format_byte_size,
9+
check_regular_series,
10+
is_dict_in_string,
11+
get_package_version,
12+
calculate_number_of_cpu,
13+
)
714

815

916
def test_format_byte_size():
@@ -104,3 +111,27 @@ def test_get_package_version_type():
104111

105112
def test_get_package_version_value():
106113
assert get_package_version("hdmf") >= version.parse("3.1.1") # minimum supported PyNWB version
114+
115+
116+
class TestCalulcateNumberOfCPU(TestCase):
117+
total_cpu = os.cpu_count()
118+
119+
def test_request_more_than_available_assert(self):
120+
requested_cpu = 2500
121+
with self.assertRaisesWith(
122+
exc_type=AssertionError,
123+
exc_msg=f"Requested more CPUs ({requested_cpu}) than are available ({self.total_cpu})!",
124+
):
125+
calculate_number_of_cpu(requested_cpu=requested_cpu)
126+
127+
def test_request_fewer_than_available_assert(self):
128+
requested_cpu = -2500
129+
with self.assertRaisesWith(
130+
exc_type=AssertionError,
131+
exc_msg=f"Requested fewer CPUs ({requested_cpu}) than are available ({self.total_cpu})!",
132+
):
133+
calculate_number_of_cpu(requested_cpu=requested_cpu)
134+
135+
def test_calculate_number_of_cpu_negative_value(self):
136+
requested_cpu = -1 # CI only has 2 jobs available
137+
assert calculate_number_of_cpu(requested_cpu=requested_cpu) == requested_cpu % self.total_cpu

tests/unit_tests/test_tables.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,8 +237,8 @@ def test_check_single_row_ignore_units():
237237

238238
def test_check_single_row_ignore_electrodes():
239239
table = ElectrodeTable(
240-
name="electrodes",
241-
) # default name when building through nwbfile
240+
name="electrodes", # default name when building through nwbfile
241+
)
242242
if get_package_version(name="pynwb") >= version.Version("2.1.0"):
243243
table.add_row(
244244
location="unknown",

tests/unit_tests/test_time_series.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from packaging import version
2+
from time import sleep
23

34
import numpy as np
45
import pynwb
@@ -14,7 +15,7 @@
1415
check_missing_unit,
1516
check_resolution,
1617
)
17-
from nwbinspector.utils import get_package_version
18+
from nwbinspector.utils import get_package_version, robust_s3_read
1819

1920
try:
2021
# Test ros3 on sub-YutaMouse54/sub-YutaMouse54_ses-YutaMouse54-160630_behavior+ecephys.nwb from #3
@@ -195,9 +196,12 @@ def test_check_none_matnwb_resolution_pass():
195196
load_namespaces=True,
196197
driver="ros3",
197198
) as io:
198-
nwbfile = io.read()
199-
time_series = nwbfile.processing["video_files"]["video"].time_series["20170203_KIB_01_s1.1.h264"]
200-
assert check_resolution(time_series) is None
199+
nwbfile = robust_s3_read(command=io.read)
200+
time_series = robust_s3_read(
201+
"20170203_KIB_01_s1.1.h264",
202+
command=nwbfile.processing["video_files"]["video"].time_series.get,
203+
)
204+
assert check_resolution(time_series) is None
201205

202206

203207
def test_check_resolution_fail():

0 commit comments

Comments
 (0)