Skip to content

Commit e5bf5a6

Browse files
Merge branch 'dev' into document_formatter
2 parents 674fdc7 + 1d35700 commit e5bf5a6

21 files changed

+395
-84
lines changed

.github/ISSUE_TEMPLATE/bug_report.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ body:
6161
- 3.7
6262
- 3.8
6363
- 3.9
64+
- 3.10
6465
validations:
6566
required: true
6667
- type: dropdown

.github/workflows/testing.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
name: Testing
2-
on: pull_request
2+
on:
3+
schedule:
4+
- cron: "0 0 * * *" # daily
5+
pull_request:
6+
37
jobs:
48
build-and-test:
59
name: Testing using ${{ matrix.os }} with ${{ matrix.python-version }}
@@ -22,7 +26,9 @@ jobs:
2226
pip install pytest
2327
pip install pytest-cov
2428
- name: Install package
25-
run: pip install -e .
29+
run: |
30+
pip install -e .
31+
pip install dandi
2632
- name: Uninstall h5py
2733
run: pip uninstall -y h5py
2834
- name: Install ROS3

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
repos:
22
- repo: https://github.com/pre-commit/pre-commit-hooks
3-
rev: v4.2.0
3+
rev: v4.3.0
44
hooks:
55
- id: check-yaml
66
- id: end-of-file-fixer
77
- id: trailing-whitespace
88
- repo: https://github.com/psf/black
9-
rev: 22.3.0
9+
rev: 22.6.0
1010
hooks:
1111
- id: black
1212
exclude: ^docs/

docs/index.rst

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,8 @@ Welcome to the documentation for the NWBInspector!
77
:scale: 100 %
88
:align: right
99
10-
NWBInspector is a Python-based package designed to asses the quality of Neurodata Without Borders
11-
files (NWBFiles) and to suggest improvements for any Best Practice violations that are found.
12-
13-
14-
.. note::
15-
16-
This package is in alpha development; as such, we make every effort towards
17-
a stable environment but bugs are known to occur. If you use this software
18-
for your own quality assurance purposes and discover any issues throughout
19-
the process, please let us know by filing a ticket on our :nwbinspector-issues: page.
10+
NWBInspector is a Python-based package designed to asses the quality of Neurodata Without Borders (NWB)
11+
files and based on compliance with Best Practice. This tool is meant as a companion to the PyNWB validator, which checks for strict schema compliance. In contrast, this tool attempts to apply some commonsense rules and heuristics to find data components of a file that pass validation, but are probably incorrect, or suboptimal, or deviate from best practices. In other words, while the PyNWB validator focuses on compliance of the structure of a file with the schema, the inspector focuses on compliance of the actual data with best practices. The NWB Inspector is meant simply as a data review aid. It does not catch all best practice violations, and any warnings it does produce should be checked by a knowledgeable reviewer.
2012

2113
.. toctree::
2214
:maxdepth: 2

nwbinspector/checks/behavior.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""Checks for types belonging to the pynwb.behavior module."""
2-
from pynwb.behavior import SpatialSeries
2+
from pynwb.behavior import SpatialSeries, CompassDirection
33

44
from ..register_checks import register_check, Importance, InspectorMessage
55

@@ -11,3 +11,13 @@ def check_spatial_series_dims(spatial_series: SpatialSeries):
1111
return InspectorMessage(
1212
message="SpatialSeries should have 1 column (x), 2 columns (x, y), or 3 columns (x, y, z)."
1313
)
14+
15+
16+
@register_check(importance=Importance.BEST_PRACTICE_VIOLATION, neurodata_type=CompassDirection)
17+
def check_compass_direction_unit(compass_direction: CompassDirection):
18+
for spatial_series in compass_direction.spatial_series.values():
19+
if spatial_series.unit not in ("degrees", "radians"):
20+
yield InspectorMessage(
21+
message=f"SpatialSeries objects inside a CompassDirection object should be angular and should have a "
22+
f"unit of 'degrees' or 'radians', but '{spatial_series.name}' has units '{spatial_series.unit}'."
23+
)

nwbinspector/checks/image_series.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ def check_image_series_external_file_valid(image_series: ImageSeries):
1414
return
1515
nwbfile_path = Path(get_nwbfile_path_from_internal_object(obj=image_series))
1616
for file_path in image_series.external_file:
17+
file_path = file_path.decode() if isinstance(file_path, bytes) else file_path
1718
if not Path(file_path).is_absolute() and not (nwbfile_path.parent / file_path).exists():
1819
yield InspectorMessage(
1920
message=(
@@ -29,6 +30,7 @@ def check_image_series_external_file_relative(image_series: ImageSeries):
2930
if image_series.external_file is None:
3031
return
3132
for file_path in image_series.external_file:
33+
file_path = file_path.decode() if isinstance(file_path, bytes) else file_path
3234
if Path(file_path).is_absolute():
3335
yield InspectorMessage(
3436
message=(

nwbinspector/checks/nwbfile_metadata.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,7 @@ def check_doi_publications(nwbfile: NWBFile):
8080
if not nwbfile.related_publications:
8181
return
8282
for publication in nwbfile.related_publications:
83-
if isinstance(publication, bytes):
84-
publication = publication.decode()
83+
publication = publication.decode() if isinstance(publication, bytes) else publication
8584
if not any((publication.startswith(valid_start) for valid_start in valid_starts)):
8685
yield InspectorMessage(
8786
message=(

nwbinspector/checks/time_series.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,9 @@ def check_missing_unit(time_series: TimeSeries):
8686
@register_check(importance=Importance.BEST_PRACTICE_VIOLATION, neurodata_type=TimeSeries)
8787
def check_resolution(time_series: TimeSeries):
8888
"""Check the resolution value of a TimeSeries for proper format (-1.0 or NaN for unknown)."""
89-
if time_series.resolution != -1.0 and time_series.resolution <= 0:
89+
if time_series.resolution is None or time_series.resolution == -1.0:
90+
return
91+
if time_series.resolution <= 0:
9092
return InspectorMessage(
9193
message=f"'resolution' should use -1.0 or NaN for unknown instead of {time_series.resolution}."
9294
)

nwbinspector/inspector_tools.py

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,16 @@
1313
import numpy as np
1414

1515
from .register_checks import InspectorMessage, Importance
16-
from .utils import FilePathType
17-
18-
try:
19-
from importlib.metadata import version
20-
21-
inspector_version = version("nwbinspector")
22-
except ModuleNotFoundError: # Remove the except clause when minimal supported version becomes 3.8
23-
from pkg_resources import get_distribution
24-
25-
inspector_version = get_distribution("nwbinspector").version
16+
from .utils import FilePathType, get_package_version
2617

2718

2819
def get_report_header():
2920
"""Grab basic information from system at time of report generation."""
30-
return dict(Timestamp=str(datetime.now().astimezone()), Platform=platform(), NWBInspector_version=inspector_version)
21+
return dict(
22+
Timestamp=str(datetime.now().astimezone()),
23+
Platform=platform(),
24+
NWBInspector_version=get_package_version("nwbinspector"),
25+
)
3126

3227

3328
def _sort_unique_values(unique_values: list, reverse: bool = False):
@@ -82,10 +77,7 @@ class FormatterOptions:
8277
"""Class structure for defining all free attributes for the design of a report format."""
8378

8479
def __init__(
85-
self,
86-
indent_size: int = 2,
87-
indent: Optional[str] = None,
88-
section_headers: List[str] = ["=", "-", "~"],
80+
self, indent_size: int = 2, indent: Optional[str] = None, section_headers: List[str] = ["=", "-", "~"]
8981
):
9082
# TODO
9183
# Future custom options could include section break sizes, section-specific indents, etc.

nwbinspector/nwbinspector.py

Lines changed: 40 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@
88
from pathlib import Path
99
from collections.abc import Iterable
1010
from enum import Enum
11-
from typing import Optional, List
11+
from typing import Union, Optional, List
1212
from concurrent.futures import ProcessPoolExecutor, as_completed
1313
from types import FunctionType
1414
from warnings import filterwarnings, warn
1515
from distutils.util import strtobool
16+
from time import sleep
1617

1718
import click
1819
import pynwb
@@ -28,7 +29,7 @@
2829
)
2930
from .register_checks import InspectorMessage, Importance
3031
from .tools import get_s3_urls_and_dandi_paths
31-
from .utils import FilePathType, PathType, OptionalListOfStrings
32+
from .utils import FilePathType, PathType, OptionalListOfStrings, robust_s3_read, calculate_number_of_cpu
3233

3334
INTERNAL_CONFIGS = dict(dandi=Path(__file__).parent / "internal_configs" / "dandi.inspector_config.yaml")
3435

@@ -278,7 +279,7 @@ def inspect_all(
278279
config: Optional[dict] = None,
279280
ignore: OptionalListOfStrings = None,
280281
select: OptionalListOfStrings = None,
281-
importance_threshold: Importance = Importance.BEST_PRACTICE_SUGGESTION,
282+
importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION,
282283
n_jobs: int = 1,
283284
skip_validate: bool = False,
284285
progress_bar: bool = True,
@@ -305,7 +306,7 @@ def inspect_all(
305306
Names of functions to skip.
306307
select: list of strings, optional
307308
Names of functions to pick out of available checks.
308-
importance_threshold : string, optional
309+
importance_threshold : string or Importance, optional
309310
Ignores tests with an assigned importance below this threshold.
310311
Importance has three levels:
311312
CRITICAL
@@ -317,6 +318,8 @@ def inspect_all(
317318
The default is the lowest level, BEST_PRACTICE_SUGGESTION.
318319
n_jobs : int
319320
Number of jobs to use in parallel. Set to -1 to use all available resources.
321+
This may also be a negative integer x from -2 to -(number_of_cpus - 1) which acts like negative slicing by using
322+
all available CPUs minus x.
320323
Set to 1 (also the default) to disable.
321324
skip_validate : bool, optional
322325
Skip the PyNWB validation step. This may be desired for older NWBFiles (< schema version v2.10).
@@ -336,7 +339,11 @@ def inspect_all(
336339
Common options are 'draft' or 'published'.
337340
Defaults to the most recent published version, or if not published then the most recent draft version.
338341
"""
342+
importance_threshold = (
343+
Importance[importance_threshold] if isinstance(importance_threshold, str) else importance_threshold
344+
)
339345
modules = modules or []
346+
n_jobs = calculate_number_of_cpu(requested_cpu=n_jobs)
340347
if progress_bar_options is None:
341348
progress_bar_options = dict(position=0, leave=False)
342349
if stream:
@@ -410,9 +417,10 @@ def inspect_nwb(
410417
config: dict = None,
411418
ignore: OptionalListOfStrings = None,
412419
select: OptionalListOfStrings = None,
413-
importance_threshold: Importance = Importance.BEST_PRACTICE_SUGGESTION,
414-
driver: str = None,
420+
importance_threshold: Union[str, Importance] = Importance.BEST_PRACTICE_SUGGESTION,
421+
driver: Optional[str] = None,
415422
skip_validate: bool = False,
423+
max_retries: int = 10,
416424
) -> List[InspectorMessage]:
417425
"""
418426
Inspect a NWBFile object and return suggestions for improvements according to best practices.
@@ -431,7 +439,7 @@ def inspect_nwb(
431439
Names of functions to skip.
432440
select: list, optional
433441
Names of functions to pick out of available checks.
434-
importance_threshold : string, optional
442+
importance_threshold : string or Importance, optional
435443
Ignores tests with an assigned importance below this threshold.
436444
Importance has three levels:
437445
CRITICAL
@@ -446,38 +454,47 @@ def inspect_nwb(
446454
skip_validate : bool
447455
Skip the PyNWB validation step. This may be desired for older NWBFiles (< schema version v2.10).
448456
The default is False, which is also recommended.
457+
max_retries : int, optional
458+
When using the ros3 driver to stream data from an s3 path, occasional curl issues can result.
459+
AWS suggests using iterative retry with an exponential backoff of 0.1 * 2^retries.
460+
This sets a hard bound on the number of times to attempt to retry the collection of messages.
461+
Defaults to 10 (corresponds to 102.4s maximum delay on final attempt).
449462
"""
463+
importance_threshold = (
464+
Importance[importance_threshold] if isinstance(importance_threshold, str) else importance_threshold
465+
)
450466
if any(x is not None for x in [config, ignore, select, importance_threshold]):
451467
checks = configure_checks(
452468
checks=checks, config=config, ignore=ignore, select=select, importance_threshold=importance_threshold
453469
)
454470
nwbfile_path = str(nwbfile_path)
455471
filterwarnings(action="ignore", message="No cached namespaces found in .*")
456472
filterwarnings(action="ignore", message="Ignoring cached namespace .*")
473+
457474
with pynwb.NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True, driver=driver) as io:
458-
if skip_validate:
475+
if not skip_validate:
459476
validation_errors = pynwb.validate(io=io)
460-
if any(validation_errors):
461-
for validation_error in validation_errors:
462-
yield InspectorMessage(
463-
message=validation_error.reason,
464-
importance=Importance.PYNWB_VALIDATION,
465-
check_function_name=validation_error.name,
466-
location=validation_error.location,
467-
file_path=nwbfile_path,
468-
)
477+
for validation_error in validation_errors:
478+
yield InspectorMessage(
479+
message=validation_error.reason,
480+
importance=Importance.PYNWB_VALIDATION,
481+
check_function_name=validation_error.name,
482+
location=validation_error.location,
483+
file_path=nwbfile_path,
484+
)
485+
469486
try:
470-
nwbfile = io.read()
487+
nwbfile = robust_s3_read(command=io.read, max_retries=max_retries)
488+
for inspector_message in run_checks(nwbfile=nwbfile, checks=checks):
489+
inspector_message.file_path = nwbfile_path
490+
yield inspector_message
471491
except Exception as ex:
472492
yield InspectorMessage(
473493
message=traceback.format_exc(),
474494
importance=Importance.ERROR,
475-
check_function_name=f"{type(ex)}: {str(ex)}",
495+
check_function_name=f"During io.read() - {type(ex)}: {str(ex)}",
476496
file_path=nwbfile_path,
477497
)
478-
for inspector_message in run_checks(nwbfile, checks=checks):
479-
inspector_message.file_path = nwbfile_path
480-
yield inspector_message
481498

482499

483500
def run_checks(nwbfile: pynwb.NWBFile, checks: list):
@@ -493,7 +510,7 @@ def run_checks(nwbfile: pynwb.NWBFile, checks: list):
493510
for nwbfile_object in nwbfile.objects.values():
494511
if check_function.neurodata_type is None or issubclass(type(nwbfile_object), check_function.neurodata_type):
495512
try:
496-
output = check_function(nwbfile_object)
513+
output = robust_s3_read(command=check_function, command_args=[nwbfile_object])
497514
# if an individual check fails, include it in the report and continue with the inspection
498515
except Exception:
499516
output = InspectorMessage(

0 commit comments

Comments
 (0)