Skip to content

Commit b787ce8

Browse files
Merge branch 'dev' into better_fix_curl
2 parents 393a990 + 67a2e29 commit b787ce8

File tree

7 files changed

+211
-24
lines changed

7 files changed

+211
-24
lines changed

nwbinspector/internal_configs/dandi.inspector_config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ CRITICAL: # All the fields under CRITICAL will be required for dandi validate t
22
- check_subject_exists
33
- check_subject_id_exists
44
BEST_PRACTICE_VIOLATION:
5-
- check_subject_sex # these are planned to be elevated to CRITICAL when requried for DANDI validate
6-
- check_subject_species # these are planned to be elevated to CRITICAL when requried for DANDI validate
7-
- check_subject_age # these are planned to be elevated to CRITICAL when requried for DANDI validate
5+
- check_subject_sex # these are planned to be elevated to CRITICAL when required for DANDI validate
6+
- check_subject_species # these are planned to be elevated to CRITICAL when required for DANDI validate
7+
- check_subject_age # these are planned to be elevated to CRITICAL when required for DANDI validate
88
- check_data_orientation # not 100% accurate, so need to deelevate from CRITICAL to skip it in dandi validate

nwbinspector/nwbinspector.py

Lines changed: 52 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@
1313
from types import FunctionType
1414
from warnings import filterwarnings, warn
1515
from distutils.util import strtobool
16-
from time import sleep
16+
from collections import defaultdict
1717

1818
import click
1919
import pynwb
2020
import yaml
2121
from tqdm import tqdm
22+
from natsort import natsorted
2223

2324
from . import available_checks
2425
from .inspector_tools import (
@@ -37,7 +38,7 @@
3738
class InspectorOutputJSONEncoder(json.JSONEncoder):
3839
"""Custom JSONEncoder for the NWBInspector."""
3940

40-
def default(self, o):
41+
def default(self, o): # noqa D102
4142
if isinstance(o, InspectorMessage):
4243
return o.__dict__
4344
if isinstance(o, Enum):
@@ -53,25 +54,41 @@ def validate_config(config: dict):
5354
jsonschema.validate(instance=config, schema=schema)
5455

5556

56-
def copy_function(function):
57+
def _copy_function(function):
5758
"""
58-
Return a copy of a function so that internal attributes can be adjusted without changing the original function.
59+
Copy the core parts of a given function, excluding wrappers, then return a new function.
5960
60-
Required to ensure our configuration of functions in the registry does not effect the registry itself.
61-
62-
Taken from
61+
Based off of
6362
https://stackoverflow.com/questions/6527633/how-can-i-make-a-deepcopy-of-a-function-in-python/30714299#30714299
6463
"""
65-
if getattr(function, "__wrapped__", False):
66-
function = function.__wrapped__
6764
copied_function = FunctionType(
6865
function.__code__, function.__globals__, function.__name__, function.__defaults__, function.__closure__
6966
)
70-
# in case f was given attrs (note this dict is a shallow copy):
67+
68+
# in case f was given attrs (note this dict is a shallow copy)
7169
copied_function.__dict__.update(function.__dict__)
7270
return copied_function
7371

7472

73+
def copy_check(function):
74+
"""
75+
Copy a check function so that internal attributes can be adjusted without changing the original function.
76+
77+
Required to ensure our configuration of functions in the registry does not effect the registry itself.
78+
79+
Also copies the wrapper for auto-parsing ressults,
80+
see https://github.com/NeurodataWithoutBorders/nwbinspector/pull/218 for explanation.
81+
82+
Taken from
83+
https://stackoverflow.com/questions/6527633/how-can-i-make-a-deepcopy-of-a-function-in-python/30714299#30714299
84+
"""
85+
if getattr(function, "__wrapped__", False):
86+
check_function = function.__wrapped__
87+
copied_function = _copy_function(function)
88+
copied_function.__wrapped__ = _copy_function(check_function)
89+
return copied_function
90+
91+
7592
def load_config(filepath_or_keyword: PathType) -> dict:
7693
"""
7794
Load a config dictionary either via keyword search of the internal configs, or an explicit filepath.
@@ -131,7 +148,7 @@ def configure_checks(
131148
checks_out = []
132149
ignore = ignore or []
133150
for check in checks:
134-
mapped_check = copy_function(check)
151+
mapped_check = copy_check(check)
135152
for importance_name, func_names in config.items():
136153
if check.__name__ in func_names:
137154
if importance_name == "SKIP":
@@ -370,6 +387,30 @@ def inspect_all(
370387
# Filtering of checks should apply after external modules are imported, in case those modules have their own checks
371388
checks = configure_checks(config=config, ignore=ignore, select=select, importance_threshold=importance_threshold)
372389

390+
# Manual identifier check over all files in the folder path
391+
identifiers = defaultdict(list)
392+
for nwbfile_path in nwbfiles:
393+
with pynwb.NWBHDF5IO(path=nwbfile_path, mode="r", driver=driver) as io:
394+
nwbfile = robust_s3_read(io.read)
395+
identifiers[nwbfile.identifier].append(nwbfile_path)
396+
if len(identifiers) != len(nwbfiles):
397+
for identifier, nwbfiles_with_identifier in identifiers.items():
398+
if len(nwbfiles_with_identifier) > 1:
399+
yield InspectorMessage(
400+
message=(
401+
f"The identifier '{identifier}' is used across the .nwb files: "
402+
f"{natsorted([x.name for x in nwbfiles_with_identifier])}. "
403+
"The identifier of any NWBFile should be a completely unique value - "
404+
"we recommend using uuid4 to achieve this."
405+
),
406+
importance=Importance.CRITICAL,
407+
check_function_name="check_unique_identifiers",
408+
object_type="NWBFile",
409+
object_name="root",
410+
location="/",
411+
file_path=str(path),
412+
)
413+
373414
nwbfiles_iterable = nwbfiles
374415
if progress_bar:
375416
nwbfiles_iterable = tqdm(nwbfiles_iterable, **progress_bar_options)

requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,6 @@ pynwb
22
PyYAML
33
jsonschema
44
packaging
5+
natsort
6+
click
7+
tqdm

setup.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
from setuptools import setup, find_packages
2+
from pathlib import Path
23

3-
# Get the long description from the README file
4-
with open("README.md", "r") as f:
4+
root = Path(__file__).parent
5+
with open(root / "README.md", "r") as f:
56
long_description = f.read()
7+
with open(root / "requirements.txt") as f:
8+
install_requires = f.readlines()
69
setup(
710
name="nwbinspector",
8-
version="0.4.9",
11+
version="0.4.10",
912
description="Tool to inspect NWB files for best practices compliance.",
1013
long_description=long_description,
1114
long_description_content_type="text/markdown",
@@ -14,7 +17,7 @@
1417
packages=find_packages(),
1518
include_package_data=True,
1619
url="https://github.com/NeurodataWithoutBorders/nwbinspector",
17-
install_requires=["pynwb", "natsort", "click", "PyYAML", "jsonschema", "tqdm"],
20+
install_requires=install_requires,
1821
extras_require=dict(dandi=["dandi>=0.39.2"]),
1922
entry_points={"console_scripts": ["nwbinspector=nwbinspector.nwbinspector:inspect_all_cli"]},
2023
license="BSD-3-Clause",

tests/test_check_configuration.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
check_timestamps_match_first_dimension,
1010
available_checks,
1111
)
12-
from nwbinspector.nwbinspector import validate_config, configure_checks, copy_function, load_config
12+
from nwbinspector.nwbinspector import validate_config, configure_checks, _copy_function, load_config
1313

1414

1515
class TestCheckConfiguration(TestCase):
@@ -24,7 +24,7 @@ def setUpClass(cls):
2424

2525
def test_safe_check_copy(self):
2626
initial_importance = available_checks[0].importance
27-
changed_check = copy_function(function=available_checks[0])
27+
changed_check = _copy_function(function=available_checks[0])
2828
if initial_importance is Importance.CRITICAL:
2929
changed_importance = Importance.BEST_PRACTICE_SUGGESTION
3030
else:

tests/test_inspector.py

Lines changed: 142 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,25 @@
44
from tempfile import mkdtemp
55
from pathlib import Path
66
from unittest import TestCase
7+
from datetime import datetime
78

89
import numpy as np
910
from pynwb import NWBFile, NWBHDF5IO, TimeSeries
1011
from pynwb.file import TimeIntervals
1112
from pynwb.behavior import SpatialSeries, Position
1213
from hdmf.common import DynamicTable
14+
from natsort import natsorted
1315

1416
from nwbinspector import (
1517
Importance,
1618
check_small_dataset_compression,
1719
check_regular_timestamps,
1820
check_data_orientation,
1921
check_timestamps_match_first_dimension,
22+
check_subject_exists,
23+
load_config,
2024
)
21-
from nwbinspector.nwbinspector import inspect_all, inspect_nwb
25+
from nwbinspector import inspect_all, inspect_nwb
2226
from nwbinspector.register_checks import Severity, InspectorMessage, register_check
2327
from nwbinspector.utils import FilePathType, is_module_installed
2428
from nwbinspector.tools import make_minimal_nwbfile
@@ -144,7 +148,7 @@ def assertLogFileContentsEqual(
144148
if ".nwb" in test_line:
145149
# Transform temporary testing path and formatted to hardcoded fake path
146150
str_loc = test_line.find(".nwb")
147-
correction_str = test_line.replace(test_line[5 : str_loc - 8], "./")
151+
correction_str = test_line.replace(test_line[5 : str_loc - 8], "./") # noqa: E203 (black)
148152
test_file_lines[line_number] = correction_str
149153
self.assertEqual(first=test_file_lines[skip_first_n_lines:-1], second=true_file_lines)
150154

@@ -501,6 +505,71 @@ def test_inspect_nwb_manual_iteration_stop(self):
501505
with self.assertRaises(expected_exception=StopIteration):
502506
next(generator)
503507

508+
def test_inspect_nwb_dandi_config(self):
509+
config_checks = [check_subject_exists] + self.checks
510+
test_results = list(
511+
inspect_nwb(
512+
nwbfile_path=self.nwbfile_paths[0],
513+
checks=config_checks,
514+
config=load_config(filepath_or_keyword="dandi"),
515+
)
516+
)
517+
true_results = [
518+
InspectorMessage(
519+
message="Subject is missing.",
520+
importance=Importance.BEST_PRACTICE_SUGGESTION,
521+
check_function_name="check_subject_exists",
522+
object_type="NWBFile",
523+
object_name="root",
524+
location="/",
525+
file_path=self.nwbfile_paths[0],
526+
),
527+
InspectorMessage(
528+
message="data is not compressed. Consider enabling compression when writing a dataset.",
529+
importance=Importance.BEST_PRACTICE_SUGGESTION,
530+
check_function_name="check_small_dataset_compression",
531+
object_type="TimeSeries",
532+
object_name="test_time_series_1",
533+
location="/acquisition/test_time_series_1",
534+
file_path=self.nwbfile_paths[0],
535+
),
536+
InspectorMessage(
537+
message=(
538+
"TimeSeries appears to have a constant sampling rate. "
539+
"Consider specifying starting_time=1.2 and rate=2.0 instead of timestamps."
540+
),
541+
importance=Importance.BEST_PRACTICE_VIOLATION,
542+
check_function_name="check_regular_timestamps",
543+
object_type="TimeSeries",
544+
object_name="test_time_series_2",
545+
location="/acquisition/test_time_series_2",
546+
file_path=self.nwbfile_paths[0],
547+
),
548+
InspectorMessage(
549+
message=(
550+
"Data may be in the wrong orientation. "
551+
"Time should be in the first dimension, and is usually the longest dimension. "
552+
"Here, another dimension is longer."
553+
),
554+
importance=Importance.CRITICAL,
555+
check_function_name="check_data_orientation",
556+
object_type="SpatialSeries",
557+
object_name="my_spatial_series",
558+
location="/processing/behavior/Position/my_spatial_series",
559+
file_path=self.nwbfile_paths[0],
560+
),
561+
InspectorMessage(
562+
message="The length of the first dimension of data does not match the length of timestamps.",
563+
importance=Importance.CRITICAL,
564+
check_function_name="check_timestamps_match_first_dimension",
565+
object_type="TimeSeries",
566+
object_name="test_time_series_3",
567+
location="/acquisition/test_time_series_3",
568+
file_path=self.nwbfile_paths[0],
569+
),
570+
]
571+
self.assertCountEqual(first=test_results, second=true_results)
572+
504573

505574
@pytest.mark.skipif(not HAVE_ROS3 or not HAVE_DANDI, reason="Needs h5py setup with ROS3.")
506575
def test_dandiset_streaming():
@@ -561,3 +630,74 @@ def test_dandiset_streaming_cli_parallel(self):
561630
f"> {console_output_file}"
562631
)
563632
self.assertFileExists(path=self.tempdir / "test_nwbinspector_streaming_report_7.txt")
633+
634+
635+
class TestCheckUniqueIdentifiersPass(TestCase):
636+
maxDiff = None
637+
638+
@classmethod
639+
def setUpClass(cls):
640+
cls.tempdir = Path(mkdtemp())
641+
num_nwbfiles = 3
642+
unique_id_nwbfiles = list()
643+
for j in range(num_nwbfiles):
644+
unique_id_nwbfiles.append(make_minimal_nwbfile())
645+
646+
cls.unique_id_nwbfile_paths = [str(cls.tempdir / f"unique_id_testing{j}.nwb") for j in range(num_nwbfiles)]
647+
for nwbfile_path, nwbfile in zip(cls.unique_id_nwbfile_paths, unique_id_nwbfiles):
648+
with NWBHDF5IO(path=nwbfile_path, mode="w") as io:
649+
io.write(nwbfile)
650+
651+
@classmethod
652+
def tearDownClass(cls):
653+
rmtree(cls.tempdir)
654+
655+
def test_check_unique_identifiers_pass(self):
656+
assert list(inspect_all(path=self.tempdir, select=["check_data_orientation"])) == []
657+
658+
659+
class TestCheckUniqueIdentifiersFail(TestCase):
660+
maxDiff = None
661+
662+
@classmethod
663+
def setUpClass(cls):
664+
cls.tempdir = Path(mkdtemp())
665+
num_nwbfiles = 3
666+
non_unique_id_nwbfiles = list()
667+
for j in range(num_nwbfiles):
668+
non_unique_id_nwbfiles.append(
669+
NWBFile(
670+
session_description="",
671+
identifier="not a unique identifier!",
672+
session_start_time=datetime.now().astimezone(),
673+
)
674+
)
675+
676+
cls.non_unique_id_nwbfile_paths = [
677+
str(cls.tempdir / f"non_unique_id_testing{j}.nwb") for j in range(num_nwbfiles)
678+
]
679+
for nwbfile_path, nwbfile in zip(cls.non_unique_id_nwbfile_paths, non_unique_id_nwbfiles):
680+
with NWBHDF5IO(path=nwbfile_path, mode="w") as io:
681+
io.write(nwbfile)
682+
683+
@classmethod
684+
def tearDownClass(cls):
685+
rmtree(cls.tempdir)
686+
687+
def test_check_unique_identifiers_fail(self):
688+
assert list(inspect_all(path=self.tempdir, select=["check_data_orientation"])) == [
689+
InspectorMessage(
690+
message=(
691+
"The identifier 'not a unique identifier!' is used across the .nwb files: "
692+
f"{natsorted([Path(x).name for x in self.non_unique_id_nwbfile_paths])}. "
693+
"The identifier of any NWBFile should be a completely unique value - "
694+
"we recommend using uuid4 to achieve this."
695+
),
696+
importance=Importance.CRITICAL,
697+
check_function_name="check_unique_identifiers",
698+
object_type="NWBFile",
699+
object_name="root",
700+
location="/",
701+
file_path=str(self.tempdir),
702+
)
703+
]

tests/unit_tests/test_tables.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,8 @@ def test_check_single_row_pass():
229229

230230
def test_check_single_row_ignore_units():
231231
table = Units(
232-
name="Units", # default name when building through nwbfile
233-
)
232+
name="Units",
233+
) # default name when building through nwbfile
234234
table.add_unit(spike_times=[1, 2, 3])
235235
assert check_single_row(table=table) is None
236236

0 commit comments

Comments
 (0)