Skip to content

Commit e1c189d

Browse files
Merge pull request #232 from NeurodataWithoutBorders/add_identifier_check_over_files
[New Check] Check if identifiers are unique over a folder path
2 parents bd81d40 + 2082f9e commit e1c189d

File tree

2 files changed

+99
-1
lines changed

2 files changed

+99
-1
lines changed

nwbinspector/nwbinspector.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@
1313
from types import FunctionType
1414
from warnings import filterwarnings, warn
1515
from distutils.util import strtobool
16-
from time import sleep
16+
from collections import defaultdict
1717

1818
import click
1919
import pynwb
2020
import yaml
2121
from tqdm import tqdm
22+
from natsort import natsorted
2223

2324
from . import available_checks
2425
from .inspector_tools import (
@@ -386,6 +387,30 @@ def inspect_all(
386387
# Filtering of checks should apply after external modules are imported, in case those modules have their own checks
387388
checks = configure_checks(config=config, ignore=ignore, select=select, importance_threshold=importance_threshold)
388389

390+
# Manual identifier check over all files in the folder path
391+
identifiers = defaultdict(list)
392+
for nwbfile_path in nwbfiles:
393+
with pynwb.NWBHDF5IO(path=nwbfile_path, mode="r", driver=driver) as io:
394+
nwbfile = robust_s3_read(io.read)
395+
identifiers[nwbfile.identifier].append(nwbfile_path)
396+
if len(identifiers) != len(nwbfiles):
397+
for identifier, nwbfiles_with_identifier in identifiers.items():
398+
if len(nwbfiles_with_identifier) > 1:
399+
yield InspectorMessage(
400+
message=(
401+
f"The identifier '{identifier}' is used across the .nwb files: "
402+
f"{natsorted([x.name for x in nwbfiles_with_identifier])}. "
403+
"The identifier of any NWBFile should be a completely unique value - "
404+
"we recommend using uuid4 to achieve this."
405+
),
406+
importance=Importance.CRITICAL,
407+
check_function_name="check_unique_identifiers",
408+
object_type="NWBFile",
409+
object_name="root",
410+
location="/",
411+
file_path=str(path),
412+
)
413+
389414
nwbfiles_iterable = nwbfiles
390415
if progress_bar:
391416
nwbfiles_iterable = tqdm(nwbfiles_iterable, **progress_bar_options)

tests/test_inspector.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
from tempfile import mkdtemp
55
from pathlib import Path
66
from unittest import TestCase
7+
from datetime import datetime
78

89
import numpy as np
910
from pynwb import NWBFile, NWBHDF5IO, TimeSeries
1011
from pynwb.file import TimeIntervals
1112
from pynwb.behavior import SpatialSeries, Position
1213
from hdmf.common import DynamicTable
14+
from natsort import natsorted
1315

1416
from nwbinspector import (
1517
Importance,
@@ -628,3 +630,74 @@ def test_dandiset_streaming_cli_parallel(self):
628630
f"> {console_output_file}"
629631
)
630632
self.assertFileExists(path=self.tempdir / "test_nwbinspector_streaming_report_7.txt")
633+
634+
635+
class TestCheckUniqueIdentifiersPass(TestCase):
636+
maxDiff = None
637+
638+
@classmethod
639+
def setUpClass(cls):
640+
cls.tempdir = Path(mkdtemp())
641+
num_nwbfiles = 3
642+
unique_id_nwbfiles = list()
643+
for j in range(num_nwbfiles):
644+
unique_id_nwbfiles.append(make_minimal_nwbfile())
645+
646+
cls.unique_id_nwbfile_paths = [str(cls.tempdir / f"unique_id_testing{j}.nwb") for j in range(num_nwbfiles)]
647+
for nwbfile_path, nwbfile in zip(cls.unique_id_nwbfile_paths, unique_id_nwbfiles):
648+
with NWBHDF5IO(path=nwbfile_path, mode="w") as io:
649+
io.write(nwbfile)
650+
651+
@classmethod
652+
def tearDownClass(cls):
653+
rmtree(cls.tempdir)
654+
655+
def test_check_unique_identifiers_pass(self):
656+
assert list(inspect_all(path=self.tempdir, select=["check_data_orientation"])) == []
657+
658+
659+
class TestCheckUniqueIdentifiersFail(TestCase):
660+
maxDiff = None
661+
662+
@classmethod
663+
def setUpClass(cls):
664+
cls.tempdir = Path(mkdtemp())
665+
num_nwbfiles = 3
666+
non_unique_id_nwbfiles = list()
667+
for j in range(num_nwbfiles):
668+
non_unique_id_nwbfiles.append(
669+
NWBFile(
670+
session_description="",
671+
identifier="not a unique identifier!",
672+
session_start_time=datetime.now().astimezone(),
673+
)
674+
)
675+
676+
cls.non_unique_id_nwbfile_paths = [
677+
str(cls.tempdir / f"non_unique_id_testing{j}.nwb") for j in range(num_nwbfiles)
678+
]
679+
for nwbfile_path, nwbfile in zip(cls.non_unique_id_nwbfile_paths, non_unique_id_nwbfiles):
680+
with NWBHDF5IO(path=nwbfile_path, mode="w") as io:
681+
io.write(nwbfile)
682+
683+
@classmethod
684+
def tearDownClass(cls):
685+
rmtree(cls.tempdir)
686+
687+
def test_check_unique_identifiers_fail(self):
688+
assert list(inspect_all(path=self.tempdir, select=["check_data_orientation"])) == [
689+
InspectorMessage(
690+
message=(
691+
"The identifier 'not a unique identifier!' is used across the .nwb files: "
692+
f"{natsorted([Path(x).name for x in self.non_unique_id_nwbfile_paths])}. "
693+
"The identifier of any NWBFile should be a completely unique value - "
694+
"we recommend using uuid4 to achieve this."
695+
),
696+
importance=Importance.CRITICAL,
697+
check_function_name="check_unique_identifiers",
698+
object_type="NWBFile",
699+
object_name="root",
700+
location="/",
701+
file_path=str(self.tempdir),
702+
)
703+
]

0 commit comments

Comments
 (0)