Skip to content

Commit 8a777a1

Browse files
stephprincerly
andauthored
Add best practices documentation and check for NWB file extension (#625)
* add file extension check * add test for file extension check * expose check to API and add docs * update CHANGELOG * update file documentation, changelog * Update src/nwbinspector/checks/_nwbfile_metadata.py Co-authored-by: Ryan Ly <[email protected]> * Update src/nwbinspector/checks/_nwbfile_metadata.py Co-authored-by: Ryan Ly <[email protected]> * check file extension ends with valid extension * Update src/nwbinspector/checks/_nwbfile_metadata.py Co-authored-by: Ryan Ly <[email protected]> --------- Co-authored-by: Ryan Ly <[email protected]>
1 parent b5d5cb1 commit 8a777a1

File tree

5 files changed

+113
-2
lines changed

5 files changed

+113
-2
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# v0.6.6 (Upcoming)
22

3+
### New Checks
4+
* Added `check_file_extension` for NWB file extension best practice recommendations (`.nwb`, `.nwb.h5`, or `.nwb.zarr`) [#625](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/625)
5+
36
### Improvements
47
* Added documentation to API and CLI docs on how to use the dandi config option. [#624](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/624)
58

docs/best_practices/nwbfile_metadata.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,21 @@ NWBFile Metadata
44
An :ref:`nwb-schema:sec-NWBFile` object generally contains data from a single experimental session.
55

66

7+
.. _best_practice_file_extension:
8+
9+
File Extensions
10+
---------------
11+
12+
NWB file paths should contain `.nwb` in their file extension to indicate that they are NWB files.
13+
To further help tools and users quickly identify the underlying backend type, an additional option is to attach the backend as a second suffix.
14+
Recommended file extensions are:
15+
16+
1. ``.nwb`` (minimum recommendation)
17+
2. ``.nwb.h5`` (also acceptable for NWB HDF5 files)
18+
3. ``.nwb.zarr`` (also acceptable for NWB Zarr stores)
19+
20+
Check function: :py:meth:`~nwbinspector.checks._nwbfile_metadata.check_file_extension`
21+
722

823
File Organization
924
-----------------

src/nwbinspector/checks/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
check_experiment_description,
4040
check_experimenter_exists,
4141
check_experimenter_form,
42+
check_file_extension,
4243
check_institution,
4344
check_keywords,
4445
check_processing_module_name,
@@ -120,6 +121,7 @@
120121
"check_subject_species_exists",
121122
"check_subject_species_form",
122123
"check_subject_proper_age_range",
124+
"check_file_extension",
123125
"check_session_id_no_slashes",
124126
"check_session_start_time_future_date",
125127
"check_processing_module_name",

src/nwbinspector/checks/_nwbfile_metadata.py

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,16 @@
22

33
import re
44
from datetime import datetime
5+
from pathlib import Path
56
from typing import Iterable, Optional
67

8+
from hdmf_zarr import NWBZarrIO
79
from isodate import Duration, parse_duration
8-
from pynwb import NWBFile, ProcessingModule
10+
from pynwb import NWBHDF5IO, NWBFile, ProcessingModule
911
from pynwb.file import Subject
1012

1113
from .._registration import Importance, InspectorMessage, register_check
14+
from ..tools import get_nwbfile_path_from_internal_object
1215
from ..utils import is_module_installed
1316

1417
duration_regex = (
@@ -352,3 +355,48 @@ def check_subject_id_no_slashes(subject: Subject) -> Optional[InspectorMessage]:
352355
)
353356

354357
return None
358+
359+
360+
@register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=NWBFile)
361+
def check_file_extension(nwbfile: NWBFile) -> Optional[InspectorMessage]:
362+
"""
363+
Check if the file extension contains ".nwb".
364+
If a backend storage type is specified, check that it matches the backend storage type.
365+
366+
NWB files should use appropriate extensions based on their backend:
367+
- .nwb (minimum recommendation), .nwb.h5 (HDF5), or .nwb.zarr (Zarr)
368+
369+
Best Practice: :ref:`best_practice_file_extension`
370+
"""
371+
file_path = get_nwbfile_path_from_internal_object(nwbfile)
372+
373+
# Only perform the check if we can determine the file path
374+
if file_path is not None:
375+
file_extension = "".join(Path(file_path).suffixes) # Concatenate all suffixes for multi-part extensions
376+
all_valid_extensions = [".nwb", ".nwb.h5", ".nwb.zarr"]
377+
378+
read_io = nwbfile.get_read_io()
379+
if isinstance(read_io, NWBHDF5IO):
380+
valid_extensions = [".nwb", ".nwb.h5"]
381+
backend = "HDF5"
382+
elif isinstance(read_io, NWBZarrIO):
383+
valid_extensions = [".nwb", ".nwb.zarr"]
384+
backend = "Zarr"
385+
else:
386+
valid_extensions = all_valid_extensions
387+
backend = ""
388+
389+
# check the extension ends with .nwb or .nwb.h5/.nwb.zarr
390+
msg = (
391+
f"The file extension '{file_extension}' does not follow the recommended naming convention. "
392+
f"{backend} NWB files should use one of the following file name extensions: {', '.join(valid_extensions)}."
393+
)
394+
if not any(file_extension.endswith(pattern) for pattern in valid_extensions):
395+
return InspectorMessage(message=msg)
396+
397+
# check the extension matches the backend storage type
398+
invalid_extensions = set(all_valid_extensions) - set(valid_extensions)
399+
if any(file_extension.endswith(pattern) for pattern in invalid_extensions):
400+
return InspectorMessage(message=msg)
401+
402+
return None

tests/unit_tests/test_nwbfile_metadata.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
import tempfile
12
from datetime import datetime, timezone
23
from uuid import uuid4
34

4-
from pynwb import NWBFile, ProcessingModule
5+
from hdmf_zarr import NWBZarrIO
6+
from pynwb import NWBHDF5IO, NWBFile, ProcessingModule
57
from pynwb.file import Subject
68

79
from nwbinspector import Importance, InspectorMessage
@@ -10,6 +12,7 @@
1012
check_experiment_description,
1113
check_experimenter_exists,
1214
check_experimenter_form,
15+
check_file_extension,
1316
check_institution,
1417
check_keywords,
1518
check_processing_module_name,
@@ -619,3 +622,43 @@ def test_check_subject_id_with_slashes():
619622
object_name="subject",
620623
location="/general/subject",
621624
)
625+
626+
627+
def test_check_file_extension_pass():
628+
"""Test that valid HDF5 extensions pass the check."""
629+
extension_dict = {".nwb": NWBHDF5IO, ".nwb.h5": NWBHDF5IO, ".nwb.zarr": NWBZarrIO}
630+
631+
for ext, io_class in extension_dict.items():
632+
if isinstance(io_class, NWBZarrIO):
633+
tmp_path = tempfile.TemporaryDirectory(suffix=ext).name
634+
else:
635+
tmp_path = tempfile.NamedTemporaryFile(suffix=ext).name
636+
637+
nwbfile = make_minimal_nwbfile()
638+
with io_class(str(tmp_path), mode="w") as io:
639+
io.write(nwbfile)
640+
641+
with io_class(str(tmp_path), mode="r") as io:
642+
read_nwbfile = io.read()
643+
assert check_file_extension(read_nwbfile) is None
644+
645+
646+
def test_check_file_extension_fail():
647+
"""Test that invalid HDF5 extensions fail the check."""
648+
invalid_extension_dict = {".txt": NWBHDF5IO, ".nwb.zarr": NWBHDF5IO, ".nwb.h5": NWBZarrIO}
649+
650+
for ext, io_class in invalid_extension_dict.items():
651+
if isinstance(io_class, NWBZarrIO):
652+
tmp_path = tempfile.TemporaryDirectory(suffix=ext).name
653+
else:
654+
tmp_path = tempfile.NamedTemporaryFile(suffix=ext).name
655+
656+
nwbfile = make_minimal_nwbfile()
657+
with io_class(str(tmp_path), mode="w") as io:
658+
io.write(nwbfile)
659+
660+
with io_class(str(tmp_path), mode="r") as io:
661+
read_nwbfile = io.read()
662+
result = check_file_extension(read_nwbfile)
663+
msg = f"The file extension '{ext}' does not follow the recommended naming convention."
664+
assert msg in result.message

0 commit comments

Comments
 (0)