|
2 | 2 |
|
3 | 3 | import re |
4 | 4 | from datetime import datetime |
| 5 | +from pathlib import Path |
5 | 6 | from typing import Iterable, Optional |
6 | 7 |
|
| 8 | +from hdmf_zarr import NWBZarrIO |
7 | 9 | from isodate import Duration, parse_duration |
8 | | -from pynwb import NWBFile, ProcessingModule |
| 10 | +from pynwb import NWBHDF5IO, NWBFile, ProcessingModule |
9 | 11 | from pynwb.file import Subject |
10 | 12 |
|
11 | 13 | from .._registration import Importance, InspectorMessage, register_check |
| 14 | +from ..tools import get_nwbfile_path_from_internal_object |
12 | 15 | from ..utils import is_module_installed |
13 | 16 |
|
14 | 17 | duration_regex = ( |
@@ -352,3 +355,48 @@ def check_subject_id_no_slashes(subject: Subject) -> Optional[InspectorMessage]: |
352 | 355 | ) |
353 | 356 |
|
354 | 357 | return None |
| 358 | + |
| 359 | + |
| 360 | +@register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=NWBFile) |
| 361 | +def check_file_extension(nwbfile: NWBFile) -> Optional[InspectorMessage]: |
| 362 | + """ |
| 363 | + Check if the file extension contains ".nwb". |
| 364 | + If a backend storage type is specified, check that it matches the backend storage type. |
| 365 | +
|
| 366 | + NWB files should use appropriate extensions based on their backend: |
| 367 | + - .nwb (minimum recommendation), .nwb.h5 (HDF5), or .nwb.zarr (Zarr) |
| 368 | +
|
| 369 | + Best Practice: :ref:`best_practice_file_extension` |
| 370 | + """ |
| 371 | + file_path = get_nwbfile_path_from_internal_object(nwbfile) |
| 372 | + |
| 373 | + # Only perform the check if we can determine the file path |
| 374 | + if file_path is not None: |
| 375 | + file_extension = "".join(Path(file_path).suffixes) # Concatenate all suffixes for multi-part extensions |
| 376 | + all_valid_extensions = [".nwb", ".nwb.h5", ".nwb.zarr"] |
| 377 | + |
| 378 | + read_io = nwbfile.get_read_io() |
| 379 | + if isinstance(read_io, NWBHDF5IO): |
| 380 | + valid_extensions = [".nwb", ".nwb.h5"] |
| 381 | + backend = "HDF5" |
| 382 | + elif isinstance(read_io, NWBZarrIO): |
| 383 | + valid_extensions = [".nwb", ".nwb.zarr"] |
| 384 | + backend = "Zarr" |
| 385 | + else: |
| 386 | + valid_extensions = all_valid_extensions |
| 387 | + backend = "" |
| 388 | + |
| 389 | + # check the extension ends with .nwb or .nwb.h5/.nwb.zarr |
| 390 | + msg = ( |
| 391 | + f"The file extension '{file_extension}' does not follow the recommended naming convention. " |
| 392 | + f"{backend} NWB files should use one of the following file name extensions: {', '.join(valid_extensions)}." |
| 393 | + ) |
| 394 | + if not any(file_extension.endswith(pattern) for pattern in valid_extensions): |
| 395 | + return InspectorMessage(message=msg) |
| 396 | + |
| 397 | + # check the extension matches the backend storage type |
| 398 | + invalid_extensions = set(all_valid_extensions) - set(valid_extensions) |
| 399 | + if any(file_extension.endswith(pattern) for pattern in invalid_extensions): |
| 400 | + return InspectorMessage(message=msg) |
| 401 | + |
| 402 | + return None |
0 commit comments