diff --git a/CHANGELOG.md b/CHANGELOG.md index 880ab649..1e8bb263 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ ### Improvements * Added documentation to API and CLI docs on how to use the dandi config option. [#624](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/624) +* Updated report summary to include number of files detected and indicate when no issues are found. [#629](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/629) + +### Fixes +* Fixed file count error when checking for non-unique identifiers in a folder [#629](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/629) # v0.6.5 (July 25, 2025) diff --git a/src/nwbinspector/_dandi_inspection.py b/src/nwbinspector/_dandi_inspection.py index 084724ec..36f3270e 100644 --- a/src/nwbinspector/_dandi_inspection.py +++ b/src/nwbinspector/_dandi_inspection.py @@ -8,6 +8,7 @@ from ._configuration import load_config, validate_config from ._nwb_inspection import inspect_nwbfile_object from ._types import Importance, InspectorMessage +from .tools import get_nwb_assets_from_dandiset def inspect_dandiset( @@ -66,14 +67,7 @@ def inspect_dandiset( """ config = config or "dandi" - if client is None: - import dandi.dandiapi - - client = dandi.dandiapi.DandiAPIClient() - - dandiset = client.get_dandiset(dandiset_id=dandiset_id, version_id=dandiset_version) - - nwb_assets = [asset for asset in dandiset.get_assets() if ".nwb" in pathlib.Path(asset.path).suffixes] + nwb_assets = get_nwb_assets_from_dandiset(dandiset_id=dandiset_id, dandiset_version=dandiset_version, client=client) nwb_assets_iterator = nwb_assets if show_progress_bar: diff --git a/src/nwbinspector/_formatting.py b/src/nwbinspector/_formatting.py index baf14701..bacaa449 100644 --- a/src/nwbinspector/_formatting.py +++ b/src/nwbinspector/_formatting.py @@ -81,9 +81,11 @@ def __init__( reverse: Optional[list[bool]] = None, detailed: bool = False, formatter_options: Optional[FormatterOptions] = None, + nfiles_detected: Optional[int] = None, ) -> None: self.nmessages = len(messages) - self.nfiles = len(set(message.file_path for message in messages)) # type: ignore + self.nfiles_with_issues = len(set(message.file_path for message in messages)) # type: ignore + self.nfiles_detected = nfiles_detected if nfiles_detected is not None else self.nfiles_with_issues self.message_count_by_importance = self._count_messages_by_importance(messages=messages) self.initial_organized_messages = organize_messages(messages=messages, levels=levels, reverse=reverse) self.detailed = detailed @@ -206,9 +208,16 @@ def format_messages(self) -> list[str]: f"Platform: {report_header['Platform']}", f"NWBInspector version: {report_header['NWBInspector_version']}", "", - f"Found {self.nmessages} issues over {self.nfiles} files:", ] ) + + if self.nmessages == 0: + self.formatted_messages.append(f"Scanned {self.nfiles_detected} file(s) - no issues found!") + else: + self.formatted_messages.append( + f"Scanned {self.nfiles_detected} file(s) and found {self.nmessages} issues across {self.nfiles_with_issues} file(s):" + ) + for importance_level, number_of_results in self.message_count_by_importance.items(): increment = " " * (8 - len(str(number_of_results))) self.formatted_messages.append(f"{increment}{number_of_results} - {importance_level}") @@ -222,11 +231,14 @@ def format_messages( levels: Optional[list[str]] = None, reverse: Optional[list[bool]] = None, detailed: bool = False, + nfiles_detected: Optional[int] = None, ) -> list[str]: """Print InspectorMessages in order specified by the organization structure.""" levels = levels or ["file_path", "importance"] - message_formatter = MessageFormatter(messages=messages, levels=levels, reverse=reverse, detailed=detailed) + message_formatter = MessageFormatter( + messages=messages, levels=levels, reverse=reverse, detailed=detailed, nfiles_detected=nfiles_detected + ) formatted_messages = message_formatter.format_messages() return formatted_messages diff --git a/src/nwbinspector/_nwb_inspection.py b/src/nwbinspector/_nwb_inspection.py index 14fdd7ab..8096d211 100644 --- a/src/nwbinspector/_nwb_inspection.py +++ b/src/nwbinspector/_nwb_inspection.py @@ -9,7 +9,6 @@ from warnings import filterwarnings, warn import pynwb -from hdmf_zarr import ZarrIO from natsort import natsorted from tqdm import tqdm @@ -20,6 +19,7 @@ OptionalListOfStrings, PathType, calculate_number_of_cpu, + get_nwbfiles_from_path, ) @@ -86,7 +86,6 @@ def inspect_all( List of external module names to load; examples would be namespace extensions. These modules may also contain their own custom checks for their extensions. """ - in_path = Path(path) importance_threshold = ( Importance[importance_threshold] if isinstance(importance_threshold, str) else importance_threshold ) @@ -127,17 +126,8 @@ def inspect_all( if progress_bar_options is None: progress_bar_options = dict(position=0, leave=False) - if in_path.is_dir() and (in_path.match("*.nwb*")) and ZarrIO.can_read(in_path): - nwbfiles = [in_path] # if it is a zarr directory - elif in_path.is_dir(): - nwbfiles = list(in_path.rglob("*.nwb*")) + nwbfiles = get_nwbfiles_from_path(path=path) - # Remove any macOS sidecar files - nwbfiles = [nwbfile for nwbfile in nwbfiles if not nwbfile.name.startswith("._")] - elif in_path.is_file(): - nwbfiles = [in_path] - else: - raise ValueError(f"{in_path} should be a directory or an NWB file.") # Filtering of checks should apply after external modules are imported, in case those modules have their own checks checks = configure_checks(config=config, ignore=ignore, select=select, importance_threshold=importance_threshold) @@ -153,10 +143,11 @@ def inspect_all( if len(identifiers) != len(nwbfiles): for identifier, nwbfiles_with_identifier in identifiers.items(): if len(nwbfiles_with_identifier) > 1: + non_unique_files = natsorted([x.name for x in nwbfiles_with_identifier]) yield InspectorMessage( message=( f"The identifier '{identifier}' is used across the .nwb files: " - f"{natsorted([x.name for x in nwbfiles_with_identifier])}. " + f"{non_unique_files}. " "The identifier of any NWBFile should be a completely unique value - " "we recommend using uuid4 to achieve this." ), @@ -165,7 +156,7 @@ def inspect_all( object_type="NWBFile", object_name="root", location="/", - file_path=str(path), + file_path=str(non_unique_files[-1]), # report an example file_path with non-unique identifier ) nwbfiles_iterable = nwbfiles diff --git a/src/nwbinspector/_nwbinspector_cli.py b/src/nwbinspector/_nwbinspector_cli.py index 2b16ce57..307564ed 100644 --- a/src/nwbinspector/_nwbinspector_cli.py +++ b/src/nwbinspector/_nwbinspector_cli.py @@ -20,7 +20,8 @@ ) from ._nwb_inspection import inspect_all from ._types import Importance -from .utils import strtobool +from .tools import get_nwb_assets_from_dandiset +from .utils import get_nwbfiles_from_path, strtobool @click.command() @@ -159,6 +160,8 @@ def _nwbinspector_cli( skip_validate=skip_validate, show_progress_bar=show_progress_bar, ) + nwb_assets = get_nwb_assets_from_dandiset(dandiset_id=dandiset_id, dandiset_version=dandiset_version) + nfiles_detected = len(nwb_assets) # Scan a single NWB file in a Dandiset elif stream and ":" in path and not path_is_url: dandiset_id, dandi_file_path = path.split(":") @@ -174,6 +177,7 @@ def _nwbinspector_cli( importance_threshold=handled_importance_threshold, skip_validate=skip_validate, ) + nfiles_detected = 1 # Scan single NWB file at URL elif stream and path_is_url: dandi_s3_url = path @@ -186,6 +190,7 @@ def _nwbinspector_cli( importance_threshold=handled_importance_threshold, skip_validate=skip_validate, ) + nfiles_detected = 1 # Scan local file/folder else: # stream is False messages_iterator = inspect_all( @@ -198,6 +203,7 @@ def _nwbinspector_cli( skip_validate=skip_validate, progress_bar=show_progress_bar, ) + nfiles_detected = len(get_nwbfiles_from_path(path=path)) messages = list(messages_iterator) if json_file_path is not None: @@ -209,7 +215,11 @@ def _nwbinspector_cli( print(f"{os.linesep*2}Report saved to {str(Path(json_file_path).absolute())}!{os.linesep}") formatted_messages = format_messages( - messages=messages, levels=handled_levels, reverse=handled_reverse, detailed=detailed + messages=messages, + levels=handled_levels, + reverse=handled_reverse, + detailed=detailed, + nfiles_detected=nfiles_detected, ) print_to_console(formatted_messages=formatted_messages) if report_file_path is not None: diff --git a/src/nwbinspector/tools/__init__.py b/src/nwbinspector/tools/__init__.py index 33612ae1..df94c2a3 100644 --- a/src/nwbinspector/tools/__init__.py +++ b/src/nwbinspector/tools/__init__.py @@ -1,4 +1,4 @@ -from ._dandi import get_s3_urls_and_dandi_paths +from ._dandi import get_nwb_assets_from_dandiset, get_s3_urls_and_dandi_paths from ._nwb import all_of_type, get_nwbfile_path_from_internal_object from ._read_nwbfile import BACKEND_IO_CLASSES, read_nwbfile, read_nwbfile_and_io @@ -9,4 +9,5 @@ "get_nwbfile_path_from_internal_object", "read_nwbfile", "read_nwbfile_and_io", + "get_nwb_assets_from_dandiset", ] diff --git a/src/nwbinspector/tools/_dandi.py b/src/nwbinspector/tools/_dandi.py index 87fac36e..8f3ec84a 100644 --- a/src/nwbinspector/tools/_dandi.py +++ b/src/nwbinspector/tools/_dandi.py @@ -1,8 +1,9 @@ """Helper functions related to DANDI for internal use that rely on external dependencies (i.e., dandi).""" +import pathlib import re from concurrent.futures import ProcessPoolExecutor, as_completed -from typing import Optional +from typing import Literal, Optional, Union from ..utils import calculate_number_of_cpu, is_module_installed @@ -57,3 +58,25 @@ def _get_content_url_and_path( Must be globally defined (not as a part of get_s3_urls..) in order to be pickled. """ return {asset.get_content_url(follow_redirects=1, strip_query=True): asset.path} + + +def get_nwb_assets_from_dandiset( + dandiset_id: str, + dandiset_version: Union[str, Literal["draft"], None] = None, + client: Union["dandi.dandiapi.DandiAPIClient", None] = None, # type: ignore +) -> list["dandi.dandiapi.BaseRemoteAsset"]: # type: ignore + """ + Collect NWB assets from a DANDISet ID. + + Returns list of NWB assets. + """ + if client is None: + import dandi.dandiapi + + client = dandi.dandiapi.DandiAPIClient() + + dandiset = client.get_dandiset(dandiset_id=dandiset_id, version_id=dandiset_version) + + nwb_assets = [asset for asset in dandiset.get_assets() if ".nwb" in pathlib.Path(asset.path).suffixes] + + return nwb_assets diff --git a/src/nwbinspector/utils/__init__.py b/src/nwbinspector/utils/__init__.py index 905622e9..d73eb3d5 100644 --- a/src/nwbinspector/utils/__init__.py +++ b/src/nwbinspector/utils/__init__.py @@ -11,6 +11,7 @@ get_package_version, calculate_number_of_cpu, get_data_shape, + get_nwbfiles_from_path, PathType, # TODO: deprecate in favor of explicit typing FilePathType, # TODO: deprecate in favor of explicit typing OptionalListOfStrings, # TODO: deprecate in favor of explicit typing @@ -29,4 +30,5 @@ "get_package_version", "calculate_number_of_cpu", "get_data_shape", + "get_nwbfiles_from_path", ] diff --git a/src/nwbinspector/utils/_utils.py b/src/nwbinspector/utils/_utils.py index 6111a188..d4f85ac4 100644 --- a/src/nwbinspector/utils/_utils.py +++ b/src/nwbinspector/utils/_utils.py @@ -12,6 +12,7 @@ import numpy as np import zarr from hdmf.backends.hdf5.h5_utils import H5Dataset +from hdmf_zarr import ZarrIO from numpy.typing import ArrayLike from packaging import version @@ -256,3 +257,26 @@ def strtobool(val: str) -> bool: return False else: raise ValueError(f"Invalid truth value {val!r}") + + +def get_nwbfiles_from_path(path: PathType) -> list[Path]: + """ + Given a path, return a list of NWB files. + + If the path is a directory, check whether it is a zarr directory or search for all NWB files. + If the path is a file, return a list containing that file. + """ + in_path = Path(path) + if in_path.is_dir() and (in_path.match("*.nwb*")) and ZarrIO.can_read(in_path): + nwbfiles = [in_path] # if it is a zarr directory + elif in_path.is_dir(): + nwbfiles = list(in_path.rglob("*.nwb*")) + + # Remove any macOS sidecar files + nwbfiles = [nwbfile for nwbfile in nwbfiles if not nwbfile.name.startswith("._")] + elif in_path.is_file(): + nwbfiles = [in_path] + else: + raise ValueError(f"{in_path} should be a directory or an NWB file.") + + return nwbfiles diff --git a/tests/expected_reports/000126_report.txt b/tests/expected_reports/000126_report.txt index c68eba52..a1b6065c 100644 --- a/tests/expected_reports/000126_report.txt +++ b/tests/expected_reports/000126_report.txt @@ -5,7 +5,7 @@ Timestamp: 2024-09-04 14:08:54.703671-04:00 Platform: macOS-14.6.1-arm64-arm-64bit NWBInspector version: 0.4.38 -Found 10 issues over 1 files: +Scanned 1 file(s) and found 10 issues across 1 file(s): 3 - CRITICAL 1 - BEST_PRACTICE_VIOLATION 6 - BEST_PRACTICE_SUGGESTION diff --git a/tests/expected_reports/true_nwbinspector_default_report_hdf5.txt b/tests/expected_reports/true_nwbinspector_default_report_hdf5.txt index 0b7a3896..96956ca7 100644 --- a/tests/expected_reports/true_nwbinspector_default_report_hdf5.txt +++ b/tests/expected_reports/true_nwbinspector_default_report_hdf5.txt @@ -5,7 +5,7 @@ Timestamp: 2022-04-01 13:32:13.756390-04:00 Platform: Windows-10-10.0.19043-SP0 NWBInspector version: 0.3.6 -Found 5 issues over 2 files: +Scanned 3 file(s) and found 5 issues across 2 file(s): 2 - CRITICAL 2 - BEST_PRACTICE_VIOLATION 1 - BEST_PRACTICE_SUGGESTION diff --git a/tests/expected_reports/true_nwbinspector_default_report_zarr.txt b/tests/expected_reports/true_nwbinspector_default_report_zarr.txt index 957e68ad..60d60634 100644 --- a/tests/expected_reports/true_nwbinspector_default_report_zarr.txt +++ b/tests/expected_reports/true_nwbinspector_default_report_zarr.txt @@ -5,7 +5,7 @@ Timestamp: 2022-04-01 13:32:13.756390-04:00 Platform: Windows-10-10.0.19043-SP0 NWBInspector version: 0.3.6 -Found 5 issues over 2 files: +Scanned 3 file(s) and found 5 issues across 2 file(s): 2 - CRITICAL 2 - BEST_PRACTICE_VIOLATION 1 - BEST_PRACTICE_SUGGESTION diff --git a/tests/expected_reports/true_nwbinspector_report_with_dandi_config_hdf5.txt b/tests/expected_reports/true_nwbinspector_report_with_dandi_config_hdf5.txt index ae39abe6..6321ec34 100644 --- a/tests/expected_reports/true_nwbinspector_report_with_dandi_config_hdf5.txt +++ b/tests/expected_reports/true_nwbinspector_report_with_dandi_config_hdf5.txt @@ -5,7 +5,7 @@ Timestamp: 2022-04-01 13:32:13.756390-04:00 Platform: Windows-10-10.0.19043-SP0 NWBInspector version: 0.3.6 -Found 10 issues over 2 files: +Scanned 2 file(s) and found 10 issues across 2 file(s): 3 - CRITICAL 7 - BEST_PRACTICE_VIOLATION ************************************************** diff --git a/tests/expected_reports/true_nwbinspector_report_with_dandi_config_zarr.txt b/tests/expected_reports/true_nwbinspector_report_with_dandi_config_zarr.txt index 934b7e5f..4b9993a9 100644 --- a/tests/expected_reports/true_nwbinspector_report_with_dandi_config_zarr.txt +++ b/tests/expected_reports/true_nwbinspector_report_with_dandi_config_zarr.txt @@ -5,7 +5,7 @@ Timestamp: 2022-04-01 13:32:13.756390-04:00 Platform: Windows-10-10.0.19043-SP0 NWBInspector version: 0.3.6 -Found 10 issues over 2 files: +Scanned 2 file(s) and found 10 issues across 2 file(s): 3 - CRITICAL 7 - BEST_PRACTICE_VIOLATION ************************************************** diff --git a/tests/test_formatting.py b/tests/test_formatting.py new file mode 100644 index 00000000..e324ea4f --- /dev/null +++ b/tests/test_formatting.py @@ -0,0 +1,67 @@ +"""Tests for the _formatting module.""" + +from unittest import TestCase + +from nwbinspector import Importance, InspectorMessage +from nwbinspector._formatting import MessageFormatter + + +class TestMessageFormatterSummary(TestCase): + """Test the summary message generation in MessageFormatter.format_messages().""" + + def test_format_messages_no_issues(self): + """Test that the correct summary is generated when no issues are found.""" + messages = [] + levels = ["file_path", "importance"] + nfiles_detected = 5 + + formatter = MessageFormatter( + messages=messages, + levels=levels, + nfiles_detected=nfiles_detected, + ) + formatted_messages = formatter.format_messages() + self.assertIn("Scanned 5 file(s) - no issues found!", formatted_messages) + + def test_format_messages_with_issues(self): + """Test that the correct summary is generated when issues are found.""" + messages = [ + InspectorMessage( + message="Test issue 1", + importance=Importance.CRITICAL, + check_function_name="test_check", + object_type="TestType", + object_name="test_object", + location="/test/location", + file_path="/path/to/file1.nwb", + ), + InspectorMessage( + message="Test issue 1", + importance=Importance.CRITICAL, + check_function_name="test_check", + object_type="TestType", + object_name="test_object", + location="/test/location", + file_path="/path/to/file1.nwb", + ), + InspectorMessage( + message="Test issue 2", + importance=Importance.BEST_PRACTICE_VIOLATION, + check_function_name="test_check_2", + object_type="TestType", + object_name="test_object_2", + location="/test/location2", + file_path="/path/to/file2.nwb", + ), + ] + levels = ["file_path", "importance"] + nfiles_detected = 4 + + formatter = MessageFormatter( + messages=messages, + levels=levels, + nfiles_detected=nfiles_detected, + ) + formatted_messages = formatter.format_messages() + + self.assertIn("Scanned 4 file(s) and found 3 issues across 2 file(s):", formatted_messages) diff --git a/tests/test_inspector.py b/tests/test_inspector.py index 9548d8fb..a8414c80 100644 --- a/tests/test_inspector.py +++ b/tests/test_inspector.py @@ -880,11 +880,12 @@ def test_check_unique_identifiers_fail(self): test_messages = list( inspect_all(path=self.tempdir, select=["check_data_orientation"], skip_validate=self.skip_validate) ) + non_unique_files = natsorted([Path(x).name for x in self.non_unique_id_nwbfile_paths]) expected_messages = [ InspectorMessage( message=( "The identifier 'not a unique identifier!' is used across the .nwb files: " - f"{natsorted([Path(x).name for x in self.non_unique_id_nwbfile_paths])}. " + f"{non_unique_files}. " "The identifier of any NWBFile should be a completely unique value - " "we recommend using uuid4 to achieve this." ), @@ -893,7 +894,7 @@ def test_check_unique_identifiers_fail(self): object_type="NWBFile", object_name="root", location="/", - file_path=str(self.tempdir), + file_path=str(non_unique_files[-1]), ) ]