11"""Primary functions for inspecting NWBFiles."""
22import os
3+ import re
34import importlib
45import traceback
56import json
1011from typing import Optional , List
1112from concurrent .futures import ProcessPoolExecutor , as_completed
1213from types import FunctionType
13- from warnings import filterwarnings
14+ from warnings import filterwarnings , warn
1415from distutils .util import strtobool
1516
1617import click
2627 save_report ,
2728)
2829from .register_checks import InspectorMessage , Importance
30+ from .tools import get_s3_urls_and_dandi_paths
2931from .utils import FilePathType , PathType , OptionalListOfStrings
3032
3133INTERNAL_CONFIGS = dict (dandi = Path (__file__ ).parent / "internal_configs" / "dandi.inspector_config.yaml" )
@@ -186,6 +188,21 @@ def configure_checks(
186188 is_flag = True ,
187189)
188190@click .option ("--progress-bar" , help = "Set this flag to False to disable display of the progress bar." )
191+ @click .option (
192+ "--stream" ,
193+ help = (
194+ "Stream data from the DANDI archive. If the 'path' is a local copy of the target DANDISet, specifying this "
195+ "flag will still force the data to be streamed instead of using the local copy. To use the local copy, simply "
196+ "remove this flag. Requires the Read Only S3 (ros3) driver to be installed with h5py."
197+ ),
198+ is_flag = True ,
199+ )
200+ @click .option (
201+ "--version-id" ,
202+ help = (
203+ "When 'path' is a six-digit DANDISet ID, this further specifies which version of " "the DANDISet to inspect."
204+ ),
205+ )
189206def inspect_all_cli (
190207 path : str ,
191208 modules : Optional [str ] = None ,
@@ -203,18 +220,34 @@ def inspect_all_cli(
203220 skip_validate : bool = False ,
204221 detailed : bool = False ,
205222 progress_bar : Optional [str ] = None ,
223+ stream : bool = False ,
224+ version_id : Optional [str ] = None ,
206225):
207226 """
208227 Run the NWBInspector via the command line.
209228
210229 path :
211- Path to either a local NWBFile, a local folder containing NWBFiles.
230+ Path to either a local NWBFile, a local folder containing NWBFiles, a link to a dataset on
231+ DANDI archive (i.e., https://dandiarchive.org/dandiset/{dandiset_id}/{version_id}), or a six-digit Dandiset ID.
212232 """
213233 levels = ["importance" , "file_path" ] if levels is None else levels .split ("," )
214234 reverse = [False ] * len (levels ) if reverse is None else [strtobool (x ) for x in reverse .split ("," )]
215235 progress_bar = strtobool (progress_bar ) if progress_bar is not None else True
216236 if config is not None :
217237 config = load_config (filepath_or_keyword = config )
238+ if stream :
239+ url_path = path if path .startswith ("https://" ) else None
240+ if url_path :
241+ dandiset_id , version_id = url_path .split ("/" )[- 2 :]
242+ path = dandiset_id
243+ assert url_path or re .fullmatch (
244+ pattern = "^[0-9]{6}$" , string = path
245+ ), "'--stream' flag was enabled, but 'path' is neither a full link to the DANDI archive nor a DANDISet ID."
246+ if Path (path ).is_dir ():
247+ warn (
248+ f"The local DANDISet '{ path } ' exists, but the '--stream' flag was used. "
249+ "NWBInspector will use S3 streaming from DANDI. To use local data, remove the '--stream' flag."
250+ )
218251 messages = list (
219252 inspect_all (
220253 path = path ,
@@ -226,6 +259,8 @@ def inspect_all_cli(
226259 n_jobs = n_jobs ,
227260 skip_validate = skip_validate ,
228261 progress_bar = progress_bar ,
262+ stream = stream ,
263+ version_id = version_id ,
229264 )
230265 )
231266 if json_file_path is not None :
@@ -254,14 +289,17 @@ def inspect_all(
254289 skip_validate : bool = False ,
255290 progress_bar : bool = True ,
256291 progress_bar_options : Optional [dict ] = None ,
292+ stream : bool = False ,
293+ version_id : Optional [str ] = None ,
257294):
258295 """
259296 Inspect a local NWBFile or folder of NWBFiles and return suggestions for improvements according to best practices.
260297
261298 Parameters
262299 ----------
263300 path : PathType
264- File path to an NWBFile, or folder path to iterate over recursively and scan all NWBFiles present.
301+ File path to an NWBFile, folder path to iterate over recursively and scan all NWBFiles present, or a
302+ six-digit identifier of the DANDISet.
265303 modules : list of strings, optional
266304 List of external module names to load; examples would be namespace extensions.
267305 These modules may also contain their own custom checks for their extensions.
@@ -294,22 +332,46 @@ def inspect_all(
294332 Defaults to True.
295333 progress_bar_options : dict, optional
296334 Dictionary of keyword arguments to pass directly to tqdm.
335+ stream : bool, optional
336+ Stream data from the DANDI archive. If the 'path' is a local copy of the target DANDISet, setting this
337+ argument to True will force the data to be streamed instead of using the local copy.
338+ Requires the Read Only S3 (ros3) driver to be installed with h5py.
339+ Defaults to False.
340+ version_id : str, optional
341+ If the path is a DANDISet ID, version_id additionally specifies which version of the dataset to read from.
342+ Common options are 'draft' or 'published'.
343+ Defaults to the most recent published version, or if not published then the most recent draft version.
297344 """
298345 modules = modules or []
299346 if progress_bar_options is None :
300- progress_bar_options = dict (desc = "Inspecting NWBFiles..." )
301- in_path = Path (path )
302- if in_path .is_dir ():
303- nwbfiles = list (in_path .rglob ("*.nwb" ))
304- elif in_path .is_file ():
305- nwbfiles = [in_path ]
347+ progress_bar_options = dict (position = 0 , leave = False )
348+ if stream :
349+ progress_bar_options .update (desc = "Inspecting NWBFiles with ROS3..." )
350+ else :
351+ progress_bar_options .update (desc = "Inspecting NWBFiles..." )
352+ if stream :
353+ assert (
354+ re .fullmatch (pattern = "^[0-9]{6}$" , string = str (path )) is not None
355+ ), "'--stream' flag was enabled, but 'path' is not a DANDISet ID."
356+ driver = "ros3"
357+ nwbfiles = get_s3_urls_and_dandi_paths (dandiset_id = path , version_id = version_id , n_jobs = n_jobs )
306358 else :
307- raise ValueError (f"{ in_path } should be a directory or an NWB file." )
359+ driver = None
360+ in_path = Path (path )
361+ if in_path .is_dir ():
362+ nwbfiles = list (in_path .rglob ("*.nwb" ))
363+ elif in_path .is_file ():
364+ nwbfiles = [in_path ]
365+ else :
366+ raise ValueError (f"{ in_path } should be a directory or an NWB file." )
308367 for module in modules :
309368 importlib .import_module (module )
310369 # Filtering of checks should apply after external modules are imported, in case those modules have their own checks
311370 checks = configure_checks (config = config , ignore = ignore , select = select , importance_threshold = importance_threshold )
312371
372+ nwbfiles_iterable = nwbfiles
373+ if progress_bar :
374+ nwbfiles_iterable = tqdm (nwbfiles_iterable , ** progress_bar_options )
313375 if n_jobs != 1 :
314376 progress_bar_options .update (total = len (nwbfiles ))
315377 futures = []
@@ -318,26 +380,34 @@ def inspect_all(
318380 for nwbfile_path in nwbfiles :
319381 futures .append (
320382 executor .submit (
321- _pickle_inspect_nwb , nwbfile_path = nwbfile_path , checks = checks , skip_validate = skip_validate
383+ _pickle_inspect_nwb ,
384+ nwbfile_path = nwbfile_path ,
385+ checks = checks ,
386+ skip_validate = skip_validate ,
387+ driver = driver ,
322388 )
323389 )
324- completed_futures = as_completed (futures )
390+ nwbfiles_iterable = as_completed (futures )
325391 if progress_bar :
326- completed_futures = tqdm (completed_futures , ** progress_bar_options )
327- for future in completed_futures :
392+ nwbfiles_iterable = tqdm (nwbfiles_iterable , ** progress_bar_options )
393+ for future in nwbfiles_iterable :
328394 for message in future .result ():
395+ if stream :
396+ message .file_path = nwbfiles [message .file_path ]
329397 yield message
330398 else :
331- if progress_bar :
332- nwbfiles = tqdm ( nwbfiles , ** progress_bar_options )
333- for nwbfile_path in nwbfiles :
334- for message in inspect_nwb ( nwbfile_path = nwbfile_path , checks = checks ):
399+ for nwbfile_path in nwbfiles_iterable :
400+ for message in inspect_nwb ( nwbfile_path = nwbfile_path , checks = checks , driver = driver ):
401+ if stream :
402+ message . file_path = nwbfiles [ message . file_path ]
335403 yield message
336404
337405
338- def _pickle_inspect_nwb (nwbfile_path : str , checks : list = available_checks , skip_validate : bool = False ):
406+ def _pickle_inspect_nwb (
407+ nwbfile_path : str , checks : list = available_checks , skip_validate : bool = False , driver : Optional [str ] = None
408+ ):
339409 """Auxilliary function for inspect_all to run in parallel using the ProcessPoolExecutor."""
340- return list (inspect_nwb (nwbfile_path = nwbfile_path , checks = checks , skip_validate = skip_validate ))
410+ return list (inspect_nwb (nwbfile_path = nwbfile_path , checks = checks , skip_validate = skip_validate , driver = driver ))
341411
342412
343413def inspect_nwb (
0 commit comments