@@ -354,33 +354,41 @@ def _rescale_epoch_duration(self, raw_duration, dtype):
354354
355355def explore_folder (dirname , experiment_names = None ):
356356 """
357- Exploring the OpenEphys folder structure and structure.oebin
358-
359- Returns nested dictionary structure:
360- [block_index][seg_index][stream_type][stream_information]
361- where
362- - node_name is the open ephys node id
363- - block_index is the neo Block index
364- - segment_index is the neo Segment index
365- - stream_type can be 'continuous'/'events'/'spikes'
366- - stream_information is a dictionary containing e.g. the sampling rate
357+ Exploring the OpenEphys folder structure, by looping through the
358+ folder to find recordings.
367359
368360 Parameters
369361 ----------
370362 dirname (str): Root folder of the dataset
371363
372364 Returns
373365 -------
374- nested dictionaries containing structure and stream information
366+ folder_structure: dict
367+ The folder_structure is dictionary that describes the Open Ephys folder.
368+ Dictionary structure:
369+ [node_name]["experiments"][exp_id]["recordings"][rec_id][stream_type][stream_information]
370+ all_streams: dict
371+ From the folder_structure, the another dictionary is reorganized with NEO-like
372+ indexing: block_index (experiments) and seg_index (recordings):
373+ Dictionary structure:
374+ [block_index][seg_index][stream_type][stream_information]
375+ where
376+ - node_name is the open ephys node id
377+ - block_index is the neo Block index
378+ - segment_index is the neo Segment index
379+ - stream_type can be 'continuous'/'events'/'spikes'
380+ - stream_information is a dictionary containing e.g. the sampling rate
381+ nb_block : int
382+ Number of blocks (experiments) loaded
383+ nb_segment_per_block : dict
384+ Dictionary with number of segment per block.
385+ Keys are block indices, values are number of segments
386+ possible_experiment_names : list
387+ List of all available experiments in the Open Ephys folder
375388 """
376- nb_block = 0
377- nb_segment_per_block = {}
378- # nested dictionary: block_index > seg_index > data_type > stream_name
379- all_streams = {}
380- possible_experiment_names = []
381-
382389 # folder with nodes, experiments, setting files, recordings, and streams
383390 folder_structure = {}
391+ possible_experiment_names = []
384392
385393 for root , dirs , files in os .walk (dirname ):
386394 for file in files :
@@ -397,124 +405,138 @@ def explore_folder(dirname, experiment_names=None):
397405
398406 if node_name not in folder_structure :
399407 folder_structure [node_name ] = {}
400- folder_structure [node_name ]['experiments' ] = []
408+ folder_structure [node_name ]['experiments' ] = {}
401409
402410 # here we skip if experiment_names is not None
403411 experiment_folder = root .parents [0 ]
404412 experiment_name = experiment_folder .stem
405- possible_experiment_names .append (experiment_name )
413+ experiment_id = experiment_name .replace ('experiment' , '' )
414+ if experiment_name not in possible_experiment_names :
415+ possible_experiment_names .append (experiment_name )
406416 if experiment_names is not None and experiment_name not in experiment_names :
407417 continue
408- if experiment_name not in [ e [ 'name' ] for e in folder_structure [node_name ]['experiments' ] ]:
418+ if experiment_id not in folder_structure [node_name ]['experiments' ]:
409419 experiment = {}
410420 experiment ['name' ] = experiment_name
411421 if experiment_name == 'experiment1' :
412422 settings_file = node_folder / "settings.xml"
413423 else :
414- settings_file = node_folder / f"settings_{ experiment_folder . stem . replace ( 'experiment' , '' ) } .xml"
424+ settings_file = node_folder / f"settings_{ experiment_id } .xml"
415425 experiment ['settings_file' ] = settings_file
416- experiment ['recordings' ] = []
417- folder_structure [node_name ]['experiments' ]. append ( experiment )
426+ experiment ['recordings' ] = {}
427+ folder_structure [node_name ]['experiments' ][ experiment_id ] = experiment
418428
419429 recording_folder = root
420430 recording_name = root .stem
421- if recording_name not in [r ['name' ] for r in folder_structure [node_name ]['experiments' ][- 1 ]['recordings' ]]:
422- recording = {}
423- recording ['name' ] = recording_name
424- recording ['streams' ] = {}
425-
426- # metadata
427- with open (recording_folder / 'structure.oebin' , encoding = 'utf8' , mode = 'r' ) as f :
428- rec_structure = json .load (f )
429-
430- if (recording_folder / 'continuous' ).exists () and len (rec_structure ['continuous' ]) > 0 :
431- recording ['streams' ]['continuous' ] = {}
432- for d in rec_structure ['continuous' ]:
433- # when multi Record Node the stream name also contains
434- # the node name to make it unique
435- oe_stream_name = Path (d ["folder_name" ]).name # remove trailing slash
436- stream_name = node_name + '#' + oe_stream_name
437- raw_filename = recording_folder / 'continuous' / d ['folder_name' ] / 'continuous.dat'
438-
439- # Updates for OpenEphys v0.6:
440- # In new vesion (>=0.6) timestamps.npy is now called sample_numbers.npy
441- # see https://open-ephys.github.io/gui-docs/User-Manual/Recording-data/Binary-format.html#continuous
442- if (recording_folder / 'continuous' / d ['folder_name' ] / 'sample_numbers.npy' ).is_file ():
443- timestamp_file = recording_folder / 'continuous' / d ['folder_name' ] / \
444- 'sample_numbers.npy'
445- else :
446- timestamp_file = recording_folder / 'continuous' / d ['folder_name' ] / 'timestamps.npy'
447- timestamps = np .load (str (timestamp_file ), mmap_mode = 'r' )
448- timestamp0 = timestamps [0 ]
449- t_start = timestamp0 / d ['sample_rate' ]
450-
451- # TODO for later : gap checking
452- signal_stream = d .copy ()
453- signal_stream ['raw_filename' ] = str (raw_filename )
454- signal_stream ['dtype' ] = 'int16'
455- signal_stream ['timestamp0' ] = timestamp0
456- signal_stream ['t_start' ] = t_start
457-
458- recording ['streams' ]['continuous' ][stream_name ] = signal_stream
459-
460- if (root / 'events' ).exists () and len (rec_structure ['events' ]) > 0 :
461- recording ['streams' ]['events' ] = {}
462- for d in rec_structure ['events' ]:
463- oe_stream_name = Path (d ["folder_name" ]).name # remove trailing slash
431+ recording_id = recording_name .replace ('recording' , '' )
432+ # add recording
433+ recording = {}
434+ recording ['name' ] = recording_name
435+ recording ['streams' ] = {}
436+
437+ # metadata
438+ with open (recording_folder / 'structure.oebin' , encoding = 'utf8' , mode = 'r' ) as f :
439+ rec_structure = json .load (f )
440+
441+ if (recording_folder / 'continuous' ).exists () and len (rec_structure ['continuous' ]) > 0 :
442+ recording ['streams' ]['continuous' ] = {}
443+ for d in rec_structure ['continuous' ]:
444+ # when multi Record Node the stream name also contains
445+ # the node name to make it unique
446+ oe_stream_name = Path (d ["folder_name" ]).name # remove trailing slash
447+ if len (node_name ) > 0 :
464448 stream_name = node_name + '#' + oe_stream_name
465-
466- event_stream = d .copy ()
467- for name in _possible_event_stream_names :
468- npy_filename = root / 'events' / d ['folder_name' ] / f'{ name } .npy'
469- if npy_filename .is_file ():
470- event_stream [f'{ name } _npy' ] = str (npy_filename )
471-
472- recording ['streams' ]['events' ][stream_name ] = event_stream
473-
474- folder_structure [node_name ]['experiments' ][- 1 ]['recordings' ].append (recording )
449+ else :
450+ stream_name = oe_stream_name
451+ raw_filename = recording_folder / 'continuous' / d ['folder_name' ] / 'continuous.dat'
452+
453+ # Updates for OpenEphys v0.6:
454+ # In new vesion (>=0.6) timestamps.npy is now called sample_numbers.npy
455+ # see https://open-ephys.github.io/gui-docs/User-Manual/Recording-data/Binary-format.html#continuous
456+ sample_numbers = recording_folder / 'continuous' / d ['folder_name' ] / \
457+ 'sample_numbers.npy'
458+ if sample_numbers .is_file ():
459+ timestamp_file = sample_numbers
460+ else :
461+ timestamp_file = recording_folder / 'continuous' / d ['folder_name' ] / \
462+ 'timestamps.npy'
463+ timestamps = np .load (str (timestamp_file ), mmap_mode = 'r' )
464+ timestamp0 = timestamps [0 ]
465+ t_start = timestamp0 / d ['sample_rate' ]
466+
467+ # TODO for later : gap checking
468+ signal_stream = d .copy ()
469+ signal_stream ['raw_filename' ] = str (raw_filename )
470+ signal_stream ['dtype' ] = 'int16'
471+ signal_stream ['timestamp0' ] = timestamp0
472+ signal_stream ['t_start' ] = t_start
473+
474+ recording ['streams' ]['continuous' ][stream_name ] = signal_stream
475+
476+ if (root / 'events' ).exists () and len (rec_structure ['events' ]) > 0 :
477+ recording ['streams' ]['events' ] = {}
478+ for d in rec_structure ['events' ]:
479+ oe_stream_name = Path (d ["folder_name" ]).name # remove trailing slash
480+ stream_name = node_name + '#' + oe_stream_name
481+
482+ event_stream = d .copy ()
483+ for name in _possible_event_stream_names :
484+ npy_filename = root / 'events' / d ['folder_name' ] / f'{ name } .npy'
485+ if npy_filename .is_file ():
486+ event_stream [f'{ name } _npy' ] = str (npy_filename )
487+
488+ recording ['streams' ]['events' ][stream_name ] = event_stream
489+
490+ folder_structure [node_name ]['experiments' ][experiment_id ]['recordings' ][recording_id ] \
491+ = recording
475492
476493 # now create all_streams, nb_block, nb_segment_per_block (from first recording Node)
494+ # nested dictionary: block_index > seg_index > data_type > stream_name
495+ all_streams = {}
496+ nb_segment_per_block = {}
477497 recording_node = folder_structure [list (folder_structure .keys ())[0 ]]
478498 nb_block = len (recording_node ['experiments' ])
479- # natural sort experiment names so that block_index sequentially indicate experiments
480- experiment_names = [e ['name' ] for e in recording_node ['experiments' ]]
481- experiment_order = np .argsort ([int (ename .replace ('experiment' , '' )) for ename in experiment_names ])
482- for block_index , exp_index in enumerate (experiment_order ):
483- experiment = recording_node ['experiments' ][exp_index ]
499+
500+ exp_ids_sorted = sorted (list (recording_node ['experiments' ].keys ()))
501+ for block_index , exp_id in enumerate (exp_ids_sorted ):
502+ experiment = recording_node ['experiments' ][exp_id ]
484503 nb_segment_per_block [block_index ] = len (experiment ['recordings' ])
485504 all_streams [block_index ] = {}
486- # natural sort recording names so that seg_index sequentially indicate recordings
487- recording_names = [r ['name' ] for r in experiment ['recordings' ]]
488- recording_order = np .argsort ([int (rname .replace ('recording' , '' )) for rname in recording_names ])
489- for seg_index , rec_index in enumerate (recording_order ):
490- recording = experiment ['recordings' ][rec_index ]
505+
506+ rec_ids_sorted = sorted (list (experiment ['recordings' ].keys ()))
507+ for seg_index , rec_id in enumerate (rec_ids_sorted ):
508+ recording = experiment ['recordings' ][rec_id ]
491509 all_streams [block_index ][seg_index ] = {}
492510 for stream_type in recording ['streams' ]:
493511 all_streams [block_index ][seg_index ][stream_type ] = {}
494512 for stream_name , signal_stream in recording ['streams' ][stream_type ].items ():
495513 all_streams [block_index ][seg_index ][stream_type ][stream_name ] = signal_stream
514+ # natural sort possible experiment names
515+ experiment_order = np .argsort ([int (exp .replace ('experiment' , '' ))
516+ for exp in possible_experiment_names ])
517+ possible_experiment_names = list (np .array (possible_experiment_names )[experiment_order ])
496518
497519 return folder_structure , all_streams , nb_block , nb_segment_per_block , possible_experiment_names
498520
499521
500522def check_folder_consistency (folder_structure , possible_experiment_names = None ):
501- # experiments across nodes
523+ # check that experiment names are the same for differend record nodes
502524 if len (folder_structure ) > 1 :
503525 experiments = None
504526 for node in folder_structure .values ():
505527 experiments_node = node ['experiments' ]
506528 if experiments is None :
507529 experiments = experiments_node
508- experiment_names = [e ['name' ] for e in experiments ]
509- assert all (ename ['name' ] in experiment_names for ename in experiments_node ), \
530+ experiment_names = [e ['name' ] for e_id , e in experiments . items () ]
531+ assert all (ename ['name' ] in experiment_names for ename in experiments_node . values () ), \
510532 ("Inconsistent experiments across recording nodes!" )
511533
512- # "continuous" streams across segments
534+ # check that "continuous" streams are the same across multiple segments (recordings)
513535 experiments = folder_structure [list (folder_structure .keys ())[0 ]]['experiments' ]
514- for experiment in experiments :
536+ for exp_id , experiment in experiments . items () :
515537 segment_stream_names = None
516538 if len (experiment ['recordings' ]) > 1 :
517- for recording in experiment ['recordings' ]:
539+ for rec_id , recording in experiment ['recordings' ]. items () :
518540 stream_names = sorted (list (recording ['streams' ]['continuous' ].keys ()))
519541 if segment_stream_names is None :
520542 segment_stream_names = stream_names
@@ -523,12 +545,13 @@ def check_folder_consistency(folder_structure, possible_experiment_names=None):
523545 "segments in the same experiment must be the same. Check your open ephys "
524546 "folder." )
525547
526- # "continuous" streams across blocks
548+ # check that "continuous" streams across blocks (experiments)
527549 block_stream_names = None
528550 if len (experiments ) > 1 :
529- for experiment in experiments :
551+ for exp_id , experiment in experiments . items () :
530552 # use 1st segment
531- stream_names = list (experiment ['recordings' ][0 ]['streams' ]['continuous' ].keys ())
553+ rec_ids = list (experiment ['recordings' ])
554+ stream_names = list (experiment ['recordings' ][rec_ids [0 ]]['streams' ]['continuous' ].keys ())
532555 stream_names = sorted (stream_names )
533556 if block_stream_names is None :
534557 block_stream_names = stream_names
0 commit comments