@@ -332,33 +332,41 @@ def _rescale_epoch_duration(self, raw_duration, dtype):
332332
333333def explore_folder (dirname , experiment_names = None ):
334334 """
335- Exploring the OpenEphys folder structure and structure.oebin
336-
337- Returns nested dictionary structure:
338- [block_index][seg_index][stream_type][stream_information]
339- where
340- - node_name is the open ephys node id
341- - block_index is the neo Block index
342- - segment_index is the neo Segment index
343- - stream_type can be 'continuous'/'events'/'spikes'
344- - stream_information is a dictionary containing e.g. the sampling rate
335+ Exploring the OpenEphys folder structure, by looping through the
336+ folder to find recordings.
345337
346338 Parameters
347339 ----------
348340 dirname (str): Root folder of the dataset
349341
350342 Returns
351343 -------
352- nested dictionaries containing structure and stream information
344+ folder_structure: dict
345+ The folder_structure is dictionary that describes the Open Ephys folder.
346+ Dictionary structure:
347+ [node_name]["experiments"][exp_id]["recordings"][rec_id][stream_type][stream_information]
348+ all_streams: dict
349+ From the folder_structure, the another dictionary is reorganized with NEO-like
350+ indexing: block_index (experiments) and seg_index (recordings):
351+ Dictionary structure:
352+ [block_index][seg_index][stream_type][stream_information]
353+ where
354+ - node_name is the open ephys node id
355+ - block_index is the neo Block index
356+ - segment_index is the neo Segment index
357+ - stream_type can be 'continuous'/'events'/'spikes'
358+ - stream_information is a dictionary containing e.g. the sampling rate
359+ nb_block : int
360+ Number of blocks (experiments) loaded
361+ nb_segment_per_block : dict
362+ Dictionary with number of segment per block.
363+ Keys are block indices, values are number of segments
364+ possible_experiment_names : list
365+ List of all available experiments in the Open Ephys folder
353366 """
354- nb_block = 0
355- nb_segment_per_block = {}
356- # nested dictionary: block_index > seg_index > data_type > stream_name
357- all_streams = {}
358- possible_experiment_names = []
359-
360367 # folder with nodes, experiments, setting files, recordings, and streams
361368 folder_structure = {}
369+ possible_experiment_names = []
362370
363371 for root , dirs , files in os .walk (dirname ):
364372 for file in files :
@@ -375,124 +383,138 @@ def explore_folder(dirname, experiment_names=None):
375383
376384 if node_name not in folder_structure :
377385 folder_structure [node_name ] = {}
378- folder_structure [node_name ]['experiments' ] = []
386+ folder_structure [node_name ]['experiments' ] = {}
379387
380388 # here we skip if experiment_names is not None
381389 experiment_folder = root .parents [0 ]
382390 experiment_name = experiment_folder .stem
383- possible_experiment_names .append (experiment_name )
391+ experiment_id = experiment_name .replace ('experiment' , '' )
392+ if experiment_name not in possible_experiment_names :
393+ possible_experiment_names .append (experiment_name )
384394 if experiment_names is not None and experiment_name not in experiment_names :
385395 continue
386- if experiment_name not in [ e [ 'name' ] for e in folder_structure [node_name ]['experiments' ] ]:
396+ if experiment_id not in folder_structure [node_name ]['experiments' ]:
387397 experiment = {}
388398 experiment ['name' ] = experiment_name
389399 if experiment_name == 'experiment1' :
390400 settings_file = node_folder / "settings.xml"
391401 else :
392- settings_file = node_folder / f"settings_{ experiment_folder . stem . replace ( 'experiment' , '' ) } .xml"
402+ settings_file = node_folder / f"settings_{ experiment_id } .xml"
393403 experiment ['settings_file' ] = settings_file
394- experiment ['recordings' ] = []
395- folder_structure [node_name ]['experiments' ]. append ( experiment )
404+ experiment ['recordings' ] = {}
405+ folder_structure [node_name ]['experiments' ][ experiment_id ] = experiment
396406
397407 recording_folder = root
398408 recording_name = root .stem
399- if recording_name not in [r ['name' ] for r in folder_structure [node_name ]['experiments' ][- 1 ]['recordings' ]]:
400- recording = {}
401- recording ['name' ] = recording_name
402- recording ['streams' ] = {}
403-
404- # metadata
405- with open (recording_folder / 'structure.oebin' , encoding = 'utf8' , mode = 'r' ) as f :
406- rec_structure = json .load (f )
407-
408- if (recording_folder / 'continuous' ).exists () and len (rec_structure ['continuous' ]) > 0 :
409- recording ['streams' ]['continuous' ] = {}
410- for d in rec_structure ['continuous' ]:
411- # when multi Record Node the stream name also contains
412- # the node name to make it unique
413- oe_stream_name = Path (d ["folder_name" ]).name # remove trailing slash
414- stream_name = node_name + '#' + oe_stream_name
415- raw_filename = recording_folder / 'continuous' / d ['folder_name' ] / 'continuous.dat'
416-
417- # Updates for OpenEphys v0.6:
418- # In new vesion (>=0.6) timestamps.npy is now called sample_numbers.npy
419- # see https://open-ephys.github.io/gui-docs/User-Manual/Recording-data/Binary-format.html#continuous
420- if (recording_folder / 'continuous' / d ['folder_name' ] / 'sample_numbers.npy' ).is_file ():
421- timestamp_file = recording_folder / 'continuous' / d ['folder_name' ] / \
422- 'sample_numbers.npy'
423- else :
424- timestamp_file = recording_folder / 'continuous' / d ['folder_name' ] / 'timestamps.npy'
425- timestamps = np .load (str (timestamp_file ), mmap_mode = 'r' )
426- timestamp0 = timestamps [0 ]
427- t_start = timestamp0 / d ['sample_rate' ]
428-
429- # TODO for later : gap checking
430- signal_stream = d .copy ()
431- signal_stream ['raw_filename' ] = str (raw_filename )
432- signal_stream ['dtype' ] = 'int16'
433- signal_stream ['timestamp0' ] = timestamp0
434- signal_stream ['t_start' ] = t_start
435-
436- recording ['streams' ]['continuous' ][stream_name ] = signal_stream
437-
438- if (root / 'events' ).exists () and len (rec_structure ['events' ]) > 0 :
439- recording ['streams' ]['events' ] = {}
440- for d in rec_structure ['events' ]:
441- oe_stream_name = Path (d ["folder_name" ]).name # remove trailing slash
409+ recording_id = recording_name .replace ('recording' , '' )
410+ # add recording
411+ recording = {}
412+ recording ['name' ] = recording_name
413+ recording ['streams' ] = {}
414+
415+ # metadata
416+ with open (recording_folder / 'structure.oebin' , encoding = 'utf8' , mode = 'r' ) as f :
417+ rec_structure = json .load (f )
418+
419+ if (recording_folder / 'continuous' ).exists () and len (rec_structure ['continuous' ]) > 0 :
420+ recording ['streams' ]['continuous' ] = {}
421+ for d in rec_structure ['continuous' ]:
422+ # when multi Record Node the stream name also contains
423+ # the node name to make it unique
424+ oe_stream_name = Path (d ["folder_name" ]).name # remove trailing slash
425+ if len (node_name ) > 0 :
442426 stream_name = node_name + '#' + oe_stream_name
443-
444- event_stream = d .copy ()
445- for name in _possible_event_stream_names :
446- npy_filename = root / 'events' / d ['folder_name' ] / f'{ name } .npy'
447- if npy_filename .is_file ():
448- event_stream [f'{ name } _npy' ] = str (npy_filename )
449-
450- recording ['streams' ]['events' ][stream_name ] = event_stream
451-
452- folder_structure [node_name ]['experiments' ][- 1 ]['recordings' ].append (recording )
427+ else :
428+ stream_name = oe_stream_name
429+ raw_filename = recording_folder / 'continuous' / d ['folder_name' ] / 'continuous.dat'
430+
431+ # Updates for OpenEphys v0.6:
432+ # In new vesion (>=0.6) timestamps.npy is now called sample_numbers.npy
433+ # see https://open-ephys.github.io/gui-docs/User-Manual/Recording-data/Binary-format.html#continuous
434+ sample_numbers = recording_folder / 'continuous' / d ['folder_name' ] / \
435+ 'sample_numbers.npy'
436+ if sample_numbers .is_file ():
437+ timestamp_file = sample_numbers
438+ else :
439+ timestamp_file = recording_folder / 'continuous' / d ['folder_name' ] / \
440+ 'timestamps.npy'
441+ timestamps = np .load (str (timestamp_file ), mmap_mode = 'r' )
442+ timestamp0 = timestamps [0 ]
443+ t_start = timestamp0 / d ['sample_rate' ]
444+
445+ # TODO for later : gap checking
446+ signal_stream = d .copy ()
447+ signal_stream ['raw_filename' ] = str (raw_filename )
448+ signal_stream ['dtype' ] = 'int16'
449+ signal_stream ['timestamp0' ] = timestamp0
450+ signal_stream ['t_start' ] = t_start
451+
452+ recording ['streams' ]['continuous' ][stream_name ] = signal_stream
453+
454+ if (root / 'events' ).exists () and len (rec_structure ['events' ]) > 0 :
455+ recording ['streams' ]['events' ] = {}
456+ for d in rec_structure ['events' ]:
457+ oe_stream_name = Path (d ["folder_name" ]).name # remove trailing slash
458+ stream_name = node_name + '#' + oe_stream_name
459+
460+ event_stream = d .copy ()
461+ for name in _possible_event_stream_names :
462+ npy_filename = root / 'events' / d ['folder_name' ] / f'{ name } .npy'
463+ if npy_filename .is_file ():
464+ event_stream [f'{ name } _npy' ] = str (npy_filename )
465+
466+ recording ['streams' ]['events' ][stream_name ] = event_stream
467+
468+ folder_structure [node_name ]['experiments' ][experiment_id ]['recordings' ][recording_id ] \
469+ = recording
453470
454471 # now create all_streams, nb_block, nb_segment_per_block (from first recording Node)
472+ # nested dictionary: block_index > seg_index > data_type > stream_name
473+ all_streams = {}
474+ nb_segment_per_block = {}
455475 recording_node = folder_structure [list (folder_structure .keys ())[0 ]]
456476 nb_block = len (recording_node ['experiments' ])
457- # natural sort experiment names so that block_index sequentially indicate experiments
458- experiment_names = [e ['name' ] for e in recording_node ['experiments' ]]
459- experiment_order = np .argsort ([int (ename .replace ('experiment' , '' )) for ename in experiment_names ])
460- for block_index , exp_index in enumerate (experiment_order ):
461- experiment = recording_node ['experiments' ][exp_index ]
477+
478+ exp_ids_sorted = sorted (list (recording_node ['experiments' ].keys ()))
479+ for block_index , exp_id in enumerate (exp_ids_sorted ):
480+ experiment = recording_node ['experiments' ][exp_id ]
462481 nb_segment_per_block [block_index ] = len (experiment ['recordings' ])
463482 all_streams [block_index ] = {}
464- # natural sort recording names so that seg_index sequentially indicate recordings
465- recording_names = [r ['name' ] for r in experiment ['recordings' ]]
466- recording_order = np .argsort ([int (rname .replace ('recording' , '' )) for rname in recording_names ])
467- for seg_index , rec_index in enumerate (recording_order ):
468- recording = experiment ['recordings' ][rec_index ]
483+
484+ rec_ids_sorted = sorted (list (experiment ['recordings' ].keys ()))
485+ for seg_index , rec_id in enumerate (rec_ids_sorted ):
486+ recording = experiment ['recordings' ][rec_id ]
469487 all_streams [block_index ][seg_index ] = {}
470488 for stream_type in recording ['streams' ]:
471489 all_streams [block_index ][seg_index ][stream_type ] = {}
472490 for stream_name , signal_stream in recording ['streams' ][stream_type ].items ():
473491 all_streams [block_index ][seg_index ][stream_type ][stream_name ] = signal_stream
492+ # natural sort possible experiment names
493+ experiment_order = np .argsort ([int (exp .replace ('experiment' , '' ))
494+ for exp in possible_experiment_names ])
495+ possible_experiment_names = list (np .array (possible_experiment_names )[experiment_order ])
474496
475497 return folder_structure , all_streams , nb_block , nb_segment_per_block , possible_experiment_names
476498
477499
478500def check_folder_consistency (folder_structure , possible_experiment_names = None ):
479- # experiments across nodes
501+ # check that experiment names are the same for differend record nodes
480502 if len (folder_structure ) > 1 :
481503 experiments = None
482504 for node in folder_structure .values ():
483505 experiments_node = node ['experiments' ]
484506 if experiments is None :
485507 experiments = experiments_node
486- experiment_names = [e ['name' ] for e in experiments ]
487- assert all (ename ['name' ] in experiment_names for ename in experiments_node ), \
508+ experiment_names = [e ['name' ] for e_id , e in experiments . items () ]
509+ assert all (ename ['name' ] in experiment_names for ename in experiments_node . values () ), \
488510 ("Inconsistent experiments across recording nodes!" )
489511
490- # "continuous" streams across segments
512+ # check that "continuous" streams are the same across multiple segments (recordings)
491513 experiments = folder_structure [list (folder_structure .keys ())[0 ]]['experiments' ]
492- for experiment in experiments :
514+ for exp_id , experiment in experiments . items () :
493515 segment_stream_names = None
494516 if len (experiment ['recordings' ]) > 1 :
495- for recording in experiment ['recordings' ]:
517+ for rec_id , recording in experiment ['recordings' ]. items () :
496518 stream_names = sorted (list (recording ['streams' ]['continuous' ].keys ()))
497519 if segment_stream_names is None :
498520 segment_stream_names = stream_names
@@ -501,12 +523,13 @@ def check_folder_consistency(folder_structure, possible_experiment_names=None):
501523 "segments in the same experiment must be the same. Check your open ephys "
502524 "folder." )
503525
504- # "continuous" streams across blocks
526+ # check that "continuous" streams across blocks (experiments)
505527 block_stream_names = None
506528 if len (experiments ) > 1 :
507- for experiment in experiments :
529+ for exp_id , experiment in experiments . items () :
508530 # use 1st segment
509- stream_names = list (experiment ['recordings' ][0 ]['streams' ]['continuous' ].keys ())
531+ rec_ids = list (experiment ['recordings' ])
532+ stream_names = list (experiment ['recordings' ][rec_ids [0 ]]['streams' ]['continuous' ].keys ())
510533 stream_names = sorted (stream_names )
511534 if block_stream_names is None :
512535 block_stream_names = stream_names
0 commit comments