Skip to content

Commit 379f035

Browse files
committed
Suggested changes: use dicts instead of lists
1 parent f7ef91f commit 379f035

File tree

1 file changed

+118
-95
lines changed

1 file changed

+118
-95
lines changed

neo/rawio/openephysbinaryrawio.py

Lines changed: 118 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -332,33 +332,41 @@ def _rescale_epoch_duration(self, raw_duration, dtype):
332332

333333
def explore_folder(dirname, experiment_names=None):
334334
"""
335-
Exploring the OpenEphys folder structure and structure.oebin
336-
337-
Returns nested dictionary structure:
338-
[block_index][seg_index][stream_type][stream_information]
339-
where
340-
- node_name is the open ephys node id
341-
- block_index is the neo Block index
342-
- segment_index is the neo Segment index
343-
- stream_type can be 'continuous'/'events'/'spikes'
344-
- stream_information is a dictionary containing e.g. the sampling rate
335+
Exploring the OpenEphys folder structure, by looping through the
336+
folder to find recordings.
345337
346338
Parameters
347339
----------
348340
dirname (str): Root folder of the dataset
349341
350342
Returns
351343
-------
352-
nested dictionaries containing structure and stream information
344+
folder_structure: dict
345+
The folder_structure is dictionary that describes the Open Ephys folder.
346+
Dictionary structure:
347+
[node_name]["experiments"][exp_id]["recordings"][rec_id][stream_type][stream_information]
348+
all_streams: dict
349+
From the folder_structure, the another dictionary is reorganized with NEO-like
350+
indexing: block_index (experiments) and seg_index (recordings):
351+
Dictionary structure:
352+
[block_index][seg_index][stream_type][stream_information]
353+
where
354+
- node_name is the open ephys node id
355+
- block_index is the neo Block index
356+
- segment_index is the neo Segment index
357+
- stream_type can be 'continuous'/'events'/'spikes'
358+
- stream_information is a dictionary containing e.g. the sampling rate
359+
nb_block : int
360+
Number of blocks (experiments) loaded
361+
nb_segment_per_block : dict
362+
Dictionary with number of segment per block.
363+
Keys are block indices, values are number of segments
364+
possible_experiment_names : list
365+
List of all available experiments in the Open Ephys folder
353366
"""
354-
nb_block = 0
355-
nb_segment_per_block = {}
356-
# nested dictionary: block_index > seg_index > data_type > stream_name
357-
all_streams = {}
358-
possible_experiment_names = []
359-
360367
# folder with nodes, experiments, setting files, recordings, and streams
361368
folder_structure = {}
369+
possible_experiment_names = []
362370

363371
for root, dirs, files in os.walk(dirname):
364372
for file in files:
@@ -375,124 +383,138 @@ def explore_folder(dirname, experiment_names=None):
375383

376384
if node_name not in folder_structure:
377385
folder_structure[node_name] = {}
378-
folder_structure[node_name]['experiments'] = []
386+
folder_structure[node_name]['experiments'] = {}
379387

380388
# here we skip if experiment_names is not None
381389
experiment_folder = root.parents[0]
382390
experiment_name = experiment_folder.stem
383-
possible_experiment_names.append(experiment_name)
391+
experiment_id = experiment_name.replace('experiment', '')
392+
if experiment_name not in possible_experiment_names:
393+
possible_experiment_names.append(experiment_name)
384394
if experiment_names is not None and experiment_name not in experiment_names:
385395
continue
386-
if experiment_name not in [e['name'] for e in folder_structure[node_name]['experiments']]:
396+
if experiment_id not in folder_structure[node_name]['experiments']:
387397
experiment = {}
388398
experiment['name'] = experiment_name
389399
if experiment_name == 'experiment1':
390400
settings_file = node_folder / "settings.xml"
391401
else:
392-
settings_file = node_folder / f"settings_{experiment_folder.stem.replace('experiment', '')}.xml"
402+
settings_file = node_folder / f"settings_{experiment_id}.xml"
393403
experiment['settings_file'] = settings_file
394-
experiment['recordings'] = []
395-
folder_structure[node_name]['experiments'].append(experiment)
404+
experiment['recordings'] = {}
405+
folder_structure[node_name]['experiments'][experiment_id] = experiment
396406

397407
recording_folder = root
398408
recording_name = root.stem
399-
if recording_name not in [r['name'] for r in folder_structure[node_name]['experiments'][-1]['recordings']]:
400-
recording = {}
401-
recording['name'] = recording_name
402-
recording['streams'] = {}
403-
404-
# metadata
405-
with open(recording_folder / 'structure.oebin', encoding='utf8', mode='r') as f:
406-
rec_structure = json.load(f)
407-
408-
if (recording_folder / 'continuous').exists() and len(rec_structure['continuous']) > 0:
409-
recording['streams']['continuous'] = {}
410-
for d in rec_structure['continuous']:
411-
# when multi Record Node the stream name also contains
412-
# the node name to make it unique
413-
oe_stream_name = Path(d["folder_name"]).name # remove trailing slash
414-
stream_name = node_name + '#' + oe_stream_name
415-
raw_filename = recording_folder / 'continuous' / d['folder_name'] / 'continuous.dat'
416-
417-
# Updates for OpenEphys v0.6:
418-
# In new vesion (>=0.6) timestamps.npy is now called sample_numbers.npy
419-
# see https://open-ephys.github.io/gui-docs/User-Manual/Recording-data/Binary-format.html#continuous
420-
if (recording_folder / 'continuous' / d['folder_name'] / 'sample_numbers.npy').is_file():
421-
timestamp_file = recording_folder / 'continuous' / d['folder_name'] / \
422-
'sample_numbers.npy'
423-
else:
424-
timestamp_file = recording_folder / 'continuous' / d['folder_name'] / 'timestamps.npy'
425-
timestamps = np.load(str(timestamp_file), mmap_mode='r')
426-
timestamp0 = timestamps[0]
427-
t_start = timestamp0 / d['sample_rate']
428-
429-
# TODO for later : gap checking
430-
signal_stream = d.copy()
431-
signal_stream['raw_filename'] = str(raw_filename)
432-
signal_stream['dtype'] = 'int16'
433-
signal_stream['timestamp0'] = timestamp0
434-
signal_stream['t_start'] = t_start
435-
436-
recording['streams']['continuous'][stream_name] = signal_stream
437-
438-
if (root / 'events').exists() and len(rec_structure['events']) > 0:
439-
recording['streams']['events'] = {}
440-
for d in rec_structure['events']:
441-
oe_stream_name = Path(d["folder_name"]).name # remove trailing slash
409+
recording_id = recording_name.replace('recording', '')
410+
# add recording
411+
recording = {}
412+
recording['name'] = recording_name
413+
recording['streams'] = {}
414+
415+
# metadata
416+
with open(recording_folder / 'structure.oebin', encoding='utf8', mode='r') as f:
417+
rec_structure = json.load(f)
418+
419+
if (recording_folder / 'continuous').exists() and len(rec_structure['continuous']) > 0:
420+
recording['streams']['continuous'] = {}
421+
for d in rec_structure['continuous']:
422+
# when multi Record Node the stream name also contains
423+
# the node name to make it unique
424+
oe_stream_name = Path(d["folder_name"]).name # remove trailing slash
425+
if len(node_name) > 0:
442426
stream_name = node_name + '#' + oe_stream_name
443-
444-
event_stream = d.copy()
445-
for name in _possible_event_stream_names:
446-
npy_filename = root / 'events' / d['folder_name'] / f'{name}.npy'
447-
if npy_filename.is_file():
448-
event_stream[f'{name}_npy'] = str(npy_filename)
449-
450-
recording['streams']['events'][stream_name] = event_stream
451-
452-
folder_structure[node_name]['experiments'][-1]['recordings'].append(recording)
427+
else:
428+
stream_name = oe_stream_name
429+
raw_filename = recording_folder / 'continuous' / d['folder_name'] / 'continuous.dat'
430+
431+
# Updates for OpenEphys v0.6:
432+
# In new vesion (>=0.6) timestamps.npy is now called sample_numbers.npy
433+
# see https://open-ephys.github.io/gui-docs/User-Manual/Recording-data/Binary-format.html#continuous
434+
sample_numbers = recording_folder / 'continuous' / d['folder_name'] / \
435+
'sample_numbers.npy'
436+
if sample_numbers.is_file():
437+
timestamp_file = sample_numbers
438+
else:
439+
timestamp_file = recording_folder / 'continuous' / d['folder_name'] / \
440+
'timestamps.npy'
441+
timestamps = np.load(str(timestamp_file), mmap_mode='r')
442+
timestamp0 = timestamps[0]
443+
t_start = timestamp0 / d['sample_rate']
444+
445+
# TODO for later : gap checking
446+
signal_stream = d.copy()
447+
signal_stream['raw_filename'] = str(raw_filename)
448+
signal_stream['dtype'] = 'int16'
449+
signal_stream['timestamp0'] = timestamp0
450+
signal_stream['t_start'] = t_start
451+
452+
recording['streams']['continuous'][stream_name] = signal_stream
453+
454+
if (root / 'events').exists() and len(rec_structure['events']) > 0:
455+
recording['streams']['events'] = {}
456+
for d in rec_structure['events']:
457+
oe_stream_name = Path(d["folder_name"]).name # remove trailing slash
458+
stream_name = node_name + '#' + oe_stream_name
459+
460+
event_stream = d.copy()
461+
for name in _possible_event_stream_names:
462+
npy_filename = root / 'events' / d['folder_name'] / f'{name}.npy'
463+
if npy_filename.is_file():
464+
event_stream[f'{name}_npy'] = str(npy_filename)
465+
466+
recording['streams']['events'][stream_name] = event_stream
467+
468+
folder_structure[node_name]['experiments'][experiment_id]['recordings'][recording_id] \
469+
= recording
453470

454471
# now create all_streams, nb_block, nb_segment_per_block (from first recording Node)
472+
# nested dictionary: block_index > seg_index > data_type > stream_name
473+
all_streams = {}
474+
nb_segment_per_block = {}
455475
recording_node = folder_structure[list(folder_structure.keys())[0]]
456476
nb_block = len(recording_node['experiments'])
457-
# natural sort experiment names so that block_index sequentially indicate experiments
458-
experiment_names = [e['name'] for e in recording_node['experiments']]
459-
experiment_order = np.argsort([int(ename.replace('experiment', '')) for ename in experiment_names])
460-
for block_index, exp_index in enumerate(experiment_order):
461-
experiment = recording_node['experiments'][exp_index]
477+
478+
exp_ids_sorted = sorted(list(recording_node['experiments'].keys()))
479+
for block_index, exp_id in enumerate(exp_ids_sorted):
480+
experiment = recording_node['experiments'][exp_id]
462481
nb_segment_per_block[block_index] = len(experiment['recordings'])
463482
all_streams[block_index] = {}
464-
# natural sort recording names so that seg_index sequentially indicate recordings
465-
recording_names = [r['name'] for r in experiment['recordings']]
466-
recording_order = np.argsort([int(rname.replace('recording', '')) for rname in recording_names])
467-
for seg_index, rec_index in enumerate(recording_order):
468-
recording = experiment['recordings'][rec_index]
483+
484+
rec_ids_sorted = sorted(list(experiment['recordings'].keys()))
485+
for seg_index, rec_id in enumerate(rec_ids_sorted):
486+
recording = experiment['recordings'][rec_id]
469487
all_streams[block_index][seg_index] = {}
470488
for stream_type in recording['streams']:
471489
all_streams[block_index][seg_index][stream_type] = {}
472490
for stream_name, signal_stream in recording['streams'][stream_type].items():
473491
all_streams[block_index][seg_index][stream_type][stream_name] = signal_stream
492+
# natural sort possible experiment names
493+
experiment_order = np.argsort([int(exp.replace('experiment', ''))
494+
for exp in possible_experiment_names])
495+
possible_experiment_names = list(np.array(possible_experiment_names)[experiment_order])
474496

475497
return folder_structure, all_streams, nb_block, nb_segment_per_block, possible_experiment_names
476498

477499

478500
def check_folder_consistency(folder_structure, possible_experiment_names=None):
479-
# experiments across nodes
501+
# check that experiment names are the same for differend record nodes
480502
if len(folder_structure) > 1:
481503
experiments = None
482504
for node in folder_structure.values():
483505
experiments_node = node['experiments']
484506
if experiments is None:
485507
experiments = experiments_node
486-
experiment_names = [e['name'] for e in experiments]
487-
assert all(ename['name'] in experiment_names for ename in experiments_node), \
508+
experiment_names = [e['name'] for e_id, e in experiments.items()]
509+
assert all(ename['name'] in experiment_names for ename in experiments_node.values()), \
488510
("Inconsistent experiments across recording nodes!")
489511

490-
# "continuous" streams across segments
512+
# check that "continuous" streams are the same across multiple segments (recordings)
491513
experiments = folder_structure[list(folder_structure.keys())[0]]['experiments']
492-
for experiment in experiments:
514+
for exp_id, experiment in experiments.items():
493515
segment_stream_names = None
494516
if len(experiment['recordings']) > 1:
495-
for recording in experiment['recordings']:
517+
for rec_id, recording in experiment['recordings'].items():
496518
stream_names = sorted(list(recording['streams']['continuous'].keys()))
497519
if segment_stream_names is None:
498520
segment_stream_names = stream_names
@@ -501,12 +523,13 @@ def check_folder_consistency(folder_structure, possible_experiment_names=None):
501523
"segments in the same experiment must be the same. Check your open ephys "
502524
"folder.")
503525

504-
# "continuous" streams across blocks
526+
# check that "continuous" streams across blocks (experiments)
505527
block_stream_names = None
506528
if len(experiments) > 1:
507-
for experiment in experiments:
529+
for exp_id, experiment in experiments.items():
508530
# use 1st segment
509-
stream_names = list(experiment['recordings'][0]['streams']['continuous'].keys())
531+
rec_ids = list(experiment['recordings'])
532+
stream_names = list(experiment['recordings'][rec_ids[0]]['streams']['continuous'].keys())
510533
stream_names = sorted(stream_names)
511534
if block_stream_names is None:
512535
block_stream_names = stream_names

0 commit comments

Comments
 (0)