88Author: Samuel Garcia
99"""
1010
11- import os
1211import re
12+ from pathlib import Path
1313
1414import numpy as np
1515
@@ -73,8 +73,8 @@ class OpenEphysRawIO(BaseRawIO):
7373 * Works only if all continuous channels have the same sampling rate, which is a reasonable
7474 hypothesis.
7575 * A recording can contain gaps due to USB stream loss when high CPU load when recording.
76- Theses gaps are checked channel per channel which make the parse_header() slow.
77- If gaps are detected then they are filled with zeros but the the reading will be much slower for getting signals.
76+ These gaps are checked channel per channel which makes the parse_header() slow.
77+ If gaps are detected then they are filled with zeros but then the reading will be much slower for getting signals.
7878
7979 """
8080
@@ -112,18 +112,24 @@ def _parse_header(self):
112112 all_last_timestamps = []
113113 all_samplerate = []
114114 for chan_index , continuous_filename in enumerate (info ["continuous" ][oe_index ]):
115- fullname = os .path .join (self .dirname , continuous_filename )
116- chan_info = read_file_header (fullname )
115+ chan_info = read_file_header (continuous_filename )
117116
118- s = continuous_filename .replace (".continuous" , "" ).split ("_" )
119- processor_id , ch_name = s [0 ], s [1 ]
120- chan_str = re .split (r"(\d+)" , s [1 ])[0 ]
117+ s = continuous_filename .stem .split ("_" )
118+ # Formats are ['processor_id', 'ch_name'] or ['processor_id', 'name', 'ch_name']
119+ if len (s ) == 2 :
120+ processor_id , ch_name = s [0 ], s [1 ]
121+ chan_str = re .split (r"(\d+)" , s [1 ])[0 ]
122+ else :
123+ processor_id , ch_name = s [0 ], s [2 ]
124+ chan_str = re .split (r"(\d+)" , s [2 ])[0 ]
121125 # note that chan_id is not unique in case of CH + AUX
122126 chan_id = int (ch_name .replace (chan_str , "" ))
123127
124- filesize = os .stat (fullname ).st_size
128+ filesize = continuous_filename .stat ().st_size
125129 size = (filesize - HEADER_SIZE ) // np .dtype (continuous_dtype ).itemsize
126- data_chan = np .memmap (fullname , mode = "r" , offset = HEADER_SIZE , dtype = continuous_dtype , shape = (size ,))
130+ data_chan = np .memmap (
131+ continuous_filename , mode = "r" , offset = HEADER_SIZE , dtype = continuous_dtype , shape = (size ,)
132+ )
127133 self ._sigs_memmap [seg_index ][chan_index ] = data_chan
128134
129135 all_first_timestamps .append (data_chan [0 ]["timestamp" ])
@@ -220,16 +226,15 @@ def _parse_header(self):
220226 for seg_index , oe_index in enumerate (oe_indices_spk ):
221227 self ._spikes_memmap [seg_index ] = {}
222228 for spike_filename in info ["spikes" ][oe_index ]:
223- fullname = os .path .join (self .dirname , spike_filename )
224- spike_info = read_file_header (fullname )
225- spikes_dtype = make_spikes_dtype (fullname )
229+ spike_info = read_file_header (spike_filename )
230+ spikes_dtype = make_spikes_dtype (spike_filename )
226231
227232 # "STp106.0n0_2.spikes" to "STp106.0n0"
228- name = spike_filename .replace ( ".spikes" , "" )
233+ name = spike_filename .stem
229234 if seg_index > 0 :
230235 name = name .replace ("_" + str (seg_index + 1 ), "" )
231236
232- data_spike = np .memmap (fullname , mode = "r" , offset = HEADER_SIZE , dtype = spikes_dtype )
237+ data_spike = np .memmap (spike_filename , mode = "r" , offset = HEADER_SIZE , dtype = spikes_dtype )
233238 self ._spikes_memmap [seg_index ][name ] = data_spike
234239
235240 self ._first_spk_timestamps .append (data_spike [0 ]["timestamp" ])
@@ -239,10 +244,9 @@ def _parse_header(self):
239244 # so need to scan file for all segment to get units
240245 self ._spike_sampling_rate = None
241246 for spike_filename_seg0 in info ["spikes" ][0 ]:
242- name = spike_filename_seg0 .replace ( ".spikes" , "" )
247+ name = spike_filename_seg0 .stem
243248
244- fullname = os .path .join (self .dirname , spike_filename_seg0 )
245- spike_info = read_file_header (fullname )
249+ spike_info = read_file_header (spike_filename_seg0 )
246250 if self ._spike_sampling_rate is None :
247251 self ._spike_sampling_rate = spike_info ["sampleRate" ]
248252 else :
@@ -273,23 +277,39 @@ def _parse_header(self):
273277 spike_channels = np .array (spike_channels , dtype = _spike_channel_dtype )
274278
275279 # event file are:
276- # * all_channel.events (header + binray) --> event 0
280+ # * all_channel.events (header + binary) --> event 0
281+ # * n_RhythmData-a.events (header + binary) --> event 0 (maybe a new naming convention? )
277282 # and message.events (text based) --> event 1 not implemented yet
278283 event_channels = []
279284 self ._events_memmap = {}
280- for seg_index , oe_index in enumerate (oe_indices ):
281- if oe_index == 0 :
282- event_filename = "all_channels.events"
283- else :
284- event_filename = f"all_channels_{ oe_index + 1 } .events"
285-
286- fullname = os .path .join (self .dirname , event_filename )
287- event_info = read_file_header (fullname )
288- self ._event_sampling_rate = event_info ["sampleRate" ]
289- data_event = np .memmap (fullname , mode = "r" , offset = HEADER_SIZE , dtype = events_dtype )
290- self ._events_memmap [seg_index ] = data_event
291-
292- event_channels .append (("all_channels" , "" , "event" ))
285+ event_files = list (
286+ [
287+ event_file
288+ for event_file in Path (self .dirname ).glob ("**/*.events" )
289+ if event_file .name != "messages.events"
290+ ]
291+ )
292+ event_files .sort () # sort should put the xx.events first followed by xx_x.events
293+ # only run if we have actual potential event files
294+ if len (event_files ) > 0 :
295+ event_file_name_0 = event_files [0 ].stem # this should always be the file without a '_n' appended
296+ for seg_index , oe_index in enumerate (oe_indices ):
297+ if oe_index == 0 :
298+ event_filename = Path (self .dirname ) / (event_file_name_0 + ".events" )
299+ else :
300+ event_filename = Path (self .dirname ) / (event_file_name_0 + f"_{ oe_index + 1 } .events" )
301+
302+ event_info = read_file_header (event_filename )
303+ # event files can exist, but just not have data
304+ try :
305+ self ._event_sampling_rate = event_info ["sampleRate" ]
306+ except KeyError :
307+ break
308+ data_event = np .memmap (event_filename , mode = "r" , offset = HEADER_SIZE , dtype = events_dtype )
309+ self ._events_memmap [seg_index ] = data_event
310+ # only append event channels if they actually exist & have data
311+ if len (self ._events_memmap .keys ()) > 0 :
312+ event_channels .append ((event_filename .stem , "" , "event" ))
293313 # event_channels.append(('message', '', 'event')) # not implemented
294314 event_channels = np .array (event_channels , dtype = _event_channel_dtype )
295315
@@ -308,7 +328,7 @@ def _parse_header(self):
308328 for seg_index , oe_index in enumerate (oe_indices ):
309329 seg_ann = bl_ann ["segments" ][seg_index ]
310330 if len (info ["continuous" ]) > 0 :
311- fullname = os . path . join ( self . dirname , info ["continuous" ][oe_index ][0 ])
331+ fullname = info ["continuous" ][oe_index ][0 ]
312332 chan_info = read_file_header (fullname )
313333 seg_ann ["openephys_version" ] = chan_info ["version" ]
314334 bl_ann ["openephys_version" ] = chan_info ["version" ]
@@ -531,7 +551,7 @@ def make_spikes_dtype(filename):
531551
532552 # so we need to read the very first spike
533553 # but it will fail when 0 spikes (too bad)
534- filesize = os .stat (filename ).st_size
554+ filesize = filename .stat ().st_size
535555 if filesize >= (HEADER_SIZE + 23 ):
536556 with open (filename , mode = "rb" ) as f :
537557 # M and N is at 1024 + 19 bytes
@@ -561,28 +581,33 @@ def explore_folder(dirname):
561581 "100_CH0.continuous" ---> seg_index 0
562582 "100_CH0_2.continuous" ---> seg_index 1
563583 "100_CH0_N.continuous" ---> seg_index N-1
584+
585+ Newer formats follow similar rules but have an addition
586+ "100_RhythmData-A_CH0.continuous" ----> seg_index 0
564587 """
565- filenames = os . listdir (dirname )
588+ filenames = [ filename for filename in Path (dirname ). glob ( "**/*" ) if filename . is_file ()]
566589 filenames .sort ()
567590
568591 info = {}
569592 info ["nb_segment" ] = 0
570593 info ["continuous" ] = {}
571594 info ["spikes" ] = {}
572595 for filename in filenames :
573- if filename .endswith (".continuous" ):
574- s = filename .replace (".continuous" , "" ).split ("_" )
575- if len (s ) == 2 :
596+ if filename .suffix == ".continuous" :
597+ s = filename .stem .split ("_" )
598+ # For continuous files we check if the last value is an int indicating that a new segment should be
599+ # generated and if it is not an int then this must be same segment
600+ try :
601+ seg_index = int (s [- 1 ]) - 1
602+ except ValueError :
576603 seg_index = 0
577- else :
578- seg_index = int (s [2 ]) - 1
579604 if seg_index not in info ["continuous" ].keys ():
580605 info ["continuous" ][seg_index ] = []
581606 info ["continuous" ][seg_index ].append (filename )
582607 if (seg_index + 1 ) > info ["nb_segment" ]:
583608 info ["nb_segment" ] += 1
584- elif filename .endswith ( ".spikes" ) :
585- s = re .findall (r"(_\d+)$" , filename .replace ( ".spikes" , "" ) )
609+ elif filename .suffix == ".spikes" :
610+ s = re .findall (r"(_\d+)$" , filename .stem )
586611 if s :
587612 seg_index = int (s [0 ][1 :]) - 1
588613 else :
@@ -599,9 +624,15 @@ def explore_folder(dirname):
599624 chan_ids_by_type = {}
600625 filenames_by_type = {}
601626 for continuous_filename in continuous_filenames :
602- s = continuous_filename .replace (".continuous" , "" ).split ("_" )
603- processor_id , ch_name = s [0 ], s [1 ]
604- chan_type = re .split (r"(\d+)" , s [1 ])[0 ]
627+ s = continuous_filename .stem .split ("_" )
628+ # new format includes putting a name between e.g. ['124', 'RhythmData', 'CH1']
629+ # old format would just be ['124', 'CH1']
630+ if len (s ) == 2 :
631+ processor_id , ch_name = s [0 ], s [1 ]
632+ chan_type = re .split (r"(\d+)" , s [1 ])[0 ]
633+ else :
634+ processor_id , ch_name = s [0 ], s [2 ]
635+ chan_type = re .split (r"(\d+)" , s [2 ])[0 ]
605636 chan_id = int (ch_name .replace (chan_type , "" ))
606637 if chan_type in chan_ids_by_type .keys ():
607638 chan_ids_by_type [chan_type ].append (chan_id )
@@ -627,7 +658,7 @@ def explore_folder(dirname):
627658 for seg_index , spike_filenames in info ["spikes" ].items ():
628659 names = []
629660 for spike_filename in spike_filenames :
630- name = spike_filename .replace ( ".spikes" , "" )
661+ name = spike_filename .stem
631662 if seg_index > 0 :
632663 name = name .replace ("_" + str (seg_index + 1 ), "" )
633664 names .append (name )
0 commit comments