Skip to content

Commit fe22ba5

Browse files
authored
Merge pull request #1453 from zm711/openephy-rythmdata
OpenEphys: Allow for new `stream` naming convention
2 parents 0023e51 + 90c93d5 commit fe22ba5

File tree

2 files changed

+80
-46
lines changed

2 files changed

+80
-46
lines changed

neo/rawio/openephysrawio.py

Lines changed: 77 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
Author: Samuel Garcia
99
"""
1010

11-
import os
1211
import re
12+
from pathlib import Path
1313

1414
import numpy as np
1515

@@ -73,8 +73,8 @@ class OpenEphysRawIO(BaseRawIO):
7373
* Works only if all continuous channels have the same sampling rate, which is a reasonable
7474
hypothesis.
7575
* A recording can contain gaps due to USB stream loss when high CPU load when recording.
76-
Theses gaps are checked channel per channel which make the parse_header() slow.
77-
If gaps are detected then they are filled with zeros but the the reading will be much slower for getting signals.
76+
These gaps are checked channel per channel which makes the parse_header() slow.
77+
If gaps are detected then they are filled with zeros but then the reading will be much slower for getting signals.
7878
7979
"""
8080

@@ -112,18 +112,24 @@ def _parse_header(self):
112112
all_last_timestamps = []
113113
all_samplerate = []
114114
for chan_index, continuous_filename in enumerate(info["continuous"][oe_index]):
115-
fullname = os.path.join(self.dirname, continuous_filename)
116-
chan_info = read_file_header(fullname)
115+
chan_info = read_file_header(continuous_filename)
117116

118-
s = continuous_filename.replace(".continuous", "").split("_")
119-
processor_id, ch_name = s[0], s[1]
120-
chan_str = re.split(r"(\d+)", s[1])[0]
117+
s = continuous_filename.stem.split("_")
118+
# Formats are ['processor_id', 'ch_name'] or ['processor_id', 'name', 'ch_name']
119+
if len(s) == 2:
120+
processor_id, ch_name = s[0], s[1]
121+
chan_str = re.split(r"(\d+)", s[1])[0]
122+
else:
123+
processor_id, ch_name = s[0], s[2]
124+
chan_str = re.split(r"(\d+)", s[2])[0]
121125
# note that chan_id is not unique in case of CH + AUX
122126
chan_id = int(ch_name.replace(chan_str, ""))
123127

124-
filesize = os.stat(fullname).st_size
128+
filesize = continuous_filename.stat().st_size
125129
size = (filesize - HEADER_SIZE) // np.dtype(continuous_dtype).itemsize
126-
data_chan = np.memmap(fullname, mode="r", offset=HEADER_SIZE, dtype=continuous_dtype, shape=(size,))
130+
data_chan = np.memmap(
131+
continuous_filename, mode="r", offset=HEADER_SIZE, dtype=continuous_dtype, shape=(size,)
132+
)
127133
self._sigs_memmap[seg_index][chan_index] = data_chan
128134

129135
all_first_timestamps.append(data_chan[0]["timestamp"])
@@ -220,16 +226,15 @@ def _parse_header(self):
220226
for seg_index, oe_index in enumerate(oe_indices_spk):
221227
self._spikes_memmap[seg_index] = {}
222228
for spike_filename in info["spikes"][oe_index]:
223-
fullname = os.path.join(self.dirname, spike_filename)
224-
spike_info = read_file_header(fullname)
225-
spikes_dtype = make_spikes_dtype(fullname)
229+
spike_info = read_file_header(spike_filename)
230+
spikes_dtype = make_spikes_dtype(spike_filename)
226231

227232
# "STp106.0n0_2.spikes" to "STp106.0n0"
228-
name = spike_filename.replace(".spikes", "")
233+
name = spike_filename.stem
229234
if seg_index > 0:
230235
name = name.replace("_" + str(seg_index + 1), "")
231236

232-
data_spike = np.memmap(fullname, mode="r", offset=HEADER_SIZE, dtype=spikes_dtype)
237+
data_spike = np.memmap(spike_filename, mode="r", offset=HEADER_SIZE, dtype=spikes_dtype)
233238
self._spikes_memmap[seg_index][name] = data_spike
234239

235240
self._first_spk_timestamps.append(data_spike[0]["timestamp"])
@@ -239,10 +244,9 @@ def _parse_header(self):
239244
# so need to scan file for all segment to get units
240245
self._spike_sampling_rate = None
241246
for spike_filename_seg0 in info["spikes"][0]:
242-
name = spike_filename_seg0.replace(".spikes", "")
247+
name = spike_filename_seg0.stem
243248

244-
fullname = os.path.join(self.dirname, spike_filename_seg0)
245-
spike_info = read_file_header(fullname)
249+
spike_info = read_file_header(spike_filename_seg0)
246250
if self._spike_sampling_rate is None:
247251
self._spike_sampling_rate = spike_info["sampleRate"]
248252
else:
@@ -273,23 +277,39 @@ def _parse_header(self):
273277
spike_channels = np.array(spike_channels, dtype=_spike_channel_dtype)
274278

275279
# event file are:
276-
# * all_channel.events (header + binray) --> event 0
280+
# * all_channel.events (header + binary) --> event 0
281+
# * n_RhythmData-a.events (header + binary) --> event 0 (maybe a new naming convention? )
277282
# and message.events (text based) --> event 1 not implemented yet
278283
event_channels = []
279284
self._events_memmap = {}
280-
for seg_index, oe_index in enumerate(oe_indices):
281-
if oe_index == 0:
282-
event_filename = "all_channels.events"
283-
else:
284-
event_filename = f"all_channels_{oe_index + 1}.events"
285-
286-
fullname = os.path.join(self.dirname, event_filename)
287-
event_info = read_file_header(fullname)
288-
self._event_sampling_rate = event_info["sampleRate"]
289-
data_event = np.memmap(fullname, mode="r", offset=HEADER_SIZE, dtype=events_dtype)
290-
self._events_memmap[seg_index] = data_event
291-
292-
event_channels.append(("all_channels", "", "event"))
285+
event_files = list(
286+
[
287+
event_file
288+
for event_file in Path(self.dirname).glob("**/*.events")
289+
if event_file.name != "messages.events"
290+
]
291+
)
292+
event_files.sort() # sort should put the xx.events first followed by xx_x.events
293+
# only run if we have actual potential event files
294+
if len(event_files) > 0:
295+
event_file_name_0 = event_files[0].stem # this should always be the file without a '_n' appended
296+
for seg_index, oe_index in enumerate(oe_indices):
297+
if oe_index == 0:
298+
event_filename = Path(self.dirname) / (event_file_name_0 + ".events")
299+
else:
300+
event_filename = Path(self.dirname) / (event_file_name_0 + f"_{oe_index + 1}.events")
301+
302+
event_info = read_file_header(event_filename)
303+
# event files can exist, but just not have data
304+
try:
305+
self._event_sampling_rate = event_info["sampleRate"]
306+
except KeyError:
307+
break
308+
data_event = np.memmap(event_filename, mode="r", offset=HEADER_SIZE, dtype=events_dtype)
309+
self._events_memmap[seg_index] = data_event
310+
# only append event channels if they actually exist & have data
311+
if len(self._events_memmap.keys()) > 0:
312+
event_channels.append((event_filename.stem, "", "event"))
293313
# event_channels.append(('message', '', 'event')) # not implemented
294314
event_channels = np.array(event_channels, dtype=_event_channel_dtype)
295315

@@ -308,7 +328,7 @@ def _parse_header(self):
308328
for seg_index, oe_index in enumerate(oe_indices):
309329
seg_ann = bl_ann["segments"][seg_index]
310330
if len(info["continuous"]) > 0:
311-
fullname = os.path.join(self.dirname, info["continuous"][oe_index][0])
331+
fullname = info["continuous"][oe_index][0]
312332
chan_info = read_file_header(fullname)
313333
seg_ann["openephys_version"] = chan_info["version"]
314334
bl_ann["openephys_version"] = chan_info["version"]
@@ -531,7 +551,7 @@ def make_spikes_dtype(filename):
531551

532552
# so we need to read the very first spike
533553
# but it will fail when 0 spikes (too bad)
534-
filesize = os.stat(filename).st_size
554+
filesize = filename.stat().st_size
535555
if filesize >= (HEADER_SIZE + 23):
536556
with open(filename, mode="rb") as f:
537557
# M and N is at 1024 + 19 bytes
@@ -561,28 +581,33 @@ def explore_folder(dirname):
561581
"100_CH0.continuous" ---> seg_index 0
562582
"100_CH0_2.continuous" ---> seg_index 1
563583
"100_CH0_N.continuous" ---> seg_index N-1
584+
585+
Newer formats follow similar rules but have an addition
586+
"100_RhythmData-A_CH0.continuous" ----> seg_index 0
564587
"""
565-
filenames = os.listdir(dirname)
588+
filenames = [filename for filename in Path(dirname).glob("**/*") if filename.is_file()]
566589
filenames.sort()
567590

568591
info = {}
569592
info["nb_segment"] = 0
570593
info["continuous"] = {}
571594
info["spikes"] = {}
572595
for filename in filenames:
573-
if filename.endswith(".continuous"):
574-
s = filename.replace(".continuous", "").split("_")
575-
if len(s) == 2:
596+
if filename.suffix == ".continuous":
597+
s = filename.stem.split("_")
598+
# For continuous files we check if the last value is an int indicating that a new segment should be
599+
# generated and if it is not an int then this must be same segment
600+
try:
601+
seg_index = int(s[-1]) - 1
602+
except ValueError:
576603
seg_index = 0
577-
else:
578-
seg_index = int(s[2]) - 1
579604
if seg_index not in info["continuous"].keys():
580605
info["continuous"][seg_index] = []
581606
info["continuous"][seg_index].append(filename)
582607
if (seg_index + 1) > info["nb_segment"]:
583608
info["nb_segment"] += 1
584-
elif filename.endswith(".spikes"):
585-
s = re.findall(r"(_\d+)$", filename.replace(".spikes", ""))
609+
elif filename.suffix == ".spikes":
610+
s = re.findall(r"(_\d+)$", filename.stem)
586611
if s:
587612
seg_index = int(s[0][1:]) - 1
588613
else:
@@ -599,9 +624,15 @@ def explore_folder(dirname):
599624
chan_ids_by_type = {}
600625
filenames_by_type = {}
601626
for continuous_filename in continuous_filenames:
602-
s = continuous_filename.replace(".continuous", "").split("_")
603-
processor_id, ch_name = s[0], s[1]
604-
chan_type = re.split(r"(\d+)", s[1])[0]
627+
s = continuous_filename.stem.split("_")
628+
# new format includes putting a name between e.g. ['124', 'RhythmData', 'CH1']
629+
# old format would just be ['124', 'CH1']
630+
if len(s) == 2:
631+
processor_id, ch_name = s[0], s[1]
632+
chan_type = re.split(r"(\d+)", s[1])[0]
633+
else:
634+
processor_id, ch_name = s[0], s[2]
635+
chan_type = re.split(r"(\d+)", s[2])[0]
605636
chan_id = int(ch_name.replace(chan_type, ""))
606637
if chan_type in chan_ids_by_type.keys():
607638
chan_ids_by_type[chan_type].append(chan_id)
@@ -627,7 +658,7 @@ def explore_folder(dirname):
627658
for seg_index, spike_filenames in info["spikes"].items():
628659
names = []
629660
for spike_filename in spike_filenames:
630-
name = spike_filename.replace(".spikes", "")
661+
name = spike_filename.stem
631662
if seg_index > 0:
632663
name = name.replace("_" + str(seg_index + 1), "")
633664
names.append(name)

neo/test/rawiotest/test_openephysrawio.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ class TestOpenEphysRawIO(
1515
# this file has gaps and this is now handle corretly
1616
"openephys/OpenEphys_SampleData_2_(multiple_starts)",
1717
# 'openephys/OpenEphys_SampleData_3',
18+
# two nodes with the new naming convention for openephys
19+
"openephys/openephys_rhythmdata_test_nodes/Record Node 120",
20+
"openephys/openephys_rhythmdata_test_nodes/Record Node 121",
1821
]
1922

2023
def test_raise_error_if_strange_timestamps(self):

0 commit comments

Comments
 (0)