Skip to content

Commit c250c09

Browse files
author
sprenger
committed
[EDF] improve memory performance and stream handling
- use pyedflib cython methods for data retrieval - read signal only when required - use multiple streams for signals with different sampling rates
1 parent 52edc71 commit c250c09

File tree

1 file changed

+59
-19
lines changed

1 file changed

+59
-19
lines changed

neo/rawio/edfrawio.py

Lines changed: 59 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import numpy as np
1717

1818
try:
19-
from pyedflib import highlevel
19+
from pyedflib import EdfReader
2020
HAS_PYEDF = True
2121
except ImportError:
2222
HAS_PYEDF = False
@@ -49,7 +49,6 @@ def __init__(self, filename=''):
4949
# note that this filename is used in self._source_name
5050
self.filename = filename
5151

52-
self.signals = None
5352
self.signal_headers = []
5453
self.edf_header = {}
5554

@@ -67,29 +66,48 @@ def _parse_header(self):
6766
if ('EDF+' in file_version_header) and ('EDF+C' not in file_version_header):
6867
raise ValueError('Only continuous EDF+ files are currently supported.')
6968

70-
# read a edf file content using pyedflib
71-
self.signals, self.signal_headers, self.edf_header = highlevel.read_edf(self.filename)
69+
self.edf_reader = EdfReader(self.filename)
70+
# load headers, signal information and
71+
self.edf_header = self.edf_reader.getHeader()
72+
self.signal_headers = self.edf_reader.getSignalHeaders()
7273

73-
# 1 edf file = 1 stream
74-
signal_streams = [('edf stream', 0)]
75-
signal_streams = np.array(signal_streams, dtype=_signal_stream_dtype)
74+
# add annotations to header
75+
annotations = self.edf_reader.readAnnotations()
76+
self.signal_annotations = [[s, d, a] for s, d, a in zip(*annotations)]
77+
78+
# 1 stream = 1 sampling rate
79+
stream_characteristics = []
80+
self.stream_idx_to_chidx = {}
7681

7782
signal_channels = []
7883
for ch_idx, sig_dict in enumerate(self.signal_headers):
7984
ch_name = sig_dict['label']
8085
chan_id = ch_idx
8186
sr = sig_dict['sample_rate'] # Hz
82-
dtype = self.signals.dtype.str
87+
dtype = np.int16 # assume general int16 based on edf documentation
8388
units = sig_dict['dimension']
8489
physical_range = sig_dict['physical_max'] - sig_dict['physical_min']
8590
digital_range = sig_dict['digital_max'] - sig_dict['digital_min']
8691
gain = physical_range / digital_range
8792
offset = -1 * sig_dict['digital_min'] * gain + sig_dict['physical_min']
88-
stream_id = 0 # file contains only a single stream
93+
94+
# identify corresponding stream based on sampling rate
95+
if (sr,) not in stream_characteristics:
96+
stream_characteristics += [(sr,)]
97+
98+
stream_id = stream_characteristics.index((sr,))
99+
self.stream_idx_to_chidx.setdefault(stream_id, []).append(ch_idx)
100+
89101
signal_channels.append((ch_name, chan_id, sr, dtype, units, gain, offset, stream_id))
90102

103+
# convert channel index lists to arrays for indexing
104+
self.stream_idx_to_chidx = {k: np.array(v) for k, v in self.stream_idx_to_chidx.items()}
105+
91106
signal_channels = np.array(signal_channels, dtype=_signal_channel_dtype)
92107

108+
signal_streams = [(f'stream ({sr} Hz)', i) for i, sr in enumerate(stream_characteristics)]
109+
signal_streams = np.array(signal_streams, dtype=_signal_stream_dtype)
110+
93111
# no unit/epoch information contained in edf
94112
spike_channels = []
95113
spike_channels = np.array(spike_channels, dtype=_spike_channel_dtype)
@@ -126,37 +144,59 @@ def _parse_header(self):
126144
array_anno = {array_key: [h[array_key] for h in self.signal_headers]}
127145
seg_ann['signals'].append({'__array_annotations__': array_anno})
128146

147+
def _get_stream_channels(self, stream_index):
148+
return self.header['signal_channels'][self.stream_idx_to_chidx[stream_index]]
149+
129150
def _segment_t_start(self, block_index, seg_index):
130151
# no time offset provided by EDF format
131152
return 0 # in seconds
132153

133154
def _segment_t_stop(self, block_index, seg_index):
134-
t_stop = self.signals.shape[1] / self.signal_headers[0]['sample_rate']
155+
t_stop = self.edf_reader.datarecord_duration * self.edf_reader.datarecords_in_file
135156
# this must return an float scale in second
136157
return t_stop
137158

138159
def _get_signal_size(self, block_index, seg_index, stream_index):
139-
return self.signals.shape[1]
160+
chidx = self.stream_idx_to_chidx[stream_index][0]
161+
# use sample count of first signal in stream
162+
return self.edf_reader.getNSamples()[chidx]
140163

141164
def _get_signal_t_start(self, block_index, seg_index, stream_index):
142165
return 0 # EDF does not provide temporal offset information
143166

144167
def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop,
145168
stream_index, channel_indexes):
146-
# only dealing with single segment, single stream edf files
147-
assert (block_index, seg_index, stream_index) == (0, 0, 0)
169+
# only dealing with single block and segment edf files
170+
assert (block_index, seg_index) == (0, 0)
171+
172+
stream_channel_idxs = self.stream_idx_to_chidx[stream_index]
173+
174+
# keep all channels of the stream if none are selected
175+
if channel_indexes is None:
176+
channel_indexes = slice(None)
148177

149178
if i_start is None:
150179
i_start = 0
151180
if i_stop is None:
152-
i_stop = self.signals.shape[1]
181+
i_stop = self.get_signal_size(block_index=block_index, seg_index=seg_index,
182+
stream_index=stream_index)
183+
n = i_stop - i_start
153184

154-
# keep all channels if none are selected
155-
if channel_indexes is None:
156-
channel_indexes = slice(None)
185+
# raw_signals = self.edf_reader. am[channel_indexes, i_start:i_stop]
186+
selected_channel_idxs = stream_channel_idxs[channel_indexes]
187+
188+
# load data into numpy array buffer
189+
data = []
190+
for i, channel_idx in enumerate(selected_channel_idxs):
191+
# use int32 for compatibility with pyedflib
192+
buffer = np.empty(n, dtype=np.int32)
193+
self.edf_reader.read_digital_signal(channel_idx, i_start, n, buffer)
194+
data.append(buffer)
195+
196+
# downgrade to int16 as this is what is used in the edf file format
197+
data = np.asarray(data, dtype=np.int16)
157198

158-
raw_signals = self.signals[channel_indexes, i_start:i_stop]
159-
return raw_signals.T
199+
return data.T # use dimensions (time, channel)
160200

161201
def _spike_count(self, block_index, seg_index, spike_channel_index):
162202
return None

0 commit comments

Comments
 (0)