Skip to content

Commit 9a1962b

Browse files
authored
Merge branch 'master' into numpy-2-0
2 parents 5209596 + 4d079ef commit 9a1962b

23 files changed

+849
-364
lines changed

doc/source/rawio.rst

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,32 @@ Read event timestamps and times
281281
In [42]: print(ev_times)
282282
[ 0.0317]
283283

284+
Signal streams and signal buffers
285+
---------------------------------
286+
287+
For reading analog signals **neo.rawio** has 2 important concepts:
288+
289+
1. The **signal_stream** : it is a group of channels that can be read together using :func:`get_analog_signal_chunk()`.
290+
This group of channels is guaranteed to have the same sampling rate, and the same duration per segment.
291+
Most of the time, this group of channel is a "logical" group of channels. In short they are from the same headstage
292+
or from the same auxiliary board.
293+
Optionally, depending on the format, a **signal_stream** can be a slice of or an entire **signal_buffer**.
294+
295+
2. The **signal_buffer** : it is group of channels that share the same data layout in a file. The most simple example
296+
is channel that can be read by a simple :func:`signals = np.memmap(file, shape=..., dtype=... , offset=...)`.
297+
A **signal_buffer** can contain one or several **signal_stream**'s (very often it is only one).
298+
There are two kind of formats that handle this concept:
299+
300+
* Formats which use :func:`np.memmap()` internally
301+
* Formats based on hdf5
302+
303+
There are many formats that do not handle this concept:
304+
305+
* the ones that use an external python package for reading data (edf, ced, plexon2, ...)
306+
* the ones with a complicated data layout (e.g. those where the data blocks are split without structure)
307+
308+
To check if a format makes use of the buffer api you can check the class attribute flag `has_buffer_description_api` of the
309+
rawio class.
284310

285311

286312

neo/rawio/axonrawio.py

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
import numpy as np
5454

5555
from .baserawio import (
56-
BaseRawIO,
56+
BaseRawWithBufferApiIO,
5757
_signal_channel_dtype,
5858
_signal_stream_dtype,
5959
_signal_buffer_dtype,
@@ -63,7 +63,7 @@
6363
from neo.core import NeoReadWriteError
6464

6565

66-
class AxonRawIO(BaseRawIO):
66+
class AxonRawIO(BaseRawWithBufferApiIO):
6767
"""
6868
Class for Class for reading data from pCLAMP and AxoScope files (.abf version 1 and 2)
6969
@@ -92,7 +92,7 @@ class AxonRawIO(BaseRawIO):
9292
rawmode = "one-file"
9393

9494
def __init__(self, filename=""):
95-
BaseRawIO.__init__(self)
95+
BaseRawWithBufferApiIO.__init__(self)
9696
self.filename = filename
9797

9898
def _parse_header(self):
@@ -115,8 +115,6 @@ def _parse_header(self):
115115
head_offset = info["sections"]["DataSection"]["uBlockIndex"] * BLOCKSIZE
116116
totalsize = info["sections"]["DataSection"]["llNumEntries"]
117117

118-
self._raw_data = np.memmap(self.filename, dtype=sig_dtype, mode="r", shape=(totalsize,), offset=head_offset)
119-
120118
# 3 possible modes
121119
if version < 2.0:
122120
mode = info["nOperationMode"]
@@ -142,7 +140,7 @@ def _parse_header(self):
142140
)
143141
else:
144142
episode_array = np.empty(1, [("offset", "i4"), ("len", "i4")])
145-
episode_array[0]["len"] = self._raw_data.size
143+
episode_array[0]["len"] = totalsize
146144
episode_array[0]["offset"] = 0
147145

148146
# sampling_rate
@@ -154,9 +152,14 @@ def _parse_header(self):
154152
# one sweep = one segment
155153
nb_segment = episode_array.size
156154

155+
stream_id = "0"
156+
buffer_id = "0"
157+
157158
# Get raw data by segment
158-
self._raw_signals = {}
159+
# self._raw_signals = {}
159160
self._t_starts = {}
161+
self._buffer_descriptions = {0: {}}
162+
self._stream_buffer_slice = {stream_id: None}
160163
pos = 0
161164
for seg_index in range(nb_segment):
162165
length = episode_array[seg_index]["len"]
@@ -169,7 +172,15 @@ def _parse_header(self):
169172
if (fSynchTimeUnit != 0) and (mode == 1):
170173
length /= fSynchTimeUnit
171174

172-
self._raw_signals[seg_index] = self._raw_data[pos : pos + length].reshape(-1, nbchannel)
175+
self._buffer_descriptions[0][seg_index] = {}
176+
self._buffer_descriptions[0][seg_index][buffer_id] = {
177+
"type": "raw",
178+
"file_path": str(self.filename),
179+
"dtype": str(sig_dtype),
180+
"order": "C",
181+
"file_offset": head_offset + pos * sig_dtype.itemsize,
182+
"shape": (int(length // nbchannel), int(nbchannel)),
183+
}
173184
pos += length
174185

175186
t_start = float(episode_array[seg_index]["offset"])
@@ -227,17 +238,16 @@ def _parse_header(self):
227238
offset -= info["listADCInfo"][chan_id]["fSignalOffset"]
228239
else:
229240
gain, offset = 1.0, 0.0
230-
stream_id = "0"
231-
buffer_id = "0"
241+
232242
signal_channels.append(
233243
(name, str(chan_id), self._sampling_rate, sig_dtype, units, gain, offset, stream_id, buffer_id)
234244
)
235245

236246
signal_channels = np.array(signal_channels, dtype=_signal_channel_dtype)
237247

238248
# one unique signal stream and buffer
239-
signal_buffers = np.array([("Signals", "0")], dtype=_signal_buffer_dtype)
240-
signal_streams = np.array([("Signals", "0", "0")], dtype=_signal_stream_dtype)
249+
signal_buffers = np.array([("Signals", buffer_id)], dtype=_signal_buffer_dtype)
250+
signal_streams = np.array([("Signals", stream_id, buffer_id)], dtype=_signal_stream_dtype)
241251

242252
# only one events channel : tag
243253
# In ABF timstamps are not attached too any particular segment
@@ -295,21 +305,15 @@ def _segment_t_start(self, block_index, seg_index):
295305
return self._t_starts[seg_index]
296306

297307
def _segment_t_stop(self, block_index, seg_index):
298-
t_stop = self._t_starts[seg_index] + self._raw_signals[seg_index].shape[0] / self._sampling_rate
308+
sig_size = self.get_signal_size(block_index, seg_index, 0)
309+
t_stop = self._t_starts[seg_index] + sig_size / self._sampling_rate
299310
return t_stop
300311

301-
def _get_signal_size(self, block_index, seg_index, stream_index):
302-
shape = self._raw_signals[seg_index].shape
303-
return shape[0]
304-
305312
def _get_signal_t_start(self, block_index, seg_index, stream_index):
306313
return self._t_starts[seg_index]
307314

308-
def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes):
309-
if channel_indexes is None:
310-
channel_indexes = slice(None)
311-
raw_signals = self._raw_signals[seg_index][slice(i_start, i_stop), channel_indexes]
312-
return raw_signals
315+
def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id):
316+
return self._buffer_descriptions[block_index][seg_index][buffer_id]
313317

314318
def _event_count(self, block_index, seg_index, event_channel_index):
315319
return self._raw_ev_timestamps.size

neo/rawio/baserawio.py

Lines changed: 159 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@
7777

7878
from neo import logging_handler
7979

80+
from .utils import get_memmap_chunk_from_opened_file
81+
8082

8183
possible_raw_modes = [
8284
"one-file",
@@ -182,6 +184,15 @@ def __init__(self, use_cache: bool = False, cache_path: str = "same_as_resource"
182184
self.header = None
183185
self.is_header_parsed = False
184186

187+
self._has_buffer_description_api = False
188+
189+
def has_buffer_description_api(self) -> bool:
190+
"""
191+
Return if the reader handle the buffer API.
192+
If True then the reader support internally `get_analogsignal_buffer_description()`
193+
"""
194+
return self._has_buffer_description_api
195+
185196
def parse_header(self):
186197
"""
187198
Parses the header of the file(s) to allow for faster computations
@@ -191,6 +202,7 @@ def parse_header(self):
191202
# this must create
192203
# self.header['nb_block']
193204
# self.header['nb_segment']
205+
# self.header['signal_buffers']
194206
# self.header['signal_streams']
195207
# self.header['signal_channels']
196208
# self.header['spike_channels']
@@ -663,6 +675,7 @@ def get_signal_size(self, block_index: int, seg_index: int, stream_index: int |
663675
664676
"""
665677
stream_index = self._get_stream_index_from_arg(stream_index)
678+
666679
return self._get_signal_size(block_index, seg_index, stream_index)
667680

668681
def get_signal_t_start(self, block_index: int, seg_index: int, stream_index: int | None = None):
@@ -1311,7 +1324,6 @@ def _get_analogsignal_chunk(
13111324
-------
13121325
array of samples, with each requested channel in a column
13131326
"""
1314-
13151327
raise (NotImplementedError)
13161328

13171329
###
@@ -1350,6 +1362,152 @@ def _rescale_event_timestamp(self, event_timestamps: np.ndarray, dtype: np.dtype
13501362
def _rescale_epoch_duration(self, raw_duration: np.ndarray, dtype: np.dtype):
13511363
raise (NotImplementedError)
13521364

1365+
###
1366+
# buffer api zone
1367+
# must be implemented if has_buffer_description_api=True
1368+
def get_analogsignal_buffer_description(self, block_index: int = 0, seg_index: int = 0, buffer_id: str = None):
1369+
if not self.has_buffer_description_api:
1370+
raise ValueError("This reader do not support buffer_description API")
1371+
descr = self._get_analogsignal_buffer_description(block_index, seg_index, buffer_id)
1372+
return descr
1373+
1374+
def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id):
1375+
raise (NotImplementedError)
1376+
1377+
1378+
class BaseRawWithBufferApiIO(BaseRawIO):
1379+
"""
1380+
Generic class for reader that support "buffer api".
1381+
1382+
In short reader that are internally based on:
1383+
1384+
* np.memmap
1385+
* hdf5
1386+
1387+
In theses cases _get_signal_size and _get_analogsignal_chunk are totaly generic and do not need to be implemented in the class.
1388+
1389+
For this class sub classes must implements theses two dict:
1390+
* self._buffer_descriptions[block_index][seg_index] = buffer_description
1391+
* self._stream_buffer_slice[buffer_id] = None or slicer o indices
1392+
1393+
"""
1394+
1395+
def __init__(self, *arg, **kwargs):
1396+
super().__init__(*arg, **kwargs)
1397+
self._has_buffer_description_api = True
1398+
1399+
def _get_signal_size(self, block_index, seg_index, stream_index):
1400+
buffer_id = self.header["signal_streams"][stream_index]["buffer_id"]
1401+
buffer_desc = self.get_analogsignal_buffer_description(block_index, seg_index, buffer_id)
1402+
# some hdf5 revert teh buffer
1403+
time_axis = buffer_desc.get("time_axis", 0)
1404+
return buffer_desc["shape"][time_axis]
1405+
1406+
def _get_analogsignal_chunk(
1407+
self,
1408+
block_index: int,
1409+
seg_index: int,
1410+
i_start: int | None,
1411+
i_stop: int | None,
1412+
stream_index: int,
1413+
channel_indexes: list[int] | None,
1414+
):
1415+
1416+
stream_id = self.header["signal_streams"][stream_index]["id"]
1417+
buffer_id = self.header["signal_streams"][stream_index]["buffer_id"]
1418+
1419+
buffer_slice = self._stream_buffer_slice[stream_id]
1420+
1421+
buffer_desc = self.get_analogsignal_buffer_description(block_index, seg_index, buffer_id)
1422+
1423+
i_start = i_start or 0
1424+
i_stop = i_stop or buffer_desc["shape"][0]
1425+
1426+
if buffer_desc["type"] == "raw":
1427+
1428+
# open files on demand and keep reference to opened file
1429+
if not hasattr(self, "_memmap_analogsignal_buffers"):
1430+
self._memmap_analogsignal_buffers = {}
1431+
if block_index not in self._memmap_analogsignal_buffers:
1432+
self._memmap_analogsignal_buffers[block_index] = {}
1433+
if seg_index not in self._memmap_analogsignal_buffers[block_index]:
1434+
self._memmap_analogsignal_buffers[block_index][seg_index] = {}
1435+
if buffer_id not in self._memmap_analogsignal_buffers[block_index][seg_index]:
1436+
fid = open(buffer_desc["file_path"], mode="rb")
1437+
self._memmap_analogsignal_buffers[block_index][seg_index][buffer_id] = fid
1438+
else:
1439+
fid = self._memmap_analogsignal_buffers[block_index][seg_index][buffer_id]
1440+
1441+
num_channels = buffer_desc["shape"][1]
1442+
1443+
raw_sigs = get_memmap_chunk_from_opened_file(
1444+
fid,
1445+
num_channels,
1446+
i_start,
1447+
i_stop,
1448+
np.dtype(buffer_desc["dtype"]),
1449+
file_offset=buffer_desc["file_offset"],
1450+
)
1451+
1452+
elif buffer_desc["type"] == "hdf5":
1453+
1454+
# open files on demand and keep reference to opened file
1455+
if not hasattr(self, "_hdf5_analogsignal_buffers"):
1456+
self._hdf5_analogsignal_buffers = {}
1457+
if block_index not in self._hdf5_analogsignal_buffers:
1458+
self._hdf5_analogsignal_buffers[block_index] = {}
1459+
if seg_index not in self._hdf5_analogsignal_buffers[block_index]:
1460+
self._hdf5_analogsignal_buffers[block_index][seg_index] = {}
1461+
if buffer_id not in self._hdf5_analogsignal_buffers[block_index][seg_index]:
1462+
import h5py
1463+
1464+
h5file = h5py.File(buffer_desc["file_path"], mode="r")
1465+
self._hdf5_analogsignal_buffers[block_index][seg_index][buffer_id] = h5file
1466+
else:
1467+
h5file = self._hdf5_analogsignal_buffers[block_index][seg_index][buffer_id]
1468+
1469+
hdf5_path = buffer_desc["hdf5_path"]
1470+
full_raw_sigs = h5file[hdf5_path]
1471+
1472+
time_axis = buffer_desc.get("time_axis", 0)
1473+
if time_axis == 0:
1474+
raw_sigs = full_raw_sigs[i_start:i_stop, :]
1475+
elif time_axis == 1:
1476+
raw_sigs = full_raw_sigs[:, i_start:i_stop].T
1477+
else:
1478+
raise RuntimeError("Should never happen")
1479+
1480+
if buffer_slice is not None:
1481+
raw_sigs = raw_sigs[:, buffer_slice]
1482+
1483+
else:
1484+
raise NotImplementedError()
1485+
1486+
# this is a pre slicing when the stream do not contain all channels (for instance spikeglx when load_sync_channel=False)
1487+
if buffer_slice is not None:
1488+
raw_sigs = raw_sigs[:, buffer_slice]
1489+
1490+
# channel slice requested
1491+
if channel_indexes is not None:
1492+
raw_sigs = raw_sigs[:, channel_indexes]
1493+
1494+
return raw_sigs
1495+
1496+
def __del__(self):
1497+
if hasattr(self, "_memmap_analogsignal_buffers"):
1498+
for block_index in self._memmap_analogsignal_buffers.keys():
1499+
for seg_index in self._memmap_analogsignal_buffers[block_index].keys():
1500+
for buffer_id, fid in self._memmap_analogsignal_buffers[block_index][seg_index].items():
1501+
fid.close()
1502+
del self._memmap_analogsignal_buffers
1503+
1504+
if hasattr(self, "_hdf5_analogsignal_buffers"):
1505+
for block_index in self._hdf5_analogsignal_buffers.keys():
1506+
for seg_index in self._hdf5_analogsignal_buffers[block_index].keys():
1507+
for buffer_id, h5_file in self._hdf5_analogsignal_buffers[block_index][seg_index].items():
1508+
h5_file.close()
1509+
del self._hdf5_analogsignal_buffers
1510+
13531511

13541512
def pprint_vector(vector, lim: int = 8):
13551513
vector = np.asarray(vector)

neo/rawio/bci2000rawio.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
"""
22
BCI2000RawIO is a class to read BCI2000 .dat files.
33
https://www.bci2000.org/mediawiki/index.php/Technical_Reference:BCI2000_File_Format
4+
5+
Note : BCI2000RawIO cannot implemented using has_buffer_description_api because the buffer
6+
is not compact. The buffer of signals is not compact (has some interleaved state uint in between channels)
47
"""
58

69
import numpy as np
@@ -50,9 +53,11 @@ def _parse_header(self):
5053
self.header["nb_block"] = 1
5154
self.header["nb_segment"] = [1]
5255

53-
# one unique stream and buffer
54-
signal_buffers = np.array(("Signals", "0"), dtype=_signal_buffer_dtype)
55-
signal_streams = np.array([("Signals", "0", "0")], dtype=_signal_stream_dtype)
56+
# one unique stream but no buffer because channels are not compact
57+
stream_id = "0"
58+
buffer_id = ""
59+
signal_buffers = np.array([], dtype=_signal_buffer_dtype)
60+
signal_streams = np.array([("Signals", stream_id, buffer_id)], dtype=_signal_stream_dtype)
5661
self.header["signal_buffers"] = signal_buffers
5762
self.header["signal_streams"] = signal_streams
5863

@@ -80,8 +85,6 @@ def _parse_header(self):
8085
if isinstance(offset, str):
8186
offset = float(offset)
8287

83-
stream_id = "0"
84-
buffer_id = "0"
8588
sig_channels.append((ch_name, chan_id, sr, dtype, units, gain, offset, stream_id, buffer_id))
8689
self.header["signal_channels"] = np.array(sig_channels, dtype=_signal_channel_dtype)
8790

0 commit comments

Comments
 (0)