Skip to content

Commit 35d4a41

Browse files
Kkuntal990claude
andcommitted
Add support for VECTORIZED orientation in BrainVisionRawIO
Fixes KAN-43: https://eeglab.atlassian.net/browse/KAN-43 Previously, BrainVisionRawIO only supported MULTIPLEXED data orientation (interleaved channels), causing a NeoReadWriteError when attempting to read files with VECTORIZED orientation (sequential channel data). Changes: - Modified data orientation check to accept both MULTIPLEXED and VECTORIZED - Added custom _get_analogsignal_chunk() method to handle VECTORIZED reading - For VECTORIZED files, reads each channel's data from its sequential location in the binary file - Maintains backward compatibility with MULTIPLEXED files (uses parent class implementation) Testing: - Validated against MNE-Python on real-world VECTORIZED dataset (ds004621) with 127 channels × 740,360 samples - results match exactly (correlation=1.0) - Tested both MULTIPLEXED and VECTORIZED orientations with synthetic data - All existing functionality preserved for MULTIPLEXED files 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent a376b4a commit 35d4a41

File tree

1 file changed

+78
-3
lines changed

1 file changed

+78
-3
lines changed

neo/rawio/brainvisionrawio.py

Lines changed: 78 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,12 @@ def _parse_header(self):
6262
raise NeoReadWriteError(
6363
f"Only `BINARY` format has been implemented. Current Data Format is {vhdr_header['Common Infos']['DataFormat']}"
6464
)
65-
if vhdr_header["Common Infos"]["DataOrientation"] != "MULTIPLEXED":
65+
66+
# Store the data orientation for later use in reading
67+
self._data_orientation = vhdr_header["Common Infos"]["DataOrientation"]
68+
if self._data_orientation not in ("MULTIPLEXED", "VECTORIZED"):
6669
raise NeoReadWriteError(
67-
f"Only `MULTIPLEXED` is implemented. Current Orientation is {vhdr_header['Common Infos']['DataOrientation']}"
70+
f"Data orientation must be either `MULTIPLEXED` or `VECTORIZED`. Current Orientation is {self._data_orientation}"
6871
)
6972

7073
nb_channel = int(vhdr_header["Common Infos"]["NumberOfChannels"])
@@ -87,7 +90,15 @@ def _parse_header(self):
8790
buffer_id = "0"
8891
self._buffer_descriptions = {0: {0: {}}}
8992
self._stream_buffer_slice = {}
90-
shape = get_memmap_shape(binary_filename, sig_dtype, num_channels=nb_channel, offset=0)
93+
94+
# Calculate the shape based on orientation
95+
if self._data_orientation == "MULTIPLEXED":
96+
shape = get_memmap_shape(binary_filename, sig_dtype, num_channels=nb_channel, offset=0)
97+
else: # VECTORIZED
98+
# For VECTORIZED, data is stored as [all_samples_ch1, all_samples_ch2, ...]
99+
# We still report shape as (num_samples, num_channels) for compatibility
100+
shape = get_memmap_shape(binary_filename, sig_dtype, num_channels=nb_channel, offset=0)
101+
91102
self._buffer_descriptions[0][0][buffer_id] = {
92103
"type": "raw",
93104
"file_path": binary_filename,
@@ -98,6 +109,9 @@ def _parse_header(self):
98109
}
99110
self._stream_buffer_slice[stream_id] = None
100111

112+
# Store number of channels for VECTORIZED reading
113+
self._nb_channel = nb_channel
114+
101115
signal_buffers = np.array([("Signals", "0")], dtype=_signal_buffer_dtype)
102116
signal_streams = np.array([("Signals", "0", "0")], dtype=_signal_stream_dtype)
103117

@@ -239,6 +253,67 @@ def _rescale_event_timestamp(self, event_timestamps, dtype, event_channel_index)
239253
def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id):
240254
return self._buffer_descriptions[block_index][seg_index][buffer_id]
241255

256+
def _get_analogsignal_chunk(
257+
self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes
258+
):
259+
"""
260+
Override the base class method to handle VECTORIZED orientation.
261+
262+
For MULTIPLEXED data: ch1_s1, ch2_s1, ..., chN_s1, ch1_s2, ch2_s2, ...
263+
For VECTORIZED data: ch1_s1, ch1_s2, ..., ch1_sM, ch2_s1, ch2_s2, ..., ch2_sM, ...
264+
"""
265+
if self._data_orientation == "MULTIPLEXED":
266+
# Use the default implementation for MULTIPLEXED
267+
return super()._get_analogsignal_chunk(
268+
block_index, seg_index, i_start, i_stop, stream_index, channel_indexes
269+
)
270+
271+
# VECTORIZED implementation
272+
buffer_id = self.header["signal_streams"][stream_index]["buffer_id"]
273+
buffer_desc = self.get_analogsignal_buffer_description(block_index, seg_index, buffer_id)
274+
275+
i_start = i_start or 0
276+
i_stop = i_stop or buffer_desc["shape"][0]
277+
278+
# Open file on demand
279+
if not hasattr(self, "_memmap_analogsignal_buffers"):
280+
self._memmap_analogsignal_buffers = {}
281+
if block_index not in self._memmap_analogsignal_buffers:
282+
self._memmap_analogsignal_buffers[block_index] = {}
283+
if seg_index not in self._memmap_analogsignal_buffers[block_index]:
284+
self._memmap_analogsignal_buffers[block_index][seg_index] = {}
285+
if buffer_id not in self._memmap_analogsignal_buffers[block_index][seg_index]:
286+
fid = open(buffer_desc["file_path"], mode="rb")
287+
self._memmap_analogsignal_buffers[block_index][seg_index][buffer_id] = fid
288+
else:
289+
fid = self._memmap_analogsignal_buffers[block_index][seg_index][buffer_id]
290+
291+
# Determine which channels to read
292+
if channel_indexes is None:
293+
channel_indexes = np.arange(self._nb_channel)
294+
else:
295+
channel_indexes = np.asarray(channel_indexes)
296+
297+
num_samples = i_stop - i_start
298+
dtype = np.dtype(buffer_desc["dtype"])
299+
300+
# For VECTORIZED, each channel's data is stored contiguously
301+
# We need to read from different parts of the file for each channel
302+
raw_sigs = np.empty((num_samples, len(channel_indexes)), dtype=dtype)
303+
304+
total_samples_per_channel = buffer_desc["shape"][0]
305+
306+
for i, chan_idx in enumerate(channel_indexes):
307+
# Calculate offset for this channel's data in the file
308+
channel_offset = buffer_desc["file_offset"] + chan_idx * total_samples_per_channel * dtype.itemsize
309+
sample_offset = channel_offset + i_start * dtype.itemsize
310+
311+
# Seek to the position and read the data
312+
fid.seek(sample_offset)
313+
raw_sigs[:, i] = np.fromfile(fid, dtype=dtype, count=num_samples)
314+
315+
return raw_sigs
316+
242317
def _ensure_filename(self, filename, kind, entry_name):
243318
if not os.path.exists(filename):
244319
# file not found, subsequent import stage would fail

0 commit comments

Comments
 (0)