Merge branch 'master' into spikeglx_sync_separate

h-mayorquin · web-flow · commit f894e9ab9c2c · 2025-04-10T08:57:35.000-06:00
diff --git a/.github/workflows/core-test.yml b/.github/workflows/core-test.yml
@@ -26,17 +26,13 @@ jobs:
       matrix:
         os: ["ubuntu-latest", "windows-latest", "macos-latest"]
         python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
-        numpy-version: ['1.22.4', '1.23.5', '1.24.4', '1.25.1', '1.26.4', '2.0.2','2.1']
-        # numpy 1.22: 3.10, 1.23: 3.11, 1.24: 3.11, 1.25: 3.11, 1.26: 3.12
+        numpy-version: ['1.24.4', '1.25.1', '1.26.4', '2.0.2','2.1.3', '2.2.4']
+        # 1.24: 3.11, 1.25: 3.11, 1.26: 3.12
         exclude:
            - python-version: '3.9'
-             numpy-version: '2.1'
-           - python-version: '3.11'
-             numpy-version: '1.22.4'
-           - python-version: '3.12'
-             numpy-version: '1.22.4'
-           - python-version: '3.12'
-             numpy-version: '1.23.5'
+             numpy-version: '2.1.3'
+           - python-version: '3.9'
+             numpy-version: '2.2.4'
            - python-version: '3.12'
              numpy-version: '1.24.4'
            - python-version: '3.12'
diff --git a/neo/io/biocamio.py b/neo/io/biocamio.py
@@ -6,6 +6,7 @@ class BiocamIO(BiocamRawIO, BaseFromRaw):
     __doc__ = BiocamRawIO.__doc__
     mode = "file"
 
-    def __init__(self, filename):
-        BiocamRawIO.__init__(self, filename=filename)
+    def __init__(self, filename, fill_gaps_strategy=None):
+        BiocamRawIO.__init__(self, filename=filename,
+                             fill_gaps_strategy=fill_gaps_strategy)
         BaseFromRaw.__init__(self, filename)
diff --git a/neo/rawio/biocamrawio.py b/neo/rawio/biocamrawio.py
@@ -18,6 +18,10 @@
     _spike_channel_dtype,
     _event_channel_dtype,
 )
+
+import numpy as np
+import json
+import warnings
 from neo.core import NeoReadWriteError
 
 
@@ -29,6 +33,14 @@ class BiocamRawIO(BaseRawIO):
     ----------
     filename: str, default: ''
         The *.h5 file to be read
+    fill_gaps_strategy: "zeros" | "synthetic_noise" | None, default: None
+        The strategy to fill the gaps in the data when using event-based
+        compression. If None and the file is event-based compressed,
+        you need to specify a fill gaps strategy:
+
+        * "zeros": the gaps are filled with unsigned 0s (2048). This value is the "0" of the unsigned 12 bits
+                   representation of the data.
+        * "synthetic_noise": the gaps are filled with synthetic noise.
 
     Examples
     --------
@@ -49,9 +61,10 @@ class BiocamRawIO(BaseRawIO):
     extensions = ["h5", "brw"]
     rawmode = "one-file"
 
-    def __init__(self, filename=""):
+    def __init__(self, filename="", fill_gaps_strategy="zeros"):
         BaseRawIO.__init__(self)
         self.filename = filename
+        self._fill_gaps_strategy = fill_gaps_strategy
 
     def _source_name(self):
         return self.filename
@@ -130,7 +143,24 @@ def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, strea
             i_stop = self._num_frames
 
         # read functions are different based on the version of biocam
-        data = self._read_function(self._filehandle, i_start, i_stop, self._num_channels)
+        if self._read_function is readHDF5t_brw4_sparse:
+            if self._fill_gaps_strategy is None:
+                raise ValueError(
+                    "Please set `fill_gaps_strategy` to 'zeros' or 'synthetic_noise'."
+                )
+            if self._fill_gaps_strategy == "synthetic_noise":
+                warnings.warn("Event-based compression : gaps will be filled with synthetic noise. "
+                              "Set `fill_gaps_strategy` to 'zeros' to fill gaps with 0s.")
+                use_synthetic_noise = True
+            elif self._fill_gaps_strategy == "zeros":
+                use_synthetic_noise = False
+            else:
+                raise ValueError("`fill_gaps_strategy` must be 'zeros' or 'synthetic_noise'")
+
+            data = self._read_function(self._filehandle, i_start, i_stop, self._num_channels,
+                                       use_synthetic_noise=use_synthetic_noise)
+        else:
+            data = self._read_function(self._filehandle, i_start, i_stop, self._num_channels)
 
         # older style data returns array of (n_samples, n_channels), should be a view
         # but if memory issues come up we should doublecheck out how the file is being stored
@@ -243,15 +273,21 @@ def open_biocam_file_header(filename) -> dict:
         min_digital = experiment_settings["ValueConverter"]["MinDigitalValue"]
         scale_factor = experiment_settings["ValueConverter"]["ScaleFactor"]
         sampling_rate = experiment_settings["TimeConverter"]["FrameRate"]
+        num_frames = rf['TOC'][-1,-1]
 
         num_channels = None
-        for key in rf:
-            if key[:5] == "Well_":
-                num_channels = len(rf[key]["StoredChIdxs"])
-                if len(rf[key]["Raw"]) % num_channels:
-                    raise NeoReadWriteError(f"Length of raw data array is not multiple of channel number in {key}")
-                num_frames = len(rf[key]["Raw"]) // num_channels
-                break
+        well_ID = None
+        for well_ID in rf:
+            if well_ID.startswith("Well_"):
+                num_channels = len(rf[well_ID]["StoredChIdxs"])
+                if "Raw" in rf[well_ID]:
+                    if len(rf[well_ID]["Raw"]) % num_channels:
+                        raise NeoReadWriteError(f"Length of raw data array is not multiple of channel number in {well_ID}")
+                    num_frames = len(rf[well_ID]["Raw"]) // num_channels
+                    break
+                elif "EventsBasedSparseRaw" in rf[well_ID]:
+                    # Not sure how to check for this with sparse data
+                    pass
 
         if num_channels is not None:
             num_channels_x = num_channels_y = int(np.sqrt(num_channels))
@@ -264,7 +300,10 @@ def open_biocam_file_header(filename) -> dict:
 
         gain = scale_factor * (max_uv - min_uv) / (max_digital - min_digital)
         offset = min_uv
-        read_function = readHDF5t_brw4
+        if "Raw" in rf[well_ID]:
+            read_function = readHDF5t_brw4
+        elif "EventsBasedSparseRaw" in rf[well_ID]:
+            read_function = readHDF5t_brw4_sparse
 
         return dict(
             file_handle=rf,
@@ -302,5 +341,120 @@ def readHDF5t_101_i(rf, t0, t1, nch):
 
 def readHDF5t_brw4(rf, t0, t1, nch):
     for key in rf:
-        if key[:5] == "Well_":
+        if key.startswith("Well_"):
             return rf[key]["Raw"][nch * t0 : nch * t1]
+
+
+def readHDF5t_brw4_sparse(rf, t0, t1, nch, use_synthetic_noise=False):
+
+    # noise_std = None
+    start_frame = t0
+    num_frames = t1 - t0
+    for well_ID in rf:
+        if well_ID.startswith("Well_"):
+            break
+    # initialize an empty (fill with zeros) data collection
+    data = np.zeros((nch, num_frames), dtype=np.uint16)
+    if not use_synthetic_noise:
+        # Will read as 0s after 12 bits signed conversion
+        data.fill(2048)
+    else:
+        # fill the data collection with Gaussian noise if requested
+        data = generate_synthetic_noise(rf, data, well_ID, start_frame, num_frames) #, std=noise_std)
+    # fill the data collection with the decoded event based sparse raw data
+    data = decode_event_based_raw_data(rf, data, well_ID, start_frame, num_frames)
+
+    return data.T
+
+
+def decode_event_based_raw_data(rf, data, well_ID, start_frame, num_frames):
+    # Source: Documentation by 3Brain
+    # https://gin.g-node.org/NeuralEnsemble/ephy_testing_data/src/master/biocam/documentation_brw_4.x_bxr_3.x_bcmp_1.x_in_brainwave_5.x_v1.1.3.pdf
+    # collect the TOCs
+    toc = np.array(rf["TOC"])
+    events_toc = np.array(rf[well_ID]["EventsBasedSparseRawTOC"])
+    # from the given start position and duration in frames, localize the corresponding event positions
+    # using the TOC
+    toc_start_idx = np.searchsorted(toc[:, 1], start_frame)
+    toc_end_idx = min(
+            np.searchsorted(toc[:, 1], start_frame + num_frames, side="right") + 1,
+            len(toc) - 1)
+    events_start_pos = events_toc[toc_start_idx]
+    events_end_pos = events_toc[toc_end_idx]
+    # decode all data for the given well ID and time interval
+    binary_data = rf[well_ID]["EventsBasedSparseRaw"][events_start_pos:events_end_pos]
+    binary_data_length = len(binary_data)
+    pos = 0
+    while pos < binary_data_length:
+        ch_idx = int.from_bytes(binary_data[pos:pos + 4], byteorder="little")
+        pos += 4
+        ch_data_length = int.from_bytes(binary_data[pos:pos + 4], byteorder="little")
+        pos += 4
+        ch_data_pos = pos
+        while pos < ch_data_pos + ch_data_length:
+            from_inclusive = int.from_bytes(binary_data[pos:pos + 8], byteorder="little")
+            pos += 8
+            to_exclusive = int.from_bytes(binary_data[pos:pos + 8], byteorder="little")
+            pos += 8
+            range_data_pos = pos
+            for j in range(from_inclusive, to_exclusive):
+                if j >= start_frame + num_frames:
+                    break
+                if j >= start_frame:
+                    data[ch_idx][j - start_frame] = int.from_bytes(
+                            binary_data[range_data_pos:range_data_pos + 2], byteorder="little")
+                range_data_pos += 2
+            pos += (to_exclusive - from_inclusive) * 2
+
+    return data
+
+def generate_synthetic_noise(rf, data, well_ID, start_frame, num_frames):
+    # Source: Documentation by 3Brain
+    # https://gin.g-node.org/NeuralEnsemble/ephy_testing_data/src/master/biocam/documentation_brw_4.x_bxr_3.x_bcmp_1.x_in_brainwave_5.x_v1.1.3.pdf
+    # collect the TOCs
+    toc = np.array(rf["TOC"])
+    noise_toc = np.array(rf[well_ID]["NoiseTOC"])
+    # from the given start position in frames, localize the corresponding noise positions
+    # using the TOC
+    toc_start_idx = np.searchsorted(toc[:, 1], start_frame)
+    noise_start_pos = noise_toc[toc_start_idx]
+    noise_end_pos = noise_start_pos
+    for i in range(toc_start_idx + 1, len(noise_toc)):
+        next_pos = noise_toc[i]
+        if next_pos > noise_start_pos:
+            noise_end_pos = next_pos
+        break
+    if noise_end_pos == noise_start_pos:
+        for i in range(toc_start_idx - 1, 0, -1):
+            previous_pos = noise_toc[i]
+            if previous_pos < noise_start_pos:
+                noise_end_pos = noise_start_pos
+                noise_start_pos = previous_pos
+                break
+    # obtain the noise info at the start position
+    noise_ch_idx = rf[well_ID]["NoiseChIdxs"][noise_start_pos:noise_end_pos]
+    noise_mean = rf[well_ID]["NoiseMean"][noise_start_pos:noise_end_pos]
+    noise_std = rf[well_ID]["NoiseStdDev"][noise_start_pos:noise_end_pos]
+
+    noise_length = noise_end_pos - noise_start_pos
+    noise_info = {}
+    mean_collection = []
+    std_collection = []
+    for i in range(1, noise_length):
+        noise_info[noise_ch_idx[i]] = [noise_mean[i], noise_std[i]]
+        mean_collection.append(noise_mean[i])
+        std_collection.append(noise_std[i])
+    # calculate the median mean and standard deviation of all channels to be used for
+    # invalid channels
+    median_mean = np.median(mean_collection)
+    median_std = np.median(std_collection)
+    # fill with Gaussian noise
+    for ch_idx in range(len(data)):
+        if ch_idx in noise_info:
+            data[ch_idx] = np.array(np.random.normal(noise_info[ch_idx][0], noise_info[ch_idx][1],
+                num_frames), dtype=np.uint16)
+        else:
+            data[ch_idx] = np.array(np.random.normal(median_mean, median_std, num_frames),
+                    dtype=np.uint16)
+
+    return data
diff --git a/neo/rawio/blackrockrawio.py b/neo/rawio/blackrockrawio.py
@@ -130,6 +130,10 @@ class BlackrockRawIO(BaseRawIO):
     extensions.extend(["nev", "sif", "ccf"])  # 'sif', 'ccf' not yet supported
     rawmode = "multi-file"
 
+    # We need to document the origin of this value
+    main_sampling_rate = 30000.0
+
+
     def __init__(
         self, filename=None, nsx_override=None, nev_override=None, nsx_to_load=None, load_nev=True, verbose=False
     ):
@@ -250,7 +254,6 @@ def __init__(
 
     def _parse_header(self):
 
-        main_sampling_rate = 30000.0
 
         event_channels = []
         spike_channels = []
@@ -298,7 +301,7 @@ def _parse_header(self):
                     # TODO: Double check if this is the correct assumption (10 samples)
                     # default value: threshold crossing after 10 samples of waveform
                     wf_left_sweep = 10
-                    wf_sampling_rate = main_sampling_rate
+                    wf_sampling_rate = self.main_sampling_rate
                     spike_channels.append((name, _id, wf_units, wf_gain, wf_offset, wf_left_sweep, wf_sampling_rate))
 
             # scan events
@@ -392,7 +395,7 @@ def _parse_header(self):
                     _data_reader_fun = self.__nsx_data_reader[spec]
                 self.nsx_datas[nsx_nb] = _data_reader_fun(nsx_nb)
 
-                sr = float(main_sampling_rate / self.__nsx_basic_header[nsx_nb]["period"])
+                sr = float(self.main_sampling_rate / self.__nsx_basic_header[nsx_nb]["period"])
                 self.sig_sampling_rates[nsx_nb] = sr
 
                 if spec in ["2.2", "2.3", "3.0"]:
diff --git a/neo/rawio/neuralynxrawio/nlxheader.py b/neo/rawio/neuralynxrawio/nlxheader.py
@@ -98,7 +98,8 @@ def _to_bool(txt):
         ),
         # Cheetah version 5.6.0, some range of versions in between
         "v5.6.0": dict(
-            datetime1_regex=r"## Time Opened: \(m/d/y\): (?P<date>\S+)" r" At Time: (?P<time>\S+)",
+            datetime1_regex=r"## Time Opened \(m/d/y\): (?P<date>\S+)" r"  \(h:m:s.ms\) (?P<time>\S+)",
+            datetime2_regex=r"## Time Closed \(m/d/y\): (?P<date>\S+)" r"  \(h:m:s.ms\) (?P<time>\S+)",
             filename_regex=r"## File Name: (?P<filename>\S+)",
             datetimeformat="%m/%d/%Y %H:%M:%S.%f",
         ),
diff --git a/neo/rawio/openephysbinaryrawio.py b/neo/rawio/openephysbinaryrawio.py
@@ -130,6 +130,7 @@ def _parse_header(self):
         # create signals channel map: several channel per stream
         signal_channels = []
         sync_stream_id_to_buffer_id = {}
+        normal_stream_id_to_sync_stream_id = {}
         for stream_index, stream_name in enumerate(sig_stream_names):
             # stream_index is the index in vector stream names
             stream_id = str(stream_index)
@@ -140,6 +141,7 @@ def _parse_header(self):
                 chan_id = chan_info["channel_name"]
 
                 units = chan_info["units"]
+                channel_stream_id = stream_id
                 if units == "":
                     # When units are not provided they are microvolts for neural channels and volts for ADC channels
                     # See https://open-ephys.github.io/gui-docs/User-Manual/Recording-data/Binary-format.html#continuous
@@ -148,14 +150,19 @@ def _parse_header(self):
                 # Special cases for stream
                 if "SYNC" in chan_id and not self.load_sync_channel:
                     # Every stream sync channel is added as its own stream
-                    stream_id = f"{chan_id}-{str(stream_index)}"
-                    sync_stream_id_to_buffer_id[stream_id] = buffer_id
+                    sync_stream_id = f"{stream_name}SYNC"
+                    sync_stream_id_to_buffer_id[sync_stream_id] = buffer_id
+                    
+                    # We save this mapping for the buffer description protocol
+                    normal_stream_id_to_sync_stream_id[stream_id] = sync_stream_id
+                    # We then set the stream_id to the sync stream id
+                    channel_stream_id = sync_stream_id
 
                 if "ADC" in chan_id:
                     # These are non-neural channels and their stream should be separated
                     # We defined their stream_id as the stream_index of neural data plus the number of neural streams
                     # This is to not break backwards compatbility with the stream_id numbering
-                    stream_id = str(stream_index + len(sig_stream_names))
+                    channel_stream_id = str(stream_index + len(sig_stream_names))
 
                 gain = float(chan_info["bit_volts"])
                 sampling_rate = float(info["sample_rate"])
@@ -169,7 +176,7 @@ def _parse_header(self):
                         units,
                         gain,
                         offset,
-                        stream_id,
+                        channel_stream_id,
                         buffer_id,
                     )
                 )
@@ -274,9 +281,8 @@ def _parse_header(self):
                             self._stream_buffer_slice[stream_id] = slice(None, -1)
                             
                             # Add a buffer slice for the sync channel
-                            sync_channel_name = info["channels"][-1]["channel_name"]
-                            stream_name = f"{sync_channel_name}-{str(stream_id)}"
-                            self._stream_buffer_slice[stream_name] = slice(-1, None)
+                            sync_stream_id = normal_stream_id_to_sync_stream_id[stream_id]                            
+                            self._stream_buffer_slice[sync_stream_id] = slice(-1, None)
                         else:
                             self._stream_buffer_slice[stream_id] = None
                     else:
@@ -290,8 +296,8 @@ def _parse_header(self):
                             self._stream_buffer_slice[stream_id_non_neural] = slice(num_neural_channels, -1)
                             
                             # Add a buffer slice for the sync channel
-                            sync_channel_name = info["channels"][-1]["channel_name"]
-                            self._stream_buffer_slice[sync_channel_name] = slice(-1, None)
+                            sync_stream_id = normal_stream_id_to_sync_stream_id[stream_id]                            
+                            self._stream_buffer_slice[sync_stream_id] = slice(-1, None)
                         else:
                             self._stream_buffer_slice[stream_id_non_neural] = slice(num_neural_channels, None)
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -24,7 +24,7 @@ classifiers = [
 
 dependencies = [
     "packaging",
-    "numpy>=1.22.4",
+    "numpy>=1.24.4",
     "quantities>=0.16.1"
 ]
 

Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@ classifiers = [`
`24`	`24`
`25`	`25`	`dependencies = [`
`26`	`26`	`"packaging",`
`27`		`- "numpy>=1.22.4",`
	`27`	`+ "numpy>=1.24.4",`
`28`	`28`	`"quantities>=0.16.1"`
`29`	`29`	`]`
`30`	`30`