NeuralEnsemble
diff --git a/‎neo/rawio/blackrockrawio.py‎
Lines changed: 94 additions & 35 deletions b/‎neo/rawio/blackrockrawio.py‎
Lines changed: 94 additions & 35 deletions
diff --git a/‎neo/rawio/brainvisionrawio.py‎
Lines changed: 48 additions & 0 deletions b/‎neo/rawio/brainvisionrawio.py‎
Lines changed: 48 additions & 0 deletions
@@ -290,12 +290,23 @@ def _parse_header(self):
         self._nsx_basic_header = {}
         self._nsx_ext_header = {}
         self._nsx_data_header = {}
+        self._nsx_sampling_frequency = {}
 
+        # Read headers
         for nsx_nb in self._avail_nsx:
             spec_version = self._nsx_spec[nsx_nb] = self._extract_nsx_file_spec(nsx_nb)
             # read nsx headers
             self._nsx_basic_header[nsx_nb], self._nsx_ext_header[nsx_nb] = self._read_nsx_header(spec_version, nsx_nb)
 
+            # The Blackrock defines period as the number of  1/30_000 seconds between data points
+            # E.g. it is 1 for 30_000, 3 for 10_000, etc
+            nsx_period = self._nsx_basic_header[nsx_nb]["period"]
+            sampling_rate = 30_000.0 / nsx_period
+            self._nsx_sampling_frequency[nsx_nb] = float(sampling_rate)
+
+        # Parase data packages
+        for nsx_nb in self._avail_nsx:
+
             # The only way to know if it is the Precision Time Protocol of file spec 3.0
             # is to check for nanosecond timestamp resolution.
             is_ptp_variant = (
@@ -352,7 +363,10 @@ def _parse_header(self):
                 self._match_nsx_and_nev_segment_ids(nsx_nb)
 
         self.nsx_datas = {}
-        self.sig_sampling_rates = {}
+        # Keep public attribute for backward compatibility but let's use the private one and maybe deprecate this at some point
+        self.sig_sampling_rates = {
+            nsx_number: self._nsx_sampling_frequency[nsx_number] for nsx_number in self.nsx_to_load
+        }
         if len(self.nsx_to_load) > 0:
             for nsx_nb in self.nsx_to_load:
                 basic_header = self._nsx_basic_header[nsx_nb]
@@ -369,8 +383,7 @@ def _parse_header(self):
                     data_spec = spec_version
                 self.nsx_datas[nsx_nb] = self._read_nsx_data(data_spec, nsx_nb)
 
-                sr = float(self.main_sampling_rate / basic_header["period"])
-                self.sig_sampling_rates[nsx_nb] = sr
+                sr = self._nsx_sampling_frequency[nsx_nb]
 
                 if spec_version in ["2.2", "2.3", "3.0"]:
                     ext_header = self._nsx_ext_header[nsx_nb]
@@ -439,7 +452,7 @@ def _parse_header(self):
                     length = self.nsx_datas[nsx_nb][data_bl].shape[0]
                     if self._nsx_data_header[nsx_nb] is None:
                         t_start = 0.0
-                        t_stop = max(t_stop, length / self.sig_sampling_rates[nsx_nb])
+                        t_stop = max(t_stop, length / self._nsx_sampling_frequency[nsx_nb])
                     else:
                         timestamps = self._nsx_data_header[nsx_nb][data_bl]["timestamp"]
                         if hasattr(timestamps, "size") and timestamps.size == length:
@@ -448,7 +461,7 @@ def _parse_header(self):
                             t_stop = max(t_stop, timestamps[-1] / ts_res + sec_per_samp)
                         else:
                             t_start = timestamps / ts_res
-                            t_stop = max(t_stop, t_start + length / self.sig_sampling_rates[nsx_nb])
+                            t_stop = max(t_stop, t_start + length / self._nsx_sampling_frequency[nsx_nb])
                     self._sigs_t_starts[nsx_nb].append(t_start)
 
                 if self._avail_files["nev"]:
@@ -964,36 +977,82 @@ def _read_nsx_dataheader_ptp(self, nsx_nb, filesize=None, offset=None):
 
         if offset is None:
             # This is read as an uint32 numpy scalar from the header so we transform it to python int
-            offset = int(self._nsx_basic_header[nsx_nb]["bytes_in_headers"])
+            header_size = int(self._nsx_basic_header[nsx_nb]["bytes_in_headers"])
+        else:
+            header_size = offset
 
         # Use the dictionary for PTP data type
         channel_count = int(self._nsx_basic_header[nsx_nb]["channel_count"])
         ptp_dt = NSX_DATA_HEADER_TYPES["3.0-ptp"](channel_count)
-        npackets = int((filesize - offset) / np.dtype(ptp_dt).itemsize)
-        struct_arr = np.memmap(filename, dtype=ptp_dt, shape=npackets, offset=offset, mode="r")
+        npackets = int((filesize - header_size) / np.dtype(ptp_dt).itemsize)
+        struct_arr = np.memmap(filename, dtype=ptp_dt, shape=npackets, offset=header_size, mode="r")
 
         if not np.all(struct_arr["num_data_points"] == 1):
             # some packets have more than 1 sample. Not actually ptp. Revert to non-ptp variant.
-            return self._read_nsx_dataheader_standard("3.0", nsx_nb, filesize=filesize, offset=offset)
-
-        # It is still possible there was a data break and the file has multiple segments.
-        # We can no longer rely on the presence of a header indicating a new segment,
-        # so we look for timestamp differences greater than double the expected interval.
-        _period = self._nsx_basic_header[nsx_nb]["period"]  # 30_000 ^-1 s per sample
-        _nominal_rate = 30_000 / _period  # samples per sec;  maybe 30_000 should be ["sample_resolution"]
-        _clock_rate = self._nsx_basic_header[nsx_nb]["timestamp_resolution"]  # clocks per sec
-        clk_per_samp = _clock_rate / _nominal_rate  # clk/sec / smp/sec = clk/smp
-        seg_thresh_clk = int(2 * clk_per_samp)
-        seg_starts = np.hstack((0, 1 + np.argwhere(np.diff(struct_arr["timestamps"]) > seg_thresh_clk).flatten()))
-        for seg_ix, seg_start_idx in enumerate(seg_starts):
-            if seg_ix < (len(seg_starts) - 1):
-                seg_stop_idx = seg_starts[seg_ix + 1]
-            else:
-                seg_stop_idx = len(struct_arr) - 1
-            seg_offset = offset + seg_start_idx * struct_arr.dtype.itemsize
-            num_data_pts = seg_stop_idx - seg_start_idx
+            return self._read_nsx_dataheader_standard("3.0", nsx_nb, filesize=filesize, offset=header_size)
+
+        # Segment data, at the moment, we segment, where the data has gaps that are longer
+        # than twice the sampling period.
+        sampling_rate = self._nsx_sampling_frequency[nsx_nb]
+        segmentation_threshold = 2.0 / sampling_rate
+
+        # The raw timestamps are the indices of an ideal clock that ticks at `timestamp_resolution` times per second.
+        # We convert this indices to actual timestamps in seconds
+        raw_timestamps = struct_arr["timestamps"]
+        timestamps_sampling_rate = self._nsx_basic_header[nsx_nb][
+            "timestamp_resolution"
+        ]  # clocks per sec uint64 or uint32
+        timestamps_in_seconds = raw_timestamps / timestamps_sampling_rate
+
+        time_differences = np.diff(timestamps_in_seconds)
+        gap_indices = np.argwhere(time_differences > segmentation_threshold).flatten()
+        segment_starts = np.hstack((0, 1 + gap_indices))
+
+        # Report gaps if any are found
+        if len(gap_indices) > 0:
+            import warnings
+
+            threshold_ms = segmentation_threshold * 1000
+
+            # Calculate all gap details in vectorized operations
+            gap_durations_seconds = time_differences[gap_indices]
+            gap_durations_ms = gap_durations_seconds * 1000
+            gap_positions_seconds = timestamps_in_seconds[gap_indices] - timestamps_in_seconds[0]
+
+            # Build gap detail lines all at once
+            gap_detail_lines = [
+                f"| {index:>15,} | {pos:>21.6f} | {dur:>21.3f} |\n"
+                for index, pos, dur in zip(gap_indices, gap_positions_seconds, gap_durations_ms)
+            ]
+
+            segmentation_report_message = (
+                f"\nFound {len(gap_indices)} gaps for nsx {nsx_nb} where samples are farther apart than {threshold_ms:.3f} ms.\n"
+                f"Data will be segmented at these locations to create {len(segment_starts)} segments.\n\n"
+                "Gap Details:\n"
+                "+-----------------+-----------------------+-----------------------+\n"
+                "| Sample Index    | Sample at             | Gap Jump              |\n"
+                "|                 | (Seconds)             | (Milliseconds)        |\n"
+                "+-----------------+-----------------------+-----------------------+\n"
+                + "".join(gap_detail_lines)
+                + "+-----------------+-----------------------+-----------------------+\n"
+            )
+            warnings.warn(segmentation_report_message)
+
+        # Calculate all segment boundaries and derived values in one operation
+        segment_boundaries = list(segment_starts) + [len(struct_arr) - 1]
+        segment_num_data_points = [
+            segment_boundaries[i + 1] - segment_boundaries[i] for i in range(len(segment_starts))
+        ]
+
+        size_of_data_block = struct_arr.dtype.itemsize
+        segment_offsets = [header_size + pos * size_of_data_block for pos in segment_starts]
+
+        num_segments = len(segment_starts)
+        for segment_index in range(num_segments):
+            seg_offset = segment_offsets[segment_index]
+            num_data_pts = segment_num_data_points[segment_index]
             seg_struct_arr = np.memmap(filename, dtype=ptp_dt, shape=num_data_pts, offset=seg_offset, mode="r")
-            data_header[seg_ix] = {
+            data_header[segment_index] = {
                 "header": None,
                 "timestamp": seg_struct_arr["timestamps"],  # Note, this is an array, not a scalar
                 "nb_data_points": num_data_pts,
@@ -1028,7 +1087,7 @@ def _read_nsx_data_v21(self, nsx_nb):
         """
         Extract nsx data from a 2.1 .nsx file
         """
-        filename = ".".join([self._filenames["nsx"], f"ns{nsx_nb}"])
+        filename = f"{self._filenames['nsx']}.ns{nsx_nb}"
 
         # get shape of data
         shape = (
@@ -1071,7 +1130,7 @@ def _read_nsx_data_ptp(self, nsx_nb):
         yielding a timestamp per sample. Blocks can arise
         if the recording was paused by the user.
         """
-        filename = ".".join([self._filenames["nsx"], f"ns{nsx_nb}"])
+        filename = f"{self._filenames['nsx']}.ns{nsx_nb}"
 
         # Use the dictionary for PTP data type
         channel_count = int(self._nsx_basic_header[nsx_nb]["channel_count"])
@@ -1146,10 +1205,6 @@ def _read_nev_header(self, spec, filename):
 
         nev_basic_header = np.fromfile(filename, count=1, dtype=dt0)[0]
 
-        # Get extended header types for this spec
-        header_types = NEV_EXT_HEADER_TYPES_BY_SPEC[spec]
-
-        # extended header reading
         shape = nev_basic_header["nb_ext_headers"]
         offset_dt0 = np.dtype(dt0).itemsize
 
@@ -1158,6 +1213,10 @@ def _read_nev_header(self, spec, filename):
 
         raw_ext_header = np.memmap(filename, offset=offset_dt0, dtype=dt1, shape=shape, mode="r")
 
+
+        # Get extended header types for this spec
+        header_types = NEV_EXT_HEADER_TYPES_BY_SPEC[spec]
+        
         # Parse extended headers by packet type
         # Strategy: view() entire array first, then mask for efficiency
         # Since all NEV extended header packets are fixed-width (32 bytes), temporarily
@@ -2399,8 +2458,8 @@ def _is_set(self, flag, pos):
     # PTP variant has a completely different structure with samples embedded
     "3.0-ptp": lambda channel_count: [
         ("reserved", "uint8"),
-        ("timestamps", "uint64"), 
+        ("timestamps", "uint64"),
         ("num_data_points", "uint32"),
-        ("samples", "int16", channel_count)
+        ("samples", "int16", (channel_count,))
     ]
 }
@@ -55,6 +55,9 @@ def _parse_header(self):
         marker_filename = self.filename.replace(bname, vhdr_header["Common Infos"]["MarkerFile"])
         binary_filename = self.filename.replace(bname, vhdr_header["Common Infos"]["DataFile"])
 
+        marker_filename = self._ensure_filename(marker_filename, "marker", "MarkerFile")
+        binary_filename = self._ensure_filename(binary_filename, "data", "DataFile")
+
         if vhdr_header["Common Infos"]["DataFormat"] != "BINARY":
             raise NeoReadWriteError(
                 f"Only `BINARY` format has been implemented. Current Data Format is {vhdr_header['Common Infos']['DataFormat']}"
@@ -236,6 +239,51 @@ def _rescale_event_timestamp(self, event_timestamps, dtype, event_channel_index)
     def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id):
         return self._buffer_descriptions[block_index][seg_index][buffer_id]
 
+    def _ensure_filename(self, filename, kind, entry_name):
+        if not os.path.exists(filename):
+            # file not found, subsequent import stage would fail
+            ext = os.path.splitext(filename)[1]
+            # Check if we can fall back to a file with the same prefix as the .vhdr.
+            # This can happen when users rename their files but forget to edit the
+            # .vhdr file to fix the path reference to the binary and marker files,
+            # in which case import will fail. These files come in triples, like:
+            # myfile.vhdr, myfile.eeg and myfile.vmrk; this code will thus pick
+            # the next best alternative.
+            alt_name = self.filename.replace(".vhdr", ext)
+            if os.path.exists(alt_name):
+                self.logger.warning(
+                    f"The {kind} file {filename} was not found, but found a file whose "
+                    f"prefix matched the .vhdr ({os.path.basename(alt_name)}). Using "
+                    f"this file instead."
+                )
+                filename = alt_name
+            else:
+                # we neither found the file referenced in the .vhdr file nor a file of
+                # same name as header with the desired extension; most likely a file went
+                # missing or was renamed in an inconsistent fashion; generate a useful
+                # error message
+                header_dname = os.path.dirname(self.filename)
+                header_bname = os.path.basename(self.filename)
+                referenced_bname = os.path.basename(filename)
+                alt_bname = os.path.basename(alt_name)
+                if alt_bname != referenced_bname:
+                    # this is only needed when the two candidate file names differ
+                    detail = (
+                        f" is named either as per the {entry_name}={referenced_bname} " f"line in the .vhdr file, or"
+                    )
+                else:
+                    # we omit it if we can to make it less confusing
+                    detail = ""
+                self.logger.error(
+                    f"Did not find the {kind} file associated with .vhdr (header) "
+                    f"file {header_bname!r} in folder {header_dname!r}.\n  Please make "
+                    f"sure the file{detail} is named the same way as the .vhdr file, but "
+                    f"ending in {ext} (i.e. {alt_bname}).\n  The import will likely fail, "
+                    f"but if it goes through, you can ignore this message (the check "
+                    f"can misfire on networked file systems)."
+                )
+        return filename
+
 
 def read_brainvsion_soup(filename):
     with open(filename, "r", encoding="utf8") as f: