Skip to content

Commit 60101db

Browse files
h-mayorquinzm711
andauthored
Improve intan reader error message for discontinuities (#1484)
* neo error msg * indentation * indentation 2 * abstract into a function, better word for indexing, minor documentatino additions * make method private * space issue on the error msg * fix tabbing --------- Co-authored-by: Zach McKenzie <[email protected]>
1 parent 7d111f4 commit 60101db

File tree

1 file changed

+68
-26
lines changed

1 file changed

+68
-26
lines changed

neo/rawio/intanrawio.py

Lines changed: 68 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -195,30 +195,9 @@ def _parse_header(self):
195195
channel_memmap = np.memmap(file_path, dtype=stream_datatype, mode="r")
196196
self._raw_data[stream_index].append(channel_memmap)
197197

198-
# check timestamp continuity
199-
if self.file_format == "header-attached":
200-
timestamp = self._raw_data["timestamp"].flatten()
201-
202-
# timestamps are always last stream for headerless binary files
203-
elif self.file_format == "one-file-per-signal":
204-
time_stream_index = max(self._raw_data.keys())
205-
timestamp = self._raw_data[time_stream_index]
206-
elif self.file_format == "one-file-per-channel":
207-
time_stream_index = max(self._raw_data.keys())
208-
timestamp = self._raw_data[time_stream_index][0]
209-
210-
discontinuous_timestamps = np.diff(timestamp) != 1
211-
timestamps_are_not_contiguous = np.any(discontinuous_timestamps)
212-
if timestamps_are_not_contiguous:
213-
self.discontinuous_timestamps = True
214-
if not self.ignore_integrity_checks:
215-
error_msg = (
216-
"Timestamps are not continuous, this could be due to a corrupted file or an inappropriate file merge. "
217-
"Initialize the reader with `ignore_integrity_checks=True` to ignore this error and open the file. \n"
218-
f"Timestamps around discontinuities: {timestamp[discontinuous_timestamps]}"
219-
)
220-
raise NeoReadWriteError(error_msg)
221-
198+
# Data Integrity checks
199+
self._assert_timestamp_continuity()
200+
222201
# signals
223202
signal_channels = []
224203
for c, chan_info in enumerate(self._ordered_channel_info):
@@ -243,7 +222,7 @@ def _parse_header(self):
243222
stream_ids = np.unique(signal_channels["stream_id"])
244223
signal_streams = np.zeros(stream_ids.size, dtype=_signal_stream_dtype)
245224

246-
# we need to sort the data because the string of 10 is mis-sorted.
225+
# we need to sort the data because the string of stream_index 10 is mis-sorted.
247226
stream_ids_sorted = sorted([int(stream_id) for stream_id in stream_ids])
248227
signal_streams["id"] = [str(stream_id) for stream_id in stream_ids_sorted]
249228

@@ -284,8 +263,8 @@ def _parse_header(self):
284263
self.header["spike_channels"] = spike_channels
285264
self.header["event_channels"] = event_channels
286265

266+
# Extract annotations from the format
287267
self._generate_minimal_annotations()
288-
289268
bl_annotations = self.raw_annotations["blocks"][0]
290269
seg_annotations = bl_annotations["segments"][0]
291270

@@ -465,7 +444,70 @@ def _get_analogsignal_chunk_one_file_per_signal(self, i_start, i_stop, stream_in
465444
signal_data_memmap = self._raw_data[stream_index]
466445

467446
return signal_data_memmap[i_start:i_stop, channel_indexes]
447+
448+
def _assert_timestamp_continuity(self):
449+
"""
450+
Asserts the continuity of timestamps in the data.
451+
452+
This method verifies that the timestamps in the raw data are sequential,
453+
indicating a continuous recording. If discontinuities are found, a flag
454+
is set to indicate potential data integrity issues, and an error is raised
455+
unless `ignore_integrity_checks` is True.
456+
457+
Raises
458+
------
459+
NeoReadWriteError
460+
If timestamps are not continuous and `ignore_integrity_checks` is False.
461+
The error message includes a table detailing the discontinuities found.
462+
463+
Notes
464+
-----
465+
The method extracts timestamps from the raw data based on the file format:
466+
467+
* **header-attached:** Timestamps are extracted from a 'timestamp' field in the raw data.
468+
* **one-file-per-signal:** Timestamps are taken from the last stream.
469+
* **one-file-per-channel:** Timestamps are retrieved from the first channel of the last stream.
470+
"""
471+
# check timestamp continuity
472+
if self.file_format == "header-attached":
473+
timestamp = self._raw_data["timestamp"].flatten()
474+
475+
# timestamps are always last stream for headerless binary files
476+
elif self.file_format == "one-file-per-signal":
477+
time_stream_index = max(self._raw_data.keys())
478+
timestamp = self._raw_data[time_stream_index]
479+
elif self.file_format == "one-file-per-channel":
480+
time_stream_index = max(self._raw_data.keys())
481+
timestamp = self._raw_data[time_stream_index][0]
482+
483+
discontinuous_timestamps = np.diff(timestamp) != 1
484+
timestamps_are_not_contiguous = np.any(discontinuous_timestamps)
485+
if timestamps_are_not_contiguous:
486+
# Mark a flag that can be checked after parsing the header to see if the timestamps are continuous or not
487+
self.discontinuous_timestamps = True
488+
if not self.ignore_integrity_checks:
489+
error_msg = (
490+
"\nTimestamps are not continuous, likely due to a corrupted file or inappropriate file merge.\n"
491+
"To open the file anyway, initialize the reader with `ignore_integrity_checks=True`.\n\n"
492+
"Discontinuities Found:\n"
493+
"+-----------------+-----------------+-----------------+-----------------------+\n"
494+
"| Discontinuity | Previous | Next | Time Difference |\n"
495+
"| Index | (Frames) | (Frames) | (Seconds) |\n"
496+
"+-----------------+-----------------+-----------------+-----------------------+\n"
497+
)
498+
499+
amplifier_sampling_rate = self._global_info["sampling_rate"]
500+
for discontinuity_index in np.where(discontinuous_timestamps)[0]:
501+
prev_ts = timestamp[discontinuity_index]
502+
next_ts = timestamp[discontinuity_index + 1]
503+
time_diff = (next_ts - prev_ts) / amplifier_sampling_rate
504+
505+
error_msg += f"| {discontinuity_index + 1:>15,} | {prev_ts:>15,} | {next_ts:>15,} | {time_diff:>21.6f} |\n"
468506

507+
error_msg += "+-----------------+-----------------+-----------------+-----------------------+\n"
508+
509+
raise NeoReadWriteError(error_msg)
510+
469511

470512
def read_qstring(f):
471513
length = np.fromfile(f, dtype="uint32", count=1)[0]

0 commit comments

Comments
 (0)