Skip to content

Commit 694fcaf

Browse files
larsonerArnav Kumar
authored andcommitted
BUG: Raise error for EDF+D/BDF+D files with acquisition gaps
Detect and raise NotImplementedError when loading EDF+D or BDF+D files that contain actual gaps between data records, instead of silently treating them as continuous data. This prevents incorrect time alignment. Fixes #13429
1 parent 29435fb commit 694fcaf

File tree

4 files changed

+361
-1
lines changed

4 files changed

+361
-1
lines changed

doc/changes/dev/13583.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Raise a :class:`NotImplementedError` when reading EDF+D or BDF+D files with acquisition gaps instead of silently loading them as continuous data, by `Arnav Kumar`_ (:gh:`13583`).

doc/changes/names.inc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
.. _Antti Rantala: https://github.com/Odingod
3030
.. _Apoorva Karekal: https://github.com/apoorva6262
3131
.. _Archit Singhal: https://github.com/architsinghal-mriirs
32+
.. _Arnav Kumar: https://github.com/Arnav1709
3233
.. _Arne Pelzer: https://github.com/aplzr
3334
.. _Ashley Drew: https://github.com/ashdrew
3435
.. _Asish Panda: https://github.com/kaichogami

mne/io/edf/edf.py

Lines changed: 158 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,30 @@ def __init__(
235235
np.ones((len(idx), 1)),
236236
None,
237237
)
238+
239+
# Check for discontinuous EDF+D files with actual gaps
240+
if edf_info.get("discontinuous", False):
241+
record_times = _get_tal_record_times(tal_data[0], encoding=encoding)
242+
record_length = edf_info["record_length"][0]
243+
n_records = edf_info["n_records"]
244+
has_gaps, gaps = _check_edf_discontinuity(
245+
record_times, record_length, n_records
246+
)
247+
if has_gaps:
248+
gap_info = ", ".join(
249+
[f"{onset:.3f}s (duration: {dur:.3f}s)" for onset, dur in gaps]
250+
)
251+
raise NotImplementedError(
252+
"This EDF+D file contains discontinuous data with gaps "
253+
f"between records. Gaps found at: {gap_info}. "
254+
"MNE-Python does not currently support reading EDF+D files "
255+
"with acquisition gaps. The data would be incorrectly "
256+
"treated as continuous, leading to incorrect time alignment. "
257+
"Consider using specialized tools like luna/lunapi for "
258+
"discontinuous EDF+ files, or convert the file to EDF+C "
259+
"format if the gaps are not significant for your analysis."
260+
)
261+
238262
annotations = _read_annotations_edf(
239263
tal_data[0],
240264
ch_names=info["ch_names"],
@@ -447,6 +471,30 @@ def __init__(
447471
np.ones((len(idx), 1)),
448472
None,
449473
)
474+
475+
# Check for discontinuous BDF+D files with actual gaps
476+
if edf_info.get("discontinuous", False):
477+
record_times = _get_tal_record_times(tal_data[0], encoding=encoding)
478+
record_length = edf_info["record_length"][0]
479+
n_records = edf_info["n_records"]
480+
has_gaps, gaps = _check_edf_discontinuity(
481+
record_times, record_length, n_records
482+
)
483+
if has_gaps:
484+
gap_info = ", ".join(
485+
[f"{onset:.3f}s (duration: {dur:.3f}s)" for onset, dur in gaps]
486+
)
487+
raise NotImplementedError(
488+
"This BDF+D file contains discontinuous data with gaps "
489+
f"between records. Gaps found at: {gap_info}. "
490+
"MNE-Python does not currently support reading BDF+D files "
491+
"with acquisition gaps. The data would be incorrectly "
492+
"treated as continuous, leading to incorrect time alignment. "
493+
"Consider using specialized tools like luna/lunapi for "
494+
"discontinuous BDF+ files, or convert the file to BDF+C "
495+
"format if the gaps are not significant for your analysis."
496+
)
497+
450498
annotations = _read_annotations_edf(
451499
tal_data[0],
452500
ch_names=info["ch_names"],
@@ -1159,9 +1207,16 @@ def _read_edf_header(
11591207
# to determine the subtype (EDF or BDF, which differ in the
11601208
# number of bytes they use for the data records; EDF uses 2 bytes
11611209
# whereas BDF uses 3 bytes).
1162-
fid.read(44)
1210+
# However, we still need to check for EDF+D/BDF+D (discontinuous) files.
1211+
reserved = fid.read(44).decode("latin-1").rstrip()
11631212
subtype = file_type
11641213

1214+
# Check for discontinuous EDF+D/BDF+D files
1215+
if reserved in ("EDF+D", "BDF+D"):
1216+
edf_info["discontinuous"] = True
1217+
else:
1218+
edf_info["discontinuous"] = False
1219+
11651220
n_records = int(_edf_str(fid.read(8)))
11661221
record_length = float(_edf_str(fid.read(8)))
11671222
record_length = np.array([record_length, 1.0]) # in seconds
@@ -2005,6 +2060,11 @@ def read_raw_edf(
20052060
20062061
The EDF specification allows storage of subseconds in measurement date.
20072062
However, this reader currently sets subseconds to 0 by default.
2063+
2064+
EDF+D (discontinuous) files with actual gaps between data records are not
2065+
supported and will raise a :class:`NotImplementedError`. EDF+D files that
2066+
are marked as discontinuous but have no actual gaps (e.g., from some
2067+
Nihon Kohden systems) will load normally.
20082068
"""
20092069
_check_args(input_fname, preload, "edf")
20102070

@@ -2144,6 +2204,10 @@ def read_raw_bdf(
21442204
If channels named 'status' or 'trigger' are present, they are considered as
21452205
STIM channels by default. Use func:`mne.find_events` to parse events
21462206
encoded in such analog stim channels.
2207+
2208+
BDF+D (discontinuous) files with actual gaps between data records are not
2209+
supported and will raise a :class:`NotImplementedError`. BDF+D files that
2210+
are marked as discontinuous but have no actual gaps will load normally.
21472211
"""
21482212
_check_args(input_fname, preload, "bdf")
21492213

@@ -2355,3 +2419,96 @@ def _get_annotations_gdf(edf_info, sfreq):
23552419
desc = events[2]
23562420

23572421
return onset, duration, desc
2422+
2423+
2424+
def _get_tal_record_times(annotations, encoding="utf8"):
2425+
"""Extract TAL record onset times from EDF+ annotation data.
2426+
2427+
In EDF+ files, each data record contains a Time-stamped Annotation List (TAL)
2428+
that starts with the onset time of that data record. This function extracts
2429+
these onset times to detect gaps between records in EDF+D (discontinuous) files.
2430+
2431+
Parameters
2432+
----------
2433+
annotations : ndarray (n_chans, n_samples) | str
2434+
Channel data in EDF+ TAL format or path to annotation file.
2435+
encoding : str
2436+
Encoding to use when decoding the TAL data.
2437+
2438+
Returns
2439+
-------
2440+
record_times : list of float
2441+
List of onset times for each data record, in seconds.
2442+
"""
2443+
pat = "([+-]\\d+\\.?\\d*)(\x15(\\d+\\.?\\d*))?(\x14.*?)\x14\x00"
2444+
if isinstance(annotations, str | Path):
2445+
with open(annotations, "rb") as annot_file:
2446+
triggers = re.findall(pat.encode(), annot_file.read())
2447+
triggers = [tuple(map(lambda x: x.decode(encoding), t)) for t in triggers]
2448+
else:
2449+
tals = bytearray()
2450+
annotations = np.atleast_2d(annotations)
2451+
for chan in annotations:
2452+
this_chan = chan.ravel()
2453+
if this_chan.dtype == INT32: # BDF
2454+
this_chan = this_chan.view(dtype=UINT8)
2455+
this_chan = this_chan.reshape(-1, 4)
2456+
this_chan = this_chan[:, :3].ravel()
2457+
tals.extend(this_chan)
2458+
else:
2459+
this_chan = chan.astype(np.int64)
2460+
tals.extend(np.uint8([this_chan % 256, this_chan // 256]).flatten("F"))
2461+
try:
2462+
triggers = re.findall(pat, tals.decode(encoding))
2463+
except UnicodeDecodeError:
2464+
return []
2465+
2466+
# Extract record onset times (first TAL entry of each record has empty description)
2467+
record_times = []
2468+
for ev in triggers:
2469+
onset = float(ev[0])
2470+
# Check if this is a record timestamp (empty description after \x14)
2471+
descriptions = ev[3].split("\x14")[1:]
2472+
# The first TAL in each record has the record onset time
2473+
# If there's no description, it's the record timestamp
2474+
if not any(descriptions):
2475+
record_times.append(onset)
2476+
2477+
return record_times
2478+
2479+
2480+
def _check_edf_discontinuity(record_times, record_length, n_records, tolerance=1e-6):
2481+
"""Check if an EDF+D file has actual gaps between records.
2482+
2483+
Parameters
2484+
----------
2485+
record_times : list of float
2486+
List of onset times for each data record, extracted from TAL annotations.
2487+
record_length : float
2488+
Duration of each data record in seconds.
2489+
n_records : int
2490+
Expected number of data records.
2491+
tolerance : float
2492+
Tolerance for comparing times (in seconds).
2493+
2494+
Returns
2495+
-------
2496+
has_gaps : bool
2497+
True if gaps exist between records.
2498+
gaps : list of tuple
2499+
List of (onset, duration) tuples for each gap.
2500+
"""
2501+
if len(record_times) < 2:
2502+
return False, []
2503+
2504+
gaps = []
2505+
for i in range(len(record_times) - 1):
2506+
expected_next = record_times[i] + record_length
2507+
actual_next = record_times[i + 1]
2508+
gap = actual_next - expected_next
2509+
2510+
if gap > tolerance:
2511+
# Found a gap
2512+
gaps.append((expected_next, gap))
2513+
2514+
return len(gaps) > 0, gaps

0 commit comments

Comments
 (0)