Skip to content

Commit b1340eb

Browse files
authored
Fix Annotations time formats (mne-tools#13109)
1 parent 6f42037 commit b1340eb

File tree

8 files changed

+77
-9
lines changed

8 files changed

+77
-9
lines changed

doc/changes/dev/13109.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix reading annotations with :func:`mne.read_annotations` from .csv files containing nanoseconds in times, and drop nanoseconds from times returned from :meth:`mne.Annotations.to_data_frame` and saved in .csv files by :meth:`mne.Annotations.save`, by `Thomas Binns`_.

mne/annotations.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,8 @@ class Annotations:
242242
the annotations with raw data if their acquisition is started at the
243243
same time. If it is a string, it should conform to the ISO8601 format.
244244
More precisely to this '%%Y-%%m-%%d %%H:%%M:%%S.%%f' particular case of
245-
the ISO8601 format where the delimiter between date and time is ' '.
245+
the ISO8601 format where the delimiter between date and time is ' ' and at most
246+
microsecond precision (nanoseconds are not supported).
246247
%(ch_names_annot)s
247248
248249
.. versionadded:: 0.23
@@ -390,6 +391,20 @@ def __init__(
390391
extras=None,
391392
):
392393
self._orig_time = _handle_meas_date(orig_time)
394+
if isinstance(orig_time, str) and self._orig_time is None:
395+
try: # only warn if `orig_time` is not the default '1970-01-01 00:00:00'
396+
if _handle_meas_date(0) == datetime.strptime(
397+
orig_time, "%Y-%m-%d %H:%M:%S"
398+
).replace(tzinfo=timezone.utc):
399+
pass
400+
except ValueError: # error if incorrect datetime format AND not the default
401+
warn(
402+
"The format of the `orig_time` string is not recognised. It "
403+
"must conform to the ISO8601 format with at most microsecond "
404+
"precision and where the delimiter between date and time is "
405+
f"' '. Got: {orig_time}. Defaulting `orig_time` to None.",
406+
RuntimeWarning,
407+
)
393408
self.onset, self.duration, self.description, self.ch_names, self._extras = (
394409
_check_o_d_s_c_e(onset, duration, description, ch_names, extras)
395410
)
@@ -615,7 +630,7 @@ def to_data_frame(self, time_format="datetime"):
615630
dt = _handle_meas_date(0)
616631
time_format = _check_time_format(time_format, valid_time_formats, dt)
617632
dt = dt.replace(tzinfo=None)
618-
times = _convert_times(self.onset, time_format, dt)
633+
times = _convert_times(self.onset, time_format, meas_date=dt, drop_nano=True)
619634
df = dict(onset=times, duration=self.duration, description=self.description)
620635
if self._any_ch_names():
621636
df.update(ch_names=self.ch_names)
@@ -1486,7 +1501,13 @@ def _read_annotations_csv(fname):
14861501
"onsets in seconds."
14871502
)
14881503
except ValueError:
1489-
pass
1504+
# remove nanoseconds for ISO8601 (microsecond) compliance
1505+
timestamp = pd.Timestamp(orig_time)
1506+
timespec = "microseconds"
1507+
if timestamp == pd.Timestamp(_handle_meas_date(0)).astimezone(None):
1508+
timespec = "auto" # use default timespec for `orig_time=None`
1509+
orig_time = timestamp.isoformat(sep=" ", timespec=timespec)
1510+
14901511
onset_dt = pd.to_datetime(df["onset"])
14911512
onset = (onset_dt - onset_dt[0]).dt.total_seconds()
14921513
duration = df["duration"].values.astype(float)

mne/epochs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2861,7 +2861,7 @@ def to_data_frame(
28612861
# prepare extra columns / multiindex
28622862
mindex = list()
28632863
times = np.tile(times, n_epochs)
2864-
times = _convert_times(times, time_format, self.info["meas_date"])
2864+
times = _convert_times(times, time_format, meas_date=self.info["meas_date"])
28652865
mindex.append(("time", times))
28662866
rev_event_id = {v: k for k, v in self.event_id.items()}
28672867
conditions = [rev_event_id[k] for k in self.events[:, 2]]

mne/evoked.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1392,7 +1392,7 @@ def to_data_frame(
13921392
data = _scale_dataframe_data(self, data, picks, scalings)
13931393
# prepare extra columns / multiindex
13941394
mindex = list()
1395-
times = _convert_times(times, time_format, self.info["meas_date"])
1395+
times = _convert_times(times, time_format, meas_date=self.info["meas_date"])
13961396
mindex.append(("time", times))
13971397
# build DataFrame
13981398
df = _build_data_frame(

mne/io/base.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2472,7 +2472,10 @@ def to_data_frame(
24722472
# prepare extra columns / multiindex
24732473
mindex = list()
24742474
times = _convert_times(
2475-
times, time_format, self.info["meas_date"], self.first_time
2475+
times,
2476+
time_format,
2477+
meas_date=self.info["meas_date"],
2478+
first_time=self.first_time,
24762479
)
24772480
mindex.append(("time", times))
24782481
# build DataFrame

mne/tests/test_annotations.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1138,6 +1138,35 @@ def test_broken_csv(tmp_path):
11381138
read_annotations(fname)
11391139

11401140

1141+
def test_nanosecond_in_times(tmp_path):
1142+
"""Test onsets with ns read correctly for csv and caught as init argument."""
1143+
pd = pytest.importorskip("pandas")
1144+
1145+
# Test bad format onset sanitised when loading from csv
1146+
onset = (
1147+
pd.Timestamp(_ORIG_TIME)
1148+
.astimezone(None)
1149+
.isoformat(sep=" ", timespec="nanoseconds")
1150+
)
1151+
content = f"onset,duration,description\n{onset},1.0,AA"
1152+
fname = tmp_path / "annotations_broken.csv"
1153+
with open(fname, "w") as f:
1154+
f.write(content)
1155+
annot = read_annotations(fname)
1156+
assert annot.orig_time == _ORIG_TIME
1157+
1158+
# Test bad format `orig_time` str -> `None` raises warning in `Annotation` init
1159+
with pytest.warns(
1160+
RuntimeWarning, match="The format of the `orig_time` string is not recognised."
1161+
):
1162+
bad_orig_time = (
1163+
pd.Timestamp(_ORIG_TIME)
1164+
.astimezone(None)
1165+
.isoformat(sep=" ", timespec="nanoseconds")
1166+
)
1167+
Annotations([0], [1], ["test"], bad_orig_time)
1168+
1169+
11411170
# Test for IO with .txt files
11421171

11431172

@@ -1564,7 +1593,8 @@ def test_repr():
15641593
@pytest.mark.parametrize("time_format", (None, "ms", "datetime", "timedelta"))
15651594
def test_annotation_to_data_frame(time_format):
15661595
"""Test annotation class to data frame conversion."""
1567-
pytest.importorskip("pandas")
1596+
pd = pytest.importorskip("pandas")
1597+
15681598
onset = np.arange(1, 10)
15691599
durations = np.full_like(onset, [4, 5, 6, 4, 5, 6, 4, 5, 6])
15701600
description = ["yy"] * onset.shape[0]
@@ -1584,6 +1614,12 @@ def test_annotation_to_data_frame(time_format):
15841614
assert want == got
15851615
assert df.groupby("description").count().onset["yy"] == 9
15861616

1617+
# Check nanoseconds omitted from onset times
1618+
if time_format == "datetime":
1619+
a.onset += 1e-7 # >6 decimals to trigger nanosecond component
1620+
df = a.to_data_frame(time_format=time_format)
1621+
assert pd.Timestamp(df.onset[0]).nanosecond == 0
1622+
15871623

15881624
def test_annotation_ch_names():
15891625
"""Test annotation ch_names updating and pruning."""

mne/time_frequency/tfr.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2753,7 +2753,7 @@ def to_data_frame(
27532753
# prepare extra columns / multiindex
27542754
mindex = list()
27552755
default_index = list()
2756-
times = _convert_times(times, time_format, self.info["meas_date"])
2756+
times = _convert_times(times, time_format, meas_date=self.info["meas_date"])
27572757
times = np.tile(times, n_epochs * n_freqs * n_tapers)
27582758
freqs = np.tile(np.repeat(freqs, n_times), n_epochs * n_tapers)
27592759
mindex.append(("time", times))

mne/utils/dataframe.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ def _scale_dataframe_data(inst, data, picks, scalings):
3636
return data
3737

3838

39-
def _convert_times(times, time_format, meas_date=None, first_time=0):
39+
def _convert_times(
40+
times, time_format, *, meas_date=None, first_time=0, drop_nano=False
41+
):
4042
"""Convert vector of time in seconds to ms, datetime, or timedelta."""
4143
# private function; pandas already checked in calling function
4244
from pandas import to_timedelta
@@ -47,6 +49,11 @@ def _convert_times(times, time_format, meas_date=None, first_time=0):
4749
times = to_timedelta(times, unit="s")
4850
elif time_format == "datetime":
4951
times = to_timedelta(times + first_time, unit="s") + meas_date
52+
if drop_nano:
53+
tz_name = ""
54+
if meas_date is not None and meas_date.tzinfo is not None:
55+
tz_name = f", {meas_date.tzinfo.tzname(meas_date)}" # timezone as str
56+
times = times.astype(f"datetime64[us{tz_name}]") # cap at microseconds
5057
return times
5158

5259

0 commit comments

Comments
 (0)