diff --git a/doc/changes/dev/13109.bugfix.rst b/doc/changes/dev/13109.bugfix.rst new file mode 100644 index 00000000000..00dc5c4dfb9 --- /dev/null +++ b/doc/changes/dev/13109.bugfix.rst @@ -0,0 +1 @@ +Fix reading annotations with :func:`mne.read_annotations` from .csv files containing nanoseconds in times, and drop nanoseconds from times returned from :meth:`mne.Annotations.to_data_frame` and saved in .csv files by :meth:`mne.Annotations.save`, by `Thomas Binns`_. \ No newline at end of file diff --git a/mne/annotations.py b/mne/annotations.py index 74a5878dca2..e298e80918c 100644 --- a/mne/annotations.py +++ b/mne/annotations.py @@ -242,7 +242,8 @@ class Annotations: the annotations with raw data if their acquisition is started at the same time. If it is a string, it should conform to the ISO8601 format. More precisely to this '%%Y-%%m-%%d %%H:%%M:%%S.%%f' particular case of - the ISO8601 format where the delimiter between date and time is ' '. + the ISO8601 format where the delimiter between date and time is ' ' and at most + microsecond precision (nanoseconds are not supported). %(ch_names_annot)s .. versionadded:: 0.23 @@ -390,6 +391,20 @@ def __init__( extras=None, ): self._orig_time = _handle_meas_date(orig_time) + if isinstance(orig_time, str) and self._orig_time is None: + try: # only warn if `orig_time` is not the default '1970-01-01 00:00:00' + if _handle_meas_date(0) == datetime.strptime( + orig_time, "%Y-%m-%d %H:%M:%S" + ).replace(tzinfo=timezone.utc): + pass + except ValueError: # error if incorrect datetime format AND not the default + warn( + "The format of the `orig_time` string is not recognised. It " + "must conform to the ISO8601 format with at most microsecond " + "precision and where the delimiter between date and time is " + f"' '. Got: {orig_time}. Defaulting `orig_time` to None.", + RuntimeWarning, + ) self.onset, self.duration, self.description, self.ch_names, self._extras = ( _check_o_d_s_c_e(onset, duration, description, ch_names, extras) ) @@ -615,7 +630,7 @@ def to_data_frame(self, time_format="datetime"): dt = _handle_meas_date(0) time_format = _check_time_format(time_format, valid_time_formats, dt) dt = dt.replace(tzinfo=None) - times = _convert_times(self.onset, time_format, dt) + times = _convert_times(self.onset, time_format, meas_date=dt, drop_nano=True) df = dict(onset=times, duration=self.duration, description=self.description) if self._any_ch_names(): df.update(ch_names=self.ch_names) @@ -1486,7 +1501,13 @@ def _read_annotations_csv(fname): "onsets in seconds." ) except ValueError: - pass + # remove nanoseconds for ISO8601 (microsecond) compliance + timestamp = pd.Timestamp(orig_time) + timespec = "microseconds" + if timestamp == pd.Timestamp(_handle_meas_date(0)).astimezone(None): + timespec = "auto" # use default timespec for `orig_time=None` + orig_time = timestamp.isoformat(sep=" ", timespec=timespec) + onset_dt = pd.to_datetime(df["onset"]) onset = (onset_dt - onset_dt[0]).dt.total_seconds() duration = df["duration"].values.astype(float) diff --git a/mne/epochs.py b/mne/epochs.py index d6159f49583..6c3935097bf 100644 --- a/mne/epochs.py +++ b/mne/epochs.py @@ -2861,7 +2861,7 @@ def to_data_frame( # prepare extra columns / multiindex mindex = list() times = np.tile(times, n_epochs) - times = _convert_times(times, time_format, self.info["meas_date"]) + times = _convert_times(times, time_format, meas_date=self.info["meas_date"]) mindex.append(("time", times)) rev_event_id = {v: k for k, v in self.event_id.items()} conditions = [rev_event_id[k] for k in self.events[:, 2]] diff --git a/mne/evoked.py b/mne/evoked.py index 7bd2355e4ee..9a13f5229f6 100644 --- a/mne/evoked.py +++ b/mne/evoked.py @@ -1392,7 +1392,7 @@ def to_data_frame( data = _scale_dataframe_data(self, data, picks, scalings) # prepare extra columns / multiindex mindex = list() - times = _convert_times(times, time_format, self.info["meas_date"]) + times = _convert_times(times, time_format, meas_date=self.info["meas_date"]) mindex.append(("time", times)) # build DataFrame df = _build_data_frame( diff --git a/mne/io/base.py b/mne/io/base.py index eb2b5bc49f3..314f981dcaf 100644 --- a/mne/io/base.py +++ b/mne/io/base.py @@ -2472,7 +2472,10 @@ def to_data_frame( # prepare extra columns / multiindex mindex = list() times = _convert_times( - times, time_format, self.info["meas_date"], self.first_time + times, + time_format, + meas_date=self.info["meas_date"], + first_time=self.first_time, ) mindex.append(("time", times)) # build DataFrame diff --git a/mne/tests/test_annotations.py b/mne/tests/test_annotations.py index 7a9a0faea43..596e37d5ce3 100644 --- a/mne/tests/test_annotations.py +++ b/mne/tests/test_annotations.py @@ -1138,6 +1138,35 @@ def test_broken_csv(tmp_path): read_annotations(fname) +def test_nanosecond_in_times(tmp_path): + """Test onsets with ns read correctly for csv and caught as init argument.""" + pd = pytest.importorskip("pandas") + + # Test bad format onset sanitised when loading from csv + onset = ( + pd.Timestamp(_ORIG_TIME) + .astimezone(None) + .isoformat(sep=" ", timespec="nanoseconds") + ) + content = f"onset,duration,description\n{onset},1.0,AA" + fname = tmp_path / "annotations_broken.csv" + with open(fname, "w") as f: + f.write(content) + annot = read_annotations(fname) + assert annot.orig_time == _ORIG_TIME + + # Test bad format `orig_time` str -> `None` raises warning in `Annotation` init + with pytest.warns( + RuntimeWarning, match="The format of the `orig_time` string is not recognised." + ): + bad_orig_time = ( + pd.Timestamp(_ORIG_TIME) + .astimezone(None) + .isoformat(sep=" ", timespec="nanoseconds") + ) + Annotations([0], [1], ["test"], bad_orig_time) + + # Test for IO with .txt files @@ -1564,7 +1593,8 @@ def test_repr(): @pytest.mark.parametrize("time_format", (None, "ms", "datetime", "timedelta")) def test_annotation_to_data_frame(time_format): """Test annotation class to data frame conversion.""" - pytest.importorskip("pandas") + pd = pytest.importorskip("pandas") + onset = np.arange(1, 10) durations = np.full_like(onset, [4, 5, 6, 4, 5, 6, 4, 5, 6]) description = ["yy"] * onset.shape[0] @@ -1584,6 +1614,12 @@ def test_annotation_to_data_frame(time_format): assert want == got assert df.groupby("description").count().onset["yy"] == 9 + # Check nanoseconds omitted from onset times + if time_format == "datetime": + a.onset += 1e-7 # >6 decimals to trigger nanosecond component + df = a.to_data_frame(time_format=time_format) + assert pd.Timestamp(df.onset[0]).nanosecond == 0 + def test_annotation_ch_names(): """Test annotation ch_names updating and pruning.""" diff --git a/mne/time_frequency/tfr.py b/mne/time_frequency/tfr.py index 7f5632644be..3a69c93bc8d 100644 --- a/mne/time_frequency/tfr.py +++ b/mne/time_frequency/tfr.py @@ -2753,7 +2753,7 @@ def to_data_frame( # prepare extra columns / multiindex mindex = list() default_index = list() - times = _convert_times(times, time_format, self.info["meas_date"]) + times = _convert_times(times, time_format, meas_date=self.info["meas_date"]) times = np.tile(times, n_epochs * n_freqs * n_tapers) freqs = np.tile(np.repeat(freqs, n_times), n_epochs * n_tapers) mindex.append(("time", times)) diff --git a/mne/utils/dataframe.py b/mne/utils/dataframe.py index 4b3bdb312bf..332caa5baeb 100644 --- a/mne/utils/dataframe.py +++ b/mne/utils/dataframe.py @@ -36,7 +36,9 @@ def _scale_dataframe_data(inst, data, picks, scalings): return data -def _convert_times(times, time_format, meas_date=None, first_time=0): +def _convert_times( + times, time_format, *, meas_date=None, first_time=0, drop_nano=False +): """Convert vector of time in seconds to ms, datetime, or timedelta.""" # private function; pandas already checked in calling function from pandas import to_timedelta @@ -47,6 +49,11 @@ def _convert_times(times, time_format, meas_date=None, first_time=0): times = to_timedelta(times, unit="s") elif time_format == "datetime": times = to_timedelta(times + first_time, unit="s") + meas_date + if drop_nano: + tz_name = "" + if meas_date is not None and meas_date.tzinfo is not None: + tz_name = f", {meas_date.tzinfo.tzname(meas_date)}" # timezone as str + times = times.astype(f"datetime64[us{tz_name}]") # cap at microseconds return times