Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

### New Checks
* Added `check_file_extension` for NWB file extension best practice recommendations (`.nwb`, `.nwb.h5`, or `.nwb.zarr`) [#625](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/625)
* Added `check_time_series_duration` to detect unusually long TimeSeries durations (default threshold: 1 year). [#627](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/627)
* Added `check_rate_not_below_threshold` to detect suspiciously low sampling rates that may indicate period was used instead of rate. [#627](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/627)

### Improvements
* Added documentation to API and CLI docs on how to use the dandi config option. [#624](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/624)
Expand Down
4 changes: 4 additions & 0 deletions src/nwbinspector/checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,10 @@
check_missing_unit,
check_rate_is_not_zero,
check_rate_is_positive,
check_rate_not_below_threshold,
check_regular_timestamps,
check_resolution,
check_time_series_duration,
check_timestamp_of_the_first_sample_is_not_negative,
check_timestamps_ascending,
check_timestamps_match_first_dimension,
Expand Down Expand Up @@ -151,6 +153,8 @@
"check_timestamps_match_first_dimension",
"check_timestamp_of_the_first_sample_is_not_negative",
"check_rate_is_not_zero",
"check_rate_not_below_threshold",
"check_time_series_duration",
"check_intracellular_electrode_cell_id_exists",
"check_compass_direction_unit",
"check_spatial_series_radians_magnitude",
Expand Down
73 changes: 73 additions & 0 deletions src/nwbinspector/checks/_time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,3 +202,76 @@ def check_rate_is_positive(time_series: TimeSeries) -> Optional[InspectorMessage
)

return None


@register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=TimeSeries)
def check_time_series_duration(
time_series: TimeSeries, duration_threshold: float = 31557600.0
) -> Optional[InspectorMessage]:
"""
Check if the TimeSeries duration is longer than the specified threshold.

The default threshold is 1 year (31,557,600 seconds = 365.25 days).
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason for the year long threshold? I imagine anything longer than a month or two could be worth flagging since it is a best practice suggestion.

Duration is calculated from either timestamps or starting_time + rate + data length.
"""
if time_series.data is None:
return None

data_shape = get_data_shape(time_series.data)
if data_shape is None or data_shape[0] <= 1:
return None

duration = None

# Calculate duration from timestamps if available
if time_series.timestamps is not None:
timestamps_shape = get_data_shape(time_series.timestamps)
if timestamps_shape is not None and timestamps_shape[0] > 1:
first_timestamp = time_series.timestamps[0]
last_timestamp = time_series.timestamps[-1]
duration = float(last_timestamp - first_timestamp)

# Calculate duration from starting_time and rate if timestamps not available
elif time_series.rate is not None and time_series.rate > 0:
num_samples = data_shape[0]
duration = (num_samples - 1) / time_series.rate

# If we have a duration, check if it exceeds the threshold
if duration is not None and duration > duration_threshold:
# Convert duration to years for the message
duration_years = duration / 31557600.0
return InspectorMessage(
message=(
f"TimeSeries '{time_series.name}' has an unusually long duration of {duration:.2f} seconds ({duration_years:.2f} years), "
f"which may indicate an error in the timestamps or rate data. "
"Please verify that this is correct."
)
)

return None


@register_check(importance=Importance.BEST_PRACTICE_VIOLATION, neurodata_type=TimeSeries)
def check_rate_not_below_threshold(
time_series: TimeSeries, low_rate_threshold: float = 0.01
) -> Optional[InspectorMessage]:
"""
Check if the sampling rate is suspiciously low (below threshold, default 0.01 Hz).

A very low rate likely indicates the period (time between samples) was provided instead of the frequency.
The default threshold of 0.01 Hz corresponds to a period of 100 seconds.
"""
if not hasattr(time_series, "rate"):
return None

if time_series.rate is not None and 0 < time_series.rate < low_rate_threshold:
period = 1.0 / time_series.rate
return InspectorMessage(
message=(
f"TimeSeries '{time_series.name}' has a sampling rate of {time_series.rate}Hz (period of {period:.2f} seconds). "
"This low sampling rate may indicate that the period was specified instead of the rate. "
f"If the intended period is {time_series.rate} seconds, the rate should be {1.0 / time_series.rate}Hz."
)
)

return None
192 changes: 192 additions & 0 deletions tests/unit_tests/test_time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
check_missing_unit,
check_rate_is_not_zero,
check_rate_is_positive,
check_rate_not_below_threshold,
check_regular_timestamps,
check_resolution,
check_time_series_duration,
check_timestamp_of_the_first_sample_is_not_negative,
check_timestamps_ascending,
check_timestamps_match_first_dimension,
Expand Down Expand Up @@ -413,3 +415,193 @@ def test_check_rate_is_positive_fail():
object_name="TimeSeriesTest",
location="/",
)


def test_check_time_series_duration_pass_short_duration_with_timestamps():
"""Test that a short duration TimeSeries with timestamps passes."""
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=100),
timestamps=np.linspace(0, 100, 100), # 100 seconds, much less than 1 year
)
assert check_time_series_duration(time_series) is None


def test_check_time_series_duration_pass_short_duration_with_rate():
"""Test that a short duration TimeSeries with rate passes."""
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=1000),
starting_time=0.0,
rate=10.0, # 1000 samples at 10Hz = 100 seconds
)
assert check_time_series_duration(time_series) is None


def test_check_time_series_duration_fail_with_timestamps():
"""Test that a TimeSeries exceeding 1 year duration with timestamps fails."""
# Create timestamps spanning more than 1 year (31557600 seconds)
one_year = 31557600.0
time_series = pynwb.TimeSeries(
name="long_time_series",
unit="test_units",
data=np.zeros(shape=100),
timestamps=np.linspace(0, one_year + 1000, 100), # Exceeds 1 year
)
duration = one_year + 1000
duration_years = duration / 31557600.0
expected_message = (
f"TimeSeries 'long_time_series' has an unusually long duration of {duration:.2f} seconds ({duration_years:.2f} years), "
f"which may indicate an error in the timestamps or rate data. "
"Please verify that this is correct."
)
assert check_time_series_duration(time_series) == InspectorMessage(
message=expected_message,
importance=Importance.BEST_PRACTICE_SUGGESTION,
check_function_name="check_time_series_duration",
object_type="TimeSeries",
object_name="long_time_series",
location="/",
)


def test_check_time_series_duration_fail_with_rate():
"""Test that a TimeSeries exceeding 1 year duration with rate fails."""
# Create a time series with more than 1 year of data
# Use a lower rate to avoid creating a large array
one_year = 31557600.0
rate = 0.01 # 0.01 Hz = one sample every 100 seconds
num_samples = int((one_year + 1000) * rate) + 1 # Minimal samples needed
time_series = pynwb.TimeSeries(
name="long_time_series",
unit="test_units",
data=np.zeros(shape=num_samples),
starting_time=0.0,
rate=rate,
)
duration = (num_samples - 1) / rate
duration_years = duration / 31557600.0
expected_message = (
f"TimeSeries 'long_time_series' has an unusually long duration of {duration:.2f} seconds ({duration_years:.2f} years), "
f"which may indicate an error in the timestamps or rate data. "
"Please verify that this is correct."
)
assert check_time_series_duration(time_series) == InspectorMessage(
message=expected_message,
importance=Importance.BEST_PRACTICE_SUGGESTION,
check_function_name="check_time_series_duration",
object_type="TimeSeries",
object_name="long_time_series",
location="/",
)


def test_check_time_series_duration_pass_custom_threshold():
"""Test that the custom duration threshold works correctly."""
# Create a TimeSeries with 200 seconds duration
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=100),
timestamps=np.linspace(0, 200, 100),
)
# Should fail with a threshold of 100 seconds
result = check_time_series_duration(time_series, duration_threshold=100.0)
assert result is not None

# Should pass with a threshold of 300 seconds
result = check_time_series_duration(time_series, duration_threshold=300.0)
assert result is None


def test_check_time_series_duration_pass_single_sample():
"""Test that TimeSeries with a single sample passes."""
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=1),
timestamps=[0],
)
assert check_time_series_duration(time_series) is None


def test_check_rate_not_below_threshold_pass_normal_rate():
"""Test that a normal sampling rate passes."""
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=100),
starting_time=0.0,
rate=30.0, # 30 Hz is a normal rate
)
assert check_rate_not_below_threshold(time_series) is None


def test_check_rate_not_below_threshold_fail_very_low_rate():
"""Test that a very low sampling rate fails."""
low_rate = 0.001 # 0.001 Hz = period of 1000 seconds
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=100),
starting_time=0.0,
rate=low_rate,
)
period = 1.0 / low_rate
expected_message = (
f"TimeSeries 'test_time_series' has a sampling rate of {low_rate}Hz (period of {period:.2f} seconds). "
"This low sampling rate may indicate that the period was specified instead of the rate. "
f"If the intended period is {low_rate} seconds, the rate should be {1.0 / low_rate}Hz."
)
assert check_rate_not_below_threshold(time_series) == InspectorMessage(
message=expected_message,
importance=Importance.BEST_PRACTICE_VIOLATION,
check_function_name="check_rate_not_below_threshold",
object_type="TimeSeries",
object_name="test_time_series",
location="/",
)


def test_check_rate_not_below_threshold_pass_custom_threshold():
"""Test that custom threshold works correctly."""
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=100),
starting_time=0.0,
rate=0.005, # Below default threshold of 0.01
)
# Should fail with default threshold
result = check_rate_not_below_threshold(time_series)
assert result is not None

# Should pass with lower custom threshold
result = check_rate_not_below_threshold(time_series, low_rate_threshold=0.001)
assert result is None


def test_check_rate_not_below_threshold_pass_no_rate():
"""Test that TimeSeries without rate attribute passes."""
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=100),
timestamps=np.linspace(0, 100, 100),
)
assert check_rate_not_below_threshold(time_series) is None


def test_check_rate_not_below_threshold_pass_zero_rate():
"""Test that zero rate passes (handled by different check)."""
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=1),
starting_time=0.0,
rate=0.0,
)
# Zero rate should pass this check (it's handled by check_rate_is_not_zero)
assert check_rate_not_below_threshold(time_series) is None
Loading