Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/nwbinspector/checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,10 @@
check_missing_unit,
check_rate_is_not_zero,
check_rate_is_positive,
check_rate_not_below_threshold,
check_regular_timestamps,
check_resolution,
check_time_series_duration,
check_timestamp_of_the_first_sample_is_not_negative,
check_timestamps_ascending,
check_timestamps_match_first_dimension,
Expand Down Expand Up @@ -149,6 +151,8 @@
"check_timestamps_match_first_dimension",
"check_timestamp_of_the_first_sample_is_not_negative",
"check_rate_is_not_zero",
"check_rate_not_below_threshold",
"check_time_series_duration",
"check_intracellular_electrode_cell_id_exists",
"check_compass_direction_unit",
"check_spatial_series_radians_magnitude",
Expand Down
75 changes: 75 additions & 0 deletions src/nwbinspector/checks/_time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,3 +202,78 @@ def check_rate_is_positive(time_series: TimeSeries) -> Optional[InspectorMessage
)

return None


@register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=TimeSeries)
def check_time_series_duration(
time_series: TimeSeries, duration_threshold: float = 31557600.0
) -> Optional[InspectorMessage]:
"""
Check if the TimeSeries duration is longer than the specified threshold.

The default threshold is 1 year (31,557,600 seconds = 365.25 days).
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason for the year long threshold? I imagine anything longer than a month or two could be worth flagging since it is a best practice suggestion.

Duration is calculated from either timestamps or starting_time + rate + data length.
"""
if time_series.data is None:
return None

data_shape = get_data_shape(time_series.data)
if data_shape is None or data_shape[0] <= 1:
return None

duration = None

# Calculate duration from timestamps if available
if time_series.timestamps is not None:
timestamps_shape = get_data_shape(time_series.timestamps)
if timestamps_shape is not None and timestamps_shape[0] > 1:
first_timestamp = time_series.timestamps[0]
last_timestamp = time_series.timestamps[-1]
duration = float(last_timestamp - first_timestamp)

# Calculate duration from starting_time and rate if timestamps not available
elif time_series.starting_time is not None and time_series.rate is not None and time_series.rate > 0:
num_samples = data_shape[0]
duration = (num_samples - 1) / time_series.rate

# If we have a duration, check if it exceeds the threshold
if duration is not None and duration > duration_threshold:
# Convert threshold to years for the message (assuming 1 year = 365.25 days)
threshold_years = duration_threshold / 31557600.0
duration_years = duration / 31557600.0
return InspectorMessage(
message=(
f"TimeSeries '{time_series.name}' has a duration of {duration:.2f} seconds ({duration_years:.2f} years), "
f"which exceeds the threshold of {duration_threshold:.2f} seconds ({threshold_years:.2f} years). "
"Please verify that this is correct."
)
)

return None


@register_check(importance=Importance.CRITICAL, neurodata_type=TimeSeries)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be critical or a best practice violation?

def check_rate_not_below_threshold(
time_series: TimeSeries, low_rate_threshold: float = 0.01
) -> Optional[InspectorMessage]:
"""
Check if the sampling rate is suspiciously low (below threshold, default 0.01 Hz).

A very low rate likely indicates the period (time between samples) was provided instead of the frequency.
The default threshold of 0.01 Hz corresponds to a period of 100 seconds.
"""
if not hasattr(time_series, "rate"):
return None

if time_series.rate is not None and 0 < time_series.rate < low_rate_threshold:
period = 1.0 / time_series.rate
return InspectorMessage(
message=(
f"TimeSeries '{time_series.name}' has a sampling rate of {time_series.rate}Hz (period of {period:.2f} seconds), "
f"which is below the expected threshold of {low_rate_threshold}Hz. "
"This may indicate that the period was specified instead of the rate. "
f"If the intended period is {time_series.rate} seconds, the rate should be {1.0 / time_series.rate}Hz."
)
)

return None
207 changes: 207 additions & 0 deletions tests/unit_tests/test_time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
check_missing_unit,
check_rate_is_not_zero,
check_rate_is_positive,
check_rate_not_below_threshold,
check_regular_timestamps,
check_resolution,
check_time_series_duration,
check_timestamp_of_the_first_sample_is_not_negative,
check_timestamps_ascending,
check_timestamps_match_first_dimension,
Expand Down Expand Up @@ -413,3 +415,208 @@ def test_check_rate_is_positive_fail():
object_name="TimeSeriesTest",
location="/",
)


def test_check_time_series_duration_pass_short_duration_with_timestamps():
"""Test that a short duration TimeSeries with timestamps passes."""
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=100),
timestamps=np.linspace(0, 100, 100), # 100 seconds, much less than 1 year
)
assert check_time_series_duration(time_series) is None


def test_check_time_series_duration_pass_short_duration_with_rate():
"""Test that a short duration TimeSeries with rate passes."""
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=1000),
starting_time=0.0,
rate=10.0, # 1000 samples at 10Hz = 100 seconds
)
assert check_time_series_duration(time_series) is None


def test_check_time_series_duration_fail_with_timestamps():
"""Test that a TimeSeries exceeding 1 year duration with timestamps fails."""
# Create timestamps spanning more than 1 year (31557600 seconds)
one_year = 31557600.0
time_series = pynwb.TimeSeries(
name="long_time_series",
unit="test_units",
data=np.zeros(shape=100),
timestamps=np.linspace(0, one_year + 1000, 100), # Exceeds 1 year
)
result = check_time_series_duration(time_series)
assert result is not None
assert "long_time_series" in result.message
assert "exceeds the threshold" in result.message
assert result.importance == Importance.BEST_PRACTICE_SUGGESTION
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you update these to be checked against the full inspector message? Similarly in the other tests.

e.g. check_time_series_duration(time_series) == InspectorMessage(...



def test_check_time_series_duration_fail_with_rate():
"""Test that a TimeSeries exceeding 1 year duration with rate fails."""
# Create a time series with more than 1 year of data
one_year = 31557600.0
rate = 1.0 # 1 Hz
num_samples = int(one_year + 1000) # More than 1 year worth of samples
time_series = pynwb.TimeSeries(
name="long_time_series",
unit="test_units",
data=np.zeros(shape=num_samples),
starting_time=0.0,
rate=rate,
)
Comment on lines 474 to 483
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might want to avoid creating a large array here just for tests, could lower the rate instead to minimize the number of samples

result = check_time_series_duration(time_series)
assert result is not None
assert "long_time_series" in result.message
assert "exceeds the threshold" in result.message
assert result.importance == Importance.BEST_PRACTICE_SUGGESTION


def test_check_time_series_duration_pass_custom_threshold():
"""Test that the custom duration threshold works correctly."""
# Create a TimeSeries with 200 seconds duration
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=100),
timestamps=np.linspace(0, 200, 100),
)
# Should fail with a threshold of 100 seconds
result = check_time_series_duration(time_series, duration_threshold=100.0)
assert result is not None

# Should pass with a threshold of 300 seconds
result = check_time_series_duration(time_series, duration_threshold=300.0)
assert result is None


def test_check_time_series_duration_pass_single_sample():
"""Test that TimeSeries with a single sample passes."""
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=1),
timestamps=[0],
)
assert check_time_series_duration(time_series) is None


def test_check_time_series_duration_pass_two_samples():
"""Test that TimeSeries with only two samples passes (duration cannot be calculated reliably)."""
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=2),
timestamps=[0, 1],
)
# Should pass - even though there's a duration, with only 2 samples we skip
assert check_time_series_duration(time_series) is None


def test_check_rate_not_below_threshold_pass_normal_rate():
"""Test that a normal sampling rate passes."""
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=100),
starting_time=0.0,
rate=30.0, # 30 Hz is a normal rate
)
assert check_rate_not_below_threshold(time_series) is None


def test_check_rate_not_below_threshold_pass_at_threshold():
"""Test that a rate exactly at the threshold passes."""
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=100),
starting_time=0.0,
rate=0.01, # Exactly at the default threshold
)
assert check_rate_not_below_threshold(time_series) is None


def test_check_rate_not_below_threshold_fail_very_low_rate():
"""Test that a very low sampling rate fails."""
low_rate = 0.001 # 0.001 Hz = period of 1000 seconds
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=100),
starting_time=0.0,
rate=low_rate,
)
result = check_rate_not_below_threshold(time_series)
assert result is not None
assert "test_time_series" in result.message
assert f"{low_rate}Hz" in result.message
assert "period" in result.message
assert result.importance == Importance.CRITICAL


def test_check_rate_not_below_threshold_fail_period_like_value():
"""Test detection when period was likely used instead of rate."""
# If someone uses 2.0 thinking it's a 2 second period, the rate should be 0.5 Hz
period_value = 2.0
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=100),
starting_time=0.0,
rate=period_value,
)
result = check_rate_not_below_threshold(time_series)
# Should pass with default threshold since 2.0 > 0.01
assert result is None

# But should fail with a custom threshold
result = check_rate_not_below_threshold(time_series, low_rate_threshold=5.0)
assert result is not None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def test_check_rate_not_below_threshold_fail_period_like_value():
"""Test detection when period was likely used instead of rate."""
# If someone uses 2.0 thinking it's a 2 second period, the rate should be 0.5 Hz
period_value = 2.0
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=100),
starting_time=0.0,
rate=period_value,
)
result = check_rate_not_below_threshold(time_series)
# Should pass with default threshold since 2.0 > 0.01
assert result is None
# But should fail with a custom threshold
result = check_rate_not_below_threshold(time_series, low_rate_threshold=5.0)
assert result is not None

I think this test covers the same behavior as the test above and below?



def test_check_rate_not_below_threshold_pass_custom_threshold():
"""Test that custom threshold works correctly."""
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=100),
starting_time=0.0,
rate=0.005, # Below default threshold of 0.01
)
# Should fail with default threshold
result = check_rate_not_below_threshold(time_series)
assert result is not None

# Should pass with lower custom threshold
result = check_rate_not_below_threshold(time_series, low_rate_threshold=0.001)
assert result is None


def test_check_rate_not_below_threshold_pass_no_rate():
"""Test that TimeSeries without rate attribute passes."""
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=100),
timestamps=np.linspace(0, 100, 100),
)
assert check_rate_not_below_threshold(time_series) is None


def test_check_rate_not_below_threshold_pass_zero_rate():
"""Test that zero rate passes (handled by different check)."""
time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=np.zeros(shape=1),
starting_time=0.0,
rate=0.0,
)
# Zero rate should pass this check (it's handled by check_rate_is_not_zero)
assert check_rate_not_below_threshold(time_series) is None
Loading