diff --git a/CHANGELOG.md b/CHANGELOG.md index 880ab649..3cd37b6f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ### New Checks * Added `check_file_extension` for NWB file extension best practice recommendations (`.nwb`, `.nwb.h5`, or `.nwb.zarr`) [#625](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/625) +* Added `check_time_series_duration` to detect unusually long TimeSeries durations (default threshold: 1 year). [#627](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/627) +* Added `check_rate_not_below_threshold` to detect suspiciously low sampling rates that may indicate period was used instead of rate. [#627](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/627) ### Improvements * Added documentation to API and CLI docs on how to use the dandi config option. [#624](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/624) diff --git a/src/nwbinspector/checks/__init__.py b/src/nwbinspector/checks/__init__.py index 3011d187..63b5cae7 100644 --- a/src/nwbinspector/checks/__init__.py +++ b/src/nwbinspector/checks/__init__.py @@ -82,8 +82,10 @@ check_missing_unit, check_rate_is_not_zero, check_rate_is_positive, + check_rate_not_below_threshold, check_regular_timestamps, check_resolution, + check_time_series_duration, check_timestamp_of_the_first_sample_is_not_negative, check_timestamps_ascending, check_timestamps_match_first_dimension, @@ -151,6 +153,8 @@ "check_timestamps_match_first_dimension", "check_timestamp_of_the_first_sample_is_not_negative", "check_rate_is_not_zero", + "check_rate_not_below_threshold", + "check_time_series_duration", "check_intracellular_electrode_cell_id_exists", "check_compass_direction_unit", "check_spatial_series_radians_magnitude", diff --git a/src/nwbinspector/checks/_time_series.py b/src/nwbinspector/checks/_time_series.py index dc3ea083..ac6de544 100644 --- a/src/nwbinspector/checks/_time_series.py +++ b/src/nwbinspector/checks/_time_series.py @@ -202,3 +202,76 @@ def check_rate_is_positive(time_series: TimeSeries) -> Optional[InspectorMessage ) return None + + +@register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=TimeSeries) +def check_time_series_duration( + time_series: TimeSeries, duration_threshold: float = 31557600.0 +) -> Optional[InspectorMessage]: + """ + Check if the TimeSeries duration is longer than the specified threshold. + + The default threshold is 1 year (31,557,600 seconds = 365.25 days). + Duration is calculated from either timestamps or starting_time + rate + data length. + """ + if time_series.data is None: + return None + + data_shape = get_data_shape(time_series.data) + if data_shape is None or data_shape[0] <= 1: + return None + + duration = None + + # Calculate duration from timestamps if available + if time_series.timestamps is not None: + timestamps_shape = get_data_shape(time_series.timestamps) + if timestamps_shape is not None and timestamps_shape[0] > 1: + first_timestamp = time_series.timestamps[0] + last_timestamp = time_series.timestamps[-1] + duration = float(last_timestamp - first_timestamp) + + # Calculate duration from starting_time and rate if timestamps not available + elif time_series.rate is not None and time_series.rate > 0: + num_samples = data_shape[0] + duration = (num_samples - 1) / time_series.rate + + # If we have a duration, check if it exceeds the threshold + if duration is not None and duration > duration_threshold: + # Convert duration to years for the message + duration_years = duration / 31557600.0 + return InspectorMessage( + message=( + f"TimeSeries '{time_series.name}' has an unusually long duration of {duration:.2f} seconds ({duration_years:.2f} years), " + f"which may indicate an error in the timestamps or rate data. " + "Please verify that this is correct." + ) + ) + + return None + + +@register_check(importance=Importance.BEST_PRACTICE_VIOLATION, neurodata_type=TimeSeries) +def check_rate_not_below_threshold( + time_series: TimeSeries, low_rate_threshold: float = 0.01 +) -> Optional[InspectorMessage]: + """ + Check if the sampling rate is suspiciously low (below threshold, default 0.01 Hz). + + A very low rate likely indicates the period (time between samples) was provided instead of the frequency. + The default threshold of 0.01 Hz corresponds to a period of 100 seconds. + """ + if not hasattr(time_series, "rate"): + return None + + if time_series.rate is not None and 0 < time_series.rate < low_rate_threshold: + period = 1.0 / time_series.rate + return InspectorMessage( + message=( + f"TimeSeries '{time_series.name}' has a sampling rate of {time_series.rate}Hz (period of {period:.2f} seconds). " + "This low sampling rate may indicate that the period was specified instead of the rate. " + f"If the intended period is {time_series.rate} seconds, the rate should be {1.0 / time_series.rate}Hz." + ) + ) + + return None diff --git a/tests/unit_tests/test_time_series.py b/tests/unit_tests/test_time_series.py index ef96e312..7787b22f 100644 --- a/tests/unit_tests/test_time_series.py +++ b/tests/unit_tests/test_time_series.py @@ -8,8 +8,10 @@ check_missing_unit, check_rate_is_not_zero, check_rate_is_positive, + check_rate_not_below_threshold, check_regular_timestamps, check_resolution, + check_time_series_duration, check_timestamp_of_the_first_sample_is_not_negative, check_timestamps_ascending, check_timestamps_match_first_dimension, @@ -413,3 +415,193 @@ def test_check_rate_is_positive_fail(): object_name="TimeSeriesTest", location="/", ) + + +def test_check_time_series_duration_pass_short_duration_with_timestamps(): + """Test that a short duration TimeSeries with timestamps passes.""" + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=100), + timestamps=np.linspace(0, 100, 100), # 100 seconds, much less than 1 year + ) + assert check_time_series_duration(time_series) is None + + +def test_check_time_series_duration_pass_short_duration_with_rate(): + """Test that a short duration TimeSeries with rate passes.""" + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=1000), + starting_time=0.0, + rate=10.0, # 1000 samples at 10Hz = 100 seconds + ) + assert check_time_series_duration(time_series) is None + + +def test_check_time_series_duration_fail_with_timestamps(): + """Test that a TimeSeries exceeding 1 year duration with timestamps fails.""" + # Create timestamps spanning more than 1 year (31557600 seconds) + one_year = 31557600.0 + time_series = pynwb.TimeSeries( + name="long_time_series", + unit="test_units", + data=np.zeros(shape=100), + timestamps=np.linspace(0, one_year + 1000, 100), # Exceeds 1 year + ) + duration = one_year + 1000 + duration_years = duration / 31557600.0 + expected_message = ( + f"TimeSeries 'long_time_series' has an unusually long duration of {duration:.2f} seconds ({duration_years:.2f} years), " + f"which may indicate an error in the timestamps or rate data. " + "Please verify that this is correct." + ) + assert check_time_series_duration(time_series) == InspectorMessage( + message=expected_message, + importance=Importance.BEST_PRACTICE_SUGGESTION, + check_function_name="check_time_series_duration", + object_type="TimeSeries", + object_name="long_time_series", + location="/", + ) + + +def test_check_time_series_duration_fail_with_rate(): + """Test that a TimeSeries exceeding 1 year duration with rate fails.""" + # Create a time series with more than 1 year of data + # Use a lower rate to avoid creating a large array + one_year = 31557600.0 + rate = 0.01 # 0.01 Hz = one sample every 100 seconds + num_samples = int((one_year + 1000) * rate) + 1 # Minimal samples needed + time_series = pynwb.TimeSeries( + name="long_time_series", + unit="test_units", + data=np.zeros(shape=num_samples), + starting_time=0.0, + rate=rate, + ) + duration = (num_samples - 1) / rate + duration_years = duration / 31557600.0 + expected_message = ( + f"TimeSeries 'long_time_series' has an unusually long duration of {duration:.2f} seconds ({duration_years:.2f} years), " + f"which may indicate an error in the timestamps or rate data. " + "Please verify that this is correct." + ) + assert check_time_series_duration(time_series) == InspectorMessage( + message=expected_message, + importance=Importance.BEST_PRACTICE_SUGGESTION, + check_function_name="check_time_series_duration", + object_type="TimeSeries", + object_name="long_time_series", + location="/", + ) + + +def test_check_time_series_duration_pass_custom_threshold(): + """Test that the custom duration threshold works correctly.""" + # Create a TimeSeries with 200 seconds duration + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=100), + timestamps=np.linspace(0, 200, 100), + ) + # Should fail with a threshold of 100 seconds + result = check_time_series_duration(time_series, duration_threshold=100.0) + assert result is not None + + # Should pass with a threshold of 300 seconds + result = check_time_series_duration(time_series, duration_threshold=300.0) + assert result is None + + +def test_check_time_series_duration_pass_single_sample(): + """Test that TimeSeries with a single sample passes.""" + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=1), + timestamps=[0], + ) + assert check_time_series_duration(time_series) is None + + +def test_check_rate_not_below_threshold_pass_normal_rate(): + """Test that a normal sampling rate passes.""" + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=100), + starting_time=0.0, + rate=30.0, # 30 Hz is a normal rate + ) + assert check_rate_not_below_threshold(time_series) is None + + +def test_check_rate_not_below_threshold_fail_very_low_rate(): + """Test that a very low sampling rate fails.""" + low_rate = 0.001 # 0.001 Hz = period of 1000 seconds + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=100), + starting_time=0.0, + rate=low_rate, + ) + period = 1.0 / low_rate + expected_message = ( + f"TimeSeries 'test_time_series' has a sampling rate of {low_rate}Hz (period of {period:.2f} seconds). " + "This low sampling rate may indicate that the period was specified instead of the rate. " + f"If the intended period is {low_rate} seconds, the rate should be {1.0 / low_rate}Hz." + ) + assert check_rate_not_below_threshold(time_series) == InspectorMessage( + message=expected_message, + importance=Importance.BEST_PRACTICE_VIOLATION, + check_function_name="check_rate_not_below_threshold", + object_type="TimeSeries", + object_name="test_time_series", + location="/", + ) + + +def test_check_rate_not_below_threshold_pass_custom_threshold(): + """Test that custom threshold works correctly.""" + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=100), + starting_time=0.0, + rate=0.005, # Below default threshold of 0.01 + ) + # Should fail with default threshold + result = check_rate_not_below_threshold(time_series) + assert result is not None + + # Should pass with lower custom threshold + result = check_rate_not_below_threshold(time_series, low_rate_threshold=0.001) + assert result is None + + +def test_check_rate_not_below_threshold_pass_no_rate(): + """Test that TimeSeries without rate attribute passes.""" + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=100), + timestamps=np.linspace(0, 100, 100), + ) + assert check_rate_not_below_threshold(time_series) is None + + +def test_check_rate_not_below_threshold_pass_zero_rate(): + """Test that zero rate passes (handled by different check).""" + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=1), + starting_time=0.0, + rate=0.0, + ) + # Zero rate should pass this check (it's handled by check_rate_is_not_zero) + assert check_rate_not_below_threshold(time_series) is None