From d7f27685351d1fe3d050a3a71b913251f4805e4d Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Wed, 8 Oct 2025 20:11:35 -0400 Subject: [PATCH 01/12] Add time series duration check and corresponding tests --- src/nwbinspector/checks/__init__.py | 2 + src/nwbinspector/checks/_time_series.py | 48 +++++++++++ tests/unit_tests/test_time_series.py | 102 ++++++++++++++++++++++++ 3 files changed, 152 insertions(+) diff --git a/src/nwbinspector/checks/__init__.py b/src/nwbinspector/checks/__init__.py index cf231978..5434b543 100644 --- a/src/nwbinspector/checks/__init__.py +++ b/src/nwbinspector/checks/__init__.py @@ -83,6 +83,7 @@ check_rate_is_positive, check_regular_timestamps, check_resolution, + check_time_series_duration, check_timestamp_of_the_first_sample_is_not_negative, check_timestamps_ascending, check_timestamps_match_first_dimension, @@ -149,6 +150,7 @@ "check_timestamps_match_first_dimension", "check_timestamp_of_the_first_sample_is_not_negative", "check_rate_is_not_zero", + "check_time_series_duration", "check_intracellular_electrode_cell_id_exists", "check_compass_direction_unit", "check_spatial_series_radians_magnitude", diff --git a/src/nwbinspector/checks/_time_series.py b/src/nwbinspector/checks/_time_series.py index dc3ea083..7ea34583 100644 --- a/src/nwbinspector/checks/_time_series.py +++ b/src/nwbinspector/checks/_time_series.py @@ -202,3 +202,51 @@ def check_rate_is_positive(time_series: TimeSeries) -> Optional[InspectorMessage ) return None + + +@register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=TimeSeries) +def check_time_series_duration( + time_series: TimeSeries, duration_threshold: float = 31557600.0 +) -> Optional[InspectorMessage]: + """ + Check if the TimeSeries duration is longer than the specified threshold. + + The default threshold is 1 year (31,557,600 seconds = 365.25 days). + Duration is calculated from either timestamps or starting_time + rate + data length. + """ + if time_series.data is None: + return None + + data_shape = get_data_shape(time_series.data) + if data_shape is None or data_shape[0] <= 1: + return None + + duration = None + + # Calculate duration from timestamps if available + if time_series.timestamps is not None: + timestamps_shape = get_data_shape(time_series.timestamps) + if timestamps_shape is not None and timestamps_shape[0] > 1: + first_timestamp = time_series.timestamps[0] + last_timestamp = time_series.timestamps[-1] + duration = float(last_timestamp - first_timestamp) + + # Calculate duration from starting_time and rate if timestamps not available + elif time_series.starting_time is not None and time_series.rate is not None and time_series.rate > 0: + num_samples = data_shape[0] + duration = (num_samples - 1) / time_series.rate + + # If we have a duration, check if it exceeds the threshold + if duration is not None and duration > duration_threshold: + # Convert threshold to years for the message (assuming 1 year = 365.25 days) + threshold_years = duration_threshold / 31557600.0 + duration_years = duration / 31557600.0 + return InspectorMessage( + message=( + f"TimeSeries '{time_series.name}' has a duration of {duration:.2f} seconds ({duration_years:.2f} years), " + f"which exceeds the threshold of {duration_threshold:.2f} seconds ({threshold_years:.2f} years). " + "Please verify that this is correct." + ) + ) + + return None diff --git a/tests/unit_tests/test_time_series.py b/tests/unit_tests/test_time_series.py index ef96e312..5abe5ebe 100644 --- a/tests/unit_tests/test_time_series.py +++ b/tests/unit_tests/test_time_series.py @@ -10,6 +10,7 @@ check_rate_is_positive, check_regular_timestamps, check_resolution, + check_time_series_duration, check_timestamp_of_the_first_sample_is_not_negative, check_timestamps_ascending, check_timestamps_match_first_dimension, @@ -413,3 +414,104 @@ def test_check_rate_is_positive_fail(): object_name="TimeSeriesTest", location="/", ) + + +def test_check_time_series_duration_pass_short_duration_with_timestamps(): + """Test that a short duration TimeSeries with timestamps passes.""" + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=100), + timestamps=np.linspace(0, 100, 100), # 100 seconds, much less than 1 year + ) + assert check_time_series_duration(time_series) is None + + +def test_check_time_series_duration_pass_short_duration_with_rate(): + """Test that a short duration TimeSeries with rate passes.""" + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=1000), + starting_time=0.0, + rate=10.0, # 1000 samples at 10Hz = 100 seconds + ) + assert check_time_series_duration(time_series) is None + + +def test_check_time_series_duration_fail_with_timestamps(): + """Test that a TimeSeries exceeding 1 year duration with timestamps fails.""" + # Create timestamps spanning more than 1 year (31557600 seconds) + one_year = 31557600.0 + time_series = pynwb.TimeSeries( + name="long_time_series", + unit="test_units", + data=np.zeros(shape=100), + timestamps=np.linspace(0, one_year + 1000, 100), # Exceeds 1 year + ) + result = check_time_series_duration(time_series) + assert result is not None + assert "long_time_series" in result.message + assert "exceeds the threshold" in result.message + assert result.importance == Importance.BEST_PRACTICE_SUGGESTION + + +def test_check_time_series_duration_fail_with_rate(): + """Test that a TimeSeries exceeding 1 year duration with rate fails.""" + # Create a time series with more than 1 year of data + one_year = 31557600.0 + rate = 1.0 # 1 Hz + num_samples = int(one_year + 1000) # More than 1 year worth of samples + time_series = pynwb.TimeSeries( + name="long_time_series", + unit="test_units", + data=np.zeros(shape=num_samples), + starting_time=0.0, + rate=rate, + ) + result = check_time_series_duration(time_series) + assert result is not None + assert "long_time_series" in result.message + assert "exceeds the threshold" in result.message + assert result.importance == Importance.BEST_PRACTICE_SUGGESTION + + +def test_check_time_series_duration_pass_custom_threshold(): + """Test that the custom duration threshold works correctly.""" + # Create a TimeSeries with 200 seconds duration + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=100), + timestamps=np.linspace(0, 200, 100), + ) + # Should fail with a threshold of 100 seconds + result = check_time_series_duration(time_series, duration_threshold=100.0) + assert result is not None + + # Should pass with a threshold of 300 seconds + result = check_time_series_duration(time_series, duration_threshold=300.0) + assert result is None + + +def test_check_time_series_duration_pass_single_sample(): + """Test that TimeSeries with a single sample passes.""" + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=1), + timestamps=[0], + ) + assert check_time_series_duration(time_series) is None + + +def test_check_time_series_duration_pass_two_samples(): + """Test that TimeSeries with only two samples passes (duration cannot be calculated reliably).""" + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=2), + timestamps=[0, 1], + ) + # Should pass - even though there's a duration, with only 2 samples we skip + assert check_time_series_duration(time_series) is None From 4fd527684187c53bf4607b83d4e159690cde4527 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 9 Oct 2025 00:12:15 +0000 Subject: [PATCH 02/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/unit_tests/test_time_series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit_tests/test_time_series.py b/tests/unit_tests/test_time_series.py index 5abe5ebe..cee8daea 100644 --- a/tests/unit_tests/test_time_series.py +++ b/tests/unit_tests/test_time_series.py @@ -488,7 +488,7 @@ def test_check_time_series_duration_pass_custom_threshold(): # Should fail with a threshold of 100 seconds result = check_time_series_duration(time_series, duration_threshold=100.0) assert result is not None - + # Should pass with a threshold of 300 seconds result = check_time_series_duration(time_series, duration_threshold=300.0) assert result is None From 986ae0adf5c83d790233214479f8fd599670547f Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Thu, 9 Oct 2025 09:00:45 -0400 Subject: [PATCH 03/12] Add check for sampling rate below threshold and corresponding tests --- src/nwbinspector/checks/__init__.py | 2 + src/nwbinspector/checks/_time_series.py | 25 ++++++ tests/unit_tests/test_time_series.py | 105 ++++++++++++++++++++++++ 3 files changed, 132 insertions(+) diff --git a/src/nwbinspector/checks/__init__.py b/src/nwbinspector/checks/__init__.py index 5434b543..6fe096cf 100644 --- a/src/nwbinspector/checks/__init__.py +++ b/src/nwbinspector/checks/__init__.py @@ -81,6 +81,7 @@ check_missing_unit, check_rate_is_not_zero, check_rate_is_positive, + check_rate_not_below_threshold, check_regular_timestamps, check_resolution, check_time_series_duration, @@ -150,6 +151,7 @@ "check_timestamps_match_first_dimension", "check_timestamp_of_the_first_sample_is_not_negative", "check_rate_is_not_zero", + "check_rate_not_below_threshold", "check_time_series_duration", "check_intracellular_electrode_cell_id_exists", "check_compass_direction_unit", diff --git a/src/nwbinspector/checks/_time_series.py b/src/nwbinspector/checks/_time_series.py index 7ea34583..c5c7f361 100644 --- a/src/nwbinspector/checks/_time_series.py +++ b/src/nwbinspector/checks/_time_series.py @@ -250,3 +250,28 @@ def check_time_series_duration( ) return None + + +@register_check(importance=Importance.CRITICAL, neurodata_type=TimeSeries) +def check_rate_not_below_threshold(time_series: TimeSeries, low_rate_threshold: float = 0.01) -> Optional[InspectorMessage]: + """ + Check if the sampling rate is suspiciously low (below threshold, default 0.01 Hz). + + A very low rate likely indicates the period (time between samples) was provided instead of the frequency. + The default threshold of 0.01 Hz corresponds to a period of 100 seconds. + """ + if not hasattr(time_series, "rate"): + return None + + if time_series.rate is not None and 0 < time_series.rate < low_rate_threshold: + period = 1.0 / time_series.rate + return InspectorMessage( + message=( + f"TimeSeries '{time_series.name}' has a sampling rate of {time_series.rate}Hz (period of {period:.2f} seconds), " + f"which is below the expected threshold of {low_rate_threshold}Hz. " + "This may indicate that the period was specified instead of the rate. " + f"If the intended period is {time_series.rate} seconds, the rate should be {1.0 / time_series.rate}Hz." + ) + ) + + return None diff --git a/tests/unit_tests/test_time_series.py b/tests/unit_tests/test_time_series.py index cee8daea..f05d7988 100644 --- a/tests/unit_tests/test_time_series.py +++ b/tests/unit_tests/test_time_series.py @@ -8,6 +8,7 @@ check_missing_unit, check_rate_is_not_zero, check_rate_is_positive, + check_rate_not_below_threshold, check_regular_timestamps, check_resolution, check_time_series_duration, @@ -515,3 +516,107 @@ def test_check_time_series_duration_pass_two_samples(): ) # Should pass - even though there's a duration, with only 2 samples we skip assert check_time_series_duration(time_series) is None + + +def test_check_rate_not_below_threshold_pass_normal_rate(): + """Test that a normal sampling rate passes.""" + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=100), + starting_time=0.0, + rate=30.0, # 30 Hz is a normal rate + ) + assert check_rate_not_below_threshold(time_series) is None + + +def test_check_rate_not_below_threshold_pass_at_threshold(): + """Test that a rate exactly at the threshold passes.""" + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=100), + starting_time=0.0, + rate=0.01, # Exactly at the default threshold + ) + assert check_rate_not_below_threshold(time_series) is None + + +def test_check_rate_not_below_threshold_fail_very_low_rate(): + """Test that a very low sampling rate fails.""" + low_rate = 0.001 # 0.001 Hz = period of 1000 seconds + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=100), + starting_time=0.0, + rate=low_rate, + ) + result = check_rate_not_below_threshold(time_series) + assert result is not None + assert "test_time_series" in result.message + assert f"{low_rate}Hz" in result.message + assert "period" in result.message + assert result.importance == Importance.CRITICAL + + +def test_check_rate_not_below_threshold_fail_period_like_value(): + """Test detection when period was likely used instead of rate.""" + # If someone uses 2.0 thinking it's a 2 second period, the rate should be 0.5 Hz + period_value = 2.0 + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=100), + starting_time=0.0, + rate=period_value, + ) + result = check_rate_not_below_threshold(time_series) + # Should pass with default threshold since 2.0 > 0.01 + assert result is None + + # But should fail with a custom threshold + result = check_rate_not_below_threshold(time_series, low_rate_threshold=5.0) + assert result is not None + + +def test_check_rate_not_below_threshold_pass_custom_threshold(): + """Test that custom threshold works correctly.""" + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=100), + starting_time=0.0, + rate=0.005, # Below default threshold of 0.01 + ) + # Should fail with default threshold + result = check_rate_not_below_threshold(time_series) + assert result is not None + + # Should pass with lower custom threshold + result = check_rate_not_below_threshold(time_series, low_rate_threshold=0.001) + assert result is None + + +def test_check_rate_not_below_threshold_pass_no_rate(): + """Test that TimeSeries without rate attribute passes.""" + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=100), + timestamps=np.linspace(0, 100, 100), + ) + assert check_rate_not_below_threshold(time_series) is None + + +def test_check_rate_not_below_threshold_pass_zero_rate(): + """Test that zero rate passes (handled by different check).""" + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=np.zeros(shape=1), + starting_time=0.0, + rate=0.0, + ) + # Zero rate should pass this check (it's handled by check_rate_is_not_zero) + assert check_rate_not_below_threshold(time_series) is None From 25aaec4674355dd65624ea20de304068fdac037c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 9 Oct 2025 13:00:58 +0000 Subject: [PATCH 04/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/nwbinspector/checks/_time_series.py | 6 ++++-- tests/unit_tests/test_time_series.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/nwbinspector/checks/_time_series.py b/src/nwbinspector/checks/_time_series.py index c5c7f361..25f25d4e 100644 --- a/src/nwbinspector/checks/_time_series.py +++ b/src/nwbinspector/checks/_time_series.py @@ -253,10 +253,12 @@ def check_time_series_duration( @register_check(importance=Importance.CRITICAL, neurodata_type=TimeSeries) -def check_rate_not_below_threshold(time_series: TimeSeries, low_rate_threshold: float = 0.01) -> Optional[InspectorMessage]: +def check_rate_not_below_threshold( + time_series: TimeSeries, low_rate_threshold: float = 0.01 +) -> Optional[InspectorMessage]: """ Check if the sampling rate is suspiciously low (below threshold, default 0.01 Hz). - + A very low rate likely indicates the period (time between samples) was provided instead of the frequency. The default threshold of 0.01 Hz corresponds to a period of 100 seconds. """ diff --git a/tests/unit_tests/test_time_series.py b/tests/unit_tests/test_time_series.py index f05d7988..526f2b65 100644 --- a/tests/unit_tests/test_time_series.py +++ b/tests/unit_tests/test_time_series.py @@ -574,7 +574,7 @@ def test_check_rate_not_below_threshold_fail_period_like_value(): result = check_rate_not_below_threshold(time_series) # Should pass with default threshold since 2.0 > 0.01 assert result is None - + # But should fail with a custom threshold result = check_rate_not_below_threshold(time_series, low_rate_threshold=5.0) assert result is not None @@ -592,7 +592,7 @@ def test_check_rate_not_below_threshold_pass_custom_threshold(): # Should fail with default threshold result = check_rate_not_below_threshold(time_series) assert result is not None - + # Should pass with lower custom threshold result = check_rate_not_below_threshold(time_series, low_rate_threshold=0.001) assert result is None From 006350caece0f0e534e141a09ae85c36709697e6 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Fri, 10 Oct 2025 13:16:59 -0400 Subject: [PATCH 05/12] Update src/nwbinspector/checks/_time_series.py Co-authored-by: Steph Prince <40640337+stephprince@users.noreply.github.com> --- src/nwbinspector/checks/_time_series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nwbinspector/checks/_time_series.py b/src/nwbinspector/checks/_time_series.py index 25f25d4e..88b97584 100644 --- a/src/nwbinspector/checks/_time_series.py +++ b/src/nwbinspector/checks/_time_series.py @@ -232,7 +232,7 @@ def check_time_series_duration( duration = float(last_timestamp - first_timestamp) # Calculate duration from starting_time and rate if timestamps not available - elif time_series.starting_time is not None and time_series.rate is not None and time_series.rate > 0: + elif time_series.rate is not None and time_series.rate > 0: num_samples = data_shape[0] duration = (num_samples - 1) / time_series.rate From de9af717994d7708645ef13b808b99b4d0dfa0dc Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Fri, 10 Oct 2025 13:17:31 -0400 Subject: [PATCH 06/12] Update src/nwbinspector/checks/_time_series.py Co-authored-by: Steph Prince <40640337+stephprince@users.noreply.github.com> --- src/nwbinspector/checks/_time_series.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/nwbinspector/checks/_time_series.py b/src/nwbinspector/checks/_time_series.py index 88b97584..e711fd15 100644 --- a/src/nwbinspector/checks/_time_series.py +++ b/src/nwbinspector/checks/_time_series.py @@ -238,13 +238,12 @@ def check_time_series_duration( # If we have a duration, check if it exceeds the threshold if duration is not None and duration > duration_threshold: - # Convert threshold to years for the message (assuming 1 year = 365.25 days) - threshold_years = duration_threshold / 31557600.0 + # Convert duration to years for the message duration_years = duration / 31557600.0 return InspectorMessage( message=( - f"TimeSeries '{time_series.name}' has a duration of {duration:.2f} seconds ({duration_years:.2f} years), " - f"which exceeds the threshold of {duration_threshold:.2f} seconds ({threshold_years:.2f} years). " + f"TimeSeries '{time_series.name}' has an unusually long duration of {duration:.2f} seconds ({duration_years:.2f} years), " + f"which may indicate an error in the timestamps or rate data. " "Please verify that this is correct." ) ) From 190e5ec4d723e01b9a97dc3ac1a727e699916bb4 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Fri, 10 Oct 2025 13:18:06 -0400 Subject: [PATCH 07/12] Update src/nwbinspector/checks/_time_series.py Co-authored-by: Steph Prince <40640337+stephprince@users.noreply.github.com> --- src/nwbinspector/checks/_time_series.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/nwbinspector/checks/_time_series.py b/src/nwbinspector/checks/_time_series.py index e711fd15..496d122b 100644 --- a/src/nwbinspector/checks/_time_series.py +++ b/src/nwbinspector/checks/_time_series.py @@ -268,9 +268,8 @@ def check_rate_not_below_threshold( period = 1.0 / time_series.rate return InspectorMessage( message=( - f"TimeSeries '{time_series.name}' has a sampling rate of {time_series.rate}Hz (period of {period:.2f} seconds), " - f"which is below the expected threshold of {low_rate_threshold}Hz. " - "This may indicate that the period was specified instead of the rate. " + f"TimeSeries '{time_series.name}' has a sampling rate of {time_series.rate}Hz (period of {period:.2f} seconds). " + "This low sampling rate may indicate that the period was specified instead of the rate. " f"If the intended period is {time_series.rate} seconds, the rate should be {1.0 / time_series.rate}Hz." ) ) From eaf9ab13ef97b3142413159197d070fffd6741cf Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Fri, 10 Oct 2025 13:18:24 -0400 Subject: [PATCH 08/12] Update tests/unit_tests/test_time_series.py Co-authored-by: Steph Prince <40640337+stephprince@users.noreply.github.com> --- tests/unit_tests/test_time_series.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/unit_tests/test_time_series.py b/tests/unit_tests/test_time_series.py index 526f2b65..57223843 100644 --- a/tests/unit_tests/test_time_series.py +++ b/tests/unit_tests/test_time_series.py @@ -530,16 +530,6 @@ def test_check_rate_not_below_threshold_pass_normal_rate(): assert check_rate_not_below_threshold(time_series) is None -def test_check_rate_not_below_threshold_pass_at_threshold(): - """Test that a rate exactly at the threshold passes.""" - time_series = pynwb.TimeSeries( - name="test_time_series", - unit="test_units", - data=np.zeros(shape=100), - starting_time=0.0, - rate=0.01, # Exactly at the default threshold - ) - assert check_rate_not_below_threshold(time_series) is None def test_check_rate_not_below_threshold_fail_very_low_rate(): From 31b66477bf67d4e3904590aa5d8f5838dd1634ad Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 10 Oct 2025 17:18:53 +0000 Subject: [PATCH 09/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/unit_tests/test_time_series.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/unit_tests/test_time_series.py b/tests/unit_tests/test_time_series.py index 57223843..7d1f0552 100644 --- a/tests/unit_tests/test_time_series.py +++ b/tests/unit_tests/test_time_series.py @@ -530,8 +530,6 @@ def test_check_rate_not_below_threshold_pass_normal_rate(): assert check_rate_not_below_threshold(time_series) is None - - def test_check_rate_not_below_threshold_fail_very_low_rate(): """Test that a very low sampling rate fails.""" low_rate = 0.001 # 0.001 Hz = period of 1000 seconds From 39737e07dc88cb8dcfbf667f0be80fd3d41189cf Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Fri, 10 Oct 2025 13:27:39 -0400 Subject: [PATCH 10/12] Update tests/unit_tests/test_time_series.py Co-authored-by: Steph Prince <40640337+stephprince@users.noreply.github.com> --- tests/unit_tests/test_time_series.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/unit_tests/test_time_series.py b/tests/unit_tests/test_time_series.py index 7d1f0552..f8c8d3cb 100644 --- a/tests/unit_tests/test_time_series.py +++ b/tests/unit_tests/test_time_series.py @@ -506,16 +506,6 @@ def test_check_time_series_duration_pass_single_sample(): assert check_time_series_duration(time_series) is None -def test_check_time_series_duration_pass_two_samples(): - """Test that TimeSeries with only two samples passes (duration cannot be calculated reliably).""" - time_series = pynwb.TimeSeries( - name="test_time_series", - unit="test_units", - data=np.zeros(shape=2), - timestamps=[0, 1], - ) - # Should pass - even though there's a duration, with only 2 samples we skip - assert check_time_series_duration(time_series) is None def test_check_rate_not_below_threshold_pass_normal_rate(): From 786aef35c45f7337a852247f8fd9c43dd71e853e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 10 Oct 2025 17:27:50 +0000 Subject: [PATCH 11/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/unit_tests/test_time_series.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/unit_tests/test_time_series.py b/tests/unit_tests/test_time_series.py index f8c8d3cb..22a327f1 100644 --- a/tests/unit_tests/test_time_series.py +++ b/tests/unit_tests/test_time_series.py @@ -506,8 +506,6 @@ def test_check_time_series_duration_pass_single_sample(): assert check_time_series_duration(time_series) is None - - def test_check_rate_not_below_threshold_pass_normal_rate(): """Test that a normal sampling rate passes.""" time_series = pynwb.TimeSeries( From 6cf9922d7390fcd197ba009e005d009511547a06 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Fri, 10 Oct 2025 13:59:12 -0400 Subject: [PATCH 12/12] Update CHANGELOG and improve checks for time series duration and rate --- CHANGELOG.md | 2 + src/nwbinspector/checks/_time_series.py | 2 +- tests/unit_tests/test_time_series.py | 83 ++++++++++++++----------- 3 files changed, 49 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 880ab649..3cd37b6f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ### New Checks * Added `check_file_extension` for NWB file extension best practice recommendations (`.nwb`, `.nwb.h5`, or `.nwb.zarr`) [#625](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/625) +* Added `check_time_series_duration` to detect unusually long TimeSeries durations (default threshold: 1 year). [#627](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/627) +* Added `check_rate_not_below_threshold` to detect suspiciously low sampling rates that may indicate period was used instead of rate. [#627](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/627) ### Improvements * Added documentation to API and CLI docs on how to use the dandi config option. [#624](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/624) diff --git a/src/nwbinspector/checks/_time_series.py b/src/nwbinspector/checks/_time_series.py index 496d122b..ac6de544 100644 --- a/src/nwbinspector/checks/_time_series.py +++ b/src/nwbinspector/checks/_time_series.py @@ -251,7 +251,7 @@ def check_time_series_duration( return None -@register_check(importance=Importance.CRITICAL, neurodata_type=TimeSeries) +@register_check(importance=Importance.BEST_PRACTICE_VIOLATION, neurodata_type=TimeSeries) def check_rate_not_below_threshold( time_series: TimeSeries, low_rate_threshold: float = 0.01 ) -> Optional[InspectorMessage]: diff --git a/tests/unit_tests/test_time_series.py b/tests/unit_tests/test_time_series.py index 22a327f1..7787b22f 100644 --- a/tests/unit_tests/test_time_series.py +++ b/tests/unit_tests/test_time_series.py @@ -450,19 +450,30 @@ def test_check_time_series_duration_fail_with_timestamps(): data=np.zeros(shape=100), timestamps=np.linspace(0, one_year + 1000, 100), # Exceeds 1 year ) - result = check_time_series_duration(time_series) - assert result is not None - assert "long_time_series" in result.message - assert "exceeds the threshold" in result.message - assert result.importance == Importance.BEST_PRACTICE_SUGGESTION + duration = one_year + 1000 + duration_years = duration / 31557600.0 + expected_message = ( + f"TimeSeries 'long_time_series' has an unusually long duration of {duration:.2f} seconds ({duration_years:.2f} years), " + f"which may indicate an error in the timestamps or rate data. " + "Please verify that this is correct." + ) + assert check_time_series_duration(time_series) == InspectorMessage( + message=expected_message, + importance=Importance.BEST_PRACTICE_SUGGESTION, + check_function_name="check_time_series_duration", + object_type="TimeSeries", + object_name="long_time_series", + location="/", + ) def test_check_time_series_duration_fail_with_rate(): """Test that a TimeSeries exceeding 1 year duration with rate fails.""" # Create a time series with more than 1 year of data + # Use a lower rate to avoid creating a large array one_year = 31557600.0 - rate = 1.0 # 1 Hz - num_samples = int(one_year + 1000) # More than 1 year worth of samples + rate = 0.01 # 0.01 Hz = one sample every 100 seconds + num_samples = int((one_year + 1000) * rate) + 1 # Minimal samples needed time_series = pynwb.TimeSeries( name="long_time_series", unit="test_units", @@ -470,11 +481,21 @@ def test_check_time_series_duration_fail_with_rate(): starting_time=0.0, rate=rate, ) - result = check_time_series_duration(time_series) - assert result is not None - assert "long_time_series" in result.message - assert "exceeds the threshold" in result.message - assert result.importance == Importance.BEST_PRACTICE_SUGGESTION + duration = (num_samples - 1) / rate + duration_years = duration / 31557600.0 + expected_message = ( + f"TimeSeries 'long_time_series' has an unusually long duration of {duration:.2f} seconds ({duration_years:.2f} years), " + f"which may indicate an error in the timestamps or rate data. " + "Please verify that this is correct." + ) + assert check_time_series_duration(time_series) == InspectorMessage( + message=expected_message, + importance=Importance.BEST_PRACTICE_SUGGESTION, + check_function_name="check_time_series_duration", + object_type="TimeSeries", + object_name="long_time_series", + location="/", + ) def test_check_time_series_duration_pass_custom_threshold(): @@ -528,32 +549,20 @@ def test_check_rate_not_below_threshold_fail_very_low_rate(): starting_time=0.0, rate=low_rate, ) - result = check_rate_not_below_threshold(time_series) - assert result is not None - assert "test_time_series" in result.message - assert f"{low_rate}Hz" in result.message - assert "period" in result.message - assert result.importance == Importance.CRITICAL - - -def test_check_rate_not_below_threshold_fail_period_like_value(): - """Test detection when period was likely used instead of rate.""" - # If someone uses 2.0 thinking it's a 2 second period, the rate should be 0.5 Hz - period_value = 2.0 - time_series = pynwb.TimeSeries( - name="test_time_series", - unit="test_units", - data=np.zeros(shape=100), - starting_time=0.0, - rate=period_value, + period = 1.0 / low_rate + expected_message = ( + f"TimeSeries 'test_time_series' has a sampling rate of {low_rate}Hz (period of {period:.2f} seconds). " + "This low sampling rate may indicate that the period was specified instead of the rate. " + f"If the intended period is {low_rate} seconds, the rate should be {1.0 / low_rate}Hz." + ) + assert check_rate_not_below_threshold(time_series) == InspectorMessage( + message=expected_message, + importance=Importance.BEST_PRACTICE_VIOLATION, + check_function_name="check_rate_not_below_threshold", + object_type="TimeSeries", + object_name="test_time_series", + location="/", ) - result = check_rate_not_below_threshold(time_series) - # Should pass with default threshold since 2.0 > 0.01 - assert result is None - - # But should fail with a custom threshold - result = check_rate_not_below_threshold(time_series, low_rate_threshold=5.0) - assert result is not None def test_check_rate_not_below_threshold_pass_custom_threshold():