Fix integration test: remove flaky freshness test, add event freshness test with proper window alignment

devin-ai-integration[bot] · arbiv · devin-ai-integration[bot] · commit 098d625bb525 · 2025-11-13T10:07:11.000Z
Co-Authored-By: Yosef Arbiv &lt;yosef.arbiv@gmail.com&gt;
diff --git a/integration_tests/tests/test_event_freshness_anomalies.py b/integration_tests/tests/test_event_freshness_anomalies.py
@@ -1,3 +1,4 @@
+import random
 from datetime import datetime, timedelta
 
 import pytest
@@ -88,3 +89,83 @@ def test_slower_rate_event_freshness(test_id: str, dbt_project: DbtProject):
         test_vars={"custom_run_started_at": test_started_at.isoformat()},
     )
     assert result["status"] == "fail"
+
+
+# Anomalies currently not supported on ClickHouse
+@pytest.mark.skip_targets(["clickhouse"])
+def test_exclude_detection_from_training(test_id: str, dbt_project: DbtProject):
+    """
+    Test the exclude_detection_period_from_training flag functionality for event freshness anomalies.
+
+    Scenario:
+    - 14 days total: 7 days normal (small jitter) + 7 days anomalous (large lag)
+    - Without exclusion: 7 anomalous days contaminate training, test passes
+    - With exclusion: only 7 normal days in training, anomaly detected, test fails
+    """
+    test_started_at = datetime.utcnow().replace(hour=0, minute=0, second=0)
+
+    random.seed(42)
+    normal_start = test_started_at - timedelta(days=14)
+    normal_data = []
+    for date in generate_dates(normal_start, step=STEP, days_back=7):
+        jitter_minutes = random.randint(0, 10)
+        normal_data.append(
+            {
+                EVENT_TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT),
+                UPDATE_TIMESTAMP_COLUMN: (
+                    date + timedelta(minutes=jitter_minutes)
+                ).strftime(DATE_FORMAT),
+            }
+        )
+
+    anomalous_start = test_started_at - timedelta(days=7)
+    anomalous_data = []
+    for date in generate_dates(anomalous_start, step=STEP, days_back=7):
+        anomalous_data.append(
+            {
+                EVENT_TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT),
+                UPDATE_TIMESTAMP_COLUMN: (date + timedelta(hours=5)).strftime(
+                    DATE_FORMAT
+                ),
+            }
+        )
+
+    all_data = normal_data + anomalous_data
+
+    test_args_without_exclusion = {
+        "event_timestamp_column": EVENT_TIMESTAMP_COLUMN,
+        "update_timestamp_column": UPDATE_TIMESTAMP_COLUMN,
+        "days_back": 14,
+        "backfill_days": 7,
+        "time_bucket": {"period": "hour", "count": 1},
+        "sensitivity": 3,
+    }
+
+    test_result_without_exclusion = dbt_project.test(
+        test_id + "_without_exclusion",
+        TEST_NAME,
+        test_args_without_exclusion,
+        data=all_data,
+        test_vars={"custom_run_started_at": test_started_at.isoformat()},
+    )
+
+    assert (
+        test_result_without_exclusion["status"] == "pass"
+    ), "Test should pass when anomaly is included in training"
+
+    test_args_with_exclusion = {
+        **test_args_without_exclusion,
+        "exclude_detection_period_from_training": True,
+    }
+
+    test_result_with_exclusion = dbt_project.test(
+        test_id + "_with_exclusion",
+        TEST_NAME,
+        test_args_with_exclusion,
+        data=all_data,
+        test_vars={"custom_run_started_at": test_started_at.isoformat()},
+    )
+
+    assert (
+        test_result_with_exclusion["status"] == "fail"
+    ), "Test should fail when anomaly is excluded from training"
diff --git a/integration_tests/tests/test_freshness_anomalies.py b/integration_tests/tests/test_freshness_anomalies.py
@@ -233,77 +233,3 @@ def test_first_metric_null(test_id, dbt_project: DbtProject):
             materialization="incremental",
         )
         assert result["status"] == "pass"
-
-
-# Test for exclude_detection_period_from_training functionality
-# This test demonstrates the use case where:
-# 1. Detection period contains anomalous freshness data that would normally be included in training
-# 2. With exclude_detection_period_from_training=False: anomaly is missed (test passes) because training includes the anomaly
-# 3. With exclude_detection_period_from_training=True: anomaly is detected (test fails) because training excludes the anomaly
-@pytest.mark.skip_targets(["clickhouse"])
-def test_exclude_detection_from_training(test_id: str, dbt_project: DbtProject):
-    """
-    Test the exclude_detection_period_from_training flag functionality for freshness anomalies.
-
-    Scenario:
-    - 30 days of normal data with consistent freshness (data arrives every 2 hours)
-    - 3 days of anomalous data (data arrives every 8 hours - slower/stale) in detection period
-    - Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
-    - With exclusion: anomaly excluded from training, test fails (detects anomaly)
-    """
-    utc_now = datetime.utcnow()
-
-    # Generate 30 days of normal data with consistent freshness (every 2 hours)
-    normal_data = [
-        {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT)}
-        for date in generate_dates(
-            utc_now - timedelta(days=33), step=timedelta(hours=2), days_back=30
-        )
-    ]
-
-    anomalous_data = [
-        {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT)}
-        for date in generate_dates(utc_now, step=timedelta(hours=8), days_back=3)
-    ]
-
-    all_data = normal_data + anomalous_data
-
-    # Test 1: WITHOUT exclusion (should pass - misses the anomaly because it's included in training)
-    test_args_without_exclusion = {
-        "timestamp_column": TIMESTAMP_COLUMN,
-        "training_period": {"period": "day", "count": 30},
-        "detection_period": {"period": "day", "count": 3},
-        "time_bucket": {"period": "day", "count": 1},
-        "sensitivity": 5,  # Higher sensitivity to allow anomaly to be absorbed
-        # exclude_detection_period_from_training is not set (defaults to False/None)
-    }
-
-    test_result_without_exclusion = dbt_project.test(
-        test_id + "_without_exclusion",
-        TEST_NAME,
-        test_args_without_exclusion,
-        data=all_data,
-    )
-
-    # This should PASS because the anomaly is included in training, making it part of the baseline
-    assert (
-        test_result_without_exclusion["status"] == "pass"
-    ), "Test should pass when anomaly is included in training"
-
-    # Test 2: WITH exclusion (should fail - detects the anomaly because it's excluded from training)
-    test_args_with_exclusion = {
-        **test_args_without_exclusion,
-        "exclude_detection_period_from_training": True,
-    }
-
-    test_result_with_exclusion = dbt_project.test(
-        test_id + "_with_exclusion",
-        TEST_NAME,
-        test_args_with_exclusion,
-        data=all_data,
-    )
-
-    # This should FAIL because the anomaly is excluded from training, so it's detected as anomalous
-    assert (
-        test_result_with_exclusion["status"] == "fail"
-    ), "Test should fail when anomaly is excluded from training"