Fix test_column_anomalies_exclude_detection_period_from_training with more substantial dataset

devin-ai-integration[bot] · arbiv · devin-ai-integration[bot] · commit 8a70f2abc102 · 2025-11-19T10:49:33.000Z
- Use 30 days of normal data with low null count (0-2 nulls/day) instead of 1 day
- Use 7 days of anomalous data with high null count (20 nulls/day) instead of 1 day
- Update training period to 30 days and detection period to 7 days
- Add more data per day to create clearer anomaly signal
- Use separate test IDs for the two test runs to avoid conflicts
- Pattern matches successful volume and freshness anomalies tests

Co-Authored-By: Yosef Arbiv &lt;yosef.arbiv@gmail.com&gt;
diff --git a/integration_tests/tests/test_column_anomalies.py b/integration_tests/tests/test_column_anomalies.py
@@ -483,67 +483,99 @@ def test_anomalous_boolean_column_anomalies(test_id: str, dbt_project: DbtProjec
 def test_column_anomalies_exclude_detection_period_from_training(
     test_id: str, dbt_project: DbtProject
 ):
+    """
+    Test the exclude_detection_period_from_training flag functionality for column anomalies.
+
+    Scenario:
+    - 30 days of normal data with low null count (0-2 nulls per day)
+    - 7 days of anomalous data with high null count (20 nulls per day) in detection period
+    - Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
+    - With exclusion: anomaly excluded from training, test fails (detects anomaly)
+    """
     utc_today = datetime.utcnow().date()
-    test_date, *training_dates = generate_dates(base_date=utc_today - timedelta(1))
-
-    data: List[Dict[str, Any]] = [
-        {
-            TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT),
-            "superhero": superhero,
-        }
-        for cur_date in training_dates
-        for superhero in ["Superman", "Batman"]
-    ]
-
-    data += [
-        {TIMESTAMP_COLUMN: test_date.strftime(DATE_FORMAT), "superhero": None}
-        for _ in range(10)
-    ]
 
-    test_args_false = {
+    # Generate 30 days of normal data with low null count (0-2 nulls per day)
+    normal_data = []
+    for i in range(30):
+        date = utc_today - timedelta(days=37 - i)
+        normal_data.extend(
+            [
+                {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": superhero}
+                for superhero in ["Superman", "Batman", "Wonder Woman", "Flash"] * 5
+            ]
+        )
+        null_count = i % 3
+        normal_data.extend(
+            [
+                {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": None}
+                for _ in range(null_count)
+            ]
+        )
+
+    # Generate 7 days of anomalous data with high null count (20 nulls per day)
+    anomalous_data = []
+    for i in range(7):
+        date = utc_today - timedelta(days=7 - i)
+        anomalous_data.extend(
+            [
+                {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": superhero}
+                for superhero in ["Superman", "Batman"]
+            ]
+        )
+        anomalous_data.extend(
+            [
+                {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": None}
+                for _ in range(20)
+            ]
+        )
+
+    all_data = normal_data + anomalous_data
+
+    # Test 1: WITHOUT exclusion (should pass - misses the anomaly because it's included in training)
+    test_args_without_exclusion = {
         "timestamp_column": TIMESTAMP_COLUMN,
         "column_anomalies": ["null_count"],
         "time_bucket": {"period": "day", "count": 1},
-        "training_period": {"period": "day", "count": 1},
-        "detection_period": {"period": "day", "count": 1},
-        "min_training_set_size": 1,
+        "training_period": {"period": "day", "count": 30},
+        "detection_period": {"period": "day", "count": 7},
+        "min_training_set_size": 5,
         "anomaly_sensitivity": 3,
         "anomaly_direction": "spike",
         "exclude_detection_period_from_training": False,
     }
-    test_result_false = dbt_project.test(
-        test_id,
+
+    test_result_without_exclusion = dbt_project.test(
+        test_id + "_without_exclusion",
         DBT_TEST_NAME,
-        test_args_false,
-        data=data,
+        test_args_without_exclusion,
+        data=all_data,
         test_column="superhero",
         test_vars={"force_metrics_backfill": True},
     )
-    assert test_result_false["status"] == "pass", (
+
+    # This should PASS because the anomaly is included in training, making it part of the baseline
+    assert test_result_without_exclusion["status"] == "pass", (
         "Expected PASS when exclude_detection_period_from_training=False "
         "(detection data included in training baseline)"
     )
 
-    test_args_true = {
-        "timestamp_column": TIMESTAMP_COLUMN,
-        "column_anomalies": ["null_count"],
-        "time_bucket": {"period": "day", "count": 1},
-        "training_period": {"period": "day", "count": 1},
-        "detection_period": {"period": "day", "count": 1},
-        "min_training_set_size": 1,
-        "anomaly_sensitivity": 3,
-        "anomaly_direction": "spike",
+    # Test 2: WITH exclusion (should fail - detects the anomaly because it's excluded from training)
+    test_args_with_exclusion = {
+        **test_args_without_exclusion,
         "exclude_detection_period_from_training": True,
     }
-    test_result_true = dbt_project.test(
-        test_id,
+
+    test_result_with_exclusion = dbt_project.test(
+        test_id + "_with_exclusion",
         DBT_TEST_NAME,
-        test_args_true,
-        data=data,
+        test_args_with_exclusion,
+        data=all_data,
         test_column="superhero",
         test_vars={"force_metrics_backfill": True},
     )
-    assert test_result_true["status"] == "fail", (
+
+    # This should FAIL because the anomaly is excluded from training, so it's detected as anomalous
+    assert test_result_with_exclusion["status"] == "fail", (
         "Expected FAIL when exclude_detection_period_from_training=True "
         "(detection data excluded from training baseline, anomaly detected)"
     )