Skip to content

Commit 6b60a66

Browse files
Add integration test for exclude_detection_period_from_training flag
Test demonstrates that: - When flag=False: detection period data is included in training baseline, preventing anomaly detection - When flag=True: detection period data is excluded from training baseline, enabling anomaly detection The test uses constrained time windows (1 day training/detection) to make the behavior deterministic. Co-Authored-By: Yosef Arbiv <[email protected]>
1 parent 8c2588a commit 6b60a66

File tree

1 file changed

+71
-0
lines changed

1 file changed

+71
-0
lines changed

integration_tests/tests/test_column_anomalies.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -476,3 +476,74 @@ def test_anomalous_boolean_column_anomalies(test_id: str, dbt_project: DbtProjec
476476
"count_true",
477477
"count_false",
478478
}
479+
480+
481+
# Anomalies currently not supported on ClickHouse
482+
@pytest.mark.skip_targets(["clickhouse"])
483+
def test_column_anomalies_exclude_detection_period_from_training(
484+
test_id: str, dbt_project: DbtProject
485+
):
486+
utc_today = datetime.utcnow().date()
487+
test_date, *training_dates = generate_dates(base_date=utc_today - timedelta(1))
488+
489+
data: List[Dict[str, Any]] = [
490+
{
491+
TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT),
492+
"superhero": superhero,
493+
}
494+
for cur_date in training_dates
495+
for superhero in ["Superman", "Batman"]
496+
]
497+
498+
data += [
499+
{TIMESTAMP_COLUMN: test_date.strftime(DATE_FORMAT), "superhero": None}
500+
for _ in range(10)
501+
]
502+
503+
test_args_false = {
504+
"timestamp_column": TIMESTAMP_COLUMN,
505+
"column_anomalies": ["null_count"],
506+
"time_bucket": {"period": "day", "count": 1},
507+
"training_period": {"period": "day", "count": 1},
508+
"detection_period": {"period": "day", "count": 1},
509+
"min_training_set_size": 1,
510+
"anomaly_sensitivity": 3,
511+
"anomaly_direction": "spike",
512+
"exclude_detection_period_from_training": False,
513+
}
514+
test_result_false = dbt_project.test(
515+
test_id,
516+
DBT_TEST_NAME,
517+
test_args_false,
518+
data=data,
519+
test_column="superhero",
520+
test_vars={"force_metrics_backfill": True},
521+
)
522+
assert test_result_false["status"] == "pass", (
523+
"Expected PASS when exclude_detection_period_from_training=False "
524+
"(detection data included in training baseline)"
525+
)
526+
527+
test_args_true = {
528+
"timestamp_column": TIMESTAMP_COLUMN,
529+
"column_anomalies": ["null_count"],
530+
"time_bucket": {"period": "day", "count": 1},
531+
"training_period": {"period": "day", "count": 1},
532+
"detection_period": {"period": "day", "count": 1},
533+
"min_training_set_size": 1,
534+
"anomaly_sensitivity": 3,
535+
"anomaly_direction": "spike",
536+
"exclude_detection_period_from_training": True,
537+
}
538+
test_result_true = dbt_project.test(
539+
test_id,
540+
DBT_TEST_NAME,
541+
test_args_true,
542+
data=data,
543+
test_column="superhero",
544+
test_vars={"force_metrics_backfill": True},
545+
)
546+
assert test_result_true["status"] == "fail", (
547+
"Expected FAIL when exclude_detection_period_from_training=True "
548+
"(detection data excluded from training baseline, anomaly detected)"
549+
)

0 commit comments

Comments
 (0)