Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
14 commits
Select commit Hold shift + click to select a range
f4b672b
Add exclude_detection_period_from_training parameter to freshness ano…
devin-ai-integration[bot] Nov 10, 2025
94002a8
Add integration test for exclude_detection_period_from_training with …
devin-ai-integration[bot] Nov 10, 2025
7576f81
Fix freshness anomalies test to create stronger anomaly
devin-ai-integration[bot] Nov 10, 2025
d5e8482
Pass exclude_detection_period_from_training via test_vars
devin-ai-integration[bot] Nov 10, 2025
ee928af
Pass exclude_detection_period_from_training via test_args
devin-ai-integration[bot] Nov 10, 2025
0f6d5cb
Fix freshness anomalies test with proper configuration
devin-ai-integration[bot] Nov 10, 2025
14e1a19
Add comprehensive configuration to freshness anomalies test
devin-ai-integration[bot] Nov 10, 2025
2558e32
Fix test with backfill_days and weaker anomaly
devin-ai-integration[bot] Nov 10, 2025
eb7d179
Fix test by making training period overlap detection period
devin-ai-integration[bot] Nov 10, 2025
38b64db
Align test data with detection period for clarity
devin-ai-integration[bot] Nov 12, 2025
dc423c0
Fix test by using 7-day training period and separating data ranges
devin-ai-integration[bot] Nov 12, 2025
5cd80c5
Revert "Align test data with detection period for clarity"
devin-ai-integration[bot] Nov 12, 2025
294b6ca
Align test data with detection period using 14-day training period
devin-ai-integration[bot] Nov 12, 2025
0473f90
Shorten test documentation for test_exclude_detection_from_training
devin-ai-integration[bot] Nov 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions integration_tests/tests/test_freshness_anomalies.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,3 +233,92 @@ def test_first_metric_null(test_id, dbt_project: DbtProject):
materialization="incremental",
)
assert result["status"] == "pass"


@pytest.mark.skip_targets(["clickhouse"])
def test_exclude_detection_from_training(test_id: str, dbt_project: DbtProject):
"""
Test the exclude_detection_period_from_training flag functionality for freshness anomalies.

Scenario:
- 30 days of normal data with frequent updates (every 2 hours) from day -37 to day -7
- 7 days of anomalous data (only 1 update per day at noon) from day -7 to day -1
- Detection period: last 7 days (days -7 to -1)
- Training period: 30 days (days -37 to -7 when exclusion enabled)
- Without exclusion: anomaly included in training baseline → test passes (misses anomaly)
- With exclusion: anomaly excluded from training → test fails (detects anomaly)
"""
utc_now = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)

# Generate 30 days of normal data with frequent updates (every 2 hours) from day -37 to day -7
normal_data = [
{TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT)}
for date in generate_dates(
base_date=utc_now - timedelta(days=7),
step=timedelta(hours=2),
days_back=30,
)
]

# Generate 7 days of anomalous data (only 1 update per day at noon) from day -7 to day -1
anomalous_data = [
{TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT)}
for date in generate_dates(
base_date=(utc_now - timedelta(days=1)).replace(hour=12, minute=0),
step=timedelta(hours=24),
days_back=7,
)
]

all_data = normal_data + anomalous_data

# Test 1: WITHOUT exclusion (should pass - misses the anomaly because it's included in training)
test_args_without_exclusion = {
"timestamp_column": TIMESTAMP_COLUMN,
"training_period": {"period": "day", "count": 30},
"detection_period": {"period": "day", "count": 7},
"time_bucket": {"period": "day", "count": 1},
"days_back": 40,
"backfill_days": 0,
"sensitivity": 3,
"min_training_set_size": 5,
"anomaly_direction": "spike",
"ignore_small_changes": {
"spike_failure_percent_threshold": 0,
"drop_failure_percent_threshold": 0,
},
}

detection_end = utc_now

test_result_without_exclusion = dbt_project.test(
test_id + "_without_exclusion",
TEST_NAME,
test_args_without_exclusion,
data=all_data,
test_vars={"custom_run_started_at": detection_end.isoformat()},
)

# This should PASS because the anomaly is included in training, making it part of the baseline
assert (
test_result_without_exclusion["status"] == "pass"
), "Test should pass when anomaly is included in training"

# Test 2: WITH exclusion (should fail - detects the anomaly because it's excluded from training)
test_args_with_exclusion = {
**test_args_without_exclusion,
"exclude_detection_period_from_training": True,
}

test_result_with_exclusion = dbt_project.test(
test_id + "_with_exclusion",
TEST_NAME,
test_args_with_exclusion,
data=all_data,
test_vars={"custom_run_started_at": detection_end.isoformat()},
)

# This should FAIL because the anomaly is excluded from training, so it's detected as anomalous
assert (
test_result_with_exclusion["status"] == "fail"
), "Test should fail when anomaly is excluded from training"
5 changes: 3 additions & 2 deletions macros/edr/tests/test_event_freshness_anomalies.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{% test event_freshness_anomalies(model, event_timestamp_column, update_timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity, ignore_small_changes, detection_delay, anomaly_exclude_metrics, detection_period, training_period) %}
{% test event_freshness_anomalies(model, event_timestamp_column, update_timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity, ignore_small_changes, detection_delay, anomaly_exclude_metrics, detection_period, training_period, exclude_detection_period_from_training=false) %}
{{ config(tags = ['elementary-tests']) }}
{% if execute and elementary.is_test_command() and elementary.is_elementary_enabled() %}
{% set model_relation = elementary.get_model_relation_for_test(model, elementary.get_test_model()) %}
Expand Down Expand Up @@ -32,7 +32,8 @@
detection_delay=detection_delay,
anomaly_exclude_metrics=anomaly_exclude_metrics,
detection_period=detection_period,
training_period=training_period
training_period=training_period,
exclude_detection_period_from_training=exclude_detection_period_from_training
)
}}
{% endtest %}
5 changes: 3 additions & 2 deletions macros/edr/tests/test_freshness_anomalies.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{% test freshness_anomalies(model, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity, ignore_small_changes, detection_delay, anomaly_exclude_metrics, detection_period, training_period) %}
{% test freshness_anomalies(model, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity, ignore_small_changes, detection_delay, anomaly_exclude_metrics, detection_period, training_period, exclude_detection_period_from_training=false) %}
{{ config(tags = ['elementary-tests']) }}
{{ elementary.test_table_anomalies(
model=model,
Expand All @@ -18,7 +18,8 @@
detection_delay=detection_delay,
anomaly_exclude_metrics=anomaly_exclude_metrics,
detection_period=detection_period,
training_period=training_period
training_period=training_period,
exclude_detection_period_from_training=exclude_detection_period_from_training
)
}}
{% endtest %}
Loading