Skip to content

Commit 2cddf06

Browse files
Add exclude_detection_period_from_training flag to event freshness anomaly tests (#888)
1 parent 2455fc7 commit 2cddf06

File tree

1 file changed

+102
-0
lines changed

1 file changed

+102
-0
lines changed

integration_tests/tests/test_event_freshness_anomalies.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,3 +88,105 @@ def test_slower_rate_event_freshness(test_id: str, dbt_project: DbtProject):
8888
test_vars={"custom_run_started_at": test_started_at.isoformat()},
8989
)
9090
assert result["status"] == "fail"
91+
92+
93+
# Anomalies currently not supported on ClickHouse
94+
@pytest.mark.skip_targets(["clickhouse"])
95+
def test_exclude_detection_from_training(test_id: str, dbt_project: DbtProject):
96+
"""
97+
Test the exclude_detection_period_from_training flag functionality for event freshness anomalies.
98+
99+
Scenario:
100+
- 7 days of normal data (5 minute lag between event and update) - training period
101+
- 7 days of anomalous data (5 hour lag) - detection period
102+
- Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
103+
- With exclusion: anomaly excluded from training, test fails (detects anomaly)
104+
105+
"""
106+
utc_now = datetime.utcnow()
107+
test_started_at = (utc_now + timedelta(days=1)).replace(
108+
hour=0, minute=0, second=0, microsecond=0
109+
)
110+
111+
# Generate 7 days of normal data with varying lag (2-8 minutes) to ensure training_stddev > 0
112+
training_lags_minutes = [2, 3, 4, 5, 6, 7, 8]
113+
normal_data = []
114+
for i in range(7):
115+
event_date = test_started_at - timedelta(days=14 - i)
116+
event_time = event_date.replace(hour=12, minute=0, second=0, microsecond=0)
117+
update_time = event_time + timedelta(minutes=training_lags_minutes[i])
118+
normal_data.append(
119+
{
120+
EVENT_TIMESTAMP_COLUMN: event_time.strftime(DATE_FORMAT),
121+
UPDATE_TIMESTAMP_COLUMN: update_time.strftime(DATE_FORMAT),
122+
}
123+
)
124+
125+
# Generate 7 days of anomalous data with 5-hour lag (detection period)
126+
anomalous_data = []
127+
for i in range(7):
128+
event_date = test_started_at - timedelta(days=7 - i)
129+
event_time = event_date.replace(hour=12, minute=0, second=0, microsecond=0)
130+
update_time = event_time + timedelta(hours=5)
131+
anomalous_data.append(
132+
{
133+
EVENT_TIMESTAMP_COLUMN: event_time.strftime(DATE_FORMAT),
134+
UPDATE_TIMESTAMP_COLUMN: update_time.strftime(DATE_FORMAT),
135+
}
136+
)
137+
138+
all_data = normal_data + anomalous_data
139+
140+
# Test 1: WITHOUT exclusion (should pass - misses the anomaly because it's included in training)
141+
test_args_without_exclusion = {
142+
"event_timestamp_column": EVENT_TIMESTAMP_COLUMN,
143+
"update_timestamp_column": UPDATE_TIMESTAMP_COLUMN,
144+
"days_back": 14, # Scoring window: 14 days to include both training and detection
145+
"backfill_days": 7, # Detection period: last 7 days (days 7-1 before test_started_at)
146+
"time_bucket": {
147+
"period": "day",
148+
"count": 1,
149+
}, # Daily buckets to avoid boundary issues
150+
"sensitivity": 3,
151+
"anomaly_direction": "spike", # Explicit direction since we're testing increased lag
152+
"min_training_set_size": 5, # Explicit minimum to avoid threshold issues
153+
# exclude_detection_period_from_training is not set (defaults to False/None)
154+
}
155+
156+
test_result_without_exclusion = dbt_project.test(
157+
test_id + "_without_exclusion",
158+
TEST_NAME,
159+
test_args_without_exclusion,
160+
data=all_data,
161+
test_vars={
162+
"custom_run_started_at": test_started_at.isoformat(),
163+
"force_metrics_backfill": True,
164+
},
165+
)
166+
167+
# This should PASS because the anomaly is included in training, making it part of the baseline
168+
assert (
169+
test_result_without_exclusion["status"] == "pass"
170+
), "Test should pass when anomaly is included in training"
171+
172+
# Test 2: WITH exclusion (should fail - detects the anomaly because it's excluded from training)
173+
test_args_with_exclusion = {
174+
**test_args_without_exclusion,
175+
"exclude_detection_period_from_training": True,
176+
}
177+
178+
test_result_with_exclusion = dbt_project.test(
179+
test_id + "_with_exclusion",
180+
TEST_NAME,
181+
test_args_with_exclusion,
182+
data=all_data,
183+
test_vars={
184+
"custom_run_started_at": test_started_at.isoformat(),
185+
"force_metrics_backfill": True,
186+
},
187+
)
188+
189+
# This should FAIL because the anomaly is excluded from training, so it's detected as anomalous
190+
assert (
191+
test_result_with_exclusion["status"] == "fail"
192+
), "Test should fail when anomaly is excluded from training"

0 commit comments

Comments
 (0)