@@ -233,3 +233,79 @@ def test_first_metric_null(test_id, dbt_project: DbtProject):
233233 materialization = "incremental" ,
234234 )
235235 assert result ["status" ] == "pass"
236+
237+
238+ @pytest .mark .skip_targets (["clickhouse" ])
239+ def test_exclude_detection_from_training (test_id : str , dbt_project : DbtProject ):
240+ """
241+ Test the exclude_detection_period_from_training flag functionality for freshness anomalies.
242+
243+ Scenario:
244+ - 30 days of normal data with consistent update frequency (every 2 hours)
245+ - 7 days of anomalous data (slower updates every 8 hours) in detection period
246+ - Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
247+ - With exclusion: anomaly excluded from training, test fails (detects anomaly)
248+ """
249+ utc_now = datetime .utcnow ()
250+
251+ # Generate 30 days of normal data with consistent update frequency (every 2 hours)
252+ normal_data = [
253+ {TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT )}
254+ for date in generate_dates (
255+ base_date = utc_now - timedelta (days = 37 ),
256+ step = timedelta (hours = 2 ),
257+ days_back = 30 ,
258+ )
259+ ]
260+
261+ # Generate 7 days of anomalous data (slower updates every 8 hours) - this will be in detection period
262+ anomalous_data = [
263+ {TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT )}
264+ for date in generate_dates (
265+ base_date = utc_now - timedelta (days = 7 ),
266+ step = timedelta (hours = 8 ), # 4x slower than normal
267+ days_back = 7 ,
268+ )
269+ ]
270+
271+ all_data = normal_data + anomalous_data
272+
273+ # Test 1: WITHOUT exclusion (should pass - misses the anomaly because it's included in training)
274+ test_args_without_exclusion = {
275+ "timestamp_column" : TIMESTAMP_COLUMN ,
276+ "training_period" : {"period" : "day" , "count" : 30 },
277+ "detection_period" : {"period" : "day" , "count" : 7 },
278+ "time_bucket" : {"period" : "day" , "count" : 1 },
279+ "sensitivity" : 5 , # Higher sensitivity to allow anomaly to be absorbed
280+ # exclude_detection_period_from_training is not set (defaults to False/None)
281+ }
282+
283+ test_result_without_exclusion = dbt_project .test (
284+ test_id + "_without_exclusion" ,
285+ TEST_NAME ,
286+ test_args_without_exclusion ,
287+ data = all_data ,
288+ )
289+
290+ # This should PASS because the anomaly is included in training, making it part of the baseline
291+ assert (
292+ test_result_without_exclusion ["status" ] == "pass"
293+ ), "Test should pass when anomaly is included in training"
294+
295+ # Test 2: WITH exclusion (should fail - detects the anomaly because it's excluded from training)
296+ test_args_with_exclusion = {
297+ ** test_args_without_exclusion ,
298+ "exclude_detection_period_from_training" : True ,
299+ }
300+
301+ test_result_with_exclusion = dbt_project .test (
302+ test_id + "_with_exclusion" ,
303+ TEST_NAME ,
304+ test_args_with_exclusion ,
305+ data = all_data ,
306+ )
307+
308+ # This should FAIL because the anomaly is excluded from training, so it's detected as anomalous
309+ assert (
310+ test_result_with_exclusion ["status" ] == "fail"
311+ ), "Test should fail when anomaly is excluded from training"
0 commit comments