@@ -233,3 +233,77 @@ def test_first_metric_null(test_id, dbt_project: DbtProject):
233233 materialization = "incremental" ,
234234 )
235235 assert result ["status" ] == "pass"
236+
237+
238+ # Test for exclude_detection_period_from_training functionality
239+ # This test demonstrates the use case where:
240+ # 1. Detection period contains anomalous freshness data that would normally be included in training
241+ # 2. With exclude_detection_period_from_training=False: anomaly is missed (test passes) because training includes the anomaly
242+ # 3. With exclude_detection_period_from_training=True: anomaly is detected (test fails) because training excludes the anomaly
243+ @pytest .mark .skip_targets (["clickhouse" ])
244+ def test_exclude_detection_from_training (test_id : str , dbt_project : DbtProject ):
245+ """
246+ Test the exclude_detection_period_from_training flag functionality for freshness anomalies.
247+
248+ Scenario:
249+ - 30 days of normal data with consistent freshness (data arrives every 2 hours)
250+ - 3 days of anomalous data (data arrives every 8 hours - slower/stale) in detection period
251+ - Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
252+ - With exclusion: anomaly excluded from training, test fails (detects anomaly)
253+ """
254+ utc_now = datetime .utcnow ()
255+
256+ # Generate 30 days of normal data with consistent freshness (every 2 hours)
257+ normal_data = [
258+ {TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT )}
259+ for date in generate_dates (
260+ utc_now - timedelta (days = 33 ), step = timedelta (hours = 2 ), days_back = 30
261+ )
262+ ]
263+
264+ anomalous_data = [
265+ {TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT )}
266+ for date in generate_dates (utc_now , step = timedelta (hours = 8 ), days_back = 3 )
267+ ]
268+
269+ all_data = normal_data + anomalous_data
270+
271+ # Test 1: WITHOUT exclusion (should pass - misses the anomaly because it's included in training)
272+ test_args_without_exclusion = {
273+ "timestamp_column" : TIMESTAMP_COLUMN ,
274+ "training_period" : {"period" : "day" , "count" : 30 },
275+ "detection_period" : {"period" : "day" , "count" : 3 },
276+ "time_bucket" : {"period" : "day" , "count" : 1 },
277+ "sensitivity" : 5 , # Higher sensitivity to allow anomaly to be absorbed
278+ # exclude_detection_period_from_training is not set (defaults to False/None)
279+ }
280+
281+ test_result_without_exclusion = dbt_project .test (
282+ test_id + "_without_exclusion" ,
283+ TEST_NAME ,
284+ test_args_without_exclusion ,
285+ data = all_data ,
286+ )
287+
288+ # This should PASS because the anomaly is included in training, making it part of the baseline
289+ assert (
290+ test_result_without_exclusion ["status" ] == "pass"
291+ ), "Test should pass when anomaly is included in training"
292+
293+ # Test 2: WITH exclusion (should fail - detects the anomaly because it's excluded from training)
294+ test_args_with_exclusion = {
295+ ** test_args_without_exclusion ,
296+ "exclude_detection_period_from_training" : True ,
297+ }
298+
299+ test_result_with_exclusion = dbt_project .test (
300+ test_id + "_with_exclusion" ,
301+ TEST_NAME ,
302+ test_args_with_exclusion ,
303+ data = all_data ,
304+ )
305+
306+ # This should FAIL because the anomaly is excluded from training, so it's detected as anomalous
307+ assert (
308+ test_result_with_exclusion ["status" ] == "fail"
309+ ), "Test should fail when anomaly is excluded from training"
0 commit comments