@@ -233,77 +233,3 @@ def test_first_metric_null(test_id, dbt_project: DbtProject):
233233 materialization = "incremental" ,
234234 )
235235 assert result ["status" ] == "pass"
236-
237-
238- # Test for exclude_detection_period_from_training functionality
239- # This test demonstrates the use case where:
240- # 1. Detection period contains anomalous freshness data that would normally be included in training
241- # 2. With exclude_detection_period_from_training=False: anomaly is missed (test passes) because training includes the anomaly
242- # 3. With exclude_detection_period_from_training=True: anomaly is detected (test fails) because training excludes the anomaly
243- @pytest .mark .skip_targets (["clickhouse" ])
244- def test_exclude_detection_from_training (test_id : str , dbt_project : DbtProject ):
245- """
246- Test the exclude_detection_period_from_training flag functionality for freshness anomalies.
247-
248- Scenario:
249- - 30 days of normal data with consistent freshness (data arrives every 2 hours)
250- - 3 days of anomalous data (data arrives every 8 hours - slower/stale) in detection period
251- - Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
252- - With exclusion: anomaly excluded from training, test fails (detects anomaly)
253- """
254- utc_now = datetime .utcnow ()
255-
256- # Generate 30 days of normal data with consistent freshness (every 2 hours)
257- normal_data = [
258- {TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT )}
259- for date in generate_dates (
260- utc_now - timedelta (days = 33 ), step = timedelta (hours = 2 ), days_back = 30
261- )
262- ]
263-
264- anomalous_data = [
265- {TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT )}
266- for date in generate_dates (utc_now , step = timedelta (hours = 8 ), days_back = 3 )
267- ]
268-
269- all_data = normal_data + anomalous_data
270-
271- # Test 1: WITHOUT exclusion (should pass - misses the anomaly because it's included in training)
272- test_args_without_exclusion = {
273- "timestamp_column" : TIMESTAMP_COLUMN ,
274- "training_period" : {"period" : "day" , "count" : 30 },
275- "detection_period" : {"period" : "day" , "count" : 3 },
276- "time_bucket" : {"period" : "day" , "count" : 1 },
277- "sensitivity" : 5 , # Higher sensitivity to allow anomaly to be absorbed
278- # exclude_detection_period_from_training is not set (defaults to False/None)
279- }
280-
281- test_result_without_exclusion = dbt_project .test (
282- test_id + "_without_exclusion" ,
283- TEST_NAME ,
284- test_args_without_exclusion ,
285- data = all_data ,
286- )
287-
288- # This should PASS because the anomaly is included in training, making it part of the baseline
289- assert (
290- test_result_without_exclusion ["status" ] == "pass"
291- ), "Test should pass when anomaly is included in training"
292-
293- # Test 2: WITH exclusion (should fail - detects the anomaly because it's excluded from training)
294- test_args_with_exclusion = {
295- ** test_args_without_exclusion ,
296- "exclude_detection_period_from_training" : True ,
297- }
298-
299- test_result_with_exclusion = dbt_project .test (
300- test_id + "_with_exclusion" ,
301- TEST_NAME ,
302- test_args_with_exclusion ,
303- data = all_data ,
304- )
305-
306- # This should FAIL because the anomaly is excluded from training, so it's detected as anomalous
307- assert (
308- test_result_with_exclusion ["status" ] == "fail"
309- ), "Test should fail when anomaly is excluded from training"
0 commit comments