@@ -218,3 +218,109 @@ def test_dimension_anomalies_with_timestamp_exclude_final_results(
218218 test_result = dbt_project .test (test_id , DBT_TEST_NAME , test_args , data = data )
219219 assert test_result ["status" ] == "fail"
220220 assert test_result ["failures" ] == 1
221+
222+
223+ # Test for exclude_detection_period_from_training functionality
224+ # This test demonstrates the use case where:
225+ # 1. Detection period contains anomalous distribution data that would normally be included in training
226+ # 2. With exclude_detection_period_from_training=False: anomaly is missed (test passes) because training includes the anomaly
227+ # 3. With exclude_detection_period_from_training=True: anomaly is detected (test fails) because training excludes the anomaly
228+ @pytest .mark .skip_targets (["clickhouse" ])
229+ def test_dimension_exclude_detection_from_training (
230+ test_id : str , dbt_project : DbtProject
231+ ):
232+ """
233+ Test the exclude_detection_period_from_training flag functionality for dimension anomalies.
234+
235+ Scenario:
236+ - 30 days of normal data with variance (45/50/55 Superman, 55/50/45 Spiderman pattern)
237+ - 7 days of anomalous data (72 Superman, 28 Spiderman per day) in detection period
238+ - Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
239+ - With exclusion: anomaly excluded from training, test fails (detects anomaly)
240+ """
241+ utc_now = datetime .utcnow ()
242+
243+ # Generate 30 days of normal data with variance (45/50/55 pattern for Superman)
244+ normal_pattern = [45 , 50 , 55 ]
245+ normal_data = []
246+ for i in range (30 ):
247+ date = utc_now - timedelta (days = 37 - i )
248+ superman_count = normal_pattern [i % 3 ]
249+ spiderman_count = 100 - superman_count
250+ normal_data .extend (
251+ [
252+ {TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT ), "superhero" : "Superman" }
253+ for _ in range (superman_count )
254+ ]
255+ )
256+ normal_data .extend (
257+ [
258+ {
259+ TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT ),
260+ "superhero" : "Spiderman" ,
261+ }
262+ for _ in range (spiderman_count )
263+ ]
264+ )
265+
266+ # Generate 7 days of anomalous data (72 Superman, 28 Spiderman per day) - this will be in detection period
267+ anomalous_data = []
268+ for i in range (7 ):
269+ date = utc_now - timedelta (days = 7 - i )
270+ anomalous_data .extend (
271+ [
272+ {TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT ), "superhero" : "Superman" }
273+ for _ in range (72 )
274+ ]
275+ )
276+ anomalous_data .extend (
277+ [
278+ {
279+ TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT ),
280+ "superhero" : "Spiderman" ,
281+ }
282+ for _ in range (28 )
283+ ]
284+ )
285+
286+ all_data = normal_data + anomalous_data
287+
288+ # Test 1: WITHOUT exclusion (should pass - misses the anomaly because it's included in training)
289+ test_args_without_exclusion = {
290+ ** DBT_TEST_ARGS ,
291+ "training_period" : {"period" : "day" , "count" : 30 },
292+ "detection_period" : {"period" : "day" , "count" : 7 },
293+ "time_bucket" : {"period" : "day" , "count" : 1 },
294+ "sensitivity" : 5 ,
295+ # exclude_detection_period_from_training is not set (defaults to False/None)
296+ }
297+
298+ test_result_without_exclusion = dbt_project .test (
299+ test_id + "_without_exclusion" ,
300+ DBT_TEST_NAME ,
301+ test_args_without_exclusion ,
302+ data = all_data ,
303+ )
304+
305+ # This should PASS because the anomaly is included in training, making it part of the baseline
306+ assert (
307+ test_result_without_exclusion ["status" ] == "pass"
308+ ), "Test should pass when anomaly is included in training"
309+
310+ # Test 2: WITH exclusion (should fail - detects the anomaly because it's excluded from training)
311+ test_args_with_exclusion = {
312+ ** test_args_without_exclusion ,
313+ "exclude_detection_period_from_training" : True ,
314+ }
315+
316+ test_result_with_exclusion = dbt_project .test (
317+ test_id + "_with_exclusion" ,
318+ DBT_TEST_NAME ,
319+ test_args_with_exclusion ,
320+ data = all_data ,
321+ )
322+
323+ # This should FAIL because the anomaly is excluded from training, so it's detected as anomalous
324+ assert (
325+ test_result_with_exclusion ["status" ] == "fail"
326+ ), "Test should fail when anomaly is excluded from training"
0 commit comments