@@ -153,3 +153,121 @@ def test_anomalyless_all_columns_anomalies_all_monitors_sanity(
153153 test_id , DBT_TEST_NAME , test_args , data = data , multiple_results = True
154154 )
155155 assert all ([res ["status" ] == "pass" for res in test_results ])
156+
157+
158+ # Anomalies currently not supported on ClickHouse
159+ @pytest .mark .skip_targets (["clickhouse" ])
160+ def test_exclude_detection_from_training_all_columns (
161+ test_id : str , dbt_project : DbtProject
162+ ):
163+ """
164+ Test the exclude_detection_period_from_training flag functionality for column anomalies.
165+
166+ Scenario:
167+ - 30 days of normal data with consistent null_count pattern (2 nulls per day)
168+ - 7 days of anomalous data (10 nulls per day) in detection period
169+ - Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
170+ - With exclusion: anomaly excluded from training, test fails (detects anomaly)
171+ """
172+ utc_now = datetime .utcnow ()
173+
174+ # Generate 30 days of normal data with consistent null_count (2 nulls per day)
175+ normal_data = []
176+ for i in range (30 ):
177+ date = utc_now - timedelta (days = 37 - i )
178+ normal_data .extend (
179+ [
180+ {TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT ), "superhero" : None }
181+ for _ in range (2 )
182+ ]
183+ )
184+ normal_data .extend (
185+ [
186+ {
187+ TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT ),
188+ "superhero" : "Superman" if i % 2 == 0 else "Batman" ,
189+ }
190+ for _ in range (8 )
191+ ]
192+ )
193+
194+ # Generate 7 days of anomalous data (10 nulls per day) - this will be in detection period
195+ anomalous_data = []
196+ for i in range (7 ):
197+ date = utc_now - timedelta (days = 7 - i )
198+ anomalous_data .extend (
199+ [
200+ {TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT ), "superhero" : None }
201+ for _ in range (10 )
202+ ]
203+ )
204+ anomalous_data .extend (
205+ [
206+ {
207+ TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT ),
208+ "superhero" : "Superman" if i % 2 == 0 else "Batman" ,
209+ }
210+ for _ in range (0 ) # No non-null values to keep total similar
211+ ]
212+ )
213+
214+ all_data = normal_data + anomalous_data
215+
216+ # Test 1: WITHOUT exclusion (should pass - misses the anomaly because it's included in training)
217+ test_args_without_exclusion = {
218+ "timestamp_column" : TIMESTAMP_COLUMN ,
219+ "column_anomalies" : ["null_count" ],
220+ "training_period" : {"period" : "day" , "count" : 30 },
221+ "detection_period" : {"period" : "day" , "count" : 7 },
222+ "time_bucket" : {"period" : "day" , "count" : 1 },
223+ "sensitivity" : 5 , # Higher sensitivity to allow anomaly to be absorbed
224+ # exclude_detection_period_from_training is not set (defaults to False/None)
225+ }
226+
227+ test_results_without_exclusion = dbt_project .test (
228+ test_id + "_without_exclusion" ,
229+ DBT_TEST_NAME ,
230+ test_args_without_exclusion ,
231+ data = all_data ,
232+ multiple_results = True ,
233+ )
234+
235+ # This should PASS because the anomaly is included in training, making it part of the baseline
236+ superhero_result = next (
237+ (
238+ res
239+ for res in test_results_without_exclusion
240+ if res ["column_name" ].lower () == "superhero"
241+ ),
242+ None ,
243+ )
244+ assert (
245+ superhero_result and superhero_result ["status" ] == "pass"
246+ ), "Test should pass when anomaly is included in training"
247+
248+ # Test 2: WITH exclusion (should fail - detects the anomaly because it's excluded from training)
249+ test_args_with_exclusion = {
250+ ** test_args_without_exclusion ,
251+ "exclude_detection_period_from_training" : True ,
252+ }
253+
254+ test_results_with_exclusion = dbt_project .test (
255+ test_id + "_with_exclusion" ,
256+ DBT_TEST_NAME ,
257+ test_args_with_exclusion ,
258+ data = all_data ,
259+ multiple_results = True ,
260+ )
261+
262+ # This should FAIL because the anomaly is excluded from training, so it's detected as anomalous
263+ superhero_result = next (
264+ (
265+ res
266+ for res in test_results_with_exclusion
267+ if res ["column_name" ].lower () == "superhero"
268+ ),
269+ None ,
270+ )
271+ assert (
272+ superhero_result and superhero_result ["status" ] == "fail"
273+ ), "Test should fail when anomaly is excluded from training"
0 commit comments