1- from datetime import datetime , timedelta
1+ from datetime import datetime , timedelta , timezone
22from typing import Any , Dict , List
33
44import pytest
@@ -175,21 +175,23 @@ def test_anomaly_in_detection_period(
175175 Test the exclude_detection_period_from_training flag functionality for column anomalies.
176176
177177 Scenario:
178- - 30 days of normal data with consistent null_count pattern (2 nulls per day)
179- - 7 days of anomalous data (10 nulls per day) in detection period
178+ - 30 days of normal data with variance in null_count pattern (8, 10, 12 nulls per day)
179+ - 7 days of anomalous data (20 nulls per day) in detection period
180180 - Without exclusion (exclude_detection=False): anomaly gets included in training baseline, test passes
181181 - With exclusion (exclude_detection=True): anomaly excluded from training, test fails (detects anomaly)
182182 """
183- utc_now = datetime .utcnow ( )
183+ utc_now = datetime .now ( timezone . utc )
184184
185- # Generate 30 days of normal data with consistent null_count (2 nulls per day)
185+ # Generate 30 days of normal data with variance in null_count (8, 10, 12 pattern)
186+ normal_pattern = [8 , 10 , 12 ]
186187 normal_data = []
187188 for i in range (30 ):
188189 date = utc_now - timedelta (days = 37 - i )
190+ null_count = normal_pattern [i % 3 ]
189191 normal_data .extend (
190192 [
191193 {TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT ), "superhero" : None }
192- for _ in range (2 )
194+ for _ in range (null_count )
193195 ]
194196 )
195197 normal_data .extend (
@@ -198,18 +200,18 @@ def test_anomaly_in_detection_period(
198200 TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT ),
199201 "superhero" : "Superman" if i % 2 == 0 else "Batman" ,
200202 }
201- for _ in range (8 )
203+ for _ in range (40 - null_count )
202204 ]
203205 )
204206
205- # Generate 7 days of anomalous data (10 nulls per day) - this will be in detection period
207+ # Generate 7 days of anomalous data (20 nulls per day) - 100% increase from mean
206208 anomalous_data = []
207209 for i in range (7 ):
208210 date = utc_now - timedelta (days = 7 - i )
209211 anomalous_data .extend (
210212 [
211213 {TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT ), "superhero" : None }
212- for _ in range (10 )
214+ for _ in range (20 )
213215 ]
214216 )
215217 anomalous_data .extend (
@@ -218,7 +220,7 @@ def test_anomaly_in_detection_period(
218220 TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT ),
219221 "superhero" : "Superman" if i % 2 == 0 else "Batman" ,
220222 }
221- for _ in range (0 )
223+ for _ in range (20 )
222224 ]
223225 )
224226
0 commit comments