Skip to content

Commit 9872781

Browse files
Add integration test for exclude_detection_period_from_training in dimension anomalies
- Added test_dimension_exclude_detection_from_training to demonstrate the flag's behavior - Test shows that without exclusion, anomaly is missed (test passes) because training includes the detection period - Test shows that with exclusion, anomaly is detected (test fails) because training excludes the detection period - Uses 30 days of normal data with variance (45/50/55 pattern) and 7 days of anomalous data (72/28 distribution) - Follows the same pattern as test_exclude_detection_from_training in test_volume_anomalies.py Co-Authored-By: Yosef Arbiv <[email protected]>
1 parent f83265c commit 9872781

File tree

1 file changed

+106
-0
lines changed

1 file changed

+106
-0
lines changed

integration_tests/tests/test_dimension_anomalies.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,3 +218,109 @@ def test_dimension_anomalies_with_timestamp_exclude_final_results(
218218
test_result = dbt_project.test(test_id, DBT_TEST_NAME, test_args, data=data)
219219
assert test_result["status"] == "fail"
220220
assert test_result["failures"] == 1
221+
222+
223+
# Test for exclude_detection_period_from_training functionality
224+
# This test demonstrates the use case where:
225+
# 1. Detection period contains anomalous distribution data that would normally be included in training
226+
# 2. With exclude_detection_period_from_training=False: anomaly is missed (test passes) because training includes the anomaly
227+
# 3. With exclude_detection_period_from_training=True: anomaly is detected (test fails) because training excludes the anomaly
228+
@pytest.mark.skip_targets(["clickhouse"])
229+
def test_dimension_exclude_detection_from_training(
230+
test_id: str, dbt_project: DbtProject
231+
):
232+
"""
233+
Test the exclude_detection_period_from_training flag functionality for dimension anomalies.
234+
235+
Scenario:
236+
- 30 days of normal data with variance (45/50/55 Superman, 55/50/45 Spiderman pattern)
237+
- 7 days of anomalous data (72 Superman, 28 Spiderman per day) in detection period
238+
- Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
239+
- With exclusion: anomaly excluded from training, test fails (detects anomaly)
240+
"""
241+
utc_now = datetime.utcnow()
242+
243+
# Generate 30 days of normal data with variance (45/50/55 pattern for Superman)
244+
normal_pattern = [45, 50, 55]
245+
normal_data = []
246+
for i in range(30):
247+
date = utc_now - timedelta(days=37 - i)
248+
superman_count = normal_pattern[i % 3]
249+
spiderman_count = 100 - superman_count
250+
normal_data.extend(
251+
[
252+
{TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": "Superman"}
253+
for _ in range(superman_count)
254+
]
255+
)
256+
normal_data.extend(
257+
[
258+
{
259+
TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT),
260+
"superhero": "Spiderman",
261+
}
262+
for _ in range(spiderman_count)
263+
]
264+
)
265+
266+
# Generate 7 days of anomalous data (72 Superman, 28 Spiderman per day) - this will be in detection period
267+
anomalous_data = []
268+
for i in range(7):
269+
date = utc_now - timedelta(days=7 - i)
270+
anomalous_data.extend(
271+
[
272+
{TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": "Superman"}
273+
for _ in range(72)
274+
]
275+
)
276+
anomalous_data.extend(
277+
[
278+
{
279+
TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT),
280+
"superhero": "Spiderman",
281+
}
282+
for _ in range(28)
283+
]
284+
)
285+
286+
all_data = normal_data + anomalous_data
287+
288+
# Test 1: WITHOUT exclusion (should pass - misses the anomaly because it's included in training)
289+
test_args_without_exclusion = {
290+
**DBT_TEST_ARGS,
291+
"training_period": {"period": "day", "count": 30},
292+
"detection_period": {"period": "day", "count": 7},
293+
"time_bucket": {"period": "day", "count": 1},
294+
"sensitivity": 5,
295+
# exclude_detection_period_from_training is not set (defaults to False/None)
296+
}
297+
298+
test_result_without_exclusion = dbt_project.test(
299+
test_id + "_without_exclusion",
300+
DBT_TEST_NAME,
301+
test_args_without_exclusion,
302+
data=all_data,
303+
)
304+
305+
# This should PASS because the anomaly is included in training, making it part of the baseline
306+
assert (
307+
test_result_without_exclusion["status"] == "pass"
308+
), "Test should pass when anomaly is included in training"
309+
310+
# Test 2: WITH exclusion (should fail - detects the anomaly because it's excluded from training)
311+
test_args_with_exclusion = {
312+
**test_args_without_exclusion,
313+
"exclude_detection_period_from_training": True,
314+
}
315+
316+
test_result_with_exclusion = dbt_project.test(
317+
test_id + "_with_exclusion",
318+
DBT_TEST_NAME,
319+
test_args_with_exclusion,
320+
data=all_data,
321+
)
322+
323+
# This should FAIL because the anomaly is excluded from training, so it's detected as anomalous
324+
assert (
325+
test_result_with_exclusion["status"] == "fail"
326+
), "Test should fail when anomaly is excluded from training"

0 commit comments

Comments
 (0)