Skip to content
Merged
118 changes: 118 additions & 0 deletions integration_tests/tests/test_all_columns_anomalies.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,121 @@ def test_anomalyless_all_columns_anomalies_all_monitors_sanity(
test_id, DBT_TEST_NAME, test_args, data=data, multiple_results=True
)
assert all([res["status"] == "pass" for res in test_results])


# Anomalies currently not supported on ClickHouse
@pytest.mark.skip_targets(["clickhouse"])
def test_exclude_detection_from_training_all_columns(
test_id: str, dbt_project: DbtProject
):
"""
Test the exclude_detection_period_from_training flag functionality for column anomalies.

Scenario:
- 30 days of normal data with consistent null_count pattern (2 nulls per day)
- 7 days of anomalous data (10 nulls per day) in detection period
- Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
- With exclusion: anomaly excluded from training, test fails (detects anomaly)
"""
utc_now = datetime.utcnow()
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Fix datetime handling for consistency and reliability.

This test uses datetime.utcnow() while existing tests in this file use datetime.utcnow().date() (see line 19). This inconsistency could cause time-of-day variance issues in date calculations and make the test behavior unpredictable.

Additionally, datetime.utcnow() is deprecated in Python 3.12+.

Apply this diff:

-    utc_now = datetime.utcnow()
+    utc_today = datetime.utcnow().date()

Then update line 178 and 199 references:

-        date = utc_now - timedelta(days=37 - i)
+        date = utc_today - timedelta(days=37 - i)
-        date = utc_now - timedelta(days=7 - i)
+        date = utc_today - timedelta(days=7 - i)
🤖 Prompt for AI Agents
In integration_tests/tests/test_all_columns_anomalies.py around line 172,
replace the use of datetime.utcnow() with datetime.now(timezone.utc).date() to
match the file's existing pattern (uses datetime.utcnow().date() at line 19) and
avoid the deprecated datetime.utcnow() in Python 3.12+; also update the
subsequent references at lines 178 and 199 to operate on the date value (or to
use datetime.now(timezone.utc) if a timezone-aware datetime is required) so all
comparisons and arithmetic use the same date type and timezone-aware semantics
for consistent, predictable tests.


# Generate 30 days of normal data with consistent null_count (2 nulls per day)
normal_data = []
for i in range(30):
date = utc_now - timedelta(days=37 - i)
normal_data.extend(
[
{TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": None}
for _ in range(2)
]
)
normal_data.extend(
[
{
TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT),
"superhero": "Superman" if i % 2 == 0 else "Batman",
}
for _ in range(8)
]
)

# Generate 7 days of anomalous data (10 nulls per day) - this will be in detection period
anomalous_data = []
for i in range(7):
date = utc_now - timedelta(days=7 - i)
anomalous_data.extend(
[
{TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": None}
for _ in range(10)
]
)
anomalous_data.extend(
[
{
TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT),
"superhero": "Superman" if i % 2 == 0 else "Batman",
}
for _ in range(0) # No non-null values to keep total similar
]
)

all_data = normal_data + anomalous_data

# Test 1: WITHOUT exclusion (should pass - misses the anomaly because it's included in training)
test_args_without_exclusion = {
"timestamp_column": TIMESTAMP_COLUMN,
"column_anomalies": ["null_count"],
"training_period": {"period": "day", "count": 30},
"detection_period": {"period": "day", "count": 7},
"time_bucket": {"period": "day", "count": 1},
"sensitivity": 5, # Higher sensitivity to allow anomaly to be absorbed
# exclude_detection_period_from_training is not set (defaults to False/None)
}

test_results_without_exclusion = dbt_project.test(
test_id + "_without_exclusion",
DBT_TEST_NAME,
test_args_without_exclusion,
data=all_data,
multiple_results=True,
)

# This should PASS because the anomaly is included in training, making it part of the baseline
superhero_result = next(
(
res
for res in test_results_without_exclusion
if res["column_name"].lower() == "superhero"
),
None,
)
assert (
superhero_result and superhero_result["status"] == "pass"
), "Test should pass when anomaly is included in training"

# Test 2: WITH exclusion (should fail - detects the anomaly because it's excluded from training)
test_args_with_exclusion = {
**test_args_without_exclusion,
"exclude_detection_period_from_training": True,
}

test_results_with_exclusion = dbt_project.test(
test_id + "_with_exclusion",
DBT_TEST_NAME,
test_args_with_exclusion,
data=all_data,
multiple_results=True,
)

# This should FAIL because the anomaly is excluded from training, so it's detected as anomalous
superhero_result = next(
(
res
for res in test_results_with_exclusion
if res["column_name"].lower() == "superhero"
),
None,
)
assert (
superhero_result and superhero_result["status"] == "fail"
), "Test should fail when anomaly is excluded from training"
5 changes: 3 additions & 2 deletions macros/edr/tests/test_all_columns_anomalies.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{% test all_columns_anomalies(model, column_anomalies, exclude_prefix, exclude_regexp, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, dimensions) %}
{% test all_columns_anomalies(model, column_anomalies, exclude_prefix, exclude_regexp, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, dimensions, exclude_detection_period_from_training=false) %}
{{ config(tags = ['elementary-tests']) }}
{%- if execute and elementary.is_test_command() and elementary.is_elementary_enabled() %}
{% set model_relation = elementary.get_model_relation_for_test(model, elementary.get_test_model()) %}
Expand Down Expand Up @@ -37,7 +37,8 @@
anomaly_exclude_metrics=anomaly_exclude_metrics,
detection_period=detection_period,
training_period=training_period,
dimensions=dimensions) %}
dimensions=dimensions,
exclude_detection_period_from_training=exclude_detection_period_from_training) %}

{%- if not test_configuration %}
{{ exceptions.raise_compiler_error("Failed to create test configuration dict for test `{}`".format(test_table_name)) }}
Expand Down
5 changes: 3 additions & 2 deletions macros/edr/tests/test_column_anomalies.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{% test column_anomalies(model, column_name, column_anomalies, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, dimensions) %}
{% test column_anomalies(model, column_name, column_anomalies, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, dimensions, exclude_detection_period_from_training=false) %}
{{ config(tags = ['elementary-tests']) }}
{%- if execute and elementary.is_test_command() and elementary.is_elementary_enabled() %}
{% set model_relation = elementary.get_model_relation_for_test(model, elementary.get_test_model()) %}
Expand Down Expand Up @@ -36,7 +36,8 @@
anomaly_exclude_metrics=anomaly_exclude_metrics,
detection_period=detection_period,
training_period=training_period,
dimensions=dimensions) %}
dimensions=dimensions,
exclude_detection_period_from_training=exclude_detection_period_from_training) %}

{%- if not test_configuration %}
{{ exceptions.raise_compiler_error("Failed to create test configuration dict for test `{}`".format(test_table_name)) }}
Expand Down
5 changes: 3 additions & 2 deletions macros/edr/tests/test_dimension_anomalies.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{% test dimension_anomalies(model, dimensions, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, exclude_final_results) %}
{% test dimension_anomalies(model, dimensions, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, exclude_final_results, exclude_detection_period_from_training=false) %}
{{ config(tags = ['elementary-tests']) }}
{%- if execute and elementary.is_test_command() and elementary.is_elementary_enabled() %}
{% set model_relation = elementary.get_model_relation_for_test(model, elementary.get_test_model()) %}
Expand Down Expand Up @@ -39,7 +39,8 @@
anomaly_exclude_metrics=anomaly_exclude_metrics,
detection_period=detection_period,
training_period=training_period,
exclude_final_results=exclude_final_results) %}
exclude_final_results=exclude_final_results,
exclude_detection_period_from_training=exclude_detection_period_from_training) %}

{%- if not test_configuration %}
{{ exceptions.raise_compiler_error("Failed to create test configuration dict for test `{}`".format(test_table_name)) }}
Expand Down
Loading