Skip to content

Commit 57e7b7c

Browse files
Address PR review comments: rename column, add validation comment, set default to false
Co-Authored-By: Yosef Arbiv <[email protected]>
1 parent 8b5791d commit 57e7b7c

File tree

3 files changed

+13
-10
lines changed

3 files changed

+13
-10
lines changed

macros/edr/data_monitoring/anomaly_detection/get_anomaly_scores_query.sql

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@
3131
{%- set detection_end_expr = elementary.edr_cast_as_timestamp(elementary.edr_datetime_to_sql(detection_end)) %}
3232
{%- set min_bucket_start_expr = elementary.get_trunc_min_bucket_start_expr(detection_end, metric_properties, test_configuration.days_back) %}
3333

34-
{# Calculate detection period start for exclusion logic #}
34+
{# Calculate detection period start for exclusion logic.
35+
The detection period spans from (detection_end - backfill_days) to detection_end.
36+
This ensures we exclude the most recent backfill_days worth of data from training,
37+
which are the metrics being actively tested for anomalies. #}
3538
{%- if test_configuration.exclude_detection_period_from_training %}
3639
{%- set detection_period_start = (detection_end - modules.datetime.timedelta(days=test_configuration.backfill_days)) %}
3740
{%- set detection_period_start_expr = elementary.edr_cast_as_timestamp(elementary.edr_datetime_to_sql(detection_period_start)) %}
@@ -153,7 +156,7 @@
153156
bucket_end > {{ detection_period_start_expr }}
154157
{% else %}
155158
FALSE
156-
{% endif %} as is_detection_period,
159+
{% endif %} as should_exclude_from_training,
157160
bucket_duration_hours,
158161
updated_at
159162
from grouped_metrics_duplicates
@@ -176,12 +179,12 @@
176179
bucket_seasonality,
177180
bucket_duration_hours,
178181
updated_at,
179-
is_detection_period,
180-
avg(case when not is_detection_period then metric_value end) over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) as training_avg,
181-
{{ elementary.standard_deviation('case when not is_detection_period then metric_value end') }} over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) as training_stddev,
182-
count(case when not is_detection_period then metric_value end) over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) as training_set_size,
183-
last_value(case when not is_detection_period then bucket_end end) over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) training_end,
184-
first_value(case when not is_detection_period then bucket_end end) over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) as training_start
182+
should_exclude_from_training,
183+
avg(case when not should_exclude_from_training then metric_value end) over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) as training_avg,
184+
{{ elementary.standard_deviation('case when not should_exclude_from_training then metric_value end') }} over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) as training_stddev,
185+
count(case when not should_exclude_from_training then metric_value end) over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) as training_set_size,
186+
last_value(case when not should_exclude_from_training then bucket_end end) over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) training_end,
187+
first_value(case when not should_exclude_from_training then bucket_end end) over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) as training_start
185188
from grouped_metrics
186189
where not is_excluded
187190
{{ dbt_utils.group_by(14) }}

macros/edr/tests/test_table_anomalies.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{% test table_anomalies(model, table_anomalies, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, mandatory_params=none, event_timestamp_column=none, freshness_column=none, sensitivity=none, ignore_small_changes={"spike_failure_percent_threshold": none, "drop_failure_percent_threshold": none}, fail_on_zero=false, detection_delay=none, anomaly_exclude_metrics=none, detection_period=none, training_period=none, exclude_detection_period_from_training=none) %}
1+
{% test table_anomalies(model, table_anomalies, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, mandatory_params=none, event_timestamp_column=none, freshness_column=none, sensitivity=none, ignore_small_changes={"spike_failure_percent_threshold": none, "drop_failure_percent_threshold": none}, fail_on_zero=false, detection_delay=none, anomaly_exclude_metrics=none, detection_period=none, training_period=none, exclude_detection_period_from_training=false) %}
22
{{ config(tags = ['elementary-tests']) }}
33
{%- if execute and elementary.is_test_command() and elementary.is_elementary_enabled() %}
44
{% set model_relation = elementary.get_model_relation_for_test(model, elementary.get_test_model()) %}

macros/edr/tests/test_volume_anomalies.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{% test volume_anomalies(model, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity, ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, exclude_detection_period_from_training) %}
1+
{% test volume_anomalies(model, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity, ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, exclude_detection_period_from_training=false) %}
22
{{ config(tags = ['elementary-tests']) }}
33

44
{{ elementary.test_table_anomalies(

0 commit comments

Comments
 (0)