Skip to content

Commit b46d220

Browse files
authored
Ele 4942 dimension anomalies visualization (#847)
* fix * return metrics only if fail * fix tests * fix tests * only anomaly dim values * undo change * dim anomalies should return the data points of failed dimension values * nit * fix number of failed rows
1 parent 98750df commit b46d220

File tree

4 files changed

+53
-31
lines changed

4 files changed

+53
-31
lines changed

integration_tests/tests/test_dimension_anomalies.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ def test_anomalous_dimension_anomalies(test_id: str, dbt_project: DbtProject):
8686
}
8787
for superhero in ["Superman", "Superman", "Superman", "Spiderman"]
8888
]
89+
8990
data += [
9091
{
9192
TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT),
@@ -98,12 +99,19 @@ def test_anomalous_dimension_anomalies(test_id: str, dbt_project: DbtProject):
9899
test_result = dbt_project.test(test_id, DBT_TEST_NAME, DBT_TEST_ARGS, data=data)
99100
assert test_result["status"] == "fail"
100101

101-
# Dimension anomalies only stores anomalous rows (unlike other anomaly tests) - so we should only get 1 row with the problematic value
102102
anomaly_test_points = get_latest_anomaly_test_points(dbt_project, test_id)
103-
assert len(anomaly_test_points) == 1
104-
assert anomaly_test_points[0]["is_anomalous"]
105-
assert anomaly_test_points[0]["dimension"] == "superhero"
106-
assert anomaly_test_points[0]["dimension_value"] == "Superman"
103+
104+
# Only dimension values with anomalies are stored in the test points
105+
dimension_values = set([x["dimension_value"] for x in anomaly_test_points])
106+
107+
superman_anomaly_test_points = [
108+
x for x in anomaly_test_points if x["dimension_value"] == "Superman"
109+
]
110+
111+
assert len(dimension_values) == 1
112+
assert "Superman" in dimension_values
113+
assert len(anomaly_test_points) == len(superman_anomaly_test_points)
114+
assert any(x["is_anomalous"] for x in superman_anomaly_test_points)
107115

108116

109117
# Anomalies currently not supported on ClickHouse

macros/edr/materializations/test/test.sql

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -51,20 +51,20 @@
5151
{% macro handle_dbt_test(flattened_test, materialization_macro) %}
5252
{% set result = materialization_macro() %}
5353
{% set sample_limit = elementary.get_config_var('test_sample_row_count') %}
54-
54+
5555
{% set disable_test_samples = false %}
5656
{% if "meta" in flattened_test and "disable_test_samples" in flattened_test["meta"] %}
5757
{% set disable_test_samples = flattened_test["meta"]["disable_test_samples"] %}
5858
{% endif %}
59-
59+
6060
{% if disable_test_samples %}
6161
{% set sample_limit = 0 %}
6262
{% elif elementary.is_pii_table(flattened_test) %}
6363
{% set sample_limit = 0 %}
6464
{% elif elementary.should_disable_sampling_for_pii(flattened_test) %}
6565
{% set sample_limit = 0 %}
6666
{% endif %}
67-
67+
6868
{% set result_rows = elementary.query_test_result_rows(sample_limit=sample_limit, ignore_passed_tests=true) %}
6969
{% set elementary_test_results_row = elementary.get_dbt_test_result_row(flattened_test, result_rows) %}
7070
{% do elementary.cache_elementary_test_results_rows([elementary_test_results_row]) %}
@@ -125,7 +125,7 @@
125125
{% do elementary.debug_log("Skipping sample query because the test passed.") %}
126126
{% do return([]) %}
127127
{% endif %}
128-
128+
129129
{% set query %}
130130
with test_results as (
131131
{{ sql }}
@@ -137,23 +137,23 @@
137137

138138
{% macro get_columns_to_exclude_from_sampling(flattened_test) %}
139139
{% set columns_to_exclude = [] %}
140-
140+
141141
{% if not flattened_test %}
142142
{% do return(columns_to_exclude) %}
143143
{% endif %}
144-
144+
145145
{% if elementary.get_config_var('disable_samples_on_pii_tags') %}
146146
{% set pii_columns = elementary.get_pii_columns_from_parent_model(flattened_test) %}
147147
{% set columns_to_exclude = columns_to_exclude + pii_columns %}
148148
{% endif %}
149-
149+
150150
{% if elementary.is_sampling_disabled_for_column(flattened_test) %}
151151
{% set test_column_name = elementary.insensitive_get_dict_value(flattened_test, 'test_column_name') %}
152152
{% if test_column_name and test_column_name not in columns_to_exclude %}
153153
{% do columns_to_exclude.append(test_column_name) %}
154154
{% endif %}
155155
{% endif %}
156-
156+
157157
{% do return(columns_to_exclude) %}
158158
{% endmacro %}
159159

@@ -162,48 +162,48 @@
162162
{% if not elementary.get_config_var('disable_samples_on_pii_tags') %}
163163
{% do return(false) %}
164164
{% endif %}
165-
165+
166166
{% set pii_columns = elementary.get_pii_columns_from_parent_model(flattened_test) %}
167167
{% if not pii_columns %}
168168
{% do return(false) %}
169169
{% endif %}
170-
170+
171171
{# Get the compiled test query #}
172172
{% set test_query = elementary.get_compiled_code(flattened_test) %}
173173
{% set test_query_lower = test_query.lower() %}
174-
174+
175175
{# Check if query uses * (select all columns) #}
176-
{# Note: This is intentionally conservative and may over-censor in cases like
176+
{# Note: This is intentionally conservative and may over-censor in cases like
177177
"SELECT * FROM other_table" in CTEs, but it's better to be safe with PII data #}
178178
{% if '*' in test_query_lower %}
179179
{% do return(true) %}
180180
{% endif %}
181-
181+
182182
{# Check if any PII column appears in the test query #}
183183
{% for pii_column in pii_columns %}
184184
{% if pii_column.lower() in test_query_lower %}
185185
{% do return(true) %}
186186
{% endif %}
187187
{% endfor %}
188-
188+
189189
{% do return(false) %}
190190
{% endmacro %}
191191
192192
{% macro is_sampling_disabled_for_column(flattened_test) %}
193193
{% set test_column_name = elementary.insensitive_get_dict_value(flattened_test, 'test_column_name') %}
194194
{% set parent_model_unique_id = elementary.insensitive_get_dict_value(flattened_test, 'parent_model_unique_id') %}
195-
195+
196196
{% if not test_column_name or not parent_model_unique_id %}
197197
{% do return(false) %}
198198
{% endif %}
199-
199+
200200
{% set parent_model = elementary.get_node(parent_model_unique_id) %}
201201
{% if parent_model and parent_model.get('columns') %}
202202
{% set column_config = parent_model.get('columns', {}).get(test_column_name, {}).get('config', {}) %}
203203
{% set disable_test_samples = elementary.safe_get_with_default(column_config, 'disable_test_samples', false) %}
204204
{% do return(disable_test_samples) %}
205205
{% endif %}
206-
206+
207207
{% do return(false) %}
208208
{% endmacro %}
209209

macros/edr/tests/test_dimension_anomalies.sql

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
detection_period=detection_period,
4141
training_period=training_period,
4242
exclude_final_results=exclude_final_results) %}
43-
43+
4444
{%- if not test_configuration %}
4545
{{ exceptions.raise_compiler_error("Failed to create test configuration dict for test `{}`".format(test_table_name)) }}
4646
{%- endif %}
@@ -71,11 +71,11 @@
7171
{{ elementary.test_log('end', full_table_name) }}
7272

7373
{% set flattened_test = elementary.flatten_test(context["model"]) %}
74-
{% set anomalous_rows_sql = elementary.get_anomaly_query(flatten_model) %}
74+
{% set anomalous_dimension_rows_sql = elementary.get_anomaly_query_for_dimension_anomalies(flattened_test) %}
7575
{% do elementary.store_metrics_table_in_cache() %}
76-
{% do elementary.store_anomaly_test_results(flattened_test, anomalous_rows_sql) %}
76+
{% do elementary.store_anomaly_test_results(flattened_test, anomalous_dimension_rows_sql) %}
7777

78-
{{ anomalous_rows_sql }}
78+
{{ elementary.get_anomaly_query(flattened_test) }}
7979

8080
{% else %}
8181

macros/edr/tests/test_utils/get_anomaly_query.sql

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,20 @@
66
{{- return(query) -}}
77
{%- endmacro -%}
88

9+
{%- macro get_anomaly_query_for_dimension_anomalies(flattened_test=none) -%}
10+
{%- set dimension_values_query -%}
11+
select distinct dimension_value from ({{ elementary.get_read_anomaly_scores_query(flattened_test) }}) results
12+
where is_anomalous = true
13+
{%- endset -%}
14+
15+
{% set dimension_anomalies_query -%}
16+
select * from ({{ elementary.get_read_anomaly_scores_query(flattened_test) }}) results
17+
where dimension_value in ({{ dimension_values_query }})
18+
{%- endset -%}
19+
20+
{{- return(dimension_anomalies_query) -}}
21+
{%- endmacro -%}
22+
923
{% macro get_read_anomaly_scores_query(flattened_test=none) %}
1024
{% if not flattened_test %}
1125
{% set flattened_test = elementary.flatten_test(model) %}
@@ -71,15 +85,15 @@ case when
7185
when is_anomalous = TRUE and '{{ test_configuration.anomaly_direction }}' != 'spike' then
7286
{{ elementary.lag('min_metric_value') }} over (partition by full_table_name, column_name, metric_name, dimension, dimension_value, bucket_seasonality order by bucket_end)
7387
when '{{ test_configuration.anomaly_direction }}' = 'spike' then metric_value
74-
else min_metric_value
88+
else min_metric_value
7589
end as min_value,
7690
case
7791
when is_anomalous = TRUE and '{{ test_configuration.anomaly_direction }}' = 'drop' then
7892
{{ elementary.lag('metric_value') }} over (partition by full_table_name, column_name, metric_name, dimension, dimension_value, bucket_seasonality order by bucket_end)
7993
when is_anomalous = TRUE and '{{ test_configuration.anomaly_direction }}' != 'drop' then
8094
{{ elementary.lag('max_metric_value') }} over (partition by full_table_name, column_name, metric_name, dimension, dimension_value, bucket_seasonality order by bucket_end)
8195
when '{{ test_configuration.anomaly_direction }}' = 'drop' then metric_value
82-
else max_metric_value
96+
else max_metric_value
8397
end as max_value,
8498
bucket_start as start_time,
8599
bucket_end as end_time,
@@ -121,7 +135,7 @@ case when
121135
{% set drop_filter %}
122136
(metric_value < ((1 - {{ drop_failure_percent_threshold }}/100.0) * training_avg))
123137
{% endset %}
124-
138+
125139
{% if spike_failure_percent_threshold and drop_failure_percent_threshold and (anomaly_direction | lower) == 'both' %}
126140
{{ spike_filter }} or {{ drop_filter }}
127141
{% else %}
@@ -144,7 +158,7 @@ case when
144158

145159
{% macro fail_on_zero(fail_on_zero) %}
146160
(
147-
metric_value = 0 and
161+
metric_value = 0 and
148162
{% if fail_on_zero %}
149163
1 = 1
150164
{% else %}
@@ -163,7 +177,7 @@ case when
163177
test_configuration.ignore_small_changes.spike_failure_percent_threshold,
164178
test_configuration.ignore_small_changes.drop_failure_percent_threshold,
165179
test_configuration.anomaly_direction
166-
)
180+
)
167181
}}
168182
)
169183
))

0 commit comments

Comments
 (0)