|
1 | 1 | {% macro get_anomaly_scores_query(test_metrics_table_relation, model_relation, test_configuration, metric_names, column_name = none, columns_only = false, metric_properties = none, data_monitoring_metrics_table=none) %} |
| 2 | + {{ return(adapter.dispatch('get_anomaly_scores_query', 'elementary')(test_metrics_table_relation, model_relation, test_configuration, metric_names, column_name, columns_only, metric_properties, data_monitoring_metrics_table)) }} |
| 3 | +{% endmacro %} |
| 4 | + |
| 5 | +{% macro default__get_anomaly_scores_query(test_metrics_table_relation, model_relation, test_configuration, metric_names, column_name = none, columns_only = false, metric_properties = none, data_monitoring_metrics_table=none) %} |
2 | 6 | {%- set model_graph_node = elementary.get_model_graph_node(model_relation) %} |
3 | 7 | {%- set full_table_name = elementary.model_node_to_full_name(model_graph_node) %} |
4 | 8 | {%- set test_execution_id = elementary.get_test_execution_id() %} |
|
228 | 232 | {{ return(anomaly_scores_query) }} |
229 | 233 | {% endmacro %} |
230 | 234 |
|
| 235 | +{% macro fabric__get_anomaly_scores_query(test_metrics_table_relation, model_relation, test_configuration, metric_names, column_name = none, columns_only = false, metric_properties = none, data_monitoring_metrics_table=none) %} |
| 236 | + {%- set model_graph_node = elementary.get_model_graph_node(model_relation) %} |
| 237 | + {%- set full_table_name = elementary.model_node_to_full_name(model_graph_node) %} |
| 238 | + {%- set test_execution_id = elementary.get_test_execution_id() %} |
| 239 | + {%- set test_unique_id = elementary.get_test_unique_id() %} |
| 240 | + {%- if not data_monitoring_metrics_table %} |
| 241 | + {%- set data_monitoring_metrics_table = elementary.get_elementary_relation('data_monitoring_metrics') %} |
| 242 | + {%- endif %} |
| 243 | + |
| 244 | + {%- if elementary.is_incremental_model(model_graph_node) %} |
| 245 | + {%- set latest_full_refresh = elementary.get_latest_full_refresh(model_graph_node) %} |
| 246 | + {%- else %} |
| 247 | + {%- set latest_full_refresh = none %} |
| 248 | + {%- endif %} |
| 249 | + |
| 250 | + {%- if test_configuration.seasonality == 'day_of_week' %} |
| 251 | + {%- set bucket_seasonality_expr = elementary.edr_day_of_week_expression('bucket_end') %} |
| 252 | + {%- elif test_configuration.seasonality == 'hour_of_day' %} |
| 253 | + {%- set bucket_seasonality_expr = elementary.edr_hour_of_day_expression('bucket_end') %} |
| 254 | + {%- elif test_configuration.seasonality == 'hour_of_week' %} |
| 255 | + {%- set bucket_seasonality_expr = elementary.edr_hour_of_week_expression('bucket_end') %} |
| 256 | + {%- else %} |
| 257 | + {%- set bucket_seasonality_expr = elementary.const_as_text('no_seasonality') %} |
| 258 | + {%- endif %} |
| 259 | + {%- set detection_end = elementary.get_detection_end(test_configuration.detection_delay) %} |
| 260 | + {%- set min_bucket_start_expr = elementary.get_trunc_min_bucket_start_expr(detection_end, metric_properties, test_configuration.days_back) %} |
| 261 | + |
| 262 | + {%- set metric_time_bucket_expr = 'case when bucket_start is not null then bucket_start else bucket_end end' %} |
| 263 | + |
| 264 | + {%- set anomaly_scores_query %} |
| 265 | + select * from ( |
| 266 | + select |
| 267 | + {{ elementary.generate_surrogate_key([ |
| 268 | + 'metric_id', |
| 269 | + elementary.const_as_string(test_execution_id) |
| 270 | + ]) }} as id, |
| 271 | + metric_id, |
| 272 | + {{ elementary.const_as_string(test_execution_id) }} as test_execution_id, |
| 273 | + {{ elementary.const_as_string(test_unique_id) }} as test_unique_id, |
| 274 | + {{ elementary.current_timestamp_column() }} as detected_at, |
| 275 | + full_table_name, |
| 276 | + column_name, |
| 277 | + metric_name, |
| 278 | + case |
| 279 | + when training_stddev is null then null |
| 280 | + when training_stddev = 0 then 0 |
| 281 | + else (metric_value - training_avg) / (training_stddev) |
| 282 | + end as anomaly_score, |
| 283 | + {{ test_configuration.anomaly_sensitivity }} as anomaly_score_threshold, |
| 284 | + source_value as anomalous_value, |
| 285 | + {{ elementary.edr_cast_as_timestamp('bucket_start') }} as bucket_start, |
| 286 | + {{ elementary.edr_cast_as_timestamp('bucket_end') }} as bucket_end, |
| 287 | + bucket_seasonality, |
| 288 | + metric_value, |
| 289 | + |
| 290 | + {% set limit_values = elementary.get_limit_metric_values(test_configuration) %} |
| 291 | + case |
| 292 | + when training_stddev is null then null |
| 293 | + when {{ limit_values.min_metric_value }} > 0 or metric_name in {{ elementary.to_sql_list(elementary.get_negative_value_supported_metrics()) }} then {{ limit_values.min_metric_value }} |
| 294 | + else 0 |
| 295 | + end as min_metric_value, |
| 296 | + case |
| 297 | + when training_stddev is null then null |
| 298 | + else {{ limit_values.max_metric_value }} |
| 299 | + end as max_metric_value, |
| 300 | + training_avg, |
| 301 | + training_stddev, |
| 302 | + training_set_size, |
| 303 | + {{ elementary.edr_cast_as_timestamp('training_start') }} as training_start, |
| 304 | + {{ elementary.edr_cast_as_timestamp('training_end') }} as training_end, |
| 305 | + dimension, |
| 306 | + dimension_value |
| 307 | + from ( |
| 308 | + select |
| 309 | + metric_id, |
| 310 | + full_table_name, |
| 311 | + column_name, |
| 312 | + dimension, |
| 313 | + dimension_value, |
| 314 | + metric_name, |
| 315 | + metric_value, |
| 316 | + source_value, |
| 317 | + bucket_start, |
| 318 | + bucket_end, |
| 319 | + bucket_seasonality, |
| 320 | + bucket_duration_hours, |
| 321 | + updated_at, |
| 322 | + avg(metric_value) over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) as training_avg, |
| 323 | + stddev(metric_value) over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) as training_stddev, |
| 324 | + count(metric_value) over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) as training_set_size, |
| 325 | + last_value(bucket_end) over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) training_end, |
| 326 | + first_value(bucket_end) over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) as training_start |
| 327 | + from ( |
| 328 | + select |
| 329 | + id as metric_id, |
| 330 | + full_table_name, |
| 331 | + column_name, |
| 332 | + dimension, |
| 333 | + dimension_value, |
| 334 | + metric_name, |
| 335 | + metric_value, |
| 336 | + source_value, |
| 337 | + bucket_start, |
| 338 | + bucket_end, |
| 339 | + {{ bucket_seasonality_expr }} as bucket_seasonality, |
| 340 | + {{ test_configuration.anomaly_exclude_metrics or 'FALSE' }} as is_excluded, |
| 341 | + bucket_duration_hours, |
| 342 | + updated_at |
| 343 | + from ( |
| 344 | + select |
| 345 | + id, |
| 346 | + full_table_name, |
| 347 | + column_name, |
| 348 | + metric_name, |
| 349 | + metric_value, |
| 350 | + source_value, |
| 351 | + bucket_start, |
| 352 | + bucket_end, |
| 353 | + bucket_duration_hours, |
| 354 | + updated_at, |
| 355 | + dimension, |
| 356 | + dimension_value, |
| 357 | + {{ metric_time_bucket_expr }} as metric_time_bucket, |
| 358 | + {{ elementary.edr_cast_as_date(elementary.edr_date_trunc('day', metric_time_bucket_expr))}} as metric_date, |
| 359 | + row_number() over (partition by id order by updated_at desc) as row_number |
| 360 | + from ( |
| 361 | + select * from ( |
| 362 | + select |
| 363 | + id, |
| 364 | + full_table_name, |
| 365 | + column_name, |
| 366 | + metric_name, |
| 367 | + metric_type, |
| 368 | + metric_value, |
| 369 | + source_value, |
| 370 | + bucket_start, |
| 371 | + bucket_end, |
| 372 | + bucket_duration_hours, |
| 373 | + updated_at, |
| 374 | + dimension, |
| 375 | + dimension_value, |
| 376 | + metric_properties |
| 377 | + from {{ data_monitoring_metrics_table }} |
| 378 | + where |
| 379 | + bucket_end > {{ min_bucket_start_expr }} |
| 380 | + {% if test_configuration.timestamp_column %} |
| 381 | + and exists ( |
| 382 | + select 1 from ( |
| 383 | + select edr_bucket_start, edr_bucket_end |
| 384 | + from ({{ elementary.complete_buckets_cte(metric_properties, min_bucket_start_expr, |
| 385 | + elementary.edr_quote(detection_end)) }}) results |
| 386 | + where edr_bucket_start >= {{ elementary.edr_cast_as_timestamp(min_bucket_start_expr) }} |
| 387 | + and edr_bucket_end <= {{ elementary.edr_cast_as_timestamp(elementary.edr_quote(detection_end)) }} |
| 388 | + ) buckets |
| 389 | + where buckets.edr_bucket_start = cast(bucket_start as datetime2(2)) |
| 390 | + and buckets.edr_bucket_end = cast(bucket_end as datetime2(2)) |
| 391 | + ) |
| 392 | + {% endif %} |
| 393 | + and metric_properties = {{ elementary.dict_to_quoted_json(metric_properties) }} |
| 394 | + {% if latest_full_refresh %} |
| 395 | + and updated_at > {{ elementary.edr_cast_as_timestamp(elementary.edr_quote(latest_full_refresh)) }} |
| 396 | + {% endif %} |
| 397 | + and upper(full_table_name) = upper('{{ full_table_name }}') |
| 398 | + and metric_name in {{ elementary.strings_list_to_tuple(metric_names) }} |
| 399 | + {%- if column_name %} |
| 400 | + and upper(column_name) = upper('{{ column_name }}') |
| 401 | + {%- endif %} |
| 402 | + {%- if columns_only %} |
| 403 | + and column_name is not null |
| 404 | + {%- endif %} |
| 405 | + {% if test_configuration.dimensions %} |
| 406 | + and dimension = {{ elementary.edr_quote(elementary.join_list(test_configuration.dimensions, '; ')) }} |
| 407 | + {% endif %} |
| 408 | + ) data_monitoring_metrics |
| 409 | + union all |
| 410 | + select * from {{ test_metrics_table_relation }} |
| 411 | + ) union_metrics |
| 412 | + ) grouped_metrics_duplicates |
| 413 | + where row_number = 1 |
| 414 | + ) grouped_metrics |
| 415 | + where not is_excluded |
| 416 | + ) time_window_aggregation |
| 417 | + where |
| 418 | + metric_value is not null |
| 419 | + and training_avg is not null |
| 420 | + ) anomaly_scores |
| 421 | + {% endset %} |
| 422 | + {{ return(anomaly_scores_query) }} |
| 423 | +{% endmacro %} |
| 424 | + |
231 | 425 | {% macro get_negative_value_supported_metrics() %} |
232 | 426 | {% do return(["min", "max", "average", "standard_deviation", "variance", "sum"]) %} |
233 | 427 | {% endmacro %} |
|
0 commit comments