elementary-data
diff --git a/‎integration_tests/tests/dbt_project.py‎
Lines changed: 20 additions & 11 deletions b/‎integration_tests/tests/dbt_project.py‎
Lines changed: 20 additions & 11 deletions
diff --git a/‎macros/edr/data_monitoring/anomaly_detection/get_anomaly_scores_query.sql‎
Lines changed: 194 additions & 0 deletions b/‎macros/edr/data_monitoring/anomaly_detection/get_anomaly_scores_query.sql‎
Lines changed: 194 additions & 0 deletions
diff --git a/‎macros/edr/data_monitoring/data_monitors_configuration/get_buckets_configuration.sql‎
Lines changed: 3 additions & 3 deletions b/‎macros/edr/data_monitoring/data_monitors_configuration/get_buckets_configuration.sql‎
Lines changed: 3 additions & 3 deletions
@@ -42,7 +42,7 @@ def get_dbt_runner(target: str, project_dir: str) -> BaseDbtRunner:
 class DbtProject:
     def __init__(self, target: str, project_dir: str):
         self.dbt_runner = get_dbt_runner(target, project_dir)
-
+        self.target = target
         self.project_dir_path = Path(project_dir)
         self.models_dir_path = self.project_dir_path / "models"
         self.tmp_models_dir_path = self.models_dir_path / "tmp"
@@ -57,23 +57,32 @@ def run_query(self, prerendered_query: str):
         )
         return results
 
-    @staticmethod
     def read_table_query(
+        self,
         table_name: str,
         where: Optional[str] = None,
         group_by: Optional[str] = None,
         order_by: Optional[str] = None,
         limit: Optional[int] = None,
-        column_names: Optional[List[str]] = None,
+        column_names: Optional[List[str]] = None
     ):
-        return f"""
-            SELECT {', '.join(column_names) if column_names else '*'}
-            FROM {{{{ ref('{table_name}') }}}}
-            {f"WHERE {where}" if where else ""}
-            {f"GROUP BY {group_by}" if group_by else ""}
-            {f"ORDER BY {order_by}" if order_by else ""}
-            {f"LIMIT {limit}" if limit else ""}
-            """
+        if self.target == 'fabric':
+            return f"""
+                SELECT {f'TOP {limit}' if limit else ''} {', '.join(column_names) if column_names else '*'}
+                FROM {{{{ ref('{table_name}') }}}}
+                {f"WHERE {where}" if where else ""}
+                {f"GROUP BY {group_by}" if group_by else ""}
+                {f"ORDER BY {order_by}" if order_by else ""}
+                """
+        else:
+            return f"""
+                SELECT {', '.join(column_names) if column_names else '*'}
+                FROM {{{{ ref('{table_name}') }}}}
+                {f"WHERE {where}" if where else ""}
+                {f"GROUP BY {group_by}" if group_by else ""}
+                {f"ORDER BY {order_by}" if order_by else ""}
+                {f"LIMIT {limit}" if limit else ""}
+                """
 
     def read_table(
         self,
 
@@ -1,4 +1,8 @@
 {% macro get_anomaly_scores_query(test_metrics_table_relation, model_relation, test_configuration, metric_names, column_name = none, columns_only = false, metric_properties = none, data_monitoring_metrics_table=none) %}
+    {{ return(adapter.dispatch('get_anomaly_scores_query', 'elementary')(test_metrics_table_relation, model_relation, test_configuration, metric_names, column_name, columns_only, metric_properties, data_monitoring_metrics_table)) }}
+{% endmacro %}
+
+{% macro default__get_anomaly_scores_query(test_metrics_table_relation, model_relation, test_configuration, metric_names, column_name = none, columns_only = false, metric_properties = none, data_monitoring_metrics_table=none) %}
     {%- set model_graph_node = elementary.get_model_graph_node(model_relation) %}
     {%- set full_table_name = elementary.model_node_to_full_name(model_graph_node) %}
     {%- set test_execution_id = elementary.get_test_execution_id() %}
@@ -228,6 +232,196 @@
     {{ return(anomaly_scores_query) }}
 {% endmacro %}
 
+{% macro fabric__get_anomaly_scores_query(test_metrics_table_relation, model_relation, test_configuration, metric_names, column_name = none, columns_only = false, metric_properties = none, data_monitoring_metrics_table=none) %}
+    {%- set model_graph_node = elementary.get_model_graph_node(model_relation) %}
+    {%- set full_table_name = elementary.model_node_to_full_name(model_graph_node) %}
+    {%- set test_execution_id = elementary.get_test_execution_id() %}
+    {%- set test_unique_id = elementary.get_test_unique_id() %}
+    {%- if not data_monitoring_metrics_table %}
+        {%- set data_monitoring_metrics_table = elementary.get_elementary_relation('data_monitoring_metrics') %}
+    {%- endif %}
+
+    {%- if elementary.is_incremental_model(model_graph_node) %}
+      {%- set latest_full_refresh = elementary.get_latest_full_refresh(model_graph_node) %}
+    {%- else %}
+      {%- set latest_full_refresh = none %}
+    {%- endif %}
+
+    {%- if test_configuration.seasonality == 'day_of_week' %}
+        {%- set bucket_seasonality_expr = elementary.edr_day_of_week_expression('bucket_end') %}
+    {%- elif test_configuration.seasonality == 'hour_of_day' %}
+        {%- set bucket_seasonality_expr = elementary.edr_hour_of_day_expression('bucket_end') %}
+    {%- elif test_configuration.seasonality == 'hour_of_week' %}
+        {%- set bucket_seasonality_expr = elementary.edr_hour_of_week_expression('bucket_end') %}
+    {%- else %}
+        {%- set bucket_seasonality_expr = elementary.const_as_text('no_seasonality') %}
+    {%- endif %}
+    {%- set detection_end = elementary.get_detection_end(test_configuration.detection_delay) %}
+    {%- set min_bucket_start_expr = elementary.get_trunc_min_bucket_start_expr(detection_end, metric_properties, test_configuration.days_back) %}
+
+    {%- set metric_time_bucket_expr = 'case when bucket_start is not null then bucket_start else bucket_end end' %}
+
+    {%- set anomaly_scores_query %}
+        select * from (
+            select
+                {{ elementary.generate_surrogate_key([
+                 'metric_id',
+                 elementary.const_as_string(test_execution_id)
+                ]) }} as id,
+                metric_id,
+                {{ elementary.const_as_string(test_execution_id) }} as test_execution_id,
+                {{ elementary.const_as_string(test_unique_id) }} as test_unique_id,
+                {{ elementary.current_timestamp_column() }} as detected_at,
+                full_table_name,
+                column_name,
+                metric_name,
+                case
+                    when training_stddev is null then null
+                    when training_stddev = 0 then 0
+                    else (metric_value - training_avg) / (training_stddev)
+                end as anomaly_score,
+                {{ test_configuration.anomaly_sensitivity }} as anomaly_score_threshold,
+                source_value as anomalous_value,
+                {{ elementary.edr_cast_as_timestamp('bucket_start') }} as bucket_start,
+                {{ elementary.edr_cast_as_timestamp('bucket_end') }} as bucket_end,
+                bucket_seasonality,
+                metric_value,
+                
+                {% set limit_values =  elementary.get_limit_metric_values(test_configuration) %}
+                case
+                    when training_stddev is null then null
+                    when {{ limit_values.min_metric_value }} > 0 or metric_name in {{ elementary.to_sql_list(elementary.get_negative_value_supported_metrics()) }} then {{ limit_values.min_metric_value }}
+                    else 0
+                end as min_metric_value,
+                case 
+                    when training_stddev is null then null
+                    else {{ limit_values.max_metric_value }}
+                end as max_metric_value,
+                training_avg,
+                training_stddev,
+                training_set_size,
+                {{ elementary.edr_cast_as_timestamp('training_start') }} as training_start,
+                {{ elementary.edr_cast_as_timestamp('training_end') }} as training_end,
+                dimension,
+                dimension_value
+            from (
+                select
+                    metric_id,
+                    full_table_name,
+                    column_name,
+                    dimension,
+                    dimension_value,
+                    metric_name,
+                    metric_value,
+                    source_value,
+                    bucket_start,
+                    bucket_end,
+                    bucket_seasonality,
+                    bucket_duration_hours,
+                    updated_at,
+                    avg(metric_value) over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) as training_avg,
+                    stddev(metric_value) over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) as training_stddev,
+                    count(metric_value) over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) as training_set_size,
+                    last_value(bucket_end) over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) training_end,
+                    first_value(bucket_end) over (partition by metric_name, full_table_name, column_name, dimension, dimension_value, bucket_seasonality order by bucket_end asc rows between unbounded preceding and current row) as training_start
+                from (
+                    select
+                        id as metric_id,
+                        full_table_name,
+                        column_name,
+                        dimension,
+                        dimension_value,
+                        metric_name,
+                        metric_value,
+                        source_value,
+                        bucket_start,
+                        bucket_end,
+                        {{ bucket_seasonality_expr }} as bucket_seasonality,
+                        {{ test_configuration.anomaly_exclude_metrics or 'FALSE' }} as is_excluded,
+                        bucket_duration_hours,
+                        updated_at
+                    from (
+                        select
+                            id,
+                            full_table_name,
+                            column_name,
+                            metric_name,
+                            metric_value,
+                            source_value,
+                            bucket_start,
+                            bucket_end,
+                            bucket_duration_hours,
+                            updated_at,
+                            dimension,
+                            dimension_value,
+                            {{ metric_time_bucket_expr }} as metric_time_bucket,
+                            {{ elementary.edr_cast_as_date(elementary.edr_date_trunc('day', metric_time_bucket_expr))}} as metric_date,
+                            row_number() over (partition by id order by updated_at desc) as row_number
+                        from (
+                            select * from (
+                                select
+                                    id,
+                                    full_table_name,
+                                    column_name,
+                                    metric_name,
+                                    metric_type,
+                                    metric_value,
+                                    source_value,
+                                    bucket_start,
+                                    bucket_end,
+                                    bucket_duration_hours,
+                                    updated_at,
+                                    dimension,
+                                    dimension_value,
+                                    metric_properties
+                                from {{ data_monitoring_metrics_table }}
+                                where
+                                    bucket_end > {{ min_bucket_start_expr }}
+                                    {% if test_configuration.timestamp_column %}
+                                        and exists (
+                                            select 1 from (
+                                                select edr_bucket_start, edr_bucket_end
+                                                from ({{ elementary.complete_buckets_cte(metric_properties, min_bucket_start_expr,
+                                                                                     elementary.edr_quote(detection_end)) }}) results
+                                                where edr_bucket_start >= {{ elementary.edr_cast_as_timestamp(min_bucket_start_expr) }}
+                                                  and edr_bucket_end <= {{ elementary.edr_cast_as_timestamp(elementary.edr_quote(detection_end)) }}
+                                            ) buckets
+                                            where buckets.edr_bucket_start = cast(bucket_start as datetime2(2))
+                                              and buckets.edr_bucket_end = cast(bucket_end as datetime2(2))
+                                        )
+                                    {% endif %}
+                                    and metric_properties = {{ elementary.dict_to_quoted_json(metric_properties) }}
+                                    {% if latest_full_refresh %}
+                                        and updated_at > {{ elementary.edr_cast_as_timestamp(elementary.edr_quote(latest_full_refresh)) }}
+                                    {% endif %}
+                                    and upper(full_table_name) = upper('{{ full_table_name }}')
+                                    and metric_name in {{ elementary.strings_list_to_tuple(metric_names) }}
+                                    {%- if column_name %}
+                                        and upper(column_name) = upper('{{ column_name }}')
+                                    {%- endif %}
+                                    {%- if columns_only %}
+                                        and column_name is not null
+                                    {%- endif %}
+                                    {% if test_configuration.dimensions %}
+                                        and dimension = {{ elementary.edr_quote(elementary.join_list(test_configuration.dimensions, '; ')) }}
+                                    {% endif %}
+                            ) data_monitoring_metrics
+                            union all
+                            select * from {{ test_metrics_table_relation }}
+                        ) union_metrics
+                    ) grouped_metrics_duplicates
+                    where row_number = 1
+                ) grouped_metrics
+                where not is_excluded
+            ) time_window_aggregation
+            where
+                metric_value is not null
+                and training_avg is not null
+        ) anomaly_scores
+    {% endset %}
+    {{ return(anomaly_scores_query) }}
+{% endmacro %}
+
 {% macro get_negative_value_supported_metrics() %}
     {% do return(["min", "max", "average", "standard_deviation", "variance", "sum"]) %}
 {% endmacro %}
 
@@ -14,7 +14,7 @@
     {{ return(trunc_min_bucket_start_expr) }}
 {% endmacro %}
 
-{# This macro can't be used without truncating to full buckets #}
+{# This macro cant be used without truncating to full buckets #}
 {% macro get_backfill_bucket_start(detection_end, backfill_days) %}
     {% do return((detection_end - modules.datetime.timedelta(backfill_days)).strftime("%Y-%m-%d 00:00:00")) %}
 {% endmacro %}
@@ -40,7 +40,7 @@
     {%- endif %}
 
     {%- set regular_bucket_times_query %}
-        with bucket_times as (
+        ;with bucket_times as (
             select
             {{ trunc_min_bucket_start_expr }} as days_back_start
            , {{ detection_end_expr }} as detection_end
@@ -58,7 +58,7 @@
     {%- endset %}
 
     {%- set incremental_bucket_times_query %}
-        with all_buckets as (
+        ;with all_buckets as (
             select edr_bucket_start as bucket_start, edr_bucket_end as bucket_end
             from ({{ elementary.complete_buckets_cte(metric_properties, trunc_min_bucket_start_expr, detection_end_expr) }}) results
             where edr_bucket_start >= {{ trunc_min_bucket_start_expr }}