diff --git a/.gitignore b/.gitignore index d8c7c72f2..15e8d8196 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ target/ dbt_packages/ +dbt_internal_packages/ logs/ scripts/ diff --git a/macros/edr/data_monitoring/anomaly_detection/get_anomaly_scores_query.sql b/macros/edr/data_monitoring/anomaly_detection/get_anomaly_scores_query.sql index b8b8847f5..8268b395e 100644 --- a/macros/edr/data_monitoring/anomaly_detection/get_anomaly_scores_query.sql +++ b/macros/edr/data_monitoring/anomaly_detection/get_anomaly_scores_query.sql @@ -28,6 +28,7 @@ {%- set bucket_seasonality_expr = elementary.const_as_text('no_seasonality') %} {%- endif %} {%- set detection_end = elementary.get_detection_end(test_configuration.detection_delay) %} + {%- set detection_end_expr = elementary.edr_cast_as_timestamp(elementary.edr_datetime_to_sql(detection_end)) %} {%- set min_bucket_start_expr = elementary.get_trunc_min_bucket_start_expr(detection_end, metric_properties, test_configuration.days_back) %} {# For timestamped tests, this will be the bucket start, and for non-timestamped tests it will be the @@ -39,9 +40,9 @@ with buckets as ( select edr_bucket_start, edr_bucket_end from ({{ elementary.complete_buckets_cte(metric_properties, min_bucket_start_expr, - elementary.edr_quote(detection_end)) }}) results - where edr_bucket_start >= {{ elementary.edr_cast_as_timestamp(min_bucket_start_expr) }} - and edr_bucket_end <= {{ elementary.edr_cast_as_timestamp(elementary.edr_quote(detection_end)) }} + detection_end_expr) }}) results + where edr_bucket_start >= {{ min_bucket_start_expr }} + and edr_bucket_end <= {{ detection_end_expr }} ), {% else %} with @@ -121,7 +122,7 @@ {{ metric_time_bucket_expr }} as metric_time_bucket, {{ elementary.edr_cast_as_date(elementary.edr_date_trunc('day', metric_time_bucket_expr))}} as metric_date, - row_number() over (partition by id order by updated_at desc) as row_number + row_number() over (partition by id order by updated_at desc) as row_num from union_metrics ), @@ -144,7 +145,7 @@ bucket_duration_hours, updated_at from grouped_metrics_duplicates - where row_number = 1 + where row_num = 1 ), time_window_aggregation as ( diff --git a/macros/edr/data_monitoring/data_monitors_configuration/get_buckets_configuration.sql b/macros/edr/data_monitoring/data_monitors_configuration/get_buckets_configuration.sql index 09ceeb1d2..7d3d88be9 100644 --- a/macros/edr/data_monitoring/data_monitors_configuration/get_buckets_configuration.sql +++ b/macros/edr/data_monitoring/data_monitors_configuration/get_buckets_configuration.sql @@ -1,10 +1,10 @@ {% macro get_detection_end(detection_delay) %} {% if not detection_delay %} - {% do return(elementary.get_run_started_at()) %} + {% do return(elementary.get_run_started_at().replace(microsecond=0)) %} {% endif %} {%- set kwargs = {detection_delay.period+'s': detection_delay.count} %} - {%- set detection_end = elementary.get_run_started_at() - modules.datetime.timedelta(**kwargs) %} + {%- set detection_end = elementary.get_run_started_at().replace(microsecond=0) - modules.datetime.timedelta(**kwargs) %} {% do return(detection_end) %} {% endmacro %} @@ -105,8 +105,8 @@ {%- set buckets = elementary.agate_to_dicts(elementary.run_query(incremental_bucket_times_query))[0] %} {% endif %} {%- if buckets %} - {%- set min_bucket_start = elementary.edr_quote(buckets.get('min_bucket_start')) %} - {%- set max_bucket_end = elementary.edr_quote(buckets.get('max_bucket_end')) %} + {%- set min_bucket_start = elementary.edr_datetime_to_sql(buckets.get('min_bucket_start')) %} + {%- set max_bucket_end = elementary.edr_datetime_to_sql(buckets.get('max_bucket_end')) %} {{ return([min_bucket_start, max_bucket_end]) }} {%- else %} {{ exceptions.raise_compiler_error("Failed to calc test buckets min and max") }} diff --git a/macros/edr/data_monitoring/monitors/column_numeric_monitors.sql b/macros/edr/data_monitoring/monitors/column_numeric_monitors.sql index 781007ef6..00571747b 100644 --- a/macros/edr/data_monitoring/monitors/column_numeric_monitors.sql +++ b/macros/edr/data_monitoring/monitors/column_numeric_monitors.sql @@ -34,6 +34,10 @@ stddevPop(cast({{ column_name }} as {{ elementary.edr_type_float() }})) {%- endmacro %} +{% macro dremio__standard_deviation(column_name) -%} + stddev_pop(cast({{ column_name }} as {{ elementary.edr_type_float() }})) +{%- endmacro %} + {% macro variance(column_name) -%} {{ return(adapter.dispatch('variance', 'elementary')(column_name)) }} {%- endmacro %} diff --git a/macros/edr/data_monitoring/monitors_query/dimension_monitoring_query.sql b/macros/edr/data_monitoring/monitors_query/dimension_monitoring_query.sql index 5e735ee6b..81005ccb3 100644 --- a/macros/edr/data_monitoring/monitors_query/dimension_monitoring_query.sql +++ b/macros/edr/data_monitoring/monitors_query/dimension_monitoring_query.sql @@ -40,7 +40,6 @@ bucket_end, dimension_value, metric_value, - row_number () over (partition by dimension_value order by bucket_end desc) as row_number from {{ data_monitoring_metrics_relation }} where full_table_name = {{ full_table_name_str }} and metric_name = {{ elementary.edr_quote(metric_name) }} @@ -148,7 +147,6 @@ bucket_end, dimension_value, metric_value, - row_number () over (partition by dimension_value order by bucket_end desc) as row_number from {{ data_monitoring_metrics_relation }} where full_table_name = {{ full_table_name_str }} and metric_name = {{ elementary.edr_quote(metric_name) }} diff --git a/macros/edr/data_monitoring/monitors_query/table_monitoring_query.sql b/macros/edr/data_monitoring/monitors_query/table_monitoring_query.sql index 67aca02c6..56a1b4085 100644 --- a/macros/edr/data_monitoring/monitors_query/table_monitoring_query.sql +++ b/macros/edr/data_monitoring/monitors_query/table_monitoring_query.sql @@ -269,7 +269,7 @@ bucket_freshness_ranked as ( select *, - row_number () over (partition by edr_bucket_end order by freshness is null, freshness desc) as row_number + row_number () over (partition by edr_bucket_end order by freshness is null, freshness desc) as row_num from bucket_all_freshness_metrics ) @@ -281,7 +281,7 @@ {{ elementary.edr_cast_as_string('update_timestamp') }} as source_value, freshness as metric_value from bucket_freshness_ranked - where row_number = 1 + where row_num = 1 {% endmacro %} {% macro event_freshness_metric_query(metric, metric_properties) %} diff --git a/macros/edr/system/system_utils/buckets_cte.sql b/macros/edr/system/system_utils/buckets_cte.sql index fe82b61e8..f81baa20b 100644 --- a/macros/edr/system/system_utils/buckets_cte.sql +++ b/macros/edr/system/system_utils/buckets_cte.sql @@ -144,3 +144,20 @@ {{ return(complete_buckets_cte) }} {% endmacro %} +{% macro dremio__complete_buckets_cte(time_bucket, bucket_end_expr, min_bucket_start_expr, max_bucket_end_expr) %} + {%- set complete_buckets_cte %} + with integers as ( + select (row_number() over (order by t1.val, t2.val, t3.val, t4.val)) - 1 as num + from (values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10)) t1(val) + cross join (values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10)) t2(val) + cross join (values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10)) t3(val) + cross join (values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10)) t4(val) + ) + select + {{ elementary.edr_timeadd(time_bucket.period, 'num * ' ~ time_bucket.count, min_bucket_start_expr) }} as edr_bucket_start, + {{ elementary.edr_timeadd(time_bucket.period, '(num + 1) * ' ~ time_bucket.count, min_bucket_start_expr) }} as edr_bucket_end + from integers + where {{ elementary.edr_timeadd(time_bucket.period, '(num + 1) * ' ~ time_bucket.count, min_bucket_start_expr) }} <= {{ max_bucket_end_expr }} + {%- endset %} + {{ return(complete_buckets_cte) }} +{% endmacro %} diff --git a/macros/edr/tests/on_run_end/union_columns_snapshot_query.sql b/macros/edr/tests/on_run_end/union_columns_snapshot_query.sql index 225e7dc2b..c364744d4 100644 --- a/macros/edr/tests/on_run_end/union_columns_snapshot_query.sql +++ b/macros/edr/tests/on_run_end/union_columns_snapshot_query.sql @@ -9,7 +9,7 @@ ), columns_snapshot_with_duplicates as ( select *, - row_number() over (partition by column_state_id order by detected_at desc) as row_number + row_number() over (partition by column_state_id order by detected_at desc) as row_num from union_temp_columns_snapshot ) select @@ -21,7 +21,7 @@ is_new, detected_at from columns_snapshot_with_duplicates - where row_number = 1 + where row_num = 1 {%- endset %} {{ return(union_temp_query) }} {%- endif %} diff --git a/macros/edr/tests/on_run_end/union_metrics_query.sql b/macros/edr/tests/on_run_end/union_metrics_query.sql index 3ea0f32ef..541adf88d 100644 --- a/macros/edr/tests/on_run_end/union_metrics_query.sql +++ b/macros/edr/tests/on_run_end/union_metrics_query.sql @@ -9,7 +9,7 @@ ), metrics_with_duplicates as ( select *, - row_number() over (partition by id order by updated_at desc) as row_number + row_number() over (partition by id order by updated_at desc) as row_num from union_temps_metrics ) select @@ -28,7 +28,7 @@ dimension_value, metric_properties from metrics_with_duplicates - where row_number = 1 + where row_num = 1 {%- endset %} {{ return(union_temp_query) }} {%- endif %} diff --git a/macros/edr/tests/test_utils/clean_elementary_test_tables.sql b/macros/edr/tests/test_utils/clean_elementary_test_tables.sql index 870d81fac..e734e049f 100644 --- a/macros/edr/tests/test_utils/clean_elementary_test_tables.sql +++ b/macros/edr/tests/test_utils/clean_elementary_test_tables.sql @@ -48,11 +48,11 @@ {% endmacro %} {% macro trino__get_clean_elementary_test_tables_queries(test_table_relations) %} - {% set queries = [] %} - {% for test_relation in test_table_relations %} - {% do queries.append("DROP TABLE IF EXISTS {}".format(test_relation)) %} - {% endfor %} - {% do return(queries) %} + {% do return(elementary.get_transactionless_clean_elementary_test_tables_queries(test_table_relations)) %} +{% endmacro %} + +{% macro dremio__get_clean_elementary_test_tables_queries(test_table_relations) %} + {% do return(elementary.get_transactionless_clean_elementary_test_tables_queries(test_table_relations)) %} {% endmacro %} {% macro get_transaction_clean_elementary_test_tables_queries(test_table_relations) %} diff --git a/macros/edr/tests/test_utils/get_anomaly_query.sql b/macros/edr/tests/test_utils/get_anomaly_query.sql index 73a53e934..3ab1f7e62 100644 --- a/macros/edr/tests/test_utils/get_anomaly_query.sql +++ b/macros/edr/tests/test_utils/get_anomaly_query.sql @@ -62,7 +62,7 @@ case when final_results as ( select - metric_value as value, + metric_value as {{ elementary.escape_reserved_keywords('value') }}, training_avg as average, {# when there is an anomaly we would want to use the last value of the metric (lag), otherwise visually the expectations would look out of bounds #} case diff --git a/macros/utils/cross_db_utils/current_timestamp.sql b/macros/utils/cross_db_utils/current_timestamp.sql index c86aa2f8b..4e0b76b87 100644 --- a/macros/utils/cross_db_utils/current_timestamp.sql +++ b/macros/utils/cross_db_utils/current_timestamp.sql @@ -77,4 +77,4 @@ {% macro dremio__edr_current_timestamp_in_utc() -%} -- Dremio CURRENT_TIMESTAMP() is always in UTC CURRENT_TIMESTAMP() -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/utils/cross_db_utils/datediff.sql b/macros/utils/cross_db_utils/datediff.sql index 3091bffac..701d4d720 100644 --- a/macros/utils/cross_db_utils/datediff.sql +++ b/macros/utils/cross_db_utils/datediff.sql @@ -156,3 +156,28 @@ {% endif %} {{ return(macro(elementary.edr_cast_as_timestamp(first_date), elementary.edr_cast_as_timestamp(second_date), date_part)) }} {% endmacro %} + +{% macro dremio__edr_datediff(first_date, second_date, date_part) %} + {% if date_part.lower() == 'second' %} + {# Use a different method specifically for seconds diff - as it's helpful in a specific case + when subtracting aggregate values (problematic with the statement containing "select" - see below) #} + {%- set expr -%} + (unix_timestamp(substr(cast(({{ second_date }}) as varchar), 1, 19)) - + unix_timestamp(substr(cast(({{ first_date }}) as varchar), 1, 19))) + {%- endset -%} + {% do return(expr) %} + {% endif %} + + {% set macro = dbt.datediff or dbt_utils.datediff %} + {% if not macro %} + {{ exceptions.raise_compiler_error("Did not find a `datediff` macro.") }} + {% endif %} + + {% set sql = macro(elementary.edr_cast_as_timestamp(first_date), elementary.edr_cast_as_timestamp(second_date), date_part) %} + + {# Hack - dbt-dremio implements this macro as a select statement (which seems to be necessary), but in order + for it to really work we wrap it in parentheses and remove ; if it is there #} + {% set sql = '(' ~ sql.strip().replace(';', '') ~ ')' %} + + {% do return(sql) %} +{% endmacro %} diff --git a/macros/utils/cross_db_utils/datetime_to_sql.sql b/macros/utils/cross_db_utils/datetime_to_sql.sql new file mode 100644 index 000000000..dbe84a9d6 --- /dev/null +++ b/macros/utils/cross_db_utils/datetime_to_sql.sql @@ -0,0 +1,14 @@ +{% macro edr_datetime_to_sql(dt) %} + {% do return(adapter.dispatch("edr_datetime_to_sql", "elementary")(dt)) %} +{% endmacro %} + +{% macro default__edr_datetime_to_sql(dt) %} + {% do return(elementary.edr_quote(dt)) %} +{% endmacro %} + +{% macro dremio__edr_datetime_to_sql(dt) %} + {% if dt is string %} + {% set dt = modules.datetime.datetime.fromisoformat(dt) %} + {% endif %} + {% do return(elementary.edr_quote(dt.strftime(elementary.get_time_format()))) %} +{% endmacro %} diff --git a/macros/utils/data_types/cast_column.sql b/macros/utils/data_types/cast_column.sql index 2a93629f0..af8980576 100644 --- a/macros/utils/data_types/cast_column.sql +++ b/macros/utils/data_types/cast_column.sql @@ -29,6 +29,10 @@ ) {%- endmacro -%} +{%- macro dremio__edr_cast_as_timestamp(timestamp_field) -%} + cast({{ timestamp_field }} as {{ elementary.edr_type_timestamp() }}) +{%- endmacro -%} + {%- macro edr_cast_as_float(column) -%} cast({{ column }} as {{ elementary.edr_type_float() }}) {%- endmacro -%} @@ -85,6 +89,10 @@ ) {%- endmacro -%} +{%- macro dremio__edr_cast_as_date(timestamp_field) -%} + cast({{ timestamp_field }} as {{ elementary.edr_type_date() }}) +{%- endmacro -%} + {%- macro const_as_text(string) -%} {{ return(adapter.dispatch('const_as_text', 'elementary')(string)) }} diff --git a/macros/utils/data_types/data_type.sql b/macros/utils/data_types/data_type.sql index f44d7455d..b18e88c27 100644 --- a/macros/utils/data_types/data_type.sql +++ b/macros/utils/data_types/data_type.sql @@ -156,3 +156,7 @@ {% macro trino__edr_type_timestamp() %} timestamp(6) {% endmacro %} + +{% macro dremio__edr_type_timestamp() %} + timestamp +{% endmacro %} diff --git a/macros/utils/graph/get_package_database_and_schema.sql b/macros/utils/graph/get_package_database_and_schema.sql index 99d9d2796..280d9d4fb 100644 --- a/macros/utils/graph/get_package_database_and_schema.sql +++ b/macros/utils/graph/get_package_database_and_schema.sql @@ -24,4 +24,17 @@ {% endif %} {% endif %} {{ return([none, none]) }} -{% endmacro %} \ No newline at end of file +{% endmacro %} + +{% macro dremio__get_package_database_and_schema(package_name='elementary') %} + {% if execute %} + {% set node_in_package = graph.nodes.values() + | selectattr("resource_type", "==", "model") + | selectattr("package_name", "==", package_name) + | selectattr("config.materialized", "!=", "view") | first %} + {% if node_in_package %} + {{ return([node_in_package.database, node_in_package.schema]) }} + {% endif %} + {% endif %} + {{ return([none, none]) }} +{% endmacro %} diff --git a/macros/utils/sql_utils/escape_reserved_keywords.sql b/macros/utils/sql_utils/escape_reserved_keywords.sql index 6af639da8..d7d67ecab 100644 --- a/macros/utils/sql_utils/escape_reserved_keywords.sql +++ b/macros/utils/sql_utils/escape_reserved_keywords.sql @@ -14,7 +14,7 @@ {% endmacro %} {% macro dremio__is_reserved_keywords(keyword) %} - {% do return(keyword in ['filter', 'sql', 'timestamp']) %} + {% do return(keyword in ['filter', 'sql', 'timestamp', 'value']) %} {% endmacro %} {% macro escape_keywords(keyword) %}