diff --git a/.github/workflows/test-all-warehouses.yml b/.github/workflows/test-all-warehouses.yml
index ee3722726..7e2d1be5e 100644
--- a/.github/workflows/test-all-warehouses.yml
+++ b/.github/workflows/test-all-warehouses.yml
@@ -35,6 +35,7 @@ jobs:
databricks,
databricks_catalog,
athena,
+ clickhouse,
]
uses: ./.github/workflows/test-warehouse.yml
with:
diff --git a/.github/workflows/test-release.yml b/.github/workflows/test-release.yml
index 34edd7000..51c979daf 100644
--- a/.github/workflows/test-release.yml
+++ b/.github/workflows/test-release.yml
@@ -55,6 +55,7 @@ jobs:
databricks,
databricks_catalog,
athena,
+ clickhouse,
]
needs: get-latest-release-tags
uses: ./.github/workflows/test-warehouse.yml
diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml
index dcb074371..2d8a8329b 100644
--- a/.github/workflows/test-warehouse.yml
+++ b/.github/workflows/test-warehouse.yml
@@ -15,6 +15,7 @@ on:
- databricks_catalog
- spark
- athena
+ - clickhouse
elementary-ref:
type: string
required: false
diff --git a/docs/_snippets/cloud/integrations/clickhouse.mdx b/docs/_snippets/cloud/integrations/clickhouse.mdx
new file mode 100644
index 000000000..ee7f614fa
--- /dev/null
+++ b/docs/_snippets/cloud/integrations/clickhouse.mdx
@@ -0,0 +1,15 @@
+You will connect Elementary Cloud to Clickhouse for syncing the Elementary schema (created by the [Elementary dbt package](/cloud/onboarding/quickstart-dbt-package)).
+
+
+
+### Fill the connection form
+
+Provide the following fields:
+
+- **Host**: The hostname of your Clickhouse account to connect to. This can either be a hostname or an IP address.
+- **Port**: The port of your Clickhouse account to connect to.
+- **Elementary schema**: The name of your Elementary schema. Usually `[schema name]_elementary`.
+- **User**: The name of the for Elementary user.
+- **Password**: The password associated with the provided user.
+
+
diff --git a/docs/_snippets/install-cli.mdx b/docs/_snippets/install-cli.mdx
index 4e88b84d0..9da9c2215 100644
--- a/docs/_snippets/install-cli.mdx
+++ b/docs/_snippets/install-cli.mdx
@@ -15,6 +15,7 @@ pip install 'elementary-data[redshift]'
pip install 'elementary-data[databricks]'
pip install 'elementary-data[athena]'
pip install 'elementary-data[trino]'
+pip install 'elementary-data[clickhouse]'
## Postgres doesn't require this step
```
diff --git a/docs/_snippets/oss/adapters-cards.mdx b/docs/_snippets/oss/adapters-cards.mdx
index 298862230..44ceba5f9 100644
--- a/docs/_snippets/oss/adapters-cards.mdx
+++ b/docs/_snippets/oss/adapters-cards.mdx
@@ -245,6 +245,19 @@
}
+ >
+
+
+
+ }
>
diff --git a/docs/cloud/integrations/dwh/clickhouse.mdx b/docs/cloud/integrations/dwh/clickhouse.mdx
index 32c1d91ca..005388b41 100644
--- a/docs/cloud/integrations/dwh/clickhouse.mdx
+++ b/docs/cloud/integrations/dwh/clickhouse.mdx
@@ -4,13 +4,13 @@ sidebarTitle: "ClickHouse"
---
-
-
-}
->
+ title="ClickHouse"
+ href="https://tally.so/r/3N6DlW?integration=ClickHouse"
+ icon={
+
+ }
+ >
Click for details
\ No newline at end of file
diff --git a/elementary/monitor/dbt_project/macros/get_test_results.sql b/elementary/monitor/dbt_project/macros/get_test_results.sql
index fc55df0ff..ecc9331a8 100644
--- a/elementary/monitor/dbt_project/macros/get_test_results.sql
+++ b/elementary/monitor/dbt_project/macros/get_test_results.sql
@@ -1,4 +1,8 @@
-{%- macro get_test_results(days_back = 7, invocations_per_test = 720, disable_passed_test_metrics = false) -%}
+{% macro prepare_test_results(days_back = 7, invocations_per_test = 720, disable_passed_test_metrics = false) %}
+ {% do return(adapter.dispatch('prepare_test_results', 'elementary_cli')(days_back, invocations_per_test, disable_passed_test_metrics)) %}
+{% endmacro %}
+
+{% macro default__prepare_test_results(days_back = 7, invocations_per_test = 720, disable_passed_test_metrics = false) %}
{% set elementary_tests_allowlist_status = ['fail', 'warn'] if disable_passed_test_metrics else ['fail', 'warn', 'pass'] %}
{% set select_test_results %}
@@ -10,7 +14,6 @@
select
*,
{{ elementary.edr_datediff(elementary.edr_cast_as_timestamp('detected_at'), elementary.edr_current_timestamp(), 'day') }} as days_diff,
- {# When we split test into multiple test results, we want to have the same invocation order for the test results from the same run so we use rank. #}
rank() over (partition by elementary_unique_id order by {{elementary.edr_cast_as_timestamp('detected_at')}} desc) as invocations_rank_index
from test_results
)
@@ -55,11 +58,131 @@
where test_results.invocations_rank_index <= {{ invocations_per_test }}
order by test_results.elementary_unique_id, test_results.invocations_rank_index desc
{%- endset -%}
+ {% set elementary_database, elementary_schema = elementary.get_package_database_and_schema() %}
+ {% set ordered_test_results_relation = elementary.create_temp_table(elementary_database, elementary_schema, 'ordered_test_results', select_test_results) %}
+ {% do return(ordered_test_results_relation) %}
+{% endmacro %}
- {% set test_results = [] %}
+{% macro clickhouse__prepare_test_results(days_back = 7, invocations_per_test = 720, disable_passed_test_metrics = false) %}
+ {% do elementary.run_query('drop table if exists ordered_test_results') %}
+ {% set create_table_query %}
+ CREATE TABLE ordered_test_results (
+ id String,
+ invocation_id String,
+ test_execution_id String,
+ model_unique_id String,
+ test_unique_id String,
+ elementary_unique_id String,
+ detected_at DateTime,
+ database_name String,
+ schema_name String,
+ table_name String,
+ column_name String,
+ test_type String,
+ test_sub_type String,
+ test_results_description String,
+ test_description String,
+ original_path String,
+ package_name String,
+ owners String,
+ model_owner String,
+ tags String,
+ test_tags String,
+ model_tags String,
+ meta String,
+ model_meta String,
+ test_results_query String,
+ other String,
+ test_name String,
+ test_params String,
+ severity String,
+ status String,
+ execution_time Float64,
+ days_diff Int32,
+ invocations_rank_index UInt32,
+ failures Int64,
+ result_rows String
+ )
+ ENGINE = MergeTree()
+ ORDER BY (elementary_unique_id, invocations_rank_index);
+ {% endset %}
+ {% do elementary.run_query(create_table_query) %}
+ {% set insert_query %}
+ INSERT INTO ordered_test_results
+ SELECT
+ etr.id,
+ etr.invocation_id,
+ etr.test_execution_id,
+ etr.model_unique_id,
+ etr.test_unique_id,
+ CASE
+ WHEN etr.test_type = 'schema_change' THEN etr.test_unique_id
+ WHEN dt.short_name = 'dimension_anomalies' THEN etr.test_unique_id
+ ELSE coalesce(etr.test_unique_id, 'None') || '.' || coalesce(nullif(etr.column_name, ''), 'None') || '.' || coalesce(etr.test_sub_type, 'None')
+ END AS elementary_unique_id,
+ etr.detected_at,
+ etr.database_name,
+ etr.schema_name,
+ etr.table_name,
+ etr.column_name,
+ etr.test_type,
+ etr.test_sub_type,
+ etr.test_results_description,
+ dt.description AS test_description,
+ dt.original_path,
+ dt.package_name,
+ etr.owners,
+ da.owner AS model_owner,
+ etr.tags,
+ dt.tags AS test_tags,
+ da.tags AS model_tags,
+ dt.meta,
+ da.meta AS model_meta,
+ etr.test_results_query,
+ etr.other,
+ CASE
+ WHEN dt.short_name IS NOT NULL THEN dt.short_name
+ ELSE etr.test_name
+ END AS test_name,
+ etr.test_params,
+ etr.severity,
+ etr.status,
+ drr.execution_time,
+ {{ elementary.edr_datediff(elementary.edr_cast_as_timestamp('etr.detected_at'), elementary.edr_current_timestamp(), 'day') }} AS days_diff,
+ RANK() OVER (PARTITION BY elementary_unique_id ORDER BY etr.detected_at DESC) AS invocations_rank_index,
+ etr.failures,
+ etr.result_rows
+ FROM {{ ref('elementary', 'elementary_test_results') }} etr
+ JOIN {{ ref('elementary', 'dbt_tests') }} dt ON etr.test_unique_id = dt.unique_id
+ LEFT JOIN (
+ SELECT
+ min(detected_at) AS first_time_occurred,
+ test_unique_id
+ FROM {{ ref('elementary', 'elementary_test_results') }}
+ GROUP BY test_unique_id
+ ) first_occurred ON etr.test_unique_id = first_occurred.test_unique_id
+ LEFT JOIN (
+ SELECT unique_id, meta, tags, owner FROM {{ ref('elementary', 'dbt_models') }}
+ UNION ALL
+ SELECT unique_id, meta, tags, owner FROM {{ ref('elementary', 'dbt_sources') }}
+ ) da ON etr.model_unique_id = da.unique_id
+ LEFT JOIN {{ ref('elementary', 'dbt_run_results') }} drr ON etr.test_execution_id = drr.model_execution_id
+ WHERE {{ elementary.edr_datediff(elementary.edr_cast_as_timestamp('etr.detected_at'), elementary.edr_current_timestamp(), 'day') }} < {{ days_back }}
+ {% endset %}
+ {% do elementary.run_query(insert_query) %}
{% set elementary_database, elementary_schema = elementary.get_package_database_and_schema() %}
- {% set ordered_test_results_relation = elementary.create_temp_table(elementary_database, elementary_schema, 'ordered_test_results', select_test_results) %}
+ {% set ordered_test_results_relation = api.Relation.create(
+ database=elementary_database,
+ schema=elementary_schema,
+ identifier='ordered_test_results',
+ type='table'
+ ) %}
+ {% do return(ordered_test_results_relation) %}
+{% endmacro %}
+
+{%- macro get_test_results(days_back = 7, invocations_per_test = 720, disable_passed_test_metrics = false) -%}
+ {% set ordered_test_results_relation = elementary_cli.prepare_test_results(days_back, invocations_per_test, disable_passed_test_metrics) %}
{% set test_results_agate_sql %}
select * from {{ ordered_test_results_relation }}
@@ -73,6 +196,7 @@
{% set test_results_agate = elementary.run_query(test_results_agate_sql) %}
{% set test_result_rows_agate = elementary_cli.get_result_rows_agate(days_back, valid_ids_query) %}
+
{% if not elementary.has_temp_table_support() %}
{% do elementary.fully_drop_relation(ordered_test_results_relation) %}
{% endif %}
@@ -85,7 +209,7 @@
{% do filtered_tests.append(test) %}
{% endif %}
{% endfor %}
-
+ {% set test_results = [] %}
{% for test in filtered_tests %}
{% set test_rows_sample = none %}
{% if test.invocations_rank_index == 1 %}
@@ -117,7 +241,6 @@
{% do anomalous_row.update({dimensions[index]: diemsions_values[index]}) %}
{% endfor %}
{% if loop.last %}
- {# Adding dimensions to the headers #}
{% for index in range(dimensions | length) %}
{% do headers.append({'id': dimensions[index], 'display_name': dimensions[index], 'type': 'str'},) %}
{% endfor %}
@@ -126,7 +249,6 @@
{% do anomalous_rows.append(anomalous_row) %}
{% endif %}
{% endfor %}
- {# Adding the rest of the static headers (metrics headers) #}
{% do headers.extend([
{'id': 'anomalous_value_' ~ metric_name, 'display_name': ' '.join(metric_name.split('_')), 'type': 'int'},
{'id': 'anomalous_value_average_' ~ metric_name, 'display_name': 'average ' ~ ' '.join(metric_name.split('_')), 'type': 'int'}
@@ -138,10 +260,8 @@
{% endif %}
{%- endif -%}
{% endif %}
- {# Adding sample data to test results #}
{% do test.update({"sample_data": test_rows_sample}) %}
{% do test_results.append(test) %}
- {%- endfor -%}
-
+ {%- endfor %}
{% do return(test_results) %}
-{%- endmacro -%}
+{%- endmacro %}
diff --git a/elementary/monitor/dbt_project/macros/get_tests.sql b/elementary/monitor/dbt_project/macros/get_tests.sql
index 707561795..1b04f3d3e 100644
--- a/elementary/monitor/dbt_project/macros/get_tests.sql
+++ b/elementary/monitor/dbt_project/macros/get_tests.sql
@@ -63,22 +63,22 @@
)
select
- tests.unique_id,
+ tests.unique_id as unique_id,
tests.parent_model_unique_id as model_unique_id,
tests.database_name,
tests.schema_name,
nodes.name as model_name,
tests.test_column_name as column_name,
- tests.name,
+ tests.name as name,
tests.description,
tests.package_name,
tests.original_path,
tests.test_params,
- tests.meta,
+ tests.meta as meta,
nodes.meta as model_meta,
tests.tags,
nodes.tags as model_tags,
- tests.type,
+ tests.type as type,
last_test_results.test_type,
last_test_results.test_sub_type,
test_results_times.first_detected_at as created_at,
@@ -89,7 +89,6 @@
left join last_test_results on tests.unique_id = last_test_results.test_unique_id
left join nodes on tests.parent_model_unique_id = nodes.unique_id
{% endset %}
-
{% set tests_agate = run_query(get_tests_query) %}
{% do return(elementary.agate_to_dicts(tests_agate)) %}
{%- endif -%}
diff --git a/elementary/monitor/dbt_project/package-lock.yml b/elementary/monitor/dbt_project/package-lock.yml
index 92f2f01af..f4cc3abdd 100644
--- a/elementary/monitor/dbt_project/package-lock.yml
+++ b/elementary/monitor/dbt_project/package-lock.yml
@@ -1,6 +1,6 @@
packages:
- package: dbt-labs/dbt_utils
version: 0.8.6
- - package: elementary-data/elementary
- version: 0.18.2
-sha1_hash: 0e32f8128c729542efc1b5c8e2c4fbe6f6534369
+ - git: https://github.com/elementary-data/dbt-data-reliability.git
+ revision: 1517d9df8954be91f2c4b2eb6cc835647f029622
+sha1_hash: 57951992378e0dbee2916b0bef246d539e3000b1
diff --git a/elementary/monitor/dbt_project/packages.yml b/elementary/monitor/dbt_project/packages.yml
index ccf59ead3..eb4b2c23e 100644
--- a/elementary/monitor/dbt_project/packages.yml
+++ b/elementary/monitor/dbt_project/packages.yml
@@ -1,8 +1,8 @@
packages:
- package: dbt-labs/dbt_utils
version: [">=0.8.0", "<0.9.0"]
- - package: elementary-data/elementary
- version: 0.18.2
+ - git: https://github.com/elementary-data/dbt-data-reliability.git
+ revision: 1517d9df8954be91f2c4b2eb6cc835647f029622
# NOTE - for unreleased CLI versions we often need to update the package version to a commit hash (please leave this
# commented, so it will be easy to access)
diff --git a/pyproject.toml b/pyproject.toml
index 5b87f5510..9cc4eb11c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,7 +44,7 @@ dbt-databricks = {version = ">=0.20,<2.0.0", optional = true}
dbt-spark = {version = ">=0.20,<2.0.0", optional = true}
dbt-athena-community = {version = ">=1.6.3,<2.0.0", optional = true}
dbt-trino = {version = ">=1.5.0,<2.0.0", optional = true}
-
+dbt-clickhouse = {version = ">=0.20,<2.0.0", optional = true}
[tool.poetry.extras]
snowflake = ["dbt-snowflake"]
bigquery = ["dbt-bigquery"]
@@ -53,8 +53,9 @@ postgres = ["dbt-postgres"]
databricks = ["dbt-databricks"]
spark = ["dbt-spark"]
athena = ["dbt-athena-community"]
+clickhouse = ["dbt-clickhouse"]
trino = ["dbt-trino"]
-all = ["dbt-snowflake", "dbt-bigquery", "dbt-redshift", "dbt-postgres", "dbt-databricks", "dbt-spark"]
+all = ["dbt-snowflake", "dbt-bigquery", "dbt-redshift", "dbt-postgres", "dbt-databricks", "dbt-spark", "dbt-clickhouse", "dbt-athena-community", "dbt-trino"]
[build-system]
requires = ["poetry-core>=1.0.0"]