diff --git a/.github/workflows/test-all-warehouses.yml b/.github/workflows/test-all-warehouses.yml index ee3722726..7e2d1be5e 100644 --- a/.github/workflows/test-all-warehouses.yml +++ b/.github/workflows/test-all-warehouses.yml @@ -35,6 +35,7 @@ jobs: databricks, databricks_catalog, athena, + clickhouse, ] uses: ./.github/workflows/test-warehouse.yml with: diff --git a/.github/workflows/test-release.yml b/.github/workflows/test-release.yml index 34edd7000..51c979daf 100644 --- a/.github/workflows/test-release.yml +++ b/.github/workflows/test-release.yml @@ -55,6 +55,7 @@ jobs: databricks, databricks_catalog, athena, + clickhouse, ] needs: get-latest-release-tags uses: ./.github/workflows/test-warehouse.yml diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index dcb074371..2d8a8329b 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -15,6 +15,7 @@ on: - databricks_catalog - spark - athena + - clickhouse elementary-ref: type: string required: false diff --git a/docs/_snippets/cloud/integrations/clickhouse.mdx b/docs/_snippets/cloud/integrations/clickhouse.mdx new file mode 100644 index 000000000..ee7f614fa --- /dev/null +++ b/docs/_snippets/cloud/integrations/clickhouse.mdx @@ -0,0 +1,15 @@ +You will connect Elementary Cloud to Clickhouse for syncing the Elementary schema (created by the [Elementary dbt package](/cloud/onboarding/quickstart-dbt-package)). + + + +### Fill the connection form + +Provide the following fields: + +- **Host**: The hostname of your Clickhouse account to connect to. This can either be a hostname or an IP address. +- **Port**: The port of your Clickhouse account to connect to. +- **Elementary schema**: The name of your Elementary schema. Usually `[schema name]_elementary`. +- **User**: The name of the for Elementary user. +- **Password**: The password associated with the provided user. + + diff --git a/docs/_snippets/install-cli.mdx b/docs/_snippets/install-cli.mdx index 4e88b84d0..9da9c2215 100644 --- a/docs/_snippets/install-cli.mdx +++ b/docs/_snippets/install-cli.mdx @@ -15,6 +15,7 @@ pip install 'elementary-data[redshift]' pip install 'elementary-data[databricks]' pip install 'elementary-data[athena]' pip install 'elementary-data[trino]' +pip install 'elementary-data[clickhouse]' ## Postgres doesn't require this step ``` diff --git a/docs/_snippets/oss/adapters-cards.mdx b/docs/_snippets/oss/adapters-cards.mdx index 298862230..44ceba5f9 100644 --- a/docs/_snippets/oss/adapters-cards.mdx +++ b/docs/_snippets/oss/adapters-cards.mdx @@ -245,6 +245,19 @@ } + > + + + + } > diff --git a/docs/cloud/integrations/dwh/clickhouse.mdx b/docs/cloud/integrations/dwh/clickhouse.mdx index 32c1d91ca..005388b41 100644 --- a/docs/cloud/integrations/dwh/clickhouse.mdx +++ b/docs/cloud/integrations/dwh/clickhouse.mdx @@ -4,13 +4,13 @@ sidebarTitle: "ClickHouse" --- - - -} -> + title="ClickHouse" + href="https://tally.so/r/3N6DlW?integration=ClickHouse" + icon={ + + + + } + > Click for details \ No newline at end of file diff --git a/elementary/monitor/dbt_project/macros/get_test_results.sql b/elementary/monitor/dbt_project/macros/get_test_results.sql index fc55df0ff..ecc9331a8 100644 --- a/elementary/monitor/dbt_project/macros/get_test_results.sql +++ b/elementary/monitor/dbt_project/macros/get_test_results.sql @@ -1,4 +1,8 @@ -{%- macro get_test_results(days_back = 7, invocations_per_test = 720, disable_passed_test_metrics = false) -%} +{% macro prepare_test_results(days_back = 7, invocations_per_test = 720, disable_passed_test_metrics = false) %} + {% do return(adapter.dispatch('prepare_test_results', 'elementary_cli')(days_back, invocations_per_test, disable_passed_test_metrics)) %} +{% endmacro %} + +{% macro default__prepare_test_results(days_back = 7, invocations_per_test = 720, disable_passed_test_metrics = false) %} {% set elementary_tests_allowlist_status = ['fail', 'warn'] if disable_passed_test_metrics else ['fail', 'warn', 'pass'] %} {% set select_test_results %} @@ -10,7 +14,6 @@ select *, {{ elementary.edr_datediff(elementary.edr_cast_as_timestamp('detected_at'), elementary.edr_current_timestamp(), 'day') }} as days_diff, - {# When we split test into multiple test results, we want to have the same invocation order for the test results from the same run so we use rank. #} rank() over (partition by elementary_unique_id order by {{elementary.edr_cast_as_timestamp('detected_at')}} desc) as invocations_rank_index from test_results ) @@ -55,11 +58,131 @@ where test_results.invocations_rank_index <= {{ invocations_per_test }} order by test_results.elementary_unique_id, test_results.invocations_rank_index desc {%- endset -%} + {% set elementary_database, elementary_schema = elementary.get_package_database_and_schema() %} + {% set ordered_test_results_relation = elementary.create_temp_table(elementary_database, elementary_schema, 'ordered_test_results', select_test_results) %} + {% do return(ordered_test_results_relation) %} +{% endmacro %} - {% set test_results = [] %} +{% macro clickhouse__prepare_test_results(days_back = 7, invocations_per_test = 720, disable_passed_test_metrics = false) %} + {% do elementary.run_query('drop table if exists ordered_test_results') %} + {% set create_table_query %} + CREATE TABLE ordered_test_results ( + id String, + invocation_id String, + test_execution_id String, + model_unique_id String, + test_unique_id String, + elementary_unique_id String, + detected_at DateTime, + database_name String, + schema_name String, + table_name String, + column_name String, + test_type String, + test_sub_type String, + test_results_description String, + test_description String, + original_path String, + package_name String, + owners String, + model_owner String, + tags String, + test_tags String, + model_tags String, + meta String, + model_meta String, + test_results_query String, + other String, + test_name String, + test_params String, + severity String, + status String, + execution_time Float64, + days_diff Int32, + invocations_rank_index UInt32, + failures Int64, + result_rows String + ) + ENGINE = MergeTree() + ORDER BY (elementary_unique_id, invocations_rank_index); + {% endset %} + {% do elementary.run_query(create_table_query) %} + {% set insert_query %} + INSERT INTO ordered_test_results + SELECT + etr.id, + etr.invocation_id, + etr.test_execution_id, + etr.model_unique_id, + etr.test_unique_id, + CASE + WHEN etr.test_type = 'schema_change' THEN etr.test_unique_id + WHEN dt.short_name = 'dimension_anomalies' THEN etr.test_unique_id + ELSE coalesce(etr.test_unique_id, 'None') || '.' || coalesce(nullif(etr.column_name, ''), 'None') || '.' || coalesce(etr.test_sub_type, 'None') + END AS elementary_unique_id, + etr.detected_at, + etr.database_name, + etr.schema_name, + etr.table_name, + etr.column_name, + etr.test_type, + etr.test_sub_type, + etr.test_results_description, + dt.description AS test_description, + dt.original_path, + dt.package_name, + etr.owners, + da.owner AS model_owner, + etr.tags, + dt.tags AS test_tags, + da.tags AS model_tags, + dt.meta, + da.meta AS model_meta, + etr.test_results_query, + etr.other, + CASE + WHEN dt.short_name IS NOT NULL THEN dt.short_name + ELSE etr.test_name + END AS test_name, + etr.test_params, + etr.severity, + etr.status, + drr.execution_time, + {{ elementary.edr_datediff(elementary.edr_cast_as_timestamp('etr.detected_at'), elementary.edr_current_timestamp(), 'day') }} AS days_diff, + RANK() OVER (PARTITION BY elementary_unique_id ORDER BY etr.detected_at DESC) AS invocations_rank_index, + etr.failures, + etr.result_rows + FROM {{ ref('elementary', 'elementary_test_results') }} etr + JOIN {{ ref('elementary', 'dbt_tests') }} dt ON etr.test_unique_id = dt.unique_id + LEFT JOIN ( + SELECT + min(detected_at) AS first_time_occurred, + test_unique_id + FROM {{ ref('elementary', 'elementary_test_results') }} + GROUP BY test_unique_id + ) first_occurred ON etr.test_unique_id = first_occurred.test_unique_id + LEFT JOIN ( + SELECT unique_id, meta, tags, owner FROM {{ ref('elementary', 'dbt_models') }} + UNION ALL + SELECT unique_id, meta, tags, owner FROM {{ ref('elementary', 'dbt_sources') }} + ) da ON etr.model_unique_id = da.unique_id + LEFT JOIN {{ ref('elementary', 'dbt_run_results') }} drr ON etr.test_execution_id = drr.model_execution_id + WHERE {{ elementary.edr_datediff(elementary.edr_cast_as_timestamp('etr.detected_at'), elementary.edr_current_timestamp(), 'day') }} < {{ days_back }} + {% endset %} + {% do elementary.run_query(insert_query) %} {% set elementary_database, elementary_schema = elementary.get_package_database_and_schema() %} - {% set ordered_test_results_relation = elementary.create_temp_table(elementary_database, elementary_schema, 'ordered_test_results', select_test_results) %} + {% set ordered_test_results_relation = api.Relation.create( + database=elementary_database, + schema=elementary_schema, + identifier='ordered_test_results', + type='table' + ) %} + {% do return(ordered_test_results_relation) %} +{% endmacro %} + +{%- macro get_test_results(days_back = 7, invocations_per_test = 720, disable_passed_test_metrics = false) -%} + {% set ordered_test_results_relation = elementary_cli.prepare_test_results(days_back, invocations_per_test, disable_passed_test_metrics) %} {% set test_results_agate_sql %} select * from {{ ordered_test_results_relation }} @@ -73,6 +196,7 @@ {% set test_results_agate = elementary.run_query(test_results_agate_sql) %} {% set test_result_rows_agate = elementary_cli.get_result_rows_agate(days_back, valid_ids_query) %} + {% if not elementary.has_temp_table_support() %} {% do elementary.fully_drop_relation(ordered_test_results_relation) %} {% endif %} @@ -85,7 +209,7 @@ {% do filtered_tests.append(test) %} {% endif %} {% endfor %} - + {% set test_results = [] %} {% for test in filtered_tests %} {% set test_rows_sample = none %} {% if test.invocations_rank_index == 1 %} @@ -117,7 +241,6 @@ {% do anomalous_row.update({dimensions[index]: diemsions_values[index]}) %} {% endfor %} {% if loop.last %} - {# Adding dimensions to the headers #} {% for index in range(dimensions | length) %} {% do headers.append({'id': dimensions[index], 'display_name': dimensions[index], 'type': 'str'},) %} {% endfor %} @@ -126,7 +249,6 @@ {% do anomalous_rows.append(anomalous_row) %} {% endif %} {% endfor %} - {# Adding the rest of the static headers (metrics headers) #} {% do headers.extend([ {'id': 'anomalous_value_' ~ metric_name, 'display_name': ' '.join(metric_name.split('_')), 'type': 'int'}, {'id': 'anomalous_value_average_' ~ metric_name, 'display_name': 'average ' ~ ' '.join(metric_name.split('_')), 'type': 'int'} @@ -138,10 +260,8 @@ {% endif %} {%- endif -%} {% endif %} - {# Adding sample data to test results #} {% do test.update({"sample_data": test_rows_sample}) %} {% do test_results.append(test) %} - {%- endfor -%} - + {%- endfor %} {% do return(test_results) %} -{%- endmacro -%} +{%- endmacro %} diff --git a/elementary/monitor/dbt_project/macros/get_tests.sql b/elementary/monitor/dbt_project/macros/get_tests.sql index 707561795..1b04f3d3e 100644 --- a/elementary/monitor/dbt_project/macros/get_tests.sql +++ b/elementary/monitor/dbt_project/macros/get_tests.sql @@ -63,22 +63,22 @@ ) select - tests.unique_id, + tests.unique_id as unique_id, tests.parent_model_unique_id as model_unique_id, tests.database_name, tests.schema_name, nodes.name as model_name, tests.test_column_name as column_name, - tests.name, + tests.name as name, tests.description, tests.package_name, tests.original_path, tests.test_params, - tests.meta, + tests.meta as meta, nodes.meta as model_meta, tests.tags, nodes.tags as model_tags, - tests.type, + tests.type as type, last_test_results.test_type, last_test_results.test_sub_type, test_results_times.first_detected_at as created_at, @@ -89,7 +89,6 @@ left join last_test_results on tests.unique_id = last_test_results.test_unique_id left join nodes on tests.parent_model_unique_id = nodes.unique_id {% endset %} - {% set tests_agate = run_query(get_tests_query) %} {% do return(elementary.agate_to_dicts(tests_agate)) %} {%- endif -%} diff --git a/elementary/monitor/dbt_project/package-lock.yml b/elementary/monitor/dbt_project/package-lock.yml index 92f2f01af..f4cc3abdd 100644 --- a/elementary/monitor/dbt_project/package-lock.yml +++ b/elementary/monitor/dbt_project/package-lock.yml @@ -1,6 +1,6 @@ packages: - package: dbt-labs/dbt_utils version: 0.8.6 - - package: elementary-data/elementary - version: 0.18.2 -sha1_hash: 0e32f8128c729542efc1b5c8e2c4fbe6f6534369 + - git: https://github.com/elementary-data/dbt-data-reliability.git + revision: 1517d9df8954be91f2c4b2eb6cc835647f029622 +sha1_hash: 57951992378e0dbee2916b0bef246d539e3000b1 diff --git a/elementary/monitor/dbt_project/packages.yml b/elementary/monitor/dbt_project/packages.yml index ccf59ead3..eb4b2c23e 100644 --- a/elementary/monitor/dbt_project/packages.yml +++ b/elementary/monitor/dbt_project/packages.yml @@ -1,8 +1,8 @@ packages: - package: dbt-labs/dbt_utils version: [">=0.8.0", "<0.9.0"] - - package: elementary-data/elementary - version: 0.18.2 + - git: https://github.com/elementary-data/dbt-data-reliability.git + revision: 1517d9df8954be91f2c4b2eb6cc835647f029622 # NOTE - for unreleased CLI versions we often need to update the package version to a commit hash (please leave this # commented, so it will be easy to access) diff --git a/pyproject.toml b/pyproject.toml index 5b87f5510..9cc4eb11c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,7 @@ dbt-databricks = {version = ">=0.20,<2.0.0", optional = true} dbt-spark = {version = ">=0.20,<2.0.0", optional = true} dbt-athena-community = {version = ">=1.6.3,<2.0.0", optional = true} dbt-trino = {version = ">=1.5.0,<2.0.0", optional = true} - +dbt-clickhouse = {version = ">=0.20,<2.0.0", optional = true} [tool.poetry.extras] snowflake = ["dbt-snowflake"] bigquery = ["dbt-bigquery"] @@ -53,8 +53,9 @@ postgres = ["dbt-postgres"] databricks = ["dbt-databricks"] spark = ["dbt-spark"] athena = ["dbt-athena-community"] +clickhouse = ["dbt-clickhouse"] trino = ["dbt-trino"] -all = ["dbt-snowflake", "dbt-bigquery", "dbt-redshift", "dbt-postgres", "dbt-databricks", "dbt-spark"] +all = ["dbt-snowflake", "dbt-bigquery", "dbt-redshift", "dbt-postgres", "dbt-databricks", "dbt-spark", "dbt-clickhouse", "dbt-athena-community", "dbt-trino"] [build-system] requires = ["poetry-core>=1.0.0"]