diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e12833ec1..f749b7a38 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -44,8 +44,11 @@ repos: language: system files: ^elementary/.*\.py$ - - repo: https://github.com/CoderJoshDK/precommit-mintlify-validate/ - rev: v0.2.0 + - repo: local hooks: - id: mintlify-validate - args: [docs] + name: Mintlify validate + entry: bash -c "cd docs && mintlify broken-links | tee /dev/stderr | grep -q 'no broken links found'" + language: system + require_serial: true + pass_filenames: false diff --git a/docs/_snippets/alerts/description.mdx b/docs/_snippets/alerts/description.mdx index 6b3fcb508..76e249ffe 100644 --- a/docs/_snippets/alerts/description.mdx +++ b/docs/_snippets/alerts/description.mdx @@ -4,10 +4,11 @@ It's recommended to add an explanation of what does it mean if this test fails, ```yml test -tests: +data_tests: - not_null: - meta: - description: "This is the test description" + config: + meta: + description: "This is the test description" ``` ```yml test config block @@ -20,7 +21,7 @@ tests: ``` ```yml dbt_project.yml -tests: +data_tests: path: subfolder: +meta: diff --git a/docs/_snippets/alerts/owner.mdx b/docs/_snippets/alerts/owner.mdx index f9f2876f1..7d422109b 100644 --- a/docs/_snippets/alerts/owner.mdx +++ b/docs/_snippets/alerts/owner.mdx @@ -8,15 +8,17 @@ Elementary enriches alerts with [owners for models or tests](https://docs.getdbt ```yml model models: - name: my_model_name - meta: - owner: "@jessica.jones" + config: + meta: + owner: "@jessica.jones" ``` ```yml test -tests: +data_tests: - not_null: - meta: - owner: ["@jessica.jones", "@joe.joseph"] + config: + meta: + owner: ["@jessica.jones", "@joe.joseph"] ``` ```yml test/model config block @@ -36,7 +38,7 @@ models/sources: +meta: owner: "@jessica.jones" -tests: +data_tests: path: subfolder: +meta: diff --git a/docs/_snippets/alerts/subscribers.mdx b/docs/_snippets/alerts/subscribers.mdx index a76111903..92f2c1f92 100644 --- a/docs/_snippets/alerts/subscribers.mdx +++ b/docs/_snippets/alerts/subscribers.mdx @@ -8,15 +8,17 @@ If you want additional users besides the owner to be tagged on an alert, add the ```yml model models: - name: my_model_name - meta: - subscribers: "@jessica.jones" + config: + meta: + subscribers: "@jessica.jones" ``` ```yml test -tests: +data_tests: - not_null: - meta: - subscribers: ["@jessica.jones", "@joe.joseph"] + config: + meta: + subscribers: ["@jessica.jones", "@joe.joseph"] ``` ```yml test/model config block @@ -34,7 +36,7 @@ models: +meta: subscribers: "@jessica.jones" -tests: +data_tests: path: subfolder: +meta: diff --git a/docs/_snippets/alerts/tags.mdx b/docs/_snippets/alerts/tags.mdx index 4414d90c7..08138bab6 100644 --- a/docs/_snippets/alerts/tags.mdx +++ b/docs/_snippets/alerts/tags.mdx @@ -12,7 +12,7 @@ models: ``` ```yml test -tests: +data_tests: - not_null: tags: ["#marketing", "#data_ops"] ``` @@ -30,7 +30,7 @@ models: subfolder: tags: ["#marketing", "#data_ops"] -tests: +data_tests: path: subfolder: tags: ["#marketing", "#data_ops"] diff --git a/docs/_snippets/cloud/ai-privacy-policy-short.mdx b/docs/_snippets/cloud/ai-privacy-policy-short.mdx index cfe9343f5..0e761fb9f 100644 --- a/docs/_snippets/cloud/ai-privacy-policy-short.mdx +++ b/docs/_snippets/cloud/ai-privacy-policy-short.mdx @@ -17,4 +17,4 @@ Each agent may use the following data to provide recommendations: Elementary does **not collect or share any sensitive data**, and you can review and align our AI use with your organization’s governance policies at any time. -For full details, see our [AI Privacy Policy](/cloud/ai-privacy). \ No newline at end of file +For full details, see our [AI Privacy Policy](/cloud/general/ai-privacy-policy). \ No newline at end of file diff --git a/docs/_snippets/guides/alerts-code-configuration.mdx b/docs/_snippets/guides/alerts-code-configuration.mdx index 8e16d87ac..aab2625ab 100644 --- a/docs/_snippets/guides/alerts-code-configuration.mdx +++ b/docs/_snippets/guides/alerts-code-configuration.mdx @@ -76,15 +76,17 @@ the CLI or the `config.yml` file. ```yml model models: - name: my_model_name - meta: - channel: data_ops + config: + meta: + channel: data_ops ``` ```yml test -tests: +data_tests: - not_null: - meta: - channel: data_ops + config: + meta: + channel: data_ops ``` ```yml test/model config block @@ -102,7 +104,7 @@ models: +meta: channel: data_ops -tests: +data_tests: path: subfolder: +meta: @@ -126,15 +128,17 @@ Note: if you configure a suppression interval using this method, it will overrid ```yml model models: - name: my_model_name - meta: - alert_suppression_interval: 24 + config: + meta: + alert_suppression_interval: 24 ``` ```yml test -tests: +data_tests: - not_null: - meta: - alert_suppression_interval: 12 + config: + meta: + alert_suppression_interval: 12 ``` ```yml test/model config block @@ -152,7 +156,7 @@ models: +meta: alert_suppression_interval: 24 -tests: +data_tests: path: subfolder: +meta: @@ -176,15 +180,17 @@ Due to their nature, grouped alerts will contain less information on each issue. ```yml model models: - name: my_model_name - meta: - slack_group_alerts_by: table + config: + meta: + slack_group_alerts_by: table ``` ```yml test -tests: +data_tests: - not_null: - meta: - slack_group_alerts_by: table + config: + meta: + slack_group_alerts_by: table ``` ```yml test/model config block @@ -202,7 +208,7 @@ models: +meta: slack_group_alerts_by: table -tests: +data_tests: path: subfolder: +meta: @@ -236,15 +242,17 @@ Supported alert fields: ```yml model models: - name: my_model_name - meta: - alert_fields: ["description", "owners", "tags", "subscribers"] + config: + meta: + alert_fields: ["description", "owners", "tags", "subscribers"] ``` ```yml test -tests: +data_tests: - not_null: - meta: - alert_fields: ["description", "owners", "tags", "subscribers"] + config: + meta: + alert_fields: ["description", "owners", "tags", "subscribers"] ``` ```yml test/model config block @@ -262,7 +270,7 @@ models: +meta: alert_fields: ["description", "owners", "tags", "subscribers"] -tests: +data_tests: path: subfolder: +meta: diff --git a/docs/cloud/best-practices/governance-for-observability.mdx b/docs/cloud/best-practices/governance-for-observability.mdx index fc16f438c..66ba2d95b 100644 --- a/docs/cloud/best-practices/governance-for-observability.mdx +++ b/docs/cloud/best-practices/governance-for-observability.mdx @@ -43,14 +43,15 @@ A data asset or test should have only one owner, but other people might want to ```yaml models: - name: return_on_ad_spend - tags: - - marketing-public - - marketing - meta: - owner: :"@analytics.engineer" - subscribers: - - "@marketing.data.analyst" - - "@another.marketing.data.analyst" + config: + tags: + - marketing-public + - marketing + meta: + owner: :"@analytics.engineer" + subscribers: + - "@marketing.data.analyst" + - "@another.marketing.data.analyst" ``` ## Business domains & Data products @@ -108,12 +109,13 @@ Ownership and subscribers: ```yaml sources: - name: fivetran_salesforce_sync - tags: - - fivetran - - salesforce - meta: - owner: :"@data.engineer" - subscribers: "@analytics.engineer" + config: + tags: + - fivetran + - salesforce + meta: + owner: :"@data.engineer" + subscribers: "@analytics.engineer" ``` diff --git a/docs/cloud/best-practices/triage-and-response.mdx b/docs/cloud/best-practices/triage-and-response.mdx index 4ba4bf310..c80d3b267 100644 --- a/docs/cloud/best-practices/triage-and-response.mdx +++ b/docs/cloud/best-practices/triage-and-response.mdx @@ -38,14 +38,14 @@ According to these answers, you should add configuration that will impact the al ```yaml -tests: +data_tests: - unique: config: error_if: ">10" - meta: - description: "More than 10 duplicate records arriving from the source, as this is a staging table" - owner: "@data.provider" - tags: "critical", "mongo-db", "raw-production-replica" + meta: + description: "More than 10 duplicate records arriving from the source, as this is a staging table" + owner: "@data.provider" + tags: "critical", "mongo-db", "raw-production-replica" ``` ## Alert distribution @@ -129,7 +129,7 @@ These are the questions that should be asked, and product tips on how to answer - Does the incident break the pipeline / create delay? - Is the failure is a model failure, or a freshness issue? - - Do we run `dbt build` and this failure stoped the pipeline? + - Do we run `dbt build` and this failure stopped the pipeline? - Check the **Model runs** section of the dashboard to see if there are skipped models, as failures in build cause the downstream models to be skipped. diff --git a/docs/cloud/features/anomaly-detection/metrics.mdx b/docs/cloud/features/anomaly-detection/metrics.mdx index f17164049..65511875b 100644 --- a/docs/cloud/features/anomaly-detection/metrics.mdx +++ b/docs/cloud/features/anomaly-detection/metrics.mdx @@ -27,18 +27,19 @@ No mandatory configuration, however it is highly recommended to configure a `tim {/* prettier-ignore */}
  
-  tests:
+  data_tests:
       -- elementary.collect_metrics:
-          timestamp_column: column name
-          time_bucket:
-            period: [hour | day]
-            count: int
-          dimensions: sql expression
-          metrics: monitors list
-           name: string
-           type: monitor type
-           columns: list
-          where_expression: sql expression
+          arguments:
+              timestamp_column: column name
+              time_bucket:
+                period: [hour | day]
+                count: int
+              dimensions: sql expression
+              metrics: monitors list
+               name: string
+               type: monitor type
+               columns: list
+              where_expression: sql expression
  
 
@@ -47,45 +48,46 @@ No mandatory configuration, however it is highly recommended to configure a `tim ```yml Models models: - name: < model name > - tests: + data_tests: - elementary.collect_metrics: - timestamp_column: < timestamp column > - time_bucket: # Daily by default - period: < time period > - count: < number of periods > - dimensions: < list of dimensions to group by > - metrics: < list of metrics > - - name: < user defined name for metric > - type: < which metric to calculate > - columns: < which columns to calculate the metric on- for column metrics > - where_expression: < sql expression > - cloud_monitored: < boolean, should Elementary automatically create anomaly tests for the collected metrics? > + arguments: + timestamp_column: < timestamp column > + time_bucket: # Daily by default + period: < time period > + count: < number of periods > + dimensions: < list of dimensions to group by > + metrics: < list of metrics > + - name: < user defined name for metric > + type: < which metric to calculate > + columns: < which columns to calculate the metric on- for column metrics > + where_expression: < sql expression > + cloud_monitored: < boolean, should Elementary automatically create anomaly tests for the collected metrics? > ``` ```yml Models example models: - name: login_events - tests: + data_tests: - elementary.collect_metrics: - timestamp_column: 'loaded_at' - time_bucket: - period: hour - count: 1 - dimensions: - - country_id - - platform_id - metrics: - - name: row_count - type: row_count - - name: filtered_row_count - type: row_count - - name: null_count - type: null_count - columns: ["hello", "world"] - where_expression: "country = 'USA'" - cloud_monitored: true - + arguments: + timestamp_column: 'loaded_at' + time_bucket: + period: hour + count: 1 + dimensions: + - country_id + - platform_id + metrics: + - name: row_count + type: row_count + - name: filtered_row_count + type: row_count + - name: null_count + type: null_count + columns: ["hello", "world"] + where_expression: "country = 'USA'" + cloud_monitored: true ``` diff --git a/docs/cloud/features/collaboration-and-communication/data-health.mdx b/docs/cloud/features/collaboration-and-communication/data-health.mdx index d08d055b8..d4bf6d5a0 100644 --- a/docs/cloud/features/collaboration-and-communication/data-health.mdx +++ b/docs/cloud/features/collaboration-and-communication/data-health.mdx @@ -48,10 +48,11 @@ The way to do so is to add `quality_dimension` to the test definition in your db ```yml test -tests: +data_tests: - not_null: - meta: - quality_dimension: completeness + config: + meta: + quality_dimension: completeness ``` ```yml test/model config block diff --git a/docs/cloud/features/data-tests/test-coverage-screen.mdx b/docs/cloud/features/data-tests/test-coverage-screen.mdx index 94d59e872..63d02a18a 100644 --- a/docs/cloud/features/data-tests/test-coverage-screen.mdx +++ b/docs/cloud/features/data-tests/test-coverage-screen.mdx @@ -31,7 +31,7 @@ For each asset, you’ll see: - **Which dimensions are covered** by existing tests - **Where coverage is missing** -- [A **coverage score** between **0–100%**](docs/cloud/features/data-tests/test-coverage-screen#how-coverage-calculation-works). +- [A **coverage score** between **0–100%**](/cloud/features/data-tests/test-coverage-screen#how-coverage-calculation-works). - **Links to test results** ## What You Can Do from This Screen diff --git a/docs/cloud/features/performance-monitoring/performance-alerts.mdx b/docs/cloud/features/performance-monitoring/performance-alerts.mdx index 8ae3e905e..e23b813e1 100644 --- a/docs/cloud/features/performance-monitoring/performance-alerts.mdx +++ b/docs/cloud/features/performance-monitoring/performance-alerts.mdx @@ -57,18 +57,19 @@ sources: - name: model_run_results columns: - name: execution_time - tests: + data_tests: - elementary.column_anomalies: config: severity: warn - tags: ["model_performance"] - column_anomalies: - - max - dimensions: ["package_name", "name"] - timestamp_column: generated_at - anomaly_direction: spike - ignore_small_changes: - spike_failure_percent_threshold: 10 + tags: ["model_performance"] + arguments: + column_anomalies: + - max + dimensions: ["package_name", "name"] + timestamp_column: generated_at + anomaly_direction: spike + ignore_small_changes: + spike_failure_percent_threshold: 10 ``` In this configuration: @@ -78,7 +79,7 @@ In this configuration: - Small changes under 10% are ignored (`spike_failure_percent_threshold: 10`) - The severity is set to "warn" but can be adjusted as needed -This test will detect when a model's execution time increases significantly compared to its historical performance, triggering an alert when the increase exceeds the normal basline. +This test will detect when a model's execution time increases significantly compared to its historical performance, triggering an alert when the increase exceeds the normal baseline. ## Choosing the Right Approach diff --git a/docs/cloud/general/ai-privacy-policy.mdx b/docs/cloud/general/ai-privacy-policy.mdx index bba9dfbe5..15d62835b 100644 --- a/docs/cloud/general/ai-privacy-policy.mdx +++ b/docs/cloud/general/ai-privacy-policy.mdx @@ -3,7 +3,7 @@ title: "AI Privacy Policy" icon: "user-lock" --- -**We are committed to maintaining the highest standards of data privacy and protection. This AI Privacy Policy outlines how we handle data in the context of our AI-powered features, including our [suite of AI agents](/cloud/agents).** +**We are committed to maintaining the highest standards of data privacy and protection. This AI Privacy Policy outlines how we handle data in the context of our AI-powered features, including our [suite of AI agents](/cloud/ai-agents).** Elementary’s AI features are designed to enhance the user experience across key workflows, enabling natural language responses, automated data exploration, intelligent issue triage and resolution, proactive test and governance recommendations, and query optimization. These features are strictly opt-in and must be explicitly enabled for each customer instance. diff --git a/docs/data-tests/add-elementary-tests.mdx b/docs/data-tests/add-elementary-tests.mdx index f9fbcac6f..777c40a41 100644 --- a/docs/data-tests/add-elementary-tests.mdx +++ b/docs/data-tests/add-elementary-tests.mdx @@ -86,35 +86,39 @@ models: config: elementary: timestamp_column: < timestamp column > - tests: + data_tests: - elementary.freshness_anomalies: - # optional - configure different freshness column than timestamp column - where_expression: < sql expression > - time_bucket: - period: < time period > - count: < number of periods > + arguments: + # optional - configure different freshness column than timestamp column + where_expression: < sql expression > + time_bucket: + period: < time period > + count: < number of periods > - elementary.all_columns_anomalies: - column_anomalies: < specific monitors, all if null > - where_expression: < sql expression > - time_bucket: - period: < time period > - count: < number of periods > + arguments: + column_anomalies: < specific monitors, all if null > + where_expression: < sql expression > + time_bucket: + period: < time period > + count: < number of periods > - elementary.schema_changes - elementary.dimension_anomalies: - dimensions: < columns or sql expressions of columns > - # optional - configure a where a expression to accurate the dimension monitoring - where_expression: < sql expression > - time_bucket: - period: < time period > - count: < number of periods > + arguments: + dimensions: < columns or sql expressions of columns > + # optional - configure a where a expression to accurate the dimension monitoring + where_expression: < sql expression > + time_bucket: + period: < time period > + count: < number of periods > - name: < model name > ## if no timestamp is configured, elementary will monitor without time filtering columns: - name: < column name > - tests: + data_tests: - elementary.column_anomalies: - column_anomalies: < specific monitors, all if null > + arguments: + column_anomalies: < specific monitors, all if null > ``` ```yml Models example @@ -125,51 +129,59 @@ models: config: elementary: timestamp_column: 'loaded_at' - tests: + data_tests: - elementary.volume_anomalies: - # optional - use tags to run elementary tests on a dedicated run - tags: ['elementary'] config: - # optional - change severity + # optional - use tags to run elementary tests on a dedicated run + tags: ['elementary'] + # optional - change severity severity: warn - elementary.all_columns_anomalies: - tags: ['elementary'] - # optional - change global sensitivity - anomaly_sensitivity: 3.5 - timestamp_column: 'updated_at' + config: + tags: ['elementary'] + arguments: + # optional - change global sensitivity + anomaly_sensitivity: 3.5 + timestamp_column: 'updated_at' - elementary.schema_changes: - tags: ['elementary'] config: + tags: ['elementary'] severity: warn - elementary.dimension_anomalies: - dimensions: - - event_type - - country_name - where_expression: "event_type in ('event_1', 'event_2') and country_name != 'unwanted country'" - # optional - use tags to run elementary tests on a dedicated run - tags: ['elementary'] + arguments: + dimensions: + - event_type + - country_name + where_expression: "event_type in ('event_1', 'event_2') and country_name != 'unwanted country'" config: - # optional - change severity + # optional - use tags to run elementary tests on a dedicated run + tags: ['elementary'] + # optional - change severity severity: warn - name: users ## if no timestamp is configured, elementary will monitor without time filtering - tests: + data_tests: elementary.volume_anomalies - tags: ['elementary'] + config: + tags: ['elementary'] columns: - name: user_id - tests: + data_tests: - elementary.column_anomalies: - tags: ['elementary'] - timestamp_column: 'updated_at' + config: + tags: ['elementary'] + arguments: + timestamp_column: 'updated_at' - name: user_name - tests: + data_tests: - elementary.column_anomalies: - column_anomalies: - - missing_count - - min_length - tags: ['elementary'] + arguments: + column_anomalies: + - missing_count + - min_length + config: + tags: ['elementary'] ``` ```yml Sources @@ -179,11 +191,10 @@ sources: schema: < schema > tables: - name: < table_name > - ## sources don't have config, so elementary config is placed under 'meta' - meta: + config: elementary: timestamp_column: < source timestamp column > - tests: + data_tests: ``` ```yml Sources example @@ -193,25 +204,26 @@ sources: schema: "product" tables: - name: "raw_product_login_events" - ## sources don't have config, so elementary config is placed under 'meta' - meta: + config: elementary: timestamp_column: "loaded_at" - tests: + data_tests: - elementary.freshness_anomalies - elementary.dimension_anomalies: - dimensions: - - event_type + arguments: + dimensions: + - event_type - elementary.all_columns_anomalies: - column_anomalies: - - null_count - - missing_count - - zero_count + arguments: + column_anomalies: + - null_count + - missing_count + - zero_count - elementary.schema_changes_from_baseline columns: - name: user_id data_type: text - tests: + data_tests: - elementary.column_anomalies - name: event_name data_type: text diff --git a/docs/data-tests/ai-data-tests/ai_data_validations.mdx b/docs/data-tests/ai-data-tests/ai_data_validations.mdx index e4bd0c373..999a5fa9f 100644 --- a/docs/data-tests/ai-data-tests/ai_data_validations.mdx +++ b/docs/data-tests/ai-data-tests/ai_data_validations.mdx @@ -71,10 +71,11 @@ models: - name: < model name > columns: - name: < column name > - tests: + data_tests: - elementary.ai_data_validation: - expectation_prompt: "Description of what the data should satisfy" - llm_model_name: "model_name" # Optional + arguments: + expectation_prompt: "Description of what the data should satisfy" + llm_model_name: "model_name" # Optional ``` ```yml Example - Date Validation @@ -86,9 +87,10 @@ models: columns: - name: contract_date description: "The date when the contract was signed." - tests: + data_tests: - elementary.ai_data_validation: - expectation_prompt: "There should be no contract date in the future" + arguments: + expectation_prompt: "There should be no contract date in the future" ``` ```yml Example - Numeric Validation @@ -100,10 +102,11 @@ models: columns: - name: discount_percentage description: "The discount percentage applied to the sale." - tests: + data_tests: - elementary.ai_data_validation: - expectation_prompt: "The discount percentage should be between 0 and 50, and should only be a whole number." - llm_model_name: "claude-3-5-sonnet" + arguments: + expectation_prompt: "The discount percentage should be between 0 and 50, and should only be a whole number." + llm_model_name: "claude-3-5-sonnet" config: severity: warn ``` @@ -117,10 +120,11 @@ models: columns: - name: account_status description: "The current status of the customer account." - tests: + data_tests: - elementary.ai_data_validation: - expectation_prompt: "The account status should be one of: 'active', 'inactive', 'suspended', or 'pending'. If the account is 'suspended', there should be a reason code in the suspension_reason column." - llm_model_name: "gemini-1.5-pro" + arguments: + expectation_prompt: "The account status should be one of: 'active', 'inactive', 'suspended', or 'pending'. If the account is 'suspended', there should be a reason code in the suspension_reason column." + llm_model_name: "gemini-1.5-pro" ``` diff --git a/docs/data-tests/ai-data-tests/supported-platforms/bigquery.mdx b/docs/data-tests/ai-data-tests/supported-platforms/bigquery.mdx index 37b522033..0eb9464ef 100644 --- a/docs/data-tests/ai-data-tests/supported-platforms/bigquery.mdx +++ b/docs/data-tests/ai-data-tests/supported-platforms/bigquery.mdx @@ -96,10 +96,11 @@ models: columns: - name: text_data description: "Unstructured text data stored as a string." - tests: + data_tests: - elementary.validate_unstructured_data: - expectation_prompt: "The text data should represent an example of unstructured data." - llm_model_name: "gemini-1.5-pro" + arguments: + expectation_prompt: "The text data should represent an example of unstructured data." + llm_model_name: "gemini-1.5-pro" ``` diff --git a/docs/data-tests/ai-data-tests/unstructured_data_validations.mdx b/docs/data-tests/ai-data-tests/unstructured_data_validations.mdx index 95a8046b2..e6192524d 100644 --- a/docs/data-tests/ai-data-tests/unstructured_data_validations.mdx +++ b/docs/data-tests/ai-data-tests/unstructured_data_validations.mdx @@ -70,10 +70,11 @@ models: - name: < model name > columns: - name: < column name > - tests: + data_tests: - elementary.unstructured_data_validation: - expectation_prompt: "Description of what the text should contain or represent" - llm_model_name: "model_name" + arguments: + expectation_prompt: "Description of what the text should contain or represent" + llm_model_name: "model_name" ``` ```yml Example @@ -85,10 +86,11 @@ models: columns: - name: text_data description: "Unstructured text data stored as a string." - tests: + data_tests: - elementary.unstructured_data_validation: - expectation_prompt: "The text data should represent an example of unstructured data." - llm_model_name: "test_model" + arguments: + expectation_prompt: "The text data should represent an example of unstructured data." + llm_model_name: "test_model" ``` ```yml Example - Validating Customer Feedback @@ -100,10 +102,11 @@ models: columns: - name: feedback_text description: "Customer feedback in free text format." - tests: + data_tests: - elementary.unstructured_data_validation: - expectation_prompt: "The text should be a customer feedback comment in English, it should describe only a bug or a feature request." - llm_model_name: "claude-3-5-sonnet" + arguments: + expectation_prompt: "The text should be a customer feedback comment in English, it should describe only a bug or a feature request." + llm_model_name: "claude-3-5-sonnet" config: severity: warn ``` @@ -124,10 +127,11 @@ models: columns: - name: doctor_notes description: "A column containing the doctor notes on the prescription" - tests: + data_tests: - elementary.unstructured_data_validation: - expectation_prompt: "The prescription has to include a limited time period and recommendations to the patient" - llm_model_name: "claude-3-5-sonnet" + arguments: + expectation_prompt: "The prescription has to include a limited time period and recommendations to the patient" + llm_model_name: "claude-3-5-sonnet" ``` Test fails if: A doctor's note does not specify a time period or lacks recommendations for the patient. @@ -141,10 +145,11 @@ models: columns: - name: negative_feedbacks description: "A column containing negative feedbacks about our product." - tests: + data_tests: - elementary.unstructured_data_validation: - expectation_prompt: "The customer feedback's sentiment has to be negative" - llm_model_name: "claude-3-5-sonnet" + arguments: + expectation_prompt: "The customer feedback's sentiment has to be negative" + llm_model_name: "claude-3-5-sonnet" ``` Test fails if: Any feedback in `negative_feedbacks` is not actually negative. @@ -153,16 +158,17 @@ Test fails if: Any feedback in `negative_feedbacks` is not actually negative. ```yml models: - - name: summerized_pdfs + - name: summarized_pdfs description: "A table containing a summary of our ingested PDFs." columns: - name: pdf_summary description: "A column containing the main PDF's content summary." - tests: + data_tests: - elementary.validate_similarity: - to: ref('pdf_source_table') - column: pdf_content - match_by: pdf_name + arguments: + to: ref('pdf_source_table') + column: pdf_content + match_by: pdf_name ``` Test fails if: A PDF summary does not accurately represent the original PDF's content. The validation will use the pdf name as the key to match a summary from the pdf_summary table to the pdf_content in the pdf_source_table. @@ -172,9 +178,10 @@ models: - name: jobs columns: - name: job_title - tests: + data_tests: - elementary.validate_similarity: - column: job_description + arguments: + column: job_description ``` Test fails if: The job title does not align with the job description. @@ -188,9 +195,10 @@ models: columns: - name: issue_description description: "A column containing customer-reported issues." - tests: + data_tests: - elementary.accepted_categories: - categories: ['billing', 'technical_support', 'account_access', 'other'] + arguments: + categories: ['billing', 'technical_support', 'account_access', 'other'] ``` Test fails if: A support ticket does not fall within the predefined categories. @@ -204,15 +212,16 @@ models: columns: - name: article_text description: "A column containing full article text." - tests: + data_tests: - elementary.extract_and_validate_entities: - entities: - organization: - required: true - accepted_values: ['Google', 'Amazon', 'Microsoft', 'Apple'] - location: - required: false - accepted_values: {{ run_query('select zip_code from locations') }} + arguments: + entities: + organization: + required: true + accepted_values: ['Google', 'Amazon', 'Microsoft', 'Apple'] + location: + required: false + accepted_values: {{ run_query('select zip_code from locations') }} ``` Test fails if: @@ -228,20 +237,21 @@ models: columns: - name: meeting_notes description: "A column containing the full summary of the board meeting." - tests: + data_tests: - elementary.extract_and_validate_numbers: - entities: - revenue: - compare_with: ref('crm_financials') - column: sum(revenue) - required: true - net_profit: - compare_with: ref('crm_financials') - column: sum(net_profit) - customer_count: - compare_with: ref('crm_customers') - column: count(customers) - required: true + arguments: + entities: + revenue: + compare_with: ref('crm_financials') + column: sum(revenue) + required: true + net_profit: + compare_with: ref('crm_financials') + column: sum(net_profit) + customer_count: + compare_with: ref('crm_customers') + column: count(customers) + required: true ``` Test fails if: diff --git a/docs/data-tests/anomaly-detection-configuration/anomaly-direction.mdx b/docs/data-tests/anomaly-detection-configuration/anomaly-direction.mdx index 75e36e4e2..37e09af22 100644 --- a/docs/data-tests/anomaly-detection-configuration/anomaly-direction.mdx +++ b/docs/data-tests/anomaly-detection-configuration/anomaly-direction.mdx @@ -28,16 +28,18 @@ The anomaly_direction configuration is used to configure the direction of the ex ```yml test models: - name: this_is_a_model - tests: + data_tests: - elementary.volume_anomalies: - anomaly_direction: drop + arguments: + anomaly_direction: drop - elementary.all_columns_anomalies: - column_anomalies: - - null_count - - missing_count - - zero_count - anomaly_direction: spike + arguments: + column_anomalies: + - null_count + - missing_count + - zero_count + anomaly_direction: spike ``` ```yml model diff --git a/docs/data-tests/anomaly-detection-configuration/anomaly-params.mdx b/docs/data-tests/anomaly-detection-configuration/anomaly-params.mdx index 490a12281..6e160aead 100644 --- a/docs/data-tests/anomaly-detection-configuration/anomaly-params.mdx +++ b/docs/data-tests/anomaly-detection-configuration/anomaly-params.mdx @@ -70,11 +70,11 @@ models: config: elementary: timestamp_column: < model timestamp column > - tests: < here you will add elementary monitors as tests > + data_tests: < here you will add elementary monitors as tests > - name: ## if no timestamp is configured, elementary will monitor without time filtering - tests: + data_tests: ``` ```yml Models example @@ -85,17 +85,20 @@ models: config: elementary: timestamp_column: updated_at - tests: + data_tests: - elementary.freshness_anomalies: - tags: ["elementary"] + config: + tags: ["elementary"] - elementary.all_columns_anomalies: - tags: ["elementary"] + config: + tags: ["elementary"] - name: users ## if no timestamp is configured, elementary will monitor without time filtering - tests: + data_tests: - elementary.volume_anomalies: - tags: ["elementary"] + config: + tags: ["elementary"] ``` ```yml Sources @@ -107,11 +110,10 @@ sources: schema: < schema > tables: - name: < table_name > - ## sources don't have config, so elementary config is placed under 'meta' - meta: + config: elementary: timestamp_column: < source timestamp column > - tests: + data_tests: ``` ```yml Sources example @@ -123,20 +125,20 @@ sources: schema: "product" tables: - name: "raw_product_login_events" - ## sources don't have config, so elementary config is placed under 'meta' - meta: + config: elementary: timestamp_column: "loaded_at" - tests: + data_tests: - elementary.volume_anomalies - elementary.all_columns_anomalies: - column_anomalies: - - null_count - - missing_count - - zero_count + arguments: + column_anomalies: + - null_count + - missing_count + - zero_count columns: - name: user_id - tests: + data_tests: - elementary.column_anomalies ``` diff --git a/docs/data-tests/anomaly-detection-configuration/anomaly-sensitivity.mdx b/docs/data-tests/anomaly-detection-configuration/anomaly-sensitivity.mdx index d2f6b7e60..bd3695c5f 100644 --- a/docs/data-tests/anomaly-detection-configuration/anomaly-sensitivity.mdx +++ b/docs/data-tests/anomaly-detection-configuration/anomaly-sensitivity.mdx @@ -26,16 +26,18 @@ Larger values will have the opposite effect and will reduce the number of anomal ```yml test models: - name: this_is_a_model - tests: + data_tests: - elementary.volume_anomalies: - anomaly_sensitivity: 2.5 + arguments: + anomaly_sensitivity: 2.5 - elementary.all_columns_anomalies: - column_anomalies: - - null_count - - missing_count - - zero_count - anomaly_sensitivity: 4 + arguments: + column_anomalies: + - null_count + - missing_count + - zero_count + anomaly_sensitivity: 4 ``` ```yml model diff --git a/docs/data-tests/anomaly-detection-configuration/column-anomalies.mdx b/docs/data-tests/anomaly-detection-configuration/column-anomalies.mdx index b06fa1878..6b5922569 100644 --- a/docs/data-tests/anomaly-detection-configuration/column-anomalies.mdx +++ b/docs/data-tests/anomaly-detection-configuration/column-anomalies.mdx @@ -17,12 +17,13 @@ Select which monitors to activate as part of the test. ```yml test models: - name: this_is_a_model - tests: + data_tests: - elementary.column_anomalies: - column_anomalies: - - null_count - - missing_count - - average + arguments: + column_anomalies: + - null_count + - missing_count + - average ``` diff --git a/docs/data-tests/anomaly-detection-configuration/detection-delay.mdx b/docs/data-tests/anomaly-detection-configuration/detection-delay.mdx index 973f870d2..30daf80d3 100644 --- a/docs/data-tests/anomaly-detection-configuration/detection-delay.mdx +++ b/docs/data-tests/anomaly-detection-configuration/detection-delay.mdx @@ -23,11 +23,12 @@ That's useful in cases which the latest data should be excluded from the test. F ```yml test models: - name: this_is_a_model - tests: + data_tests: - elementary.volume_anomalies: - detection_delay: - period: day - count: 1 + arguments: + detection_delay: + period: day + count: 1 ``` ```yml model diff --git a/docs/data-tests/anomaly-detection-configuration/detection-period.mdx b/docs/data-tests/anomaly-detection-configuration/detection-period.mdx index 3d6176dd4..7851cdf63 100644 --- a/docs/data-tests/anomaly-detection-configuration/detection-period.mdx +++ b/docs/data-tests/anomaly-detection-configuration/detection-period.mdx @@ -28,11 +28,12 @@ This configuration should be changed according to your data delays. ```yml test models: - name: this_is_a_model - tests: + data_tests: - elementary.volume_anomalies: - detection_period: - period: day - count: 30 + arguments: + detection_period: + period: day + count: 30 ``` ```yml model diff --git a/docs/data-tests/anomaly-detection-configuration/dimensions.mdx b/docs/data-tests/anomaly-detection-configuration/dimensions.mdx index 0448744e6..5cb68de8e 100644 --- a/docs/data-tests/anomaly-detection-configuration/dimensions.mdx +++ b/docs/data-tests/anomaly-detection-configuration/dimensions.mdx @@ -28,11 +28,12 @@ models: config: elementary: timestamp_column: updated_at - tests: + data_tests: - elementary.dimension_anomalies: - dimensions: - - device_os - - device_browser + arguments: + dimensions: + - device_os + - device_browser ``` diff --git a/docs/data-tests/anomaly-detection-configuration/event_timestamp_column.mdx b/docs/data-tests/anomaly-detection-configuration/event_timestamp_column.mdx index d9022c689..aeeb6d6e3 100644 --- a/docs/data-tests/anomaly-detection-configuration/event_timestamp_column.mdx +++ b/docs/data-tests/anomaly-detection-configuration/event_timestamp_column.mdx @@ -23,10 +23,11 @@ The test can work in a couple of modes: ```yml test models: - name: this_is_a_model - tests: + data_tests: - elementary.event_timestamp_column: - event_timestamp_column: "event_timestamp" - update_timestamp_column: "created_at" + arguments: + event_timestamp_column: "event_timestamp" + update_timestamp_column: "created_at" ``` diff --git a/docs/data-tests/anomaly-detection-configuration/exclude-final-results.mdx b/docs/data-tests/anomaly-detection-configuration/exclude-final-results.mdx index 4f62979c1..6859afee0 100644 --- a/docs/data-tests/anomaly-detection-configuration/exclude-final-results.mdx +++ b/docs/data-tests/anomaly-detection-configuration/exclude-final-results.mdx @@ -24,12 +24,13 @@ models: config: elementary: timestamp_column: updated_at - tests: + data_tests: - elementary.dimension_anomalies: - dimensions: - - device_os - - device_browser - exclude_final_results: 'value > 1000 or average > 10' + arguments: + dimensions: + - device_os + - device_browser + exclude_final_results: 'value > 1000 or average > 10' ``` diff --git a/docs/data-tests/anomaly-detection-configuration/exclude_prefix.mdx b/docs/data-tests/anomaly-detection-configuration/exclude_prefix.mdx index bdd705786..486bd48b8 100644 --- a/docs/data-tests/anomaly-detection-configuration/exclude_prefix.mdx +++ b/docs/data-tests/anomaly-detection-configuration/exclude_prefix.mdx @@ -17,9 +17,10 @@ Param for the `all_columns_anomalies` test only, which enables to exclude a colu ```yml test models: - name: this_is_a_model - tests: + data_tests: - elementary.column_anomalies: - exclude_prefix: "id_" + arguments: + exclude_prefix: "id_" ``` diff --git a/docs/data-tests/anomaly-detection-configuration/exclude_regexp.mdx b/docs/data-tests/anomaly-detection-configuration/exclude_regexp.mdx index a2df8a96c..c3f566222 100644 --- a/docs/data-tests/anomaly-detection-configuration/exclude_regexp.mdx +++ b/docs/data-tests/anomaly-detection-configuration/exclude_regexp.mdx @@ -17,9 +17,10 @@ Param for the `all_columns_anomalies` test only, which enables to exclude a colu ```yml test models: - name: this_is_a_model - tests: + data_tests: - elementary.column_anomalies: - exclude_regexp: ".*SDC$" + arguments: + exclude_regexp: ".*SDC$" ``` diff --git a/docs/data-tests/anomaly-detection-configuration/fail_on_zero.mdx b/docs/data-tests/anomaly-detection-configuration/fail_on_zero.mdx index 2aaa9ff75..b8d5bab62 100644 --- a/docs/data-tests/anomaly-detection-configuration/fail_on_zero.mdx +++ b/docs/data-tests/anomaly-detection-configuration/fail_on_zero.mdx @@ -17,9 +17,10 @@ If undefined, default is false. ```yml test models: - name: this_is_a_model - tests: + data_tests: - elementary.volume_anomalies: - fail_on_zero: true + arguments: + fail_on_zero: true ``` ```yml model diff --git a/docs/data-tests/anomaly-detection-configuration/ignore_small_changes.mdx b/docs/data-tests/anomaly-detection-configuration/ignore_small_changes.mdx index 6158e73ae..84c95f1ee 100644 --- a/docs/data-tests/anomaly-detection-configuration/ignore_small_changes.mdx +++ b/docs/data-tests/anomaly-detection-configuration/ignore_small_changes.mdx @@ -29,11 +29,12 @@ If undefined, default is null for both spike and drop. ```yml test models: - name: this_is_a_model - tests: + data_tests: - elementary.volume_anomalies: - ignore_small_changes: - spike_failure_percent_threshold: 2 - drop_failure_percent_threshold: 50 + arguments: + ignore_small_changes: + spike_failure_percent_threshold: 2 + drop_failure_percent_threshold: 50 ``` ```yml model @@ -51,7 +52,7 @@ sources: schema: raw tables: - name: source_table - meta: + config: elementary: ignore_small_changes: drop_failure_percent_threshold: 50 diff --git a/docs/data-tests/anomaly-detection-configuration/seasonality.mdx b/docs/data-tests/anomaly-detection-configuration/seasonality.mdx index cafdec2be..d21fccece 100644 --- a/docs/data-tests/anomaly-detection-configuration/seasonality.mdx +++ b/docs/data-tests/anomaly-detection-configuration/seasonality.mdx @@ -49,9 +49,10 @@ The expected range for Monday will be based on a training set of previous Monday ```yml test models: - name: this_is_a_model - tests: + data_tests: - elementary.volume_anomalies: - seasonality: day_of_week + arguments: + seasonality: day_of_week ``` ```yml model diff --git a/docs/data-tests/anomaly-detection-configuration/time-bucket.mdx b/docs/data-tests/anomaly-detection-configuration/time-bucket.mdx index 124440b85..8b2399cff 100644 --- a/docs/data-tests/anomaly-detection-configuration/time-bucket.mdx +++ b/docs/data-tests/anomaly-detection-configuration/time-bucket.mdx @@ -33,11 +33,12 @@ For example, if you want to detect volume anomalies in an hourly resolution, you ```yml test models: - name: this_is_a_model - tests: + data_tests: - elementary.volume_anomalies: - time_bucket: - period: day - count: 2 + arguments: + time_bucket: + period: day + count: 2 ``` ```yml model diff --git a/docs/data-tests/anomaly-detection-configuration/timestamp-column.mdx b/docs/data-tests/anomaly-detection-configuration/timestamp-column.mdx index dcd2d2984..15522255f 100644 --- a/docs/data-tests/anomaly-detection-configuration/timestamp-column.mdx +++ b/docs/data-tests/anomaly-detection-configuration/timestamp-column.mdx @@ -30,9 +30,10 @@ If undefined, default is null (no time buckets). ```yml test models: - name: this_is_a_model - tests: + data_tests: - elementary.volume_anomalies: - timestamp_column: created_at + arguments: + timestamp_column: created_at ``` ```yml model @@ -49,7 +50,7 @@ sources: schema: raw tables: - name: source_table - meta: + config: elementary: timestamp_column: loaded_at ``` diff --git a/docs/data-tests/anomaly-detection-configuration/training-period.mdx b/docs/data-tests/anomaly-detection-configuration/training-period.mdx index 3a3b28e5e..09ae5aed8 100644 --- a/docs/data-tests/anomaly-detection-configuration/training-period.mdx +++ b/docs/data-tests/anomaly-detection-configuration/training-period.mdx @@ -23,11 +23,12 @@ This timeframe includes the training period and detection period. If a detection ```yml test models: - name: this_is_a_model - tests: + data_tests: - elementary.volume_anomalies: - training_period: - period: day - count: 30 + arguments: + training_period: + period: day + count: 30 ``` ```yml model diff --git a/docs/data-tests/anomaly-detection-configuration/update_timestamp_column.mdx b/docs/data-tests/anomaly-detection-configuration/update_timestamp_column.mdx index a520fdf4b..0ddbe6414 100644 --- a/docs/data-tests/anomaly-detection-configuration/update_timestamp_column.mdx +++ b/docs/data-tests/anomaly-detection-configuration/update_timestamp_column.mdx @@ -23,10 +23,11 @@ The test can work in a couple of modes: ```yml test models: - name: this_is_a_model - tests: + data_tests: - elementary.event_timestamp_column: - event_timestamp_column: "event_timestamp" - update_timestamp_column: "created_at" + arguments: + event_timestamp_column: "event_timestamp" + update_timestamp_column: "created_at" ``` diff --git a/docs/data-tests/anomaly-detection-configuration/where-expression.mdx b/docs/data-tests/anomaly-detection-configuration/where-expression.mdx index d8be5b5fb..e0beb045e 100644 --- a/docs/data-tests/anomaly-detection-configuration/where-expression.mdx +++ b/docs/data-tests/anomaly-detection-configuration/where-expression.mdx @@ -22,9 +22,10 @@ where_expression: EXTRACT(DOW FROM timestamp_column) BETWEEN 2 AND 6 ```yml test models: - name: this_is_a_model - tests: + data_tests: - elementary.volume_anomalies: - where_expression: "user_name != 'test'" + arguments: + where_expression: "user_name != 'test'" ``` ```yml model diff --git a/docs/data-tests/anomaly-detection-tests/Anomaly-troubleshooting-guide.mdx b/docs/data-tests/anomaly-detection-tests/Anomaly-troubleshooting-guide.mdx index 7e3d9fb96..b24933b5b 100644 --- a/docs/data-tests/anomaly-detection-tests/Anomaly-troubleshooting-guide.mdx +++ b/docs/data-tests/anomaly-detection-tests/Anomaly-troubleshooting-guide.mdx @@ -10,9 +10,10 @@ First, check if your test uses a timestamp column: ```yaml # In your YAML configuration -tests: +data_tests: - elementary.volume_anomalies: - timestamp_column: created_at# If this is configured, you have a timestamp-based test + arguments: + timestamp_column: created_at# If this is configured, you have a timestamp-based test ``` @@ -135,9 +136,10 @@ If your test isn't appearing in `data_monitoring_metrics`: Verify test configuration: ```yaml -tests: +data_tests: - elementary.volume_anomalies: - timestamp_column: created_at# Check if specified correctly + arguments: + timestamp_column: created_at# Check if specified correctly ``` ### Common causes: diff --git a/docs/data-tests/anomaly-detection-tests/all-columns-anomalies.mdx b/docs/data-tests/anomaly-detection-tests/all-columns-anomalies.mdx index 7093413d0..a2c177135 100644 --- a/docs/data-tests/anomaly-detection-tests/all-columns-anomalies.mdx +++ b/docs/data-tests/anomaly-detection-tests/all-columns-anomalies.mdx @@ -22,33 +22,34 @@ No mandatory configuration, however it is highly recommended to configure a `tim {/* prettier-ignore */}
  
-  tests:
+  data_tests:
       -- elementary.all_columns_anomalies:
-          timestamp_column: column name
-          column_anomalies: column monitors list
-          dimensions: sql expression
-          exclude_prefix: string
-          exclude_regexp: regex
-          where_expression: sql expression
-          anomaly_sensitivity: int
-          anomaly_direction: [both | spike | drop]
-          detection_period:
-            period: [hour | day | week | month]
-            count: int
-          training_period:
-            period: [hour | day | week | month]
-            count: int
-          time_bucket:
-            period: [hour | day | week | month]
-            count: int
-          seasonality: day_of_week
-          detection_delay:
-            period: [hour | day | week | month]
-            count: int
-          ignore_small_changes:
-            spike_failure_percent_threshold: int
-            drop_failure_percent_threshold: int
-          anomaly_exclude_metrics: [SQL expression]
+          arguments:
+            timestamp_column: column name
+            column_anomalies: column monitors list
+            dimensions: sql expression
+            exclude_prefix: string
+            exclude_regexp: regex
+            where_expression: sql expression
+            anomaly_sensitivity: int
+            anomaly_direction: [both | spike | drop]
+            detection_period:
+              period: [hour | day | week | month]
+              count: int
+            training_period:
+              period: [hour | day | week | month]
+              count: int
+            time_bucket:
+              period: [hour | day | week | month]
+              count: int
+            seasonality: day_of_week
+            detection_delay:
+              period: [hour | day | week | month]
+              count: int
+            ignore_small_changes:
+              spike_failure_percent_threshold: int
+              drop_failure_percent_threshold: int
+            anomaly_exclude_metrics: [SQL expression]
  
 
@@ -60,13 +61,14 @@ models: config: elementary: timestamp_column: < timestamp column > - tests: + data_tests: - elementary.all_columns_anomalies: - column_anomalies: < specific monitors, all if null > - where_expression: < sql expression > - time_bucket: # Daily by default - period: < time period > - count: < number of periods > + arguments: + column_anomalies: < specific monitors, all if null > + where_expression: < sql expression > + time_bucket: # Daily by default + period: < time period > + count: < number of periods > ``` ```yml Models example @@ -75,15 +77,18 @@ models: config: elementary: timestamp_column: "loaded_at" - tests: + data_tests: - elementary.all_columns_anomalies: - where_expression: "event_type in ('event_1', 'event_2') and country_name != 'unwanted country'" - time_bucket: - period: day - count: 1 - tags: ["elementary"] - # optional - change global sensitivity - anomaly_sensitivity: 3.5 + config: + tags: ["elementary"] + arguments: + where_expression: "event_type in ('event_1', 'event_2') and country_name != 'unwanted country'" + time_bucket: + period: day + count: 1 + + # optional - change global sensitivity + anomaly_sensitivity: 3.5 ``` diff --git a/docs/data-tests/anomaly-detection-tests/column-anomalies.mdx b/docs/data-tests/anomaly-detection-tests/column-anomalies.mdx index ebdca10c5..b7caa1277 100644 --- a/docs/data-tests/anomaly-detection-tests/column-anomalies.mdx +++ b/docs/data-tests/anomaly-detection-tests/column-anomalies.mdx @@ -20,31 +20,32 @@ No mandatory configuration, however it is highly recommended to configure a `tim {/* prettier-ignore */}
  
-  tests:
+  data_tests:
       -- elementary.column_anomalies:
-          column_anomalies: column monitors list
-          dimensions: sql expression
-          timestamp_column: column name
-          where_expression: sql expression
-          anomaly_sensitivity: int
-          anomaly_direction: [both | spike | drop]
-          detection_period:
-            period: [hour | day | week | month]
-            count: int
-          training_period:
-            period: [hour | day | week | month]
-            count: int
-          time_bucket:
-            period: [hour | day | week | month]
-            count: int
-          seasonality: day_of_week
-          detection_delay:
-            period: [hour | day | week | month]
-            count: int
-          ignore_small_changes:
-            spike_failure_percent_threshold: int
-            drop_failure_percent_threshold: int
-          anomaly_exclude_metrics: [SQL expression]
+          arguments:
+              column_anomalies: column monitors list
+              dimensions: sql expression
+              timestamp_column: column name
+              where_expression: sql expression
+              anomaly_sensitivity: int
+              anomaly_direction: [both | spike | drop]
+              detection_period:
+                period: [hour | day | week | month]
+                count: int
+              training_period:
+                period: [hour | day | week | month]
+                count: int
+              time_bucket:
+                period: [hour | day | week | month]
+                count: int
+              seasonality: day_of_week
+              detection_delay:
+                period: [hour | day | week | month]
+                count: int
+              ignore_small_changes:
+                spike_failure_percent_threshold: int
+                drop_failure_percent_threshold: int
+              anomaly_exclude_metrics: [SQL expression]
  
 
@@ -58,22 +59,24 @@ models: timestamp_column: < timestamp column > columns: - name: < column name > - tests: + data_tests: - elementary.column_anomalies: - column_anomalies: < specific monitors, all if null > - where_expression: < sql expression > - time_bucket: # Daily by default - period: < time period > - count: < number of periods > + arguments: + column_anomalies: < specific monitors, all if null > + where_expression: < sql expression > + time_bucket: # Daily by default + period: < time period > + count: < number of periods > - name: < model name > ## if no timestamp is configured, elementary will monitor without time filtering columns: - name: < column name > - tests: + data_tests: - elementary.column_anomalies: - column_anomalies: < specific monitors, all if null > - where_expression: < sql expression > + arguments: + column_anomalies: < specific monitors, all if null > + where_expression: < sql expression > ``` ```yml Models example @@ -85,39 +88,46 @@ models: timestamp_column: 'loaded_at' columns: - name: user_name - tests: + data_tests: - elementary.column_anomalies: - column_anomalies: - - missing_count - - min_length - where_expression: "event_type in ('event_1', 'event_2') and country_name != 'unwanted country'" - time_bucket: - period: day - count: 1 - tags: ['elementary'] + arguments: + column_anomalies: + - missing_count + - min_length + where_expression: "event_type in ('event_1', 'event_2') and country_name != 'unwanted country'" + time_bucket: + period: day + count: 1 + config: + tags: ['elementary'] - name: users ## if no timestamp is configured, elementary will monitor without time filtering - tests: + data_tests: elementary.volume_anomalies - tags: ['elementary'] + config: + tags: ['elementary'] columns: - name: user_id - tests: + data_tests: - elementary.column_anomalies: - tags: ['elementary'] - timestamp_column: 'updated_at' - where_expression: "event_type in ('event_1', 'event_2') and country_name != 'unwanted country'" - time_bucket: - period: < time period > - count: < number of periods > + config: + tags: ['elementary'] + arguments: + timestamp_column: 'updated_at' + where_expression: "event_type in ('event_1', 'event_2') and country_name != 'unwanted country'" + time_bucket: + period: < time period > + count: < number of periods > - name: user_name - tests: + data_tests: - elementary.column_anomalies: - column_anomalies: - - missing_count - - min_length - tags: ['elementary'] + arguments: + column_anomalies: + - missing_count + - min_length + config: + tags: ['elementary'] ``` diff --git a/docs/data-tests/anomaly-detection-tests/dimension-anomalies.mdx b/docs/data-tests/anomaly-detection-tests/dimension-anomalies.mdx index cb5d71de4..fab3592d1 100644 --- a/docs/data-tests/anomaly-detection-tests/dimension-anomalies.mdx +++ b/docs/data-tests/anomaly-detection-tests/dimension-anomalies.mdx @@ -21,31 +21,32 @@ _Required configuration: `dimensions`_ {/* prettier-ignore */}
  
-  tests:
+  data_tests:
       -- elementary.dimension_anomalies:
-          dimensions: sql expression
-          timestamp_column: column name
-          where_expression: sql expression
-          anomaly_sensitivity: int
-          anomaly_direction: [both | spike | drop]
-          detection_period:
-            period: [hour | day | week | month]
-            count: int
-          training_period:
-            period: [hour | day | week | month]
-            count: int
-          time_bucket:
-            period: [hour | day | week | month]
-            count: int
-          seasonality: day_of_week
-          detection_delay:
-            period: [hour | day | week | month]
-            count: int
-          ignore_small_changes:
-            spike_failure_percent_threshold: int
-            drop_failure_percent_threshold: int
-          anomaly_exclude_metrics: [SQL expression]
-          exclude_final_results: [SQL expression]
+          arguments:
+              dimensions: sql expression
+              timestamp_column: column name
+              where_expression: sql expression
+              anomaly_sensitivity: int
+              anomaly_direction: [both | spike | drop]
+              detection_period:
+                period: [hour | day | week | month]
+                count: int
+              training_period:
+                period: [hour | day | week | month]
+                count: int
+              time_bucket:
+                period: [hour | day | week | month]
+                count: int
+              seasonality: day_of_week
+              detection_delay:
+                period: [hour | day | week | month]
+                count: int
+              ignore_small_changes:
+                spike_failure_percent_threshold: int
+                drop_failure_percent_threshold: int
+              anomaly_exclude_metrics: [SQL expression]
+              exclude_final_results: [SQL expression]
  
 
@@ -57,14 +58,15 @@ models: config: elementary: timestamp_column: < timestamp column > - tests: + data_tests: - elementary.dimension_anomalies: - dimensions: < columns or sql expressions of columns > - # optional - configure a where a expression to accurate the dimension monitoring - where_expression: < sql expression > - time_bucket: # Daily by default - period: < time period > - count: < number of periods > + arguments: + dimensions: < columns or sql expressions of columns > + # optional - configure a where a expression to accurate the dimension monitoring + where_expression: < sql expression > + time_bucket: # Daily by default + period: < time period > + count: < number of periods > ``` ```yml Models example @@ -73,28 +75,31 @@ models: config: elementary: timestamp_column: "loaded_at" - tests: + data_tests: - elementary.dimension_anomalies: - dimensions: - - event_type - - country_name - where_expression: "event_type in ('event_1', 'event_2') and country_name != 'unwanted country'" - time_bucket: - period: hour - count: 4 - # optional - use tags to run elementary tests on a dedicated run - tags: ["elementary"] + arguments: + dimensions: + - event_type + - country_name + where_expression: "event_type in ('event_1', 'event_2') and country_name != 'unwanted country'" + time_bucket: + period: hour + count: 4 config: + # optional - use tags to run elementary tests on a dedicated run + tags: ["elementary"] # optional - change severity severity: warn - name: users # if no timestamp is configured, elementary will monitor without time filtering - tests: + data_tests: - elementary.dimension_anomalies: - dimensions: - - event_type - tags: ["elementary"] + arguments: + dimensions: + - event_type + config: + tags: ["elementary"] ``` diff --git a/docs/data-tests/anomaly-detection-tests/event-freshness-anomalies.mdx b/docs/data-tests/anomaly-detection-tests/event-freshness-anomalies.mdx index 1d92f9bcc..2425a8482 100644 --- a/docs/data-tests/anomaly-detection-tests/event-freshness-anomalies.mdx +++ b/docs/data-tests/anomaly-detection-tests/event-freshness-anomalies.mdx @@ -27,29 +27,30 @@ _Default configuration: `anomaly_direction: spike` to alert only on delays._ {/* prettier-ignore */}
  
-  tests:
+  data_tests:
       -- elementary.event_freshness_anomalies:
-          event_timestamp_column: column name
-          update_timestamp_column: column name
-          where_expression: sql expression
-          anomaly_sensitivity: int
-          detection_period:
-            period: [hour | day | week | month]
-            count: int
-          training_period:
-            period: [hour | day | week | month]
-            count: int
-          time_bucket:
-            period: [hour | day | week | month]
-            count: int
-          seasonality: day_of_week
-          detection_delay:
-            period: [hour | day | week | month]
-            count: int
-          ignore_small_changes:
-            spike_failure_percent_threshold: int
-            drop_failure_percent_threshold: int
-          anomaly_exclude_metrics: [SQL expression]
+          arguments:
+              event_timestamp_column: column name
+              update_timestamp_column: column name
+              where_expression: sql expression
+              anomaly_sensitivity: int
+              detection_period:
+                period: [hour | day | week | month]
+                count: int
+              training_period:
+                period: [hour | day | week | month]
+                count: int
+              time_bucket:
+                period: [hour | day | week | month]
+                count: int
+              seasonality: day_of_week
+              detection_delay:
+                period: [hour | day | week | month]
+                count: int
+              ignore_small_changes:
+                spike_failure_percent_threshold: int
+                drop_failure_percent_threshold: int
+              anomaly_exclude_metrics: [SQL expression]
  
 
@@ -58,26 +59,28 @@ _Default configuration: `anomaly_direction: spike` to alert only on delays._ ```yml Models models: - name: < model name > - tests: + data_tests: - elementary.event_freshness_anomalies: - event_timestamp_column: < timestamp column > # Mandatory - update_timestamp_column: < timestamp column > # Optional - where_expression: < sql expression > - time_bucket: # Daily by default - period: < time period > - count: < number of periods > + arguments: + event_timestamp_column: < timestamp column > # Mandatory + update_timestamp_column: < timestamp column > # Optional + where_expression: < sql expression > + time_bucket: # Daily by default + period: < time period > + count: < number of periods > ``` ```yml Models example models: - name: login_events - tests: + data_tests: - elementary.event_freshness_anomalies: - event_timestamp_column: "occurred_at" - update_timestamp_column: "updated_at" - # optional - use tags to run elementary tests on a dedicated run - tags: ["elementary"] + arguments: + event_timestamp_column: "occurred_at" + update_timestamp_column: "updated_at" config: + # optional - use tags to run elementary tests on a dedicated run + tags: ["elementary"] # optional - change severity severity: warn ``` diff --git a/docs/data-tests/anomaly-detection-tests/freshness-anomalies.mdx b/docs/data-tests/anomaly-detection-tests/freshness-anomalies.mdx index 5a5f49fbe..0dfb87a62 100644 --- a/docs/data-tests/anomaly-detection-tests/freshness-anomalies.mdx +++ b/docs/data-tests/anomaly-detection-tests/freshness-anomalies.mdx @@ -23,27 +23,28 @@ _Default configuration: `anomaly_direction: spike` to alert only on delays._ {/* prettier-ignore */}
  
-  tests:
+  data_tests:
       -- elementary.freshness_anomalies:
-          timestamp_column: column name
-          where_expression: sql expression
-          anomaly_sensitivity: int
-          detection_period:
-            period: [hour | day | week | month]
-            count: int
-          training_period:
-            period: [hour | day | week | month]
-            count: int
-          time_bucket:
-            period: [hour | day | week | month]
-            count: int
-          detection_delay:
-            period: [hour | day | week | month]
-            count: int
-          ignore_small_changes:
-            spike_failure_percent_threshold: int
-            drop_failure_percent_threshold: int
-          anomaly_exclude_metrics: [SQL expression]
+          arguments:
+              timestamp_column: column name
+              where_expression: sql expression
+              anomaly_sensitivity: int
+              detection_period:
+                period: [hour | day | week | month]
+                count: int
+              training_period:
+                period: [hour | day | week | month]
+                count: int
+              time_bucket:
+                period: [hour | day | week | month]
+                count: int
+              detection_delay:
+                period: [hour | day | week | month]
+                count: int
+              ignore_small_changes:
+                spike_failure_percent_threshold: int
+                drop_failure_percent_threshold: int
+              anomaly_exclude_metrics: [SQL expression]
  
 
@@ -52,24 +53,26 @@ _Default configuration: `anomaly_direction: spike` to alert only on delays._ ```yml Models models: - name: < model name > - tests: + data_tests: - elementary.freshness_anomalies: - timestamp_column: < timestamp column > # Mandatory - where_expression: < sql expression > - time_bucket: # Daily by default - period: < time period > - count: < number of periods > + arguments: + timestamp_column: < timestamp column > # Mandatory + where_expression: < sql expression > + time_bucket: # Daily by default + period: < time period > + count: < number of periods > ``` ```yml Models example models: - name: login_events - tests: + data_tests: - elementary.freshness_anomalies: - timestamp_column: "updated_at" - # optional - use tags to run elementary tests on a dedicated run - tags: ["elementary"] + arguments: + timestamp_column: "updated_at" config: + # optional - use tags to run elementary tests on a dedicated run + tags: ["elementary"] # optional - change severity severity: warn ``` diff --git a/docs/data-tests/anomaly-detection-tests/volume-anomalies.mdx b/docs/data-tests/anomaly-detection-tests/volume-anomalies.mdx index 16ebdca8c..36db1ee5a 100644 --- a/docs/data-tests/anomaly-detection-tests/volume-anomalies.mdx +++ b/docs/data-tests/anomaly-detection-tests/volume-anomalies.mdx @@ -24,30 +24,31 @@ No mandatory configuration, however it is highly recommended to configure a `tim {/* prettier-ignore */}
  
-  tests:
+  data_tests:
       - elementary.volume_anomalies:
-          timestamp_column: column name
-          where_expression: sql expression
-          anomaly_sensitivity: int
-          anomaly_direction: [both | spike | drop]
-          detection_period:
-            period: [hour | day | week | month]
-            count: int
-          training_period:
-            period: [hour | day | week | month]
-            count: int
-          time_bucket:
-            period: [hour | day | week | month]
-            count: int
-          seasonality: day_of_week
-          fail_on_zero: [true | false]
-          ignore_small_changes:
-            spike_failure_percent_threshold: int
-            drop_failure_percent_threshold: int
-          detection_delay:
-            period: [hour | day | week | month]
-            count: int
-          anomaly_exclude_metrics: [SQL expression]
+          arguments:
+              timestamp_column: column name
+              where_expression: sql expression
+              anomaly_sensitivity: int
+              anomaly_direction: [both | spike | drop]
+              detection_period:
+                period: [hour | day | week | month]
+                count: int
+              training_period:
+                period: [hour | day | week | month]
+                count: int
+              time_bucket:
+                period: [hour | day | week | month]
+                count: int
+              seasonality: day_of_week
+              fail_on_zero: [true | false]
+              ignore_small_changes:
+                spike_failure_percent_threshold: int
+                drop_failure_percent_threshold: int
+              detection_delay:
+                period: [hour | day | week | month]
+                count: int
+              anomaly_exclude_metrics: [SQL expression]
  
 
@@ -56,13 +57,14 @@ No mandatory configuration, however it is highly recommended to configure a `tim ```yml Models models: - name: < model name > - tests: + data_tests: - elementary.volume_anomalies: - timestamp_column: < timestamp column > - where_expression: < sql expression > - time_bucket: # Daily by default - period: < time period > - count: < number of periods > + arguments: + timestamp_column: < timestamp column > + where_expression: < sql expression > + time_bucket: # Daily by default + period: < time period > + count: < number of periods > ``` ```yml Models example @@ -71,23 +73,25 @@ models: config: elementary: timestamp_column: "loaded_at" - tests: + data_tests: - elementary.volume_anomalies: - where_expression: "event_type in ('event_1', 'event_2') and country_name != 'unwanted country'" - time_bucket: - period: day - count: 1 - # optional - use tags to run elementary tests on a dedicated run - tags: ["elementary"] + arguments: + where_expression: "event_type in ('event_1', 'event_2') and country_name != 'unwanted country'" + time_bucket: + period: day + count: 1 config: + # optional - use tags to run elementary tests on a dedicated run + tags: ["elementary"] # optional - change severity severity: warn - name: users # if no timestamp is configured, elementary will monitor without time filtering - tests: + data_tests: - elementary.volume_anomalies: - tags: ["elementary"] + config: + tags: ["elementary"] ``` diff --git a/docs/data-tests/python-tests.mdx b/docs/data-tests/python-tests.mdx index dfb2e16ff..699cbcd34 100644 --- a/docs/data-tests/python-tests.mdx +++ b/docs/data-tests/python-tests.mdx @@ -40,9 +40,10 @@ A Python test is defined like any other dbt test. ```yaml models/schema.yml - name: orders - tests: + data_tests: - elementary.python: - code_macro: check_undelivered_orders + arguments: + code_macro: check_undelivered_orders ``` Then, we need to define a macro under `macros/.sql` that contains the Python code the test will execute. @@ -86,11 +87,12 @@ Let's compare two different tables, or views, within our warehouse. ```yaml models/schema.yml - name: orders - tests: + data_tests: - elementary.python: - code_macro: compare_tables - macro_args: - other_table: raw_orders + arguments: + code_macro: compare_tables + macro_args: + other_table: raw_orders ``` We're passing an additional argument to the test called `macro_args`. @@ -122,13 +124,14 @@ In this example we'll validate a JSON column according to a pre-defined schema. ```yaml models/schema.yml - name: login_events - tests: + data_tests: - elementary.python: - code_macro: validate_json - macro_args: - schema: "{'type': 'object', 'properties': {'country': {'type': 'string'}}}" - column: geolocation_json - packages: ["jsonschema"] + arguments: + code_macro: validate_json + macro_args: + schema: "{'type': 'object', 'properties': {'country': {'type': 'string'}}}" + column: geolocation_json + packages: ["jsonschema"] ``` Here we'll be testing that the `country` that is provided in the `geolocation_json` column in the `login_events` diff --git a/docs/data-tests/schema-tests/exposure-tests.mdx b/docs/data-tests/schema-tests/exposure-tests.mdx index 54a7800b8..13e49d24c 100644 --- a/docs/data-tests/schema-tests/exposure-tests.mdx +++ b/docs/data-tests/schema-tests/exposure-tests.mdx @@ -130,12 +130,15 @@ For each module schema you wish to verify the exposure dependencies, add the ele config: tags: ["finance"] - tests: + data_tests: - elementary.volume_anomalies: - tags: ["table_anomalies"] - timestamp_column: "order_date" + config: + tags: ["table_anomalies"] + arguments: + timestamp_column: "order_date" - elementary.exposure_schema_validity: - tags: [elementary] + config: + tags: [elementary] ``` diff --git a/docs/data-tests/schema-tests/json-schema.mdx b/docs/data-tests/schema-tests/json-schema.mdx index 3b55131b2..3a0c85c84 100644 --- a/docs/data-tests/schema-tests/json-schema.mdx +++ b/docs/data-tests/schema-tests/json-schema.mdx @@ -34,21 +34,22 @@ Please add the following test to your model configuration: columns: - name: raw_event_data - tests: + data_tests: - elementary.json_schema: - type: object - properties: - event_id: - type: integer - event_name: - type: string - event_args: - type: array - items: + arguments: + type: object + properties: + event_id: + type: integer + event_name: type: string - required: - - event_id - - event_name + event_args: + type: array + items: + type: string + required: + - event_id + - event_name ``` _Note: The `generate_json_schema_test` macro relies on a 3rd-party python library called `genson`. If you are using @@ -64,8 +65,10 @@ models: - name: < model name > columns: - name: < column name > - tests: - - elementary.json_schema: + data_tests: + - elementary.json_schema: + arguments: + ``` ```yml Models example @@ -75,21 +78,22 @@ models: - name: login_events columns: - name: raw_event_data - tests: + data_tests: - elementary.json_schema: - type: object - properties: - event_id: - type: integer - event_name: - type: string - event_args: - type: array - items: + arguments: + type: object + properties: + event_id: + type: integer + event_name: type: string - required: - - event_id - - event_name + event_args: + type: array + items: + type: string + required: + - event_id + - event_name ``` diff --git a/docs/data-tests/schema-tests/schema-changes-from-baseline.mdx b/docs/data-tests/schema-tests/schema-changes-from-baseline.mdx index 5281e5d41..5092ebd57 100644 --- a/docs/data-tests/schema-tests/schema-changes-from-baseline.mdx +++ b/docs/data-tests/schema-tests/schema-changes-from-baseline.mdx @@ -61,7 +61,7 @@ dbt run-operation elementary.generate_schema_baseline_test --args '{"fail_on_add #> - name: #> columns: #> ... -#> tests: +#> data_tests: #> - elementary.schema_changes_from_baseline: #> ... ``` @@ -84,7 +84,7 @@ sources: data_type: < data type 1 > - name: < column 2 > data_type: < data type 2 > - tests: + data_tests: - elementary.schema_changes_from_baseline ``` @@ -102,9 +102,10 @@ sources: data_type: text - name: event_id data_type: integer - tests: + data_tests: - elementary.schema_changes_from_baseline - tags: ["elementary"] + config: + tags: ["elementary"] ``` ```yml Models @@ -117,7 +118,7 @@ models: data_type: < data type 1 > - name: < column 2 > data_type: < data type 1 > - tests: + data_tests: - elementary.schema_changes_from_baseline ``` @@ -131,9 +132,10 @@ models: data_type: text - name: event_id data_type: integer - tests: + data_tests: - elementary.schema_changes_from_baseline: - tags: ["elementary"] + config: + tags: ["elementary"] ``` diff --git a/docs/data-tests/schema-tests/schema-changes.mdx b/docs/data-tests/schema-tests/schema-changes.mdx index 8a0edec7b..4aff2e91e 100644 --- a/docs/data-tests/schema-tests/schema-changes.mdx +++ b/docs/data-tests/schema-tests/schema-changes.mdx @@ -19,7 +19,7 @@ version: 2 models: - name: < model name > - tests: + data_tests: - elementary.schema_changes ``` @@ -28,10 +28,10 @@ version: 2 models: - name: login_events - tests: + data_tests: - elementary.schema_changes: - tags: ["elementary"] config: + tags: ["elementary"] severity: warn ``` diff --git a/docs/docs.json b/docs/docs.json index f3c3071f6..3a83c72fd 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -454,6 +454,7 @@ "group": "Integrations", "pages": [ "oss/integrations/dbt", + "oss/integrations/dbt-fusion", "oss/deployment-and-configuration/slack", "oss/deployment-and-configuration/teams" ] diff --git a/docs/oss/general/troubleshooting.mdx b/docs/oss/general/troubleshooting.mdx index 8fa9f3b0e..2c1c534de 100644 --- a/docs/oss/general/troubleshooting.mdx +++ b/docs/oss/general/troubleshooting.mdx @@ -166,12 +166,27 @@ python3 -m pip install elementary-data[] - The warning above may appear in one of the following two cases: - * If you are using the most recent version of dbt 1.6 or 1.7 - this warning will appear by default, since it indicates the aforementioned behavior change in dbt 1.8. - * If you are using dbt 1.8 and above, this warning will NOT appear by default, however it will start appearing once you set the flag `require_explicit_package_overrides_for_builtin_materializations` to `false` - as required in the [dbt package installation guide](/oss/quickstart/quickstart-cli-package#step-by-step-install-elementary-dbt-package). - - In either case, please ignore it for now. This is a temporary measure and we are working with the dbt team on a [longer term solution](https://github.com/dbt-labs/dbt-core/issues/10090). + If you are encountering the warning above though, it means that you have previously added the flag + `require_explicit_package_overrides_for_builtin_materializations=False` to `dbt-project.yml`. + This is no longer required!. + + Instead, please add a file named `elementary_materialization.sql` to your macros folder, with the following contents - + + If you use Snowflake: + ``` + {% materialization test, adapter='snowflake' %} + {{ return(elementary.materialization_test_snowflake()) }} + {% endmaterialization %} + ``` + + If you use any other DWH: + ``` + {% materialization test, default %} + {{ return(elementary.materialization_test_default()) }} + {% endmaterialization %} + ``` + + This will ensure Elementary's test materialization is run but will avoid the warning. @@ -221,6 +236,15 @@ vars: + + +dbt-fusion support in Elementary is still in beta. + +While most of the core features should work, some features may not work as expected. +For more details, please click [here](/oss/integrations/dbt-fusion) + + + If you're experiencing issues of any kind, reach out on the [#community-support](https://elementary-community.slack.com/archives/C02CTC89LAX) channel. Elementary AI and the team will be happy to help. diff --git a/docs/oss/integrations/dbt-fusion.mdx b/docs/oss/integrations/dbt-fusion.mdx new file mode 100644 index 000000000..6c53d0b7d --- /dev/null +++ b/docs/oss/integrations/dbt-fusion.mdx @@ -0,0 +1,108 @@ +--- +title: "dbt fusion (Beta)" +--- + +Note: dbt-fusion support in Elementary is still in beta, as is dbt-fusion itself. Please see below a list of features that are not yet implemented. + +Elemmentary OSS integrates with dbt-fusion, starting with version 0.20. +Fusion is a complete rewrite of the dbt engine, and provides many benefits, including enhanced performance and static analysis. + +For more details about dbt-fusion capabilities please consult the [dbt-fusion docs](https://docs.getdbt.com/docs/fusion/about-fusion). + +### Upgrading to dbt-fusion + +As a part of the migration to dbt-fusion, it is required to remove deprecated syntax from various areas of the dbt project, and YAMLs in particular. +Specifically for tests the following are important: + +1. Test arguments must be encapsulated under an `arguments` field. +2. Configuration fields such as `meta`, `tags` or `severity` must be encapsulated under a `config` field. +3. Recommended (but not a must) - change the `tests` field to `data_tests` to conform with the current dbt guidelines. + +Here's an example of an Elementary anomaly test with the old and new syntax: + + + +```yml Old syntax +models: + - name: login_events + config: + elementary: + timestamp_column: "loaded_at" + tests: + - elementary.volume_anomalies: + where_expression: "event_type in ('event_1', 'event_2') and country_name != 'unwanted country'" + time_bucket: + period: day + count: 1 + tags: ["elementary"] + severity: warn + meta: + owner: "@jessica.jones" + + - name: users + tests: + - elementary.volume_anomalies: + tags: ["elementary"] +``` + +```yml New syntax +models: + - name: login_events + config: + elementary: + timestamp_column: "loaded_at" + data_tests: + - elementary.volume_anomalies: + arguments: + where_expression: "event_type in ('event_1', 'event_2') and country_name != 'unwanted country'" + time_bucket: + period: day + count: 1 + config: + tags: ["elementary"] + severity: warn + meta: + owner: "@jessica.jones" + + - name: users + data_tests: + - elementary.volume_anomalies: + config: + tags: ["elementary"] +``` + + + +To facilitate the process of making these changes within your project, dbt introduced a tool called [dbt-autofix](https://github.com/dbt-labs/dbt-autofix) that +can be used to automatically migrate your project to the new syntax. + +Before running this tool, **please upgrade** to the most recent dbt-core version - as some of the syntax changes are not supported on older versions. + +For full information on upgrading from dbt-core to dbt-fusion, please check [dbt-fusion's official upgrade guide](https://docs.getdbt.com/docs/dbt-versions/core-upgrade/upgrading-to-fusion).) + + +### Supported Elementary capabilities + +Most of the main capabilities of the Elementary dbt package are supported in Fusion, including: +1. Anomaly detection tests. +2. Schema tests. +3. Artifacts collection. + +However, the following capabilities are not supported right now for dbt Fusion: +1. Python tests. +2. JSON schema tests. +3. The missing dbt-fusion features listed below. + +### Missing features in dbt-fusion + +In addition to the above, there are some features that are currently missing from dbt-fusion, and therefore are not yet available in the Elementary package. + +For each one of the issues below, we've included a link to the dbt-fusion github repository - **please upvote these features if they are important to you!** + +1. [Tests with error status are not being reported](https://github.com/dbt-labs/dbt-fusion/issues/686) - If a test fails "normally" (e.g. not_null fails on rows with null values), Elementary will report it as expected with a "fail" status. +However, if there is a compilation error / any error that comes before the actual test query ("error" status in dbt-core), it will currently be missing. +2. [Source freshness results are not reported](https://github.com/dbt-labs/dbt-fusion/issues/720) +3. [Exposure artiracts are not reported](https://github.com/dbt-labs/dbt-fusion/issues/859) +4. [Group artifacts are not reported](https://github.com/dbt-labs/dbt-fusion/issues/25) +5. [Compiled code is missing from dbt artifact tables](https://github.com/dbt-labs/dbt-fusion/issues/723) +6. [Failed row count is missing from dbt artifact tables](https://github.com/dbt-labs/dbt-fusion/issues/724) diff --git a/docs/tutorial/adding-elementary-tests.mdx b/docs/tutorial/adding-elementary-tests.mdx index 9c55d9e74..98169074c 100644 --- a/docs/tutorial/adding-elementary-tests.mdx +++ b/docs/tutorial/adding-elementary-tests.mdx @@ -29,7 +29,7 @@ models: description: This table has basic information about a customer, as well as some derived facts based on a customer's orders config: tags: ["PII"] - tests: + data_tests: - elementary.volume_anomalies ``` @@ -43,7 +43,7 @@ models: tags: ["PII"] elementary: timestamp_column: "signup_date" - tests: + data_tests: - elementary.volume_anomalies ``` @@ -59,10 +59,12 @@ Similar to Test 1, we will use the **volume_anomalies** test to detect an anomal config: tags: ["finance"] - tests: + data_tests: - elementary.volume_anomalies: - tags: ["table_anomalies"] - timestamp_column: "order_date" + config: + tags: ["table_anomalies"] + arguments: + timestamp_column: "order_date" ```` @@ -75,10 +77,11 @@ The **elementary.dimension_anomalies** tests can be used to check for anomalies config: tags: ["finance"] - tests: + data_tests: - elementary.dimension_anomalies: - dimensions: - - status + arguments: + dimensions: + - status ```` Next, we will define a timestamp column for determining time buckets: @@ -91,10 +94,11 @@ Next, we will define a timestamp column for determining time buckets: elementary: timestamp_column: "order_date" - tests: + data_tests: - elementary.dimension_anomalies: - dimensions: - - status + arguments: + dimensions: + - status ``` This test will gather row count metrics for each value in the **status** column and will fail if the distribution deviates substantially from the mean. @@ -106,14 +110,15 @@ We will use the **elementary.column_anomalies** test to monitor the count of cus ```yaml - name: number_of_orders description: Count of the number of orders a customer has placed - tests: + data_tests: - elementary.column_anomalies: config: severity: warn - tags: ["column_anomalies"] - column_anomalies: - - zero_count - timestamp_column: "signup_date" + tags: ["column_anomalies"] + arguments: + column_anomalies: + - zero_count + timestamp_column: "signup_date" ``` Notice that we already defined the **timestamp_column** at the model level, so we don't have to define it in the test. [This page](/data-tests/elementary-tests-configuration) has in-depth details on test priorities. @@ -133,7 +138,7 @@ models: tags: ["PII"] elementary: timestamp_column: "signup_date" - tests: + data_tests: - elementary.volume_anomalies columns: @@ -154,14 +159,15 @@ models: - name: number_of_orders description: Count of the number of orders a customer has placed - tests: + data_tests: - elementary.column_anomalies: config: severity: warn - tags: ["column_anomalies"] - column_anomalies: - - zero_count - timestamp_column: "signup_date" + tags: ["column_anomalies"] + arguments: + column_anomalies: + - zero_count + timestamp_column: "signup_date" - name: customer_lifetime_value description: Total value (AUD) of a customer's orders @@ -179,10 +185,11 @@ models: elementary: timestamp_column: "order_date" - tests: + data_tests: - elementary.dimension_anomalies: - dimensions: - - status + arguments: + dimensions: + - status columns: - name: order_id description: This is a unique identifier for an order @@ -216,10 +223,12 @@ models: config: tags: ["finance"] - tests: + data_tests: - elementary.volume_anomalies: - tags: ["table_anomalies"] - timestamp_column: "order_date" + config: + tags: ["table_anomalies"] + arguments: + timestamp_column: "order_date" columns: - name: order_id