diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 025cf92a5..e12833ec1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,3 +43,9 @@ repos: entry: mypy --no-error-summary language: system files: ^elementary/.*\.py$ + + - repo: https://github.com/CoderJoshDK/precommit-mintlify-validate/ + rev: v0.2.0 + hooks: + - id: mintlify-validate + args: [docs] diff --git a/docs/Dockerfile b/docs/Dockerfile index 1ee2e6c29..1ad7cc928 100644 --- a/docs/Dockerfile +++ b/docs/Dockerfile @@ -1,8 +1,7 @@ -FROM node:19 +FROM node:20.3.0 WORKDIR /app RUN npm i -g mintlify -RUN mintlify install EXPOSE 3000 CMD ["mintlify", "dev"] diff --git a/docs/_snippets/alerts/alerts-configuration.mdx b/docs/_snippets/alerts/alerts-configuration.mdx index fb21b075d..7f5d6dccd 100644 --- a/docs/_snippets/alerts/alerts-configuration.mdx +++ b/docs/_snippets/alerts/alerts-configuration.mdx @@ -1,5 +1,5 @@ - + Use Alert Rules to distribute your alerts to the right channels. diff --git a/docs/_snippets/alerts/description.mdx b/docs/_snippets/alerts/description.mdx new file mode 100644 index 000000000..6b3fcb508 --- /dev/null +++ b/docs/_snippets/alerts/description.mdx @@ -0,0 +1,30 @@ +Elementary supports configuring description for tests that are included in alerts. +It's recommended to add an explanation of what does it mean if this test fails, so alert will include this context. + + + +```yml test +tests: + - not_null: + meta: + description: "This is the test description" +``` + +```yml test config block +{{ config( + tags=["Tag1","Tag2"] + meta={ + description: "This is the test description" + } +) }} +``` + +```yml dbt_project.yml +tests: + path: + subfolder: + +meta: + description: "This is the test description" +``` + + diff --git a/docs/_snippets/alerts/owner.mdx b/docs/_snippets/alerts/owner.mdx new file mode 100644 index 000000000..174918236 --- /dev/null +++ b/docs/_snippets/alerts/owner.mdx @@ -0,0 +1,46 @@ +Elementary enriches alerts with [owners for models or tests](https://docs.getdbt.com/reference/resource-configs/meta#designate-a-model-owner)). + +- If you want the owner to be tagged on slack use '@' and the email prefix of the slack user (@jessica.jones to tag jessica.jones@marvel.com). +- You can configure a single owner or a list of owners (`["@jessica.jones", "@joe.joseph"]`). + + + +```yml model +models: + - name: my_model_name + meta: + owner: "@jessica.jones" +``` + +```yml test +tests: + - not_null: + meta: + owner: ["@jessica.jones", "@joe.joseph"] +``` + +```yml test/model config block +{{ config( + tags=["Tag1","Tag2"] + meta={ + "description": "This is a description", + "owner": "@jessica.jones" + } +) }} +``` + +```yml dbt_project.yml +models: + path: + subfolder: + +meta: + owner: "@jessica.jones" + +tests: + path: + subfolder: + +meta: + owner: "@jessica.jones" +``` + + \ No newline at end of file diff --git a/docs/_snippets/alerts/subscribers.mdx b/docs/_snippets/alerts/subscribers.mdx new file mode 100644 index 000000000..a76111903 --- /dev/null +++ b/docs/_snippets/alerts/subscribers.mdx @@ -0,0 +1,44 @@ +If you want additional users besides the owner to be tagged on an alert, add them as subscribers. + +- If you want the subscriber to be tagged on slack use '@' and the email prefix of the slack user (@jessica.jones to tag jessica.jones@marvel.com). +- You can configure a single subscriber or a list (`["@jessica.jones", "@joe.joseph"]`). + + + +```yml model +models: + - name: my_model_name + meta: + subscribers: "@jessica.jones" +``` + +```yml test +tests: + - not_null: + meta: + subscribers: ["@jessica.jones", "@joe.joseph"] +``` + +```yml test/model config block +{{ config( + meta={ + "subscribers": "@jessica.jones" + } +) }} +``` + +```yml dbt_project.yml +models: + path: + subfolder: + +meta: + subscribers: "@jessica.jones" + +tests: + path: + subfolder: + +meta: + subscribers: "@jessica.jones" +``` + + \ No newline at end of file diff --git a/docs/_snippets/alerts/tags.mdx b/docs/_snippets/alerts/tags.mdx new file mode 100644 index 000000000..4414d90c7 --- /dev/null +++ b/docs/_snippets/alerts/tags.mdx @@ -0,0 +1,39 @@ +You can use [tags](https://docs.getdbt.com/reference/resource-configs/tags) to provide context to your alerts. + +- You can tag a group or a channel in a slack alert by adding `#channel_name` as a tag. +- Tags are aggregated,so a test alert will include both the test and the parent model tags. + + + +```yml model +models: + - name: my_model_name + tags: ["#marketing", "#data_ops"] +``` + +```yml test +tests: + - not_null: + tags: ["#marketing", "#data_ops"] +``` + +```yml test/model config block +{{ config( + tags=["#marketing", "#data_ops"] + } +) }} +``` + +```yml dbt_project.yml +models: + path: + subfolder: + tags: ["#marketing", "#data_ops"] + +tests: + path: + subfolder: + tags: ["#marketing", "#data_ops"] +``` + + \ No newline at end of file diff --git a/docs/_snippets/cloud/features.mdx b/docs/_snippets/cloud/features.mdx new file mode 100644 index 000000000..64c976a14 --- /dev/null +++ b/docs/_snippets/cloud/features.mdx @@ -0,0 +1,81 @@ + +### Detection & Coverage + +Elementary integrates powerful anomaly detection and dbt tests into a unified detection strategy. +Effective detection of data issues requires a comprehensive approach, +including both pipeline and data monitoring, validation tests, +anomaly detection for unexpected behavior, and a single interface to manage it all at scale. + + + + ML-powered monitors automatically detect data quality issues. + Out-of-the-box for volume and freshness, and opt-in for data quality metrics. + + + Validate data and track the results of dbt tests, dbt packages tests (dbt-utils, dbt-expectations, elementary) and custom SQL tests. + + + Validate there are no breaking changes in tables schema, JSON schema, and downstream exposures such as dashboards. + + + Track failures and runs of jobs, models, and tests overtime. + Pipeline failures and performance issues can cause data incidents, and create unneceserry costs. + + + Configure Elementary in code, or via the UI for non-technical users or for adding tests in bulk. + The platform opens PRs to your repo, saving hours of tedious YAML edits. + + + Coming soon! + + + +### Triage & Response + +Detecting issues is just the first step to achieve data reliability. +Elementary offers tools to create an effective response plan, for faster recovery. +This includes investigating the root cause and impact of issues, communicating issues to the relevant people, assigning owners to fix issues, keeping track of open incidents and more. + + + + Column-level lineage that spans through sources, models and BI tools, enriched with monitoring results. Enables granular root cause and impact analysis. + + + Define clear ownership of data assets and enable owners to be informed and accountable for the health and status of their data. + + + Distribute highly configurable alerts to different channels and integrations. + Automatically tag owners, and enable setting status and assigns at the alert level. + + + Different failures related to the same issue are grouped automatically to a single incident. + This accelerates triage and response, and reduces alerts fautigue. + + + Manage all open incidents in a single interface, with a clear view of status and assignees. + Track historical incidents and high-level incidents metrics. + + + +### Collaboration & Communication + +The data team doesn’t live in a silo - you have many stakeholders. +The only way to improve data trust is by bringing in more team members, users and stakeholders to the data health process. +Elementary fosters collaboration by allowing you to easily share and communicate the status of issues, +the overall health of the data platform and progress made to improve it with the broader organization. + + + + Up to date dashboard with current status and trends of data issues. + Share the dashboard with others, enable them to slice results and stay informed. + + + Enable effective collaboration and communication by grouping related data assets and tests by business domains, data products, priority, etc. + + + Search and explore your datasets information - descriptions, columns, column descriptions, compiled code, datasets health and more. + + + See the Data Health scores of all your datasets by domain and share with stakeholders. + + \ No newline at end of file diff --git a/docs/_snippets/cloud/features/alerts-and-incidents/alert-types.mdx b/docs/_snippets/cloud/features/alerts-and-incidents/alert-types.mdx new file mode 100644 index 000000000..bd5e74083 --- /dev/null +++ b/docs/_snippets/cloud/features/alerts-and-incidents/alert-types.mdx @@ -0,0 +1,7 @@ +Elementary can be configured to send alerts on: + +- Model run failures +- Failures and/or warnings of dbt tests (including Elementary dbt package and other packages) +- Failures and/or warnings Elementary Anomaly Detection monitors +- Failures and/or warning of custom SQL tests +- dbt source freshness failures \ No newline at end of file diff --git a/docs/_snippets/cloud/features/anomaly-detection/all-anomalies-configuration.mdx b/docs/_snippets/cloud/features/anomaly-detection/all-anomalies-configuration.mdx new file mode 100644 index 000000000..b4b5053d5 --- /dev/null +++ b/docs/_snippets/cloud/features/anomaly-detection/all-anomalies-configuration.mdx @@ -0,0 +1,2 @@ +- **Severity** - Should a failure be considered a warning or a failure. Default is warning. +- **Test metadata** - Add metadata such as tags and owner to the test. \ No newline at end of file diff --git a/docs/_snippets/cloud/features/anomaly-detection/automated-monitors-cards.mdx b/docs/_snippets/cloud/features/anomaly-detection/automated-monitors-cards.mdx new file mode 100644 index 000000000..578c45146 --- /dev/null +++ b/docs/_snippets/cloud/features/anomaly-detection/automated-monitors-cards.mdx @@ -0,0 +1,10 @@ + + + Monitors updates to tables and how frequently a table is updated, + and fails if there is an unexpected delay. + + + Monitors how many rows were added or removed to a table on each update, + and fails if there is an unexpected drop or spike in rows. + + \ No newline at end of file diff --git a/docs/_snippets/cloud/features/anomaly-detection/automated-monitors-intro.mdx b/docs/_snippets/cloud/features/anomaly-detection/automated-monitors-intro.mdx new file mode 100644 index 000000000..ad7abf84a --- /dev/null +++ b/docs/_snippets/cloud/features/anomaly-detection/automated-monitors-intro.mdx @@ -0,0 +1,5 @@ +Out-of-the-box ML-powered monitoring for freshness and volume issues on all production tables. +The automated monitors feature provides broad coverage and detection of critical pipeline issues, without any configuration effort. + +These monitors track updates to tables, and will detect data delays, incomplete updates, and significant volume changes. +Additionally, there will be no increase in compute costs as the monitors leverage only warehouse metadata (e.g. information schema, query history). \ No newline at end of file diff --git a/docs/_snippets/cloud/features/anomaly-detection/freshness-configuration.mdx b/docs/_snippets/cloud/features/anomaly-detection/freshness-configuration.mdx new file mode 100644 index 000000000..51daa1354 --- /dev/null +++ b/docs/_snippets/cloud/features/anomaly-detection/freshness-configuration.mdx @@ -0,0 +1,6 @@ +You can choose between 2 detection methods for the Freshness monitor- Automatic and Manual. +- **Automatic** - Elementary uses machine learning models to detect anomalies in the data freshness. This is the default setting. You can change the sensitivity level to *Low*, *Medium*, or *High*. +For each level, you will see a simulation of the change impact on the latest result, and you can use the`Simulate Configuration` button to examine the change impact. +- **Manual** - You can set the SLA breach threshold for the freshness monitor manually. This is useful for assets that are updated regularly at the same time every day, hour or week. + +Freshness monitor configuration \ No newline at end of file diff --git a/docs/_snippets/cloud/features/anomaly-detection/volume-configuration.mdx b/docs/_snippets/cloud/features/anomaly-detection/volume-configuration.mdx new file mode 100644 index 000000000..13729e492 --- /dev/null +++ b/docs/_snippets/cloud/features/anomaly-detection/volume-configuration.mdx @@ -0,0 +1,3 @@ +- **Anomaly Direction** - Whether you want the monitor to fail on anomalous drops, spikes, or both. Default is both. +- **Sensitivity** - You can set the monitor's sensitivity levels to *Low*, *Medium*, or *High*. In the future, we plan to allow for more nuanced adjustments to this parameter. You can use the `Simulate Configuration` button for testing how the change will affect the monitor. +- **Detection Period** - The period in which the monitor look for anomalies. Default is the last 2 days. \ No newline at end of file diff --git a/docs/_snippets/cloud/features/data-health/data-health-intro.mdx b/docs/_snippets/cloud/features/data-health/data-health-intro.mdx new file mode 100644 index 000000000..c68644d11 --- /dev/null +++ b/docs/_snippets/cloud/features/data-health/data-health-intro.mdx @@ -0,0 +1,11 @@ +Once you start sharing data with downstream consumers and stakeholders one of the most important things that you want to create is trust. +Trust that the data that is being used is “healthy”. Imagine being a data analyst using a specific data asset but you constantly run into data quality issues. +You will eventually lose trust. + +This is why we created **data health scores** in Elementary. It is a way to share an overview of the health of your data assets. + +To measure health we use an industry standard framework of [Data Quality Dimensions](/features/collaboration-and-communication/data-quality-dimensions#data-quality-dimensions). +These dimensions help assess the reliability of data in various business contexts. +Ensuring high-quality data across these dimensions is critical for accurate analysis, informed decision-making, and operational efficiency. + +To learn more, **watch the webinar** [**Measuring Data Health with Elementary**](https://www.elementary-data.com/webinar/measuring-data-health-with-elementary) diff --git a/docs/_snippets/cloud/features/data-health/data-quality-dimensions.mdx b/docs/_snippets/cloud/features/data-health/data-quality-dimensions.mdx new file mode 100644 index 000000000..47577b8d8 --- /dev/null +++ b/docs/_snippets/cloud/features/data-health/data-quality-dimensions.mdx @@ -0,0 +1,20 @@ + + + Ensures that data is up to date and reflects the latest information. + + + Ensures all required data is available, without missing values. + + + Ensures that data represents the real-world scenario correctly. + + + The degree to which data remains uniform across multiple instances. + + + Ensures that each entity is represented only once and there are no duplicates. + + + Ensures that data conforms to rules or expectations, such as acceptable ranges or formats. + + \ No newline at end of file diff --git a/docs/_snippets/cloud/features/data-tests/benefits-dbt-tests.mdx b/docs/_snippets/cloud/features/data-tests/benefits-dbt-tests.mdx new file mode 100644 index 000000000..c44bafe6b --- /dev/null +++ b/docs/_snippets/cloud/features/data-tests/benefits-dbt-tests.mdx @@ -0,0 +1,10 @@ +dbt tests are very powerful. The ease of use, simplicity, and usefulness in the dev process is unmatched. +When you adopt any observability tool, you will still use dbt tests. This is why in Elementary, dbt tests are first class citizens. + +There are several benefits to this approach: + +- **Single interface for all observability** - Prevent the distribution of monitoring between different tools. All configuration is in code, all the results are in one interface. +- **Avoid duplicate work and vendor lock in** - The tests you implemented already are effective in Elementary, as well as additional configuration. The future tests you add will remain in your code if you decide to offboard. +- **Control of schedule and cost** - You have control of configuration and scheduling, tests can be executed when data is actually loaded and validation is needed. +- **Prevent bad data from propagating** - As tests are in pipeline, you can leverage `dbt build` and fail the pipeline on critical test failures. +- **Rich ecosystem** - The community of dbt users developes and supports various testing use cases. \ No newline at end of file diff --git a/docs/_snippets/cloud/features/data-tests/data-tests-cards.mdx b/docs/_snippets/cloud/features/data-tests/data-tests-cards.mdx new file mode 100644 index 000000000..9f562d0a0 --- /dev/null +++ b/docs/_snippets/cloud/features/data-tests/data-tests-cards.mdx @@ -0,0 +1,14 @@ + + + Native dbt tests such as `not_null`, `unique`, etc. + + + Tests of packages such as `dbt-expectations`, `dbt-utils`, etc. + + + Tests to validate an explicit business logic. + + + Schema tests by Elementary, implemented as dbt tests. + + \ No newline at end of file diff --git a/docs/_snippets/cloud/features/data-tests/dbt-test-hub.mdx b/docs/_snippets/cloud/features/data-tests/dbt-test-hub.mdx new file mode 100644 index 000000000..412e1c700 --- /dev/null +++ b/docs/_snippets/cloud/features/data-tests/dbt-test-hub.mdx @@ -0,0 +1,4 @@ +To help you find the test that is right for your use case, we created the [dbt Test Hub](https://www.elementary-data.com/dbt-test-hub). +It's a searchable catalog of all the tests supported in Elementary, with their descriptions and example use cases. + +The tests are also segmented to use cases, so you can easily find the different options for addressing your detection use case. \ No newline at end of file diff --git a/docs/_snippets/cloud/how-it-works.mdx b/docs/_snippets/cloud/how-it-works.mdx new file mode 100644 index 000000000..941bef9a2 --- /dev/null +++ b/docs/_snippets/cloud/how-it-works.mdx @@ -0,0 +1,12 @@ +1. You install the Elementary dbt package in your dbt project and configure it to write to it's own schema, the Elementary schema. +2. The package writes test results, run results, logs and metadata to the Elementary schema. +3. The cloud service only requires `read access` to the Elementary schema, not to schemas where your sensitive data is stored. +4. The cloud service connects to sync the Elementary schema using an **encrypted connection** and a **static IP address** that you will need to add to your allowlist. + + + Elementary cloud security + \ No newline at end of file diff --git a/docs/_snippets/cloud/integrations/cards-groups/alerts-destination-cards.mdx b/docs/_snippets/cloud/integrations/cards-groups/alerts-destination-cards.mdx index bf9a777ac..25f4577ce 100644 --- a/docs/_snippets/cloud/integrations/cards-groups/alerts-destination-cards.mdx +++ b/docs/_snippets/cloud/integrations/cards-groups/alerts-destination-cards.mdx @@ -51,6 +51,14 @@ } > + + } + > + - - - - } - > - Click for details - - - + } > - Click for details - - + } > - Click for details - - + + + + + } > - Click for details - - + } > - Click for details - - + } > - Click for details diff --git a/docs/_snippets/cloud/integrations/cards-groups/cloud-integrations-cards.mdx b/docs/_snippets/cloud/integrations/cards-groups/cloud-integrations-cards.mdx index 68ba9c32b..121020434 100644 --- a/docs/_snippets/cloud/integrations/cards-groups/cloud-integrations-cards.mdx +++ b/docs/_snippets/cloud/integrations/cards-groups/cloud-integrations-cards.mdx @@ -18,6 +18,6 @@ -### Communication and collaboration +### Alerts & incidents \ No newline at end of file diff --git a/docs/_snippets/cloud/integrations/cards-groups/code-repo-cards.mdx b/docs/_snippets/cloud/integrations/cards-groups/code-repo-cards.mdx index bcc19ad1f..f2efe1822 100644 --- a/docs/_snippets/cloud/integrations/cards-groups/code-repo-cards.mdx +++ b/docs/_snippets/cloud/integrations/cards-groups/code-repo-cards.mdx @@ -23,8 +23,8 @@ href="/cloud/integrations/code-repo/gitlab" icon={ } > + + + } + > + + + + + + + + + + + + + } + > - + \ No newline at end of file diff --git a/docs/_snippets/cloud/integrations/postgres.mdx b/docs/_snippets/cloud/integrations/postgres.mdx index 22b5a5d24..9e7b7fd5f 100644 --- a/docs/_snippets/cloud/integrations/postgres.mdx +++ b/docs/_snippets/cloud/integrations/postgres.mdx @@ -14,3 +14,8 @@ Provide the following fields: - **Password**: The password associated with the provided user. + + +### Connect via SSH tunnel + +Elementary supports connecting via SSH or reverse SSH tunnel. Reach out to our team for details and support in this deployment. \ No newline at end of file diff --git a/docs/_snippets/cloud/integrations/redshift.mdx b/docs/_snippets/cloud/integrations/redshift.mdx index b044ec531..3604ce7a0 100644 --- a/docs/_snippets/cloud/integrations/redshift.mdx +++ b/docs/_snippets/cloud/integrations/redshift.mdx @@ -14,3 +14,8 @@ Provide the following fields: - **Password**: The password associated with the provided user. + + +### Connect via SSH tunnel + +Elementary supports connecting via SSH or reverse SSH tunnel. Reach out to our team for details and support in this deployment. \ No newline at end of file diff --git a/docs/_snippets/cloud/integrations/repo-connection-settings.mdx b/docs/_snippets/cloud/integrations/repo-connection-settings.mdx new file mode 100644 index 000000000..b017d0b3e --- /dev/null +++ b/docs/_snippets/cloud/integrations/repo-connection-settings.mdx @@ -0,0 +1,12 @@ +After the authentication, you need to fill in the following details: +- **Repository** - The full name of the code repo. +- _Optional_ **Environment base branch** - If you want Elementary to open PRs in a target branch different than default, detail the branch name here. +- _Optional_ **Project path** - If your dbt project isn't on the root directory of the repo, detail it's path here. +- _Optional_ **Update token** - When the github token expires, regenerate a fine-grained token and paste it here. + + + Repository connection settings + \ No newline at end of file diff --git a/docs/_snippets/cloud/integrations/snowflake.mdx b/docs/_snippets/cloud/integrations/snowflake.mdx index 6eb762ada..6a2ba656b 100644 --- a/docs/_snippets/cloud/integrations/snowflake.mdx +++ b/docs/_snippets/cloud/integrations/snowflake.mdx @@ -12,7 +12,7 @@ Provide the following fields: - **Elementary schema**: The name of your Elementary schema. Usually `[schema name]_elementary`. - **Role (optional)**: e.g. `ELEMENTARY_ROLE`. -Elementary cloud supports the user password and key pair authentication connection methods. +Elementary Cloud supports the user password and key pair authentication connection methods. - **User password**: - User: The user created for Elementary. diff --git a/docs/_snippets/faq/question-disable-elementary-models.mdx b/docs/_snippets/faq/question-disable-elementary-models.mdx index 407e3ee2d..22bfe4276 100644 --- a/docs/_snippets/faq/question-disable-elementary-models.mdx +++ b/docs/_snippets/faq/question-disable-elementary-models.mdx @@ -1,6 +1,6 @@ -Elementary only needs you to run the models once after you install, and on upgrades of minor versions (like 0.7.X -> 0.8.X). +Elementary only needs you to run the models once after you install, and on upgrades of minor versions (like 0.15.X -> 0.16.X). On such upgrades we make schema changes, so we need you to rebuild the tables. For excluding the elementary models from your runs we suggest 2 options: @@ -20,10 +20,14 @@ models: +enabled: "{{ var('enable_elementary_models', false) }}" ``` -- When you upgrade elementary run: +You will run the Elementary models explicitly in one of two cases: +1. When you upgrade elementary run +2. If you choose to disable Elementary models from your runs and want to update them at your own time. + +To run Elementary models: ```shell -dbt run --select elementary --vars {enable_elementary_models: true} +dbt run --select elementary --vars '{"enable_elementary_models": true}' ``` diff --git a/docs/_snippets/guides/alerts-code-configuration.mdx b/docs/_snippets/guides/alerts-code-configuration.mdx index 559f0b662..5e8c98edc 100644 --- a/docs/_snippets/guides/alerts-code-configuration.mdx +++ b/docs/_snippets/guides/alerts-code-configuration.mdx @@ -1,14 +1,14 @@ You can enrich your alerts by adding properties to tests, models and sources in your `.yml` files. -The supported attributes are: [owner](./alerts-configuration/#owner), -[subscribers](./alerts-configuration#subscribers), -[description](./alerts-configuration#test-description), -[tags](./alerts-configuration#tags). +The supported attributes are: [owner](./alerts-code-configuration/#owner), +[subscribers](./alerts-code-configuration#subscribers), +[description](./alerts-code-configuration#test-description), +[tags](./alerts-code-configuration#tags). You can configure and customize your alerts by configuring: -[custom channel](./alerts-configuration#custom-channel), -[suppression interval](./alerts-configuration#suppression-interval), -[alert fields](./alerts-configuration#alert-fields)(for test alerts only), [alert grouping](./alerts-configuration#group-alerts-by-table), -[alert filters](./alerts-configuration#filter-alerts). +[custom channel](./alerts-code-configuration#custom-channel), +[suppression interval](./alerts-code-configuration#suppression-interval), +[alert fields](./alerts-code-configuration#alert-fields)(for test alerts only), [alert grouping](./alerts-code-configuration#group-alerts-by-table), +[alert filters](./alerts-code-configuration#filter-alerts). ## Alert properties in `.yml` files @@ -45,174 +45,19 @@ Elementary prioritizes configuration in the following order: #### Owner -Elementary enriches alerts with [owners for models or tests](https://docs.getdbt.com/reference/resource-configs/meta#designate-a-model-owner)). - -- If you want the owner to be tagged on slack use '@' and the email prefix of the slack user (@jessica.jones to tag jessica.jones@marvel.com). -- You can configure a single owner or a list of owners (`["@jessica.jones", "@joe.joseph"]`). - - - -```yml model -models: - - name: my_model_name - meta: - owner: "@jessica.jones" -``` - -```yml test -tests: - - not_null: - meta: - owner: ["@jessica.jones", "@joe.joseph"] -``` - -```yml test/model config block -{{ config( - tags=["Tag1","Tag2"] - meta={ - "description": "This is a description", - "owner": "@jessica.jones" - } -) }} -``` - -```yml dbt_project.yml -models: - path: - subfolder: - +meta: - owner: "@jessica.jones" - -tests: - path: - subfolder: - +meta: - owner: "@jessica.jones" -``` - - + #### Subscribers -If you want additional users besides the owner to be tagged on an alert, add them as subscribers. - -- If you want the subscriber to be tagged on slack use '@' and the email prefix of the slack user (@jessica.jones to tag jessica.jones@marvel.com). -- You can configure a single subscriber or a list (`["@jessica.jones", "@joe.joseph"]`). - - - -```yml model -models: - - name: my_model_name - meta: - subscribers: "@jessica.jones" -``` - -```yml test -tests: - - not_null: - meta: - subscribers: ["@jessica.jones", "@joe.joseph"] -``` - -```yml test/model config block -{{ config( - meta={ - "subscribers": "@jessica.jones" - } -) }} -``` - -```yml dbt_project.yml -models: - path: - subfolder: - +meta: - subscribers: "@jessica.jones" - -tests: - path: - subfolder: - +meta: - subscribers: "@jessica.jones" -``` - - + #### Test description -Elementary supports configuring description for tests that are included in alerts. -It's recommended to add an explanation of what does it mean if this test fails, so alert will include this context. - - - -```yml test -tests: - - not_null: - meta: - description: "This is the test description" -``` - -```yml test config block -{{ config( - tags=["Tag1","Tag2"] - meta={ - description: "This is the test description" - } -) }} -``` - -```yml dbt_project.yml -tests: - path: - subfolder: - +meta: - description: "This is the test description" -``` - - + #### Tags -You can use [tags](https://docs.getdbt.com/reference/resource-configs/tags) to provide context to your alerts. - -- You can tag a group or a channel in a slack alert by adding `#channel_name` as a tag. -- Tags are aggregated,so a test alert will include both the test and the parent model tags. - - - -```yml model -models: - - name: my_model_name - tags: ["#marketing", "#data_ops"] -``` - -```yml test -tests: - - not_null: - tags: ["#marketing", "#data_ops"] -``` - -```yml test/model config block -{{ config( - tags=["#marketing", "#data_ops"] - } -) }} -``` - -```yml dbt_project.yml -models: - path: - subfolder: - tags: ["#marketing", "#data_ops"] - -tests: - path: - subfolder: - tags: ["#marketing", "#data_ops"] -``` - - + ### Alerts distribution @@ -426,25 +271,10 @@ tests: -## Alerts global configuration - -#### Enable/disable alerts - -You can choose to enable / disable alert types by adding a var to your `dbt_project.yml`. - Vars will be deprecated soon! For OSS users, we recommend filtering the alerts + Alert vars are deprecated! We recommend filtering the alerts using [CLI selectors](/oss/guides/alerts/alerts-configuration#alerts-cli-flags) instead. -Below are the available vars and their default config: - -```yml dbt_project.yml -vars: - disable_model_alerts: false - disable_test_alerts: false - disable_warn_alerts: false - disable_skipped_model_alerts: true - disable_skipped_test_alerts: true -``` diff --git a/docs/_snippets/guides/collect-job-data.mdx b/docs/_snippets/guides/collect-job-data.mdx index 85b8e38ea..b221413ea 100644 --- a/docs/_snippets/guides/collect-job-data.mdx +++ b/docs/_snippets/guides/collect-job-data.mdx @@ -15,7 +15,7 @@ The goal is to provide context that is useful to triage and resolve data issues, - The ID of a specific run execution: `job_run_id` - Job run results URL: `job_run_url` -## How Elementary collects jobs metadata? +## How Elementary collects jobs metadata #### Environment variables @@ -31,7 +31,7 @@ To configure `env_var` for your orchestrator, refer to your orchestrator's docs. Elementary also supports passing job metadata as dbt vars. If `env_var` and `var` exist, the `var` will be prioritized. -To pass job data to elementary using `var`, use the `--vars` flag in your invocations: +To pass job data to Elementary using `var`, use the `--vars` flag in your invocations: ```shell dbt run --vars '{"orchestrator": "Airflow", "job_name": "dbt_marketing_night_load"}' @@ -57,7 +57,7 @@ The following default environment variables are supported out of the box: | Github actions | orchestrator
job_run_id: `GITHUB_RUN_ID`
job_url: generated from `GITHUB_SERVER_URL`, `GITHUB_REPOSITORY`, `GITHUB_RUN_ID` | | Airflow | orchestrator | -## What if I use dbt cloud + orchestrator? +## What if I use dbt Cloud + orchestrator? By default, Elementary will collect the dbt cloud jobs info. If you wish to override that, change your dbt cloud invocations to pass the orchestrator job info using `--vars`: diff --git a/docs/_snippets/guides/dbt-source-freshness.mdx b/docs/_snippets/guides/dbt-source-freshness.mdx index 85bc288c3..a237b7304 100644 --- a/docs/_snippets/guides/dbt-source-freshness.mdx +++ b/docs/_snippets/guides/dbt-source-freshness.mdx @@ -1,10 +1,20 @@ -Unlike dbt and Elementary tests, the results of the command `dbt source-freshness` are not automatically collected. -You can collect the results using Elementary CLI tool. +For users of dbt version 1.8 and above -If dbt source freshness results are collected, they will be presented in the UI, and in alerts upon failure. +Add the following flag to your `dbt_project.yml` file: +```yaml dbt_project.yml +flags: + source_freshness_run_project_hooks: True +``` + +This flag enables Elementary to automatically collect `source-freshness` results, just like any other test results. + +For dbt version under 1.8 + +In dbt versions lower than 1.8, the results of the command `dbt source-freshness` are not automatically collected. +You can collect the results using the Elementary CLI tool. -## Collect source freshness failures +If dbt source freshness results are collected, they will be presented in the UI, and in alerts upon failure. #### dbt core users @@ -21,7 +31,7 @@ This operation will upload the results to a table, and the execution of `edr mon - Note that `dbt source freshness` and `upload-source-freshness` needs to run from the same machine. - Note that `upload-source-freshness` requires passing `--project-dir` argument. -#### dbt cloud users +#### dbt Cloud users -The results can't be collected from dbt cloud. +The results can't be collected from dbt Cloud. Here is a [suggestion from an Elementary user](https://elementary-community.slack.com/archives/C02CTC89LAX/p1688113609829869) for a solution you can implement. \ No newline at end of file diff --git a/docs/_snippets/install-cli.mdx b/docs/_snippets/install-cli.mdx index 4e88b84d0..214d87b8d 100644 --- a/docs/_snippets/install-cli.mdx +++ b/docs/_snippets/install-cli.mdx @@ -20,4 +20,4 @@ pip install 'elementary-data[trino]' Run `edr --help` in order to ensure the installation was successful. -If you're receiving `command not found: edr` please check our [troubleshooting guide](/general/troubleshooting). +If you're receiving `command not found: edr` please check our [troubleshooting guide](/oss/general/troubleshooting). diff --git a/docs/_snippets/oss/oss-introduction-opening.mdx b/docs/_snippets/oss/oss-introduction-opening.mdx deleted file mode 100644 index 3574e16e5..000000000 --- a/docs/_snippets/oss/oss-introduction-opening.mdx +++ /dev/null @@ -1 +0,0 @@ -Elementary OSS is a CLI tool you can deploy and orchestrate to send Slack alerts and self-host the Elementary report. diff --git a/docs/_snippets/oss/oss-introduction.mdx b/docs/_snippets/oss/oss-introduction.mdx deleted file mode 100644 index ae893786b..000000000 --- a/docs/_snippets/oss/oss-introduction.mdx +++ /dev/null @@ -1,40 +0,0 @@ -### CLI Guides - - - - - - - - -
- - - Demo - - -### Supported adapters - - diff --git a/docs/_snippets/products-cards.mdx b/docs/_snippets/products-cards.mdx index afd9cefcb..c3bccb528 100644 --- a/docs/_snippets/products-cards.mdx +++ b/docs/_snippets/products-cards.mdx @@ -5,18 +5,18 @@ Read about the key features and product offerings: title="Key Features" icon="stars" iconType="solid" - href="/key-features" + href="/cloud/introduction" > diff --git a/docs/_snippets/quickstart-package-install.mdx b/docs/_snippets/quickstart-package-install.mdx index 79be05758..abec5321b 100644 --- a/docs/_snippets/quickstart-package-install.mdx +++ b/docs/_snippets/quickstart-package-install.mdx @@ -39,7 +39,7 @@ Some packages we recommend you check out: [dbt_utils](https://github.com/dbt-lab ```yml packages.yml packages: - package: elementary-data/elementary - version: 0.15.2 + version: 0.16.4 ## Docs: https://docs.elementary-data.com ``` @@ -97,9 +97,10 @@ Some packages we recommend you check out: [dbt_utils](https://github.com/dbt-lab ## To disable elementary for dev, uncomment this: # enabled: "{{ target.name in ['prod','analytics'] }}" - # Required from dbt 1.8 and above for certain Elementary features (please see more details above) + # Required from dbt 1.8 and above for certain Elementary features flags: - require_explicit_package_overrides_for_builtin_materializations: false + require_explicit_package_overrides_for_builtin_materializations: False + source_freshness_run_project_hooks: True ``` diff --git a/docs/_snippets/quickstart/quickstart-cards.mdx b/docs/_snippets/quickstart/quickstart-cards.mdx index 5d4ecca61..ba2dbbb0b 100644 --- a/docs/_snippets/quickstart/quickstart-cards.mdx +++ b/docs/_snippets/quickstart/quickstart-cards.mdx @@ -3,7 +3,7 @@ title="Elementary Cloud Platform" icon="cloud" iconType="solid" - href="https://elementary-data.frontegg.com/oauth/account/sign-up" + href="/cloud/introduction" >
Built on top of the OSS package, ideal for teams monitoring mission-critical data pipelines, requiring guaranteed uptime and reliability, short-time-to-value, advanced features, collaboration, and professional support. @@ -12,7 +12,6 @@
Integrations: @@ -26,7 +25,6 @@




Integrations: diff --git a/docs/_snippets/setup-teams-integration.mdx b/docs/_snippets/setup-teams-integration.mdx index 9780e3c97..dd9aa0a70 100644 --- a/docs/_snippets/setup-teams-integration.mdx +++ b/docs/_snippets/setup-teams-integration.mdx @@ -34,11 +34,13 @@ Call it `Elementary` (or whatever you prefer) and connect it to the workspace of -Now it is time to setup the webhook for this channel. +Now it's time to set up a webhook. You have two options for creating a webhook: - + -## Create a webhook +## Create a webhook using Connectors + +**Note:** Microsoft 365 Connectors are set to be deprecated end of 2025. Consider using Power Automate Workflows (Option 2) for new integrations. Go to a channel in your Team and choose `Manage channel` @@ -61,6 +63,7 @@ Search for `Incoming webhook` and choose `Add`. alt="Teams add incoming webhook" /> + Choose `Add` again and add name your webhook `ElementaryWebhook` (or whatever you prefer). And `Create` the webhook. -Lastly, pass the webhook to the CLI as a param or in the `config.yml` file: + + +## Create a webhook using Power Automate + +You can create a webhook using Power Automate in two ways: + +### Method 1: Directly from Teams (Recommended) + +1. Go to your Teams channel +2. Click the three dots (...) next to the channel name +3. Select `Workflows` +4. Choose the template "Post to channel when a webhook request is received" +5. Copy the webhook URL + +### Method 2: From Power Automate Website + +1. Go to [Power Automate](https://flow.microsoft.com) +2. Create a new instant cloud flow +3. Search for "When a HTTP request is received" as your trigger +4. In the flow, add a "Post adaptive card in a chat or channel" action +5. Configure the team and channel where you want to post +6. Save the flow and copy the HTTP POST URL + +**Important Notes:** + +- When using Power Automate Workflows, Elementary CLI cannot directly verify if messages were successfully delivered. You'll need to monitor your workflow runs in Power Automate to check for any delivery issues. +- Workflows can't post in private channels as a flow bot, but can post on behalf of a user +- Workflows can only be created in your default environment + + + +Lastly, pass the webhook URL (from either method) to the CLI as a param or in the `config.yml` file: diff --git a/docs/best-practices/detection-and-coverage.mdx b/docs/best-practices/detection-and-coverage.mdx new file mode 100644 index 000000000..854169468 --- /dev/null +++ b/docs/best-practices/detection-and-coverage.mdx @@ -0,0 +1,150 @@ +--- +title: "Detection and coverage" +--- + +In Elementary you can detect data issues by combining data validations (as dbt tests, custom SQL) and anomaly detection monitors. + +As you expand your coverage, it's crucial to balance between coverage and meaningful detections. While it may seem attractive to implement extensive monitoring throughout your data infrastructure, this approach is often suboptimal. Excessive failures can lead to alerts fatigue, potentially causing them to overlook significant issues. Additionally, such approach will incur unnecessary compute costs. + +In this section we will cover the available tests in Elementary, recommended tests for common use cases, and how to use the data quality dimensions framework to improve coverage. + +## Supported data tests and monitors + +Elementary detection includes: + +- Data tests - Validate an explicit expectation, and fail if it is not met. + - Example: validate there are no null values in a column. +- Anomaly detection monitors - Track a data quality metric over time, and fail if there is an anomaly comparing to previous values and trend. + - Example: track the rate of null values in a column over time, fail if there is a spike. + +### Data tests + +- dbt tests - Built in dbt tests (`not_null`, `unique`, `accepted_values`, `relationship` ) +- dbt packages - Any dbt package test, we recommend installing `dbt-utils` and `dbt-expectations` . +- Custom SQL tests - Custom query, will pass if no results and fail if any results are returned. + +### Anomaly detection monitors + +Elementary offers two types of anomaly detection monitors: + +- **Automated Monitors** - Out-of-the-box volume and freshness monitors activated automatically, that query metadata only. +- **Opt-in anomaly detection tests** - Monitors that query raw data and require configuration. + + +### Recommendations + +- Deploy the packages dbt-utils and dbt-expectations in your dbt projects, to enrich your available tests +- Refer to the [dbt test hub](https://www.elementary-data.com/dbt-test-hub) by Elementary, to explore available tests by use case + + +## Fine-tuning automated monitors + +As soon as you connect Elementary Cloud Platform to your data warehouse, a backfill process will begin to collect historical metadata. Within an average of a few hours, your automated monitors will be operational. By default, Elementary collects at least 21 days of historical metadata. + +You can fine tune the [**configuration**](https://docs.elementary-data.com/features/anomaly-detection/monitors-configuration) and [**provide feedback**](https://docs.elementary-data.com/features/anomaly-detection/monitors-feedback) to adjust the detection to your needs. + +You can read here about how to interpret the result, and what are the available setting of each monitor: + +- [Automated Freshness](https://docs.elementary-data.com/features/anomaly-detection/automated-freshness) +- [Automated Volume](https://docs.elementary-data.com/features/anomaly-detection/automated-volume) + +## Common testing use cases + +We have the following recommendations for testing different data assets: + +### Data sources + +To detect issues in sources updates, you should monitor volume, freshness and schema: + +- Volume and freshness + - Data updates - Elementary cloud provides automated monitors for freshness and volume. **These are metadata monitors.** + - Updates freshness vs. data freshness - The automated freshness will detect delays in **updates**. \*\*\*\*However, sometimes the update will be on time, but the data itself will be outdated. + - Data freshness (advanced) - Sometimes a table can update on time, but the data itself will be outdated. If you want to validate the freshness of the raw data by relaying on the actual timestamp, you can use: + - Explicit treshold [freshness dbt tests](https://www.elementary-data.com/dbt-test-hub) such as `dbt_utils.recency` , or [dbt source freshness](https://docs.getdbt.com/docs/deploy/source-freshness). + - Elementary `event_freshness_anomalies` to detect anomalies. + - Data volume (advanced) - Although a table can be updated as expected, the data itself might still be imbalanced in terms of volume per specific segment. There are several tests available to monitor that: + - Explicit [volume expectations](https://www.elementary-data.com/dbt-test-hub) such as `expect_table_row_count_to_be_between`. + - Elementary `dimension_anomalies` , that will count rows grouped by a column or combination of columns and can detect drops or spikes in volume in specific subsets of the data. +- Schema changes + + - Automated schema monitors are coming soon: + - These monitors will detect breaking changes to the schema only for columns being consumed based on lineage. + - For now, we recommend defining schema tests on the sources consumed by downstream staging models. + + Some validations on the data itself should be added in the source tables, to test early in the pipeline and detect when data is arriving with an issue from the source. + + - Low cardinality columns / strict set of values - If there are fields with a specific set of values you expect use `accepted_values`. If you also expect a consistency in ratio of these values, use `dimension_anomalies` and group by this column. + - Business requirements - If you are aware of expectations specific to your business, try to enforce early to detect when issues are at the source. Some examples: `expect_column_values_to_be_between`, `expect_column_values_to_be_increasing`, `expect-column-values-to-have-consistent-casing` + + +### Recommendations + +- Add data freshness and volume validations for relevant source tables, on top of the automated monitors (advanced) +- Add schema tests for source tables + + +### Primary / foreign key columns in your transformation models + +Tables should be covered with: + +- Unique checks on primary / foreign key columns to detect unnecessary duplications during data transformations. +- Not null checks on primary / foreign key columns to detect missing values during data transformations. + +For incremental tables, it’s recommended to use a `where` clause in the tests, and only validate recent data. This will prevent running the tests on large data sets which is costly and slow. + + +#### Recommendations + +- Add `unique` and `not_null` tests to key columns + + +### Public tables + +As these are your data products, coverage here is highly important. + +- Consistency with sources (based on aggregation/primary keys) +- Volume and freshness +- Unique and not null checks on primary keys +- Schema to ensure the "API" to data consumers is not broken +- Business Metrics / KPIs + - Sum / max anomalies group by your critical dimensions / segments (For example - country, platform…) + +### Data quality dimensions framework + +To ensure your detection and coverage have a solid baseline, we recommend leveraging the quality dimensions framework for your critical and public assets. + +The quality dimensions framework divides data validation into six common dimensions: + +- **Completeness**: No missing values, empty values, nulls, etc. +- **Uniqueness**: The data is unique, with no duplicates. +- **Freshness**: The data is up to date and within the expected SLAs. +- **Validity**: The data is in the correct format and structure. +- **Accuracy**: The data adheres to our business requirements and constraints. +- **Consistency**: The data is consistent from sources to targets, and from sources to where it is consumed. + +Elementary has already categorized all the existing tests in the dbt ecosystem, including all elementary anomaly detection monitors, into these quality dimensions and provides health scores per dimension automatically. It also shows if there are coverage gaps per dimension. + +We highly recommend going to the relevant quality dimension, then filtering by a business domain tag to see your coverage gaps in that domain. + +Example - + +![Data health dashboard](https://res.cloudinary.com/diuctyblm/image/upload/v1738149955/Docs/data-health-dashboard_czfhhp.webp) + +In this example, you can see that accuracy tests are missing for our sales domain. This means we don't know if the data in our public-facing "sales" tables adheres to our business constraints. For example, if we have an e-commerce shop where no product has a price below $100 or above $1000, we can easily add a test to validate this. Implementing validations for the main constraints in this domain will allow us to get a quality score for the accuracy level of our data. + +NOTE: The `Test Coverage` page in Elementary allows adding any dbt test from the ecosystem, Elementary anomaly detection monitors, and custom SQL tests. We are working on making it easier to add tests by creating a test catalog organized by quality dimensions and common use cases. + +Example for tests in each quality dimension - + +- **Completeness**: + - not_null, null count, null percent, missing values, empty values, column anomalies on null count, null percent, etc +- **Uniqueness**: + - unique, expect_column_values_to_be_unique, expect_column_unique_value_count_to_be_between, expect_compound_columns_to_be_unique +- **Freshness**: The data is up to date and within the expected SLAs. + - Elementary automated freshness monitor, dbt source freshness, dbt_utils.recency, expect_grouped_row_values_to_have_recent_data +- **Validity**: The data is in the correct format and structure. + - expect_column_values_to_match_regex, expect_column_min_to_be_between, expect_column_max_to_be_between, expect_column_value_lengths_to_be_between, column anomalies on min, max, string lengths +- **Accuracy**: The data adheres to our business requirements and constraints. + - expression_is_true, custom SQL +- **Consistency**: The data is consistent from sources to targets, and from sources to where it is consumed. + - relationship, expect_table_row_count_to_equal_other_table, expect_table_aggregation_to_equal_other_table \ No newline at end of file diff --git a/docs/best-practices/governance-for-observability.mdx b/docs/best-practices/governance-for-observability.mdx new file mode 100644 index 000000000..fc16f438c --- /dev/null +++ b/docs/best-practices/governance-for-observability.mdx @@ -0,0 +1,134 @@ +--- +title: "Governance for observability" +--- + +For an effective data observability process, it’s recommended to establish clear ownership, priorities and segmentation of data assets. This structure enhances governance, speeds up issue resolution, and improves data health tracking. + +Segmenting assets organizes data into manageable units, making monitoring and triage easier. Ownership ensures accountability, with specific individuals responsible for quality and response to incidents. + +## Introduction to tags, owners and subscribers + +### Tags + +As your data platform evolves and more people are maintaining it, structure and context become significantly more important. Tags are a great tool to create that context, and segment your data assets by business domains, data products, priority, etc. + +In Elementary tags are automatically included in alerts, and you can create rules to distribute alerts to different channels by tag. Additionally, different views in the platform can be filtered by tag, and provide a view for a subset of your data assets. + +- Tags for tables can be added in code at the model or folder level, and the `tags` key. +- It’s recommended to leverage dbt directories hierarchy to set tags to entire directories (in the dbt_project.yml). Tags are aggregated, so if a specific model under the directory has a different tag, the model will have both tags. + +```yaml +models: + analytics: + marketing: + +tags: marketing + public: + +tags: marketing-public +``` + +- Tags for tests can be added in code or in the Elementary UI when adding a test. + +### Owners and subscribers + +The best method to reduce time to response when there is a data issue is having a clear owner that is in charge of initial triage and accountable for the asset health. In Elementary owners are automatically tagged in alerts. Additionally, different views in the platform can be filtered by owner. + +A data asset or test should have only one owner, but other people might want to be notified on issues. These people can be listed as subscribers, and will be automatically tagged in alerts. + +- If you use a valid Slack / MS teams user as owner / subscriber, they will be tagged in alerts. +- The owner of an asset should be the person / team that is expected to respond to an issue in that asset. +- If there are specific tests or monitors that are relevant to other people, they can be the owners of these tests. + For example: A data engineer is the owner of a model and will be notified on freshness, volume, and data validations issues. A data analyst added some custom SQL tests to validate business logic on this model, and he owns these tests. +- It’s recommended to leverage dbt directories hierarchy to set owners to entire directories (in the dbt_project.yml). Owners are unique, so an owner that is defined on a model overrides the directory configuration. (Subscribers are aggregated). + +```yaml +models: + - name: return_on_ad_spend + tags: + - marketing-public + - marketing + meta: + owner: :"@analytics.engineer" + subscribers: + - "@marketing.data.analyst" + - "@another.marketing.data.analyst" +``` + +## Business domains & Data products + +- We recommend configuring the following tags for models: + - **Business domains** - These tags should be useful to understand what is the business context of the asset, and for stakeholders to filter and view the status of assets relevant to their business unit. Relevant examples are tags such as: `product-analytics` , `marketing` , `finance` , etc. + - **Data products** - Public tables that are exposed as “data products” to data consumers. These are the most important tables within a specific domain, similar to an API for an application. Public tables are usually the interface and focal point between analytics engineers and data analysts. It's crucial for both to be aware of any data issues in these tables. Relevant examples are tags such as: `product-analytics-public` , `marketing-public` , `data-science-public` , etc. + - Another possible implementation is using 3 types of tags - + - `marketing-internal` for all internal transformations on marketing data. + - `marketing-public` for all public-facing marketing data. + - `marketing` for all marketing-related data assets. +- **Owners and subscribers -** + + - Make sure to have clear ownership defined for all your public-facing tables. We also recommend adding subscribers to the relevant public tables. + - Usually, the owners of these public tables are the analytics engineering team, and the subscribers are the relevant data analysts who rely on the data from these tables. + + +### Recommendations + +- Add business domain tags to public tables +- Define owners for public facing tables +- Add data consumers as subscribers to relevant public facing tables + + + + +## Priorities (optional) + +Another useful tagging convention can be to set a tag that filters a subset of assets by their priority, so you could establish a process of response to issues with higher criticality. + +Decide how many levels of priority you wish to maintain, and implement by adding a `critical` tag to your critical assets, or create a `P0`, `P1` , `P2` tags for several priority levels. + +This will enable you to filter the results in Elementary by priority, and establish workflows such as sending `critical` alerts to Pagerduty, and the rest to Slack. + + +### Recommendations + +- Add priorities / critical tags to tables / tests (Optional) +- Add owners to all top priority tables / tests (Optional) + + +## Data sources + +Many data issues are a result of a problem in the source data, so effectively monitoring source tables is significant to your pipeline health. + +Use tags to segment your source tables: + +- If multiple source tables are loaded from the same source, we recommend grouping them by tags, such as: `mongo-db-replica`, `salesforce`, `prod-postgres`, etc. +- To make triage easier, you can also add tags of the ingestion system, such as: `fivetran`, `airflow` , `airbyte` , `kafka` , etc. + +Ownership and subscribers: + +- Usually, sources are managed by data engineers and analytics engineers are their consumers. One common way to manage this is to set data engineers as the owners and analytics engineering team members as the subscribers. + +```yaml +sources: + - name: fivetran_salesforce_sync + tags: + - fivetran + - salesforce + meta: + owner: :"@data.engineer" + subscribers: "@analytics.engineer" +``` + + +### Recommendations + +- Add tags to source tables that describe the source system and / or ingestion method +- Add owners and subscribers to source tables + + +## Recommendations + +- Add business domain tags to public tables +- Define owners for public facing tables +- Add data consumers as subscribers to relevant public facing tables +- (Optional) Add priorities / critical tags to tables / tests +- (Optional) Add owners to all top priority tables / tests +- Add tags to source tables that describe the source system and / or ingestion method +- Add owners and subscribers to source tables \ No newline at end of file diff --git a/docs/best-practices/introduction.mdx b/docs/best-practices/introduction.mdx new file mode 100644 index 000000000..ee9012a66 --- /dev/null +++ b/docs/best-practices/introduction.mdx @@ -0,0 +1,31 @@ +--- +title: "Elementary Best Practices" +sidebarTitle: "Introduction" +--- + +The goal of this collection of guides is to help you effectively implement and use Elementary. We'll cover best practices and provide practical tips to enhance your governance, detection, +coverage, response and collaboration. + +Whether you're new to Elementary or looking to optimize your current usage, these guides will help you leverage its full potential to improve your data +reliability. + + + + + + \ No newline at end of file diff --git a/docs/best-practices/triage-and-response.mdx b/docs/best-practices/triage-and-response.mdx new file mode 100644 index 000000000..4ba4bf310 --- /dev/null +++ b/docs/best-practices/triage-and-response.mdx @@ -0,0 +1,195 @@ +--- +title: "Triage & response" +--- + +Maintaining high data quality is much more than adding tests - It’s about creating processes. + +The processes that will improve your data quality, reduce response times, and prevent repeating incidents have to do with: + +- Clear ownership and response plan +- Incident management +- Effective triage and resolution +- Ending incidents with improvements, not just resolution + +Elementary has tools in place to help, and this guide is meant to help get as much value as possible from Elementary in the process of handling data incidents. + +## Plan the response in advance + +Your response to a data incident doesn’t actually start when the failure happens. An effective response starts when you add a test / monitor / dataset. + +For every test or monitor you add, think about the following - + +- Who should look into a failure? +- Who should be notified of the failure? +- What is the potential impact and severity of a failure? +- What information should the notification include? +- How to go about resolving the issue? What are the steps? + +According to these answers, you should add configuration that will impact the alert, alert distribution, and triage: + + + ### Recommendations + + - Add a test description that details what it means if this test fails, and context on resolving it. Descriptions can be added in UI or in code. + - Each failure should have an owner, that should look into the failure. It can be the owner of the data set or an owner of a specific test. + - If others need to be notified, add subscribers. + - Use the [severity of failures](https://docs.getdbt.com/reference/resource-configs/severity) intentionally, and even leverage conditional expressions (`error_if`, `warn_if`) + - Test failures and alerts include a sample of the failed results, and the test query. You can change the test query and / or add comments to it, that can provide triage context. + + +```yaml +tests: + - unique: + config: + error_if: ">10" + meta: + description: "More than 10 duplicate records arriving from the source, as this is a staging table" + owner: "@data.provider" + tags: "critical", "mongo-db", "raw-production-replica" +``` + +## Alert distribution + +As far as alerts are concerned, the desired situation is that team members will only get alerts they need to do something about - Fix the issue, wait for resolution to refresh a dashboard, etc. +Alert distribution can be configured in the [Alert rules](https://docs.elementary-data.com/features/alerts-and-incidents/alert-rules) feature. +The alerts can be distributed to different channels (within Slack / MS Teams) and to different tools (Pagerduty, Ops Genie, etc). +Elementary users usually distribute alerts by: + +1. Business domain tags - In teams where each domain has their own data teams, it’s recommended to have a separate Slack channel for alerts on that domain’s models. The domain alert rules are usually defined by tags. +2. Responsible team - For example, if there is a problem with null values in a Salesforce source, it makes sense to send the alert straight to the Salesforce team. These alert rules can be defined by model / source name, tag or owner. +3. Criticality - The most critical alerts are usually model error alerts, and handling them is critical because it blocks the pipeline. Since those issues are sometimes time sensitive, some teams choose to send them to Pager Duty or Ops Genie, or at least a dedicated Slack channel with different notification settings. +4. Low priority alerts / warnings - We generally recommend refraining from sending Slack alerts for failures that don’t have a clear response plan yet. These failures can not be sent at all, or sent to a muted channel that will operate as a “feed”. + Such failures can be: 1. Newley configured anomaly detection tests or explicit tests where you have low certainty about the threshold / expectation. 2. Anomaly detection tests that you consider as a safety measure, not a clear failure. + This is not to say that they are not interesting - but, they can be investigated within the Elementary UI, using the incidents page, at a time of convenience. We believe alerts are an interruption to the daily schedule and such an interruption should only occur if it’s justified. To avoid getting such alerts, we recommend filtering your alert rules on “Failure” or “Error” statuses. + +## Notifying stakeholders + +There are several ways to notify data consumers and stakeholders about ongoing problems. +While some customers prefer to do it personally after triaging the incidents, others prefer saving this time and going with automated notifications. +For models intended for public consumption (by BI dashboards, ML models etc) we recommend setting up [subscribers](https://docs.elementary-data.com/oss/guides/alerts/alerts-configuration#subscribers). Those subscribers will be tagged in Slack on every alert that is sent on those tables. Unlike owners, there can be many subscribers to an alert. +Tagging subscribers is of course optional, and simply adding them to the relevant channels can also suffice. +Coming soon: +As part of the data health scores release, we will be supporting a new type of alerts, that notifies a drop in the health score of an asset. This type of alert is intended for data consumers, who don’t need the details and just want a high-level notification in case the data asset shouldn’t be used. We will also support sending daily digests on all assets’ health scores. + +## Incident management + +Elementary has an incident page, new failures will either create an incident or be attached to an open incident. +This page is designed to enable your team to stay on top of open incidents and collaborate on resolving them. The page gives a comprehensive overview of all current and previous incidents, where users can view the status, prioritize, assign and resolve incidents. +![Incident management dashboard](https://res.cloudinary.com/diuctyblm/image/upload/v1738149956/Docs/incident-management_up6jzx.png) + +### Incident management usability + +- Each incident has 3 settings: Assignee, status and severity. + - These can be changed directly from the Slack notification. + - The severity is set to `high` for failures and `normal` for warnings. You can manually change to `critical` or `low` . + - You can select several incidents and make changes to the settings in bulk. +- Failures of the same test / model of an open incident will not open a new incident, these will be added to an ongoing incident. + +### Incident management best practices + +- Your goal should be to lower the time to resolution of incidents. + - Incidents should have a clear assignee. + - Use the quick view of open and unassigned incident to monitor this. + - The best implementation for this is pre-defining the assignee as owner, so they will get tagged on the failure. + - Set clear expectations with assignees. + - These can be set on severity of incidents. For example: + - Critical - Should be handled immediately. + - High - Should be resolved by end of day. + - Normal - Should be resolved by end of week. + - Low - Should be evaluated weekly, might trigger a change in coverage. +- If no one cares about an incident, this should impact coverage. + +### Coming soon + +Incidents is a beta feature, and we are working on adding functionality. The immediate roadmap includes: + +- Notifications to assignees +- Mute / Snooze +- Advanced grouping of failures to incidents according to lineage (example: model failure + all downstream freshness and volume failures) +- Initiating triage from incident management (see picture) + +![An interface showing initiating triage from incident management](https://res.cloudinary.com/diuctyblm/image/upload/v1738149956/Docs/triage-response-via-incident-management_acjqow.png) + +## Triage incidents + +When triaging incidents, there are 4 steps to go through: + +1. Impact analysis - Although root cause analysis will lead you to resolve the issue, impact analysis should be done first. The reason is the impact determines the criticality of the incidents, and therefore the priority and response time. +2. Root cause analysis +3. Resolution +4. Post mortem - Quality learnings from incidents is how you improve over time, and reduce the time to resolution and frequency of future incidents. + +### Impact analysis + +The goal of doing an impact analysis is to determine the severity and urgency of the incident, and understand if you need to communicate the incident to consumers (if there isn’t a relevant alert rule). +These are the questions that should be asked, and product tips on how to answer with Elementary: + +- Was this a failure or just a warning? + - As long as you and your team are intentional in determining severities, this can focus you on failures first. +- Does the incident break the pipeline / create delay? + + - Is the failure is a model failure, or a freshness issue? + - Do we run `dbt build` and this failure stoped the pipeline? + + - Check the **Model runs** section of the dashboard to see if there are skipped models, as failures in build cause the downstream models to be skipped. + + ![Model runs portion of the dashboard](https://res.cloudinary.com/diuctyblm/image/upload/v1738149955/Docs/dashboard-model-runs_zzgnd2.png) + + +- How important is the data asset? + - Check in the catalog or node info section in the lineage if it has a tag like `critical` , `public` or a data product tag. You can also look at the description of the data asset, whether it’s a table or a column. +- Does the failure impact important downstream assets? Did the issue propagate to downstream assets? + + - A table might not be critical, but it’s upstream from a critical one, making it part of a critical path. + + - Check in the lineage if there are downstream important BI assets / public tables. To see the downstream assets you can navigate to the lineage directly from the test results, but clicking `view in lineage`. If the incident is a failed column test, you can filter only the downstream lineage of the specific column by clicking on `filter column`. + + ![Lineage filters](https://res.cloudinary.com/diuctyblm/image/upload/v1738149955/Docs/lineage-filters_ipjze3.png) + + - Use the lineage filters to color and highlight all the tables in a path that match your filtering criteria. + + ![Lineage filters showing the add filter interface](https://res.cloudinary.com/diuctyblm/image/upload/v1738149955/Docs/lineage-filters-2_cda4on.webp) + + - If there are downstream critical tables, you might want to check if the issue actually propagated to it. A quick way to do it is to copy the test query, and run it on downstream assets (by changing the referenced table and column). + +- What is the magnitude of the failure? How many failed results out of the total volume? + + - Most tests return the number of failed results. A failure in a `unique` test for can be dramatic if it impacts many rows, but insignificant if there is just one case of duplicates. + + - You can see the total number of failures as part of the test result / alert. + - On the `Test performance` page, you can compare this number to previous failures of the same tests. + + ![Graph showing test performance](https://res.cloudinary.com/diuctyblm/image/upload/v1738149955/Docs/test-performance-graph_g5t5p5.png) + +## Root cause analysis + +If the incident is not important we recommend resolving the incident and then removing / disabling the test. +If the incident is important we need to start the investigation process, and understand the root cause. Your failures would usually be caused by issues at the source, code changes, or an infrastructure issue. + +- Is there a data issue at the source? + - Check in the lineage and see if there is coverage and failures on upstream tables, you can use the lineage filters to limit the scope to relevant failures (if `not_null` failed, filter on `not_null` tests). + - Check the test result sample. If you want to see more results copy the test SQL and run it in your DWH console. + - Sometimes an issue would be in a certain dimension, like a specific product event that stopped arriving or changed. Aggregate the test query by key dimensions in the table to understand if it’s relevant to specific subset of the data. + - _Coming soon - Automated post failure queries._ + - Check if the test is flaky in the `test performance` screen. This usually means it’s a problem that happens frequently at the source data. + - _Coming soon - Check the metric graphs of the source tables._ +- Is it a code issue? + - Check recent PRs to the underlying monitored table. + - _Coming soon - Incident timeline with recent PRs and changes._ + - Check recent PRs merged to upstream tables. + - Are there any other related failures that happen at the same time following a recent release? + - Check metrics and test results like volume of tables to see if there is a wrong join. +- If the result is an `error` and not `fail` or `warning`, it means the test / model failed to run. This can either be caused by a timeout or issue at the DWH, or by a code change that lead to a syntax error / broken lineage. + - Look at the error message to understand if it comes from dbt or the DWH, and what is the issue. + +## Post mortem - Learning from incidents + +Learning from incidents we had is how we improve our coverage, response times and reliability. + +Here are some common actions to take following an incident: + +- Incident wasn’t important - If the incident wasn’t important or significant, remove the test or change the severity to warning. +- It was hard to determine the severity of the incident - Make changes to the tags and descriptions of the test / asset, to make it easier next time. +- The relevant people weren’t notified - Make changes to owners and subscribers, and create the relevant alert rules. +- The result sample was not helpful - Make changes to the test query, to make it easier next time. +- Reoccurring incidents at the source - For incidents that keep happening, the most productive approach is to have a conversation with your data providers, and figure out how to improve response. You can use the `test performance` page, and past incidents on the `incidents` page to communicate stats on the previous incidents. \ No newline at end of file diff --git a/docs/changelog.mdx b/docs/changelog.mdx new file mode 100644 index 000000000..499f16670 --- /dev/null +++ b/docs/changelog.mdx @@ -0,0 +1,499 @@ +--- +title: Elementary Changelog +description: "See what's new on the Elementary Cloud Platform" +--- + +export const Tag = ({ children = "Text goes here", type }) => ( +
+ {children} +
+); + +export const Tags = ({ date, tags = [] }) => ( +
+ {date} + {tags.map((tag, i) => ( + {tag} + ))} +
+); + +## Critical Assets + + + +We’re excited to introduce the **Critical Assets** feature, designed to help you prioritize and protect your most important data assets. + +**What is a Critical Asset?** + +A critical asset is ***any*** data asset (such as a model, exposure, or report) that plays a crucial role in your ***company's*** data ecosystem. Issues affecting these assets can have a significant impact on business operations, dashboards, and decision-making. + +Marking an asset as **critical** ensures it receives **higher priority in monitoring and alerting**, helping you quickly identify and respond to issues that may impact it. + +Once an asset is marked as **critical**, you will be able to: + +✅ **Identify it in the UI**, where it will be visually highlighted. + +✅ **Receive alerts** when upstream issues may impact the critical asset. + +✅ **Filter incidents** by their impact on critical assets. + +Learn more about how to use Critical Assets in our [docs.](https://docs.elementary-data.com/features/data-governance/critical_assets) + +![](https://res.cloudinary.com/diuctyblm/image/upload/v1740079005/Changelog/Critical_Assets_3_gjw9nc.gif) + + + +## Mute tests + + + +We’re pleased to introduce the Mute Test feature! + +This allows you to run tests without triggering alerts, giving you greater control over notifications while still monitoring your data. It’s perfect for scenarios where you’re testing new data sets, refining thresholds, or adjusting test logic—without unnecessary noise. + +With this feature, you can ensure everything is working as expected before enabling alerts, keeping your team focused and informed only when it truly matters. + +Learn more how to [mute tests](https://docs.elementary-data.com/features/alerts-and-incidents/alerts-and-incidents-overview) in our docs. + +![](https://res.cloudinary.com/diuctyblm/image/upload/v1740079310/Changelog/Mute_Tests_gif_omflcw.gif) + + +## Custom metadata + + + +Custom attributes from ****[dbt’s meta field](https://docs.getdbt.com/reference/resource-configs/meta) are now visible in the Elementary catalog, enhancing context and improving collaboration by bringing key metadata directly into your observability workflows. + +We understand that not all meta attributes are relevant for every team. If there are specific meta attributes you’d like to see in the catalog, please reach out to us at Elementary. Let us know your preferences, and we’ll configure the catalog to display the metadata most valuable to you. + + +![](https://res.cloudinary.com/diuctyblm/image/upload/v1740079225/Changelog/Fields_j6jumx.png) + +## Manually Set SLAs for Freshness Tests + + + +You can now set a manual threshold for Elementary's freshness tests. + +While our automated freshness test uses anomaly detection to identify unusual delays in table updates, sometimes you need more precise control. With manual thresholds, you can explicitly define when a freshness test should fail, giving you full control over monitoring your data freshness requirements. Simply set your desired threshold, and you'll be notified whenever a table hasn't refreshed within that time limit. + +Learn more about [automated freshness tests](https://docs.elementary-data.com/features/anomaly-detection/automated-freshness) in our docs. + +![](https://res.cloudinary.com/diuctyblm/image/upload/v1740079192/Changelog/image_55_ktvski.png) + +## Connect Multiple BI Tools in Lineage + + + +Elementary now supports connecting multiple BI tools, bringing even more visibility into your data. Once connected, BI metadata will appear in both the **catalog** and the **lineage graph**. + +Currently, Elementary integrates with **Looker**, **Tableau**, **Power BI**, and **Sigma**—with more to come! + +![](https://res.cloudinary.com/diuctyblm/image/upload/v1740079164/Changelog/image_43_hgu2zu.png) + +## Easier Test Creation + + + +We improved the test creation UI to make it easier to create tests. Given the fantastic response to the [dbt tests hub](https://www.elementary-data.com/dbt-test-hub) we introduced a few months ago, we decided to bring the test hub into the platform. You can now search directly from the Elementary UI, select the desired test, and create it directly from the UI using the YAML format provided with examples and explanations, then add owners, tags, severity, etc, and open a PR. + +Learn more about [test creation in Elementary](https://docs.elementary-data.com/features/data-tests/data-tests-overview). + +![](https://res.cloudinary.com/diuctyblm/image/upload/v1740079128/Changelog/New_Test_Creation_UI_new_eftsw3.gif) + +## New Integrations: Jira & Linear + + + +We’re excited to announce that you can now integrate Elementary with Jira or Linear to streamline your incident management process. + +With this integration, you can create new Jira or Linear tickets directly from Elementary. Once you connect your account, a **‘Create Ticket’** button will appear next to each incident in the incident management screen. + +Jira and Linear tickets created through Elementary will automatically include key details like the test name, description, query, results, and more—ensuring all relevant context is captured and shared. + +Learn more about connecting Elementary to [Jira](https://docs.elementary-data.com/cloud/integrations/alerts/jira) or [Linear](https://docs.elementary-data.com/cloud/integrations/alerts/linear) in our docs. + +![](https://res.cloudinary.com/diuctyblm/image/upload/v1740079070/Changelog/image_6_dubecv.png) + +![](https://res.cloudinary.com/diuctyblm/image/upload/v1740079094/Changelog/image_51_r2pbsr.png) + + +## Compact navbar + + + +You can now minimize the navigation bar for a better experience on smaller screens. + +![](https://res.cloudinary.com/diuctyblm/image/upload/v1740078816/Changelog/Compact_Nav_wcfmif.gif) + +## Sigma integration + + + +Elementary now connects with Sigma! + +With this integration, Elementary automatically extends data lineage down to the workbook page and element level. This means you’ll have full end-to-end visibility into your data, making it easier to understand downstream dependencies, also known as exposures. [Learn more about connecting to Sigma](https://docs.elementary-data.com/cloud/integrations/bi/sigma). + +![](https://res.cloudinary.com/diuctyblm/image/upload/v1740078782/Changelog/Screenshot_2024-12-09_at_16.31.03_etmoap.png) + +## Performance improvements + + + +In the past few months, we have made significant performance efforts to improve loading times within our platform. In particular, we created dedicated and efficient GraphQL API endpoints in our backend to speed up the dashboard, which now loads in a couple of seconds and usually less. + +We have also made considerable efforts to improve our database infrastructure to support additional scale and customers, which has resulted in an overall improvement throughout the platform. + +## Lineage Export + + + +Before making changes to a column or any other asset, you can assess the impact by exporting a textual summary of its lineage as a .csv file. The export includes: + +- Upstream and downstream assets +- Number of hops +- Names, owners, and tags of each dependency + +You can export the lineage for either a column or a table. This feature is accessible from both the lineage screen and the test overview dependencies tab. + +![](https://res.cloudinary.com/diuctyblm/image/upload/v1740078717/Changelog/Lineage_Export_mesth8.gif) + +## Enhanced Test Results + + + +The test overview side panel, previously only accessible from the incidents page, is now available on the test results page as well. This means you can view a full test overview even for tests that aren’t failing, not just for incidents. + +We’ve also introduced several updates to the panel: + +- View asset dependencies directly in a table format, with the option to export them as a .csv file or explore the lineage graph. +- Inspect the asset in the catalog with a single click. +- Disable automated freshness or volume tests directly from the test overview screen. + +![](https://res.cloudinary.com/diuctyblm/image/upload/v1740078639/Changelog/Test_Overview_naed5h.gif) + +## Custom Resource-based Roles + + + +Elementary now allows the creation of custom roles with access controls tailored to specific data. + +These roles are based on criteria such as environments, model path, dbt tags, or dbt owners. This is in addition to the existing access control (View, Edit, Admin). This ensures that users can only access the data they need, helping create focus and security. + +For now, our team will create the custom role for you. In the future, you will be able to do this on your own in the UI. To create a custom role reach out to us in the mutual Slack support channel, and we will create it for you. When the role is created, it will appear in the roles dropdown you see when inviting a new user to Elementary. + +## Bitbucket + + + +You can now connect Elementary to your Bitbucket code repository, where your dbt project code is managed. Once connected, Elementary will open PRs with configuration changes. + +https://docs.elementary-data.com/cloud/integrations/code-repo/bitbucket + +## Power BI + + + +You can now connect Power BI to Elementary! + +This will provide you with end-to-end data lineage to understand your downstream dependencies. +Once connected, Elementary will automatically and continuously extend the lineage to the report/dashboard level. + +https://docs.elementary-data.com/cloud/integrations/bi/power-bi + +## Elementary + Atlan: See your Elementary data quality insights in Atlan! + + + +We introduced a new test overview side panel that will make it much easier to investigate incidents. + +This overview is available directly from the Incidents Management screen and will soon be available from additional screens (test results, test coverage). + +The new view includes the complete test configuration and execution history, and for each result, it includes the result description, test query, and a row sample/anomaly chart (depending on the test type) + +This new side panel is also available for model build error incidents, allowing you to view dbt model build error messages right in the Elementary UI for the first time! + +![](https://res.cloudinary.com/diuctyblm/image/upload/v1740078239/Changelog/Screenshot_2024-09-29_at_17.17.16_mpw6ni.png) + +## A better way to triage test and model failures + + + +We introduced a new test overview side panel that will make it much easier to investigate incidents. + +This overview is available directly from the Incidents Management screen and will soon be available from additional screens (test results, test coverage). + +The new view includes the complete test configuration and execution history, and for each result, it includes the result description, test query, and a row sample/anomaly chart (depending on the test type) + +This new side panel is also available for model build error incidents, allowing you to view dbt model build error messages right in the Elementary UI for the first time! + +![](https://res.cloudinary.com/diuctyblm/image/upload/v1740078239/Changelog/Screenshot_2024-09-29_at_17.17.16_mpw6ni.png) + +## Introducing: Incident Management + + + +Managing alerts is a common challenge for our users. Daily test failures generate numerous alerts, making tracking each issue's status difficult. Alerts are just the starting point—users need a tool to manage the entire process. + +We’ve been working hard to solve these issues and are excited to introduce incidents in Elementary. + +**What are Incidents?** +An incident consists of one or more failure events. Each failure or warning opens a new incident or is added to an existing one. Different failures are grouped based on automated linking rules. Each incident has a start time, status, severity, assignee, and end time. Read more about [Incidents](https://docs.elementary-data.com/features/alerts-and-incidents/incidents). + +**New Incident Management Page** +Our new [Incident Management Page](https://docs.elementary-data.com/features/alerts-and-incidents/incident-management) helps your team stay on top of open incidents and collaborate to resolve them. It provides a comprehensive overview of all incidents, allowing users to view statuses, prioritize issues, assign responsibilities, and resolve incidents efficiently. + + + +## New Alert Integrations! + + + +Introducing three new integrations with communication and incident management tools: - [Microsoft Teams](https://docs.elementary-data.com/cloud/integrations/alerts/ms-teams) - [PagerDuty](https://docs.elementary-data.com/cloud/integrations/alerts/pagerduty) - [Opsgenie](https://docs.elementary-data.com/cloud/integrations/alerts/opsgenie) + +## Add any dbt test via UI + + + +Elementary now supports adding any dbt test in bulk and directly from the UI! +We added support for dbt-expectation and dbt-utils packages, and you can use it to add your own custom generic tests to tables and columns. + + + +## Column anomalies by dimensions + + + +You can now add a new parameter to your column anomaly tests - `dimensions`. + +This will calculate the column metrics for every time bucket and table dimension values, allowing you to detect anomalies in specific segments of your data. + +For example, if you want to detect anomalies in a revenue column and you have multiple apps in different countries - now you can detect anomalies in revenue in a specific country. + +Here is an example of how this can be configured - + +```yaml + columns: + - name: in_app_purchase_revenue + tests: + - elementary.column_anomalies: + column_anomalies: + - sum + dimensions: + - app_name + - country +``` + +## Column level test results in lineage + + + +You can now investigate test results right on top of your column level lineage graph. +With this new release you can filter the lineage graph on a specific column that has an issue, and see if upstream or downstream columns +have similar test failures to understand the root cause fast. + + + +## Monitor test durations + + + + We added test durations to the Test Execution History (the command and control center for your tests). You can now monitor your tests performance and see which tests are taking the longest or if there was any degradation in performance of specific tests. + + Easily sort your tests by execution duration and see which ones are taking the longest and choose the most promising candidates for optimization. This can also be used for cost analysis as slow tests tend to be more expensive. + + Check it out: + + + +## Sync now + + + +We are excited to launch a new `Sync Now` button right in your environments page. You can think of is as a "refresh now" button for your environment. +If you introduced a change in your environment and you want to see it in Elementary immediately - just click ‘sync now’. + +Here is what it looks like - + + + +## Status and assignee for alerts + + + +An alert is just the start of a triage and response process. We have big plans for making this process much more effective. +The first step was alert rules, and now we are introducing status and assignee selection in our alerts. + +This should help you manage incidents and collaborate more effectively with your team. + + + + + +## Custom SQL tests in UI + + + +Custom query testing is a must-have to validate business logic specific to your data team. You can now create custom SQL tests with Elementary, which will be translated into singular tests in your dbt project. + + + + + +## Model runs in dashboard + + + +The results of your jobs are critical to the data health. These are now included in the Elementary dashboard, for you to get a complete overview of the status of your pipelines. We added a breakdown of the latest result of each model, and the aggregated failures over time. + + + + + +## DAG subset in Lineage + + + +To improve UX and performance, we changed the default behavior in the lineage. +When a node or group of nodes is chosen, only the first hop in both directions is loaded. To see the rest of the hops you can use the +/- buttons (see video). + + + + + +## Role-based access control + + + +Elementary now allows creating users with different roles. +This will allow you to add more users from your team to Elementary, allowing them to view results without giving them the ability to change environment settings. + + + + diff --git a/docs/cloud/features.mdx b/docs/cloud/features.mdx new file mode 100644 index 000000000..4a27276bb --- /dev/null +++ b/docs/cloud/features.mdx @@ -0,0 +1,6 @@ +--- +title: "Platform features" +icon: "browsers" +--- + + \ No newline at end of file diff --git a/docs/cloud/general/security-and-privacy.mdx b/docs/cloud/general/security-and-privacy.mdx index cd513679f..79c0c4748 100644 --- a/docs/cloud/general/security-and-privacy.mdx +++ b/docs/cloud/general/security-and-privacy.mdx @@ -6,7 +6,7 @@ icon: "lock" ## Security highlights -Our product is designed with security and compliance in mind. +Our product is designed with security and privacy in mind. - Elementary Cloud does not have read access to raw data in your data warehouse. - Elementary Cloud only extracts and stores metadata, logs and aggregated metrics. @@ -34,18 +34,7 @@ Our product and architecture are always evolving, but our commitment to secure d ## How it works? -1. You install the Elementary dbt package in your dbt project and configure it to write to it's own schema, the Elementary schema. -2. The package writes test results, run results, logs and metadata to the Elementary schema. -3. The cloud service only requires `read access` to the Elementary schema, not to schemas where your sensitive data is stored. -4. The cloud service connects to sync the Elementary schema using an **encrypted connection** and a **static IP address** that you will need to add to your allowlist. - - - Elementary cloud security - + ## What information does Elementary collect? diff --git a/docs/cloud/guides/alert-rules.mdx b/docs/cloud/guides/alert-rules.mdx deleted file mode 100644 index 3716c2893..000000000 --- a/docs/cloud/guides/alert-rules.mdx +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: "Alert rules" ---- - -Elementary cloud allows you to create rules that route your alerts. -Each rule is a combination of a filter and a destination. - -The Slack channel you choose when connecting your Slack workspace is automatically added as a default alert rule, that sends all the alerts to that channel without any filtering. -To modify, deactivate or add more rules, simply navigate to the "Alert routing rules" page in the left menu. - -

- -

- Alert routing rules screen -
- -

- -

- -

- Create a new alert rule or edit an existing one -
- -

- - - When the alerts are fetched, each alert is evaluated against all the rules, - until a rule matches. The alert is then routed to the destination of the - matching rule, and no further rules are evaluated. This is why the order of - the rules matters! - diff --git a/docs/cloud/guides/collect-job-data.mdx b/docs/cloud/guides/collect-job-data.mdx index 72361e2b1..cfd3749bc 100644 --- a/docs/cloud/guides/collect-job-data.mdx +++ b/docs/cloud/guides/collect-job-data.mdx @@ -1,5 +1,5 @@ --- -title: "Collect jobs info from orchestrator" +title: "Collect Jobs Info From Orchestrator" sidebarTitle: "Collect jobs data" --- diff --git a/docs/cloud/guides/sync-scheduling.mdx b/docs/cloud/guides/sync-scheduling.mdx index 666c29b28..6e55c36da 100644 --- a/docs/cloud/guides/sync-scheduling.mdx +++ b/docs/cloud/guides/sync-scheduling.mdx @@ -1,10 +1,10 @@ --- -title: "Environment syncs schedule" +title: "Environment Syncs Schedule" --- ## Synchronizing the Elementary schema -The data on your Elementary cloud environments is updated by syncing the local Elementary schema from the data warehouse. +The data on your Elementary Cloud environments is updated by syncing the local Elementary schema from the data warehouse. There are 2 available scheduling options: @@ -24,9 +24,9 @@ In the _Schedule Settings_, you're provided with a webhook URL. Next, you will n -Heading to dbt Cloud, you can [create a webhook subscription](https://docs.getdbt.com/docs/deploy/webhooks#create-a-webhook-subscription) that would trigger a sync after your jobs are done. +Heading to dbt Cloud, you can [create a webhook subscription](https://docs.getdbt.com/docs/deploy/webhooks#create-a-webhook-subscription) that will trigger a sync after your jobs are done. -- Make sure the webhook is triggered on `Run completed` events +- Make sure the webhook is triggered on `Run completed` events. - Select **only** the main jobs of the relevant environment. Make sure to select only the main jobs of the relevant environment. Selecting all jobs will trigger a sync for each job, which may result in unnecessary updates and therefore increased cost on the data warehouse. diff --git a/docs/cloud/guides/troubleshoot.mdx b/docs/cloud/guides/troubleshoot.mdx index df12af1c3..6b0c531de 100644 --- a/docs/cloud/guides/troubleshoot.mdx +++ b/docs/cloud/guides/troubleshoot.mdx @@ -4,7 +4,7 @@ title: "Troubleshooting" ### I connected my data warehouse but I don't see any test results -If you already connected your data warehouse to Elementary but don't see anything in Elementary UI, there could be several reasons. +If you already connected your data warehouse to Elementary but are not seeing anything in the Elementary UI, there could be several reasons. Try following these steps to troubleshoot: @@ -18,15 +18,15 @@ Try following these steps to troubleshoot: - If you have, make sure the table was created as an incremental table (not a regular table or view). - If not, there is a materialization configuration in your `dbt_project.yml` file that overrides the package config. Remove it, and run `dbt run --select elementary --full-refresh` to recreate the tables. After that run `dbt test` again and check if there is data. -**4. Still no data in the table? Reach out to the elementary team by starting an intercom chat from Elementary UI.** +**4. Still no data in the table? Reach out to the Elementary team by starting an intercom chat from the Elementary UI.** ### Column information cannot be retrieved This error can happen because of a few reasons: -1. check that your elementary dbt package version is 0.12.0 or higher -2. check that the user you are using to connect to your database has permission to access the information schema of all the schemas built or used by your dbt project +1. Check that your elementary dbt package version is 0.12.0 or higher. +2. Check that the user you are using to connect to your database has permission to access the information schema of all the schemas built or used by your dbt project. For more information on the permissions required by each data warehouse: @@ -39,3 +39,22 @@ For more information on the permissions required by each data warehouse: [Databricks](/cloud/integrations/dwh/databricks#permissions-and-security) [Postgres](/cloud/integrations/dwh/postgres#permissions-and-security) + + +### How do I set up the table name of my Singular test? + +Singular tests are sql queries that can reference more than one table, but are often intended to test a logic that is related to one table in particular. +In order to have that table name appear in the UI in the test results, test execution and more screens, you can set it up by adding the following to the config block of your singular test file: +``` +{{ config( + override_primary_test_model_id="model_unique_id" +) }} +``` + +Note: Use the `model_unique_id`, not the model name. +The `model_unique_id` is the unique identifier of the model in dbt, and can be found by running the query: +``` +SELECT unique_id +FROM .dbt_models +WHERE name= +``` diff --git a/docs/cloud/integrations/alerts/jira.mdx b/docs/cloud/integrations/alerts/jira.mdx index 9c824ddb3..f5ea802a7 100644 --- a/docs/cloud/integrations/alerts/jira.mdx +++ b/docs/cloud/integrations/alerts/jira.mdx @@ -2,14 +2,30 @@ title: "Jira" --- - - - -} -> - Click for details - \ No newline at end of file +Elementary's Jira integration enables creating Jira issues from incidents. + + + +## How to connect Jira +1. Go to the `Environments` page on the sidebar. +2. Select an environment and click connect on the `Connect ticketing system` card, and select `Jira`. +3. Authorize the Elementary app for your workspace. **This step may require a workspace admin approval.** +4. Select a default project for tickets +5. Click `Save` to finish the setup + + + + + + +## Creating Jira issues from incidents +When an incident is created, you can create a Jira issue from the incident page by simply clicking on "Create Jira Ticket". +The ticket will automatically be created in Jira, in the team you chose upon connecting Jira. + +After the ticket is created you can see the Jira issue link in the incident page. +The ticket will also contain a link to the incident in Elementary. + + +Note: Elementary will not update the ticket in Jira when the incident is resolved or changed in any way + + \ No newline at end of file diff --git a/docs/cloud/integrations/alerts/linear.mdx b/docs/cloud/integrations/alerts/linear.mdx index 5837a8c8a..ddd19c653 100644 --- a/docs/cloud/integrations/alerts/linear.mdx +++ b/docs/cloud/integrations/alerts/linear.mdx @@ -2,14 +2,30 @@ title: "Linear" --- - - - -} -> - Click for details - \ No newline at end of file +Elementary's Linear integration enables creating Linear tickets from incidents. + + + +## How to connect Linear +1. Go to the `Environments` page on the sidebar. +2. Select an environment and click connect on the `Connect ticketing system` card, and select `Linear`. +3. Authorize the Elementary app for your workspace. **This step may require a workspace admin approval.** +4. Select a default team for new tickets +5. Click `Save` to finish the setup + + + + + + +## Creating Linear ticket from incidents +When an incident is created, you can create a Linear ticket from the incident page by simply clicking on "Create Linear Ticket". +The ticket will automatically be created in Linear, in the team you chose upon connecting Linear. + +After the ticket is created you can see the Linear ticket link in the incident page. +The ticket will also contain a link to the incident in Elementary. + + +Note: Elementary will not update the ticket in Linear when the incident is resolved or changed in any way + + \ No newline at end of file diff --git a/docs/cloud/integrations/alerts/ms-teams.mdx b/docs/cloud/integrations/alerts/ms-teams.mdx index 542a1dfaa..08746bded 100644 --- a/docs/cloud/integrations/alerts/ms-teams.mdx +++ b/docs/cloud/integrations/alerts/ms-teams.mdx @@ -1,6 +1,124 @@ --- -title: "MS Teams (Beta)" +title: "Microsoft Teams" --- -Routing alerts to MS Teams is supported as a beta integration. -Reach out to us to enable it for your instance! \ No newline at end of file +Elementary's Microsoft Teams integration enables sending alerts when data issues happen. +The alerts are sent using Adaptive Cards format, which provides rich formatting and interactive capabilities. + +The alerts include rich context, and you can create [alert rules](/features/alerts-and-incidents/alert-rules) to distribute alerts to different channels and destinations. + + +
+ MS teams alert screenshot +
+ + +## Enabling Microsoft Teams alerts + +1. Go to the `Environments` page on the sidebar. +2. Select an environment and click connect on the `Connect messaging app` card (first card), and select `Microsoft Teams`. + + +
+ Connect messaging app +
+ + +3. For each MS Teams channel you connect to Elementary, you will need to create a Webhook Using Microsoft Teams Connectors. Go to a channel in your Team and choose `Manage channel`. + + +
+ Teams manage channel +
+ + +3. Click on `Edit` connectors. + + +
+ Teams edit connectors +
+ + +4. Search for `Incoming webhook` and choose `Add`. + + +
+ Teams add incoming webhook +
+ + +5. Choose `Add` again and add a name to your webhook, then click on `Create`. + + +
+ Teams create webhook +
+ + +6. Copy the URL of the webhook. + + +
+ Teams copy URL webhook +
+ + + +7. Configure your Microsoft Teams webhooks, and give each one a name indicating it's connected channel: + + +
+ Provide webhooks +
+ + +8. Select a default channel for alerts, and set the suppression interval. + + + The default channel you select will automatically add a default [alert + rule](/features/alerts-and-incidents/alert-rules) to sends all failures to + this channel. Alerts on warnings are not sent by default. To modify and add + tules, navigate to `Alert Rules` page. + + + +
+ Select channel and suppression interval +
+ diff --git a/docs/cloud/integrations/alerts/opsgenie.mdx b/docs/cloud/integrations/alerts/opsgenie.mdx index e78907cf8..74df5e663 100644 --- a/docs/cloud/integrations/alerts/opsgenie.mdx +++ b/docs/cloud/integrations/alerts/opsgenie.mdx @@ -2,14 +2,68 @@ title: "Opsgenie" --- - - - -} -> - Click for details - \ No newline at end of file +Elementary's Opsgenie integration enables sending alerts when data issues happen. + +It is recommended to create [alert rules](/features/alerts-and-incidents/alert-rules) to filter and select the alerts that will create incidents in Opsgenei. + + + +
+ Opsgenie alerts screen +
+ + + +
+ Opsgenie alerts detail +
+ + +## Enabling Opsgenie alerts + +### Create an Opsgenie API key + +To create an `Opsgenie API key`, go to `Opsgenie` and follow the following steps: + +- Create or select an `Opsgenie` team - this team will be responsible for alerts generated by Elementary. +- On the selected team go `Integrations` tab and press on `Add Integration`: + - Select `API` and press `Add` + - Select a name for the `API integration` - “Elementary” for example + - Make sure `Create` and `update access` are selected + - Press on `Save Integration` + - Copy the `API key` and provide it to Elementary UI. + +### Add API key to an environment + +1. Go to the `Environments` page on the sidebar. + +2. Select an environment and click connect on the `Connect incident management tool` card (second card), and select `Opsgenie`. + + +
+ Connect incident management tool +
+ + +3. Fill the `API key` and select the `API URL` and save the integration: + + +
+ Enter Opsgenie API key +
+ + +4. `Opsgenie` will now be available as a destination on the [`alert rules`](/features/alerts-and-incidents/alert-rules) page. You can add rules to create Opsgenie incidents out of alerts who match your rule. \ No newline at end of file diff --git a/docs/cloud/integrations/alerts/pagerduty.mdx b/docs/cloud/integrations/alerts/pagerduty.mdx index 331ef7fde..5b629aa6e 100644 --- a/docs/cloud/integrations/alerts/pagerduty.mdx +++ b/docs/cloud/integrations/alerts/pagerduty.mdx @@ -1,6 +1,45 @@ --- -title: "PagerDuty (Beta)" +title: "PagerDuty" --- -Routing alerts to PagerDuty is supported as a beta integration. -Reach out to us to enable it for your instance! \ No newline at end of file +Elementary's PagerDuty integration enables sending alerts when data issues happen. + +It is recommended to create [alert rules](/features/alerts-and-incidents/alert-rules) to filter and select the alerts that will create incidents in PagerDuty. + + +
+ PagerDuty Alerts +
+ + +## Enabling PagerDuty alerts + +1. Go to the `Environments` page on the sidebar. + +2. Select an environment and click connect on the `Connect incident management tool` card (second card), and select `PagerDuty`. + + +
+ Connect incident management tool +
+ + +3. Authorize the Elementary for your account. **This step may require admin approval.** + + +
+ PagerDuty approval +
+ + +4. `PagerDuty` will now be available as a destination on the [`alert rules`](/features/alerts-and-incidents/alert-rules) page. You can add rules to create PagerDuty incidents out of alerts who match your rule. \ No newline at end of file diff --git a/docs/cloud/integrations/alerts/slack.mdx b/docs/cloud/integrations/alerts/slack.mdx index a364f3725..3738e79cf 100644 --- a/docs/cloud/integrations/alerts/slack.mdx +++ b/docs/cloud/integrations/alerts/slack.mdx @@ -2,13 +2,78 @@ title: "Slack" --- - +Elementary's Slack integration enables sending Slack alerts when data issues happen. -## Enable Slack alerts +The alerts include rich context, and you can change the incident status and asssigne from the alert itself. +You can also create [alert rules](/features/alerts-and-incidents/alert-rules) to distribute alerts to different channels and destinations. -On the environments page, select an environment and click `connect` on the **Connect Slack** card. -After connecting your workspace, you will need to select a default channel for alerts. + +
+ Slack alert screenshot +
+ -## Alerts configuration +## Enabling Slack alerts - \ No newline at end of file +1. Go to the `Environments` page on the sidebar. +2. Select an environment and click connect on the `Connect messaging app` card (first card), and select `Slack`. + + +
+ Connect messaging app +
+ + +4. Authorize the Elementary app for your workspace. **This step may require a workspace admin approval.** + + +
+ Select Slack channel and alert suppression +
+ + +5. Select a default channel for alerts, and set the suppression interval. + + +The default channel you select will automatically add a default [alert rule](/features/alerts-and-incidents/alert-rules) +to sends all failures to this channel. Alerts on warnings are not sent by default. To modify and add tules, navigate to `Alert Rules` page. + + + +
+ Select Slack channel and alert suppression +
+ + + +## Alerts to private channels + +If the channel you want to send alerts to is private (🔒), it will not appear in the channels dropdown on the onboarding or the alert rules screen. + +You will need to invite the Elementary bot to the private channel by typing `@Elementary` in the channel and clicking to invite the bot in, and then it will appear in the UI. + + +
+ Add Elementary to private channel +
+ \ No newline at end of file diff --git a/docs/cloud/integrations/bi/connect-bi-tool.mdx b/docs/cloud/integrations/bi/connect-bi-tool.mdx index 26e47e92d..2900585ec 100644 --- a/docs/cloud/integrations/bi/connect-bi-tool.mdx +++ b/docs/cloud/integrations/bi/connect-bi-tool.mdx @@ -14,15 +14,15 @@ This will provide you end-to-end data lineage to understand your downstream depe - **Change impact**: Analyze which exposures will be impacted by a planned change. - **Unused datasets**: Detect datasets that no exposure consumes, that could be removed to save costs. - - +frameborder="0" +allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" +allowfullscreen +alt="Elementary Lineage" +> ### Supported BI tools diff --git a/docs/cloud/integrations/bi/explo.mdx b/docs/cloud/integrations/bi/explo.mdx index 8c3a3b46f..7096444f6 100644 --- a/docs/cloud/integrations/bi/explo.mdx +++ b/docs/cloud/integrations/bi/explo.mdx @@ -3,7 +3,7 @@ title: "Explo" --- diff --git a/docs/cloud/integrations/bi/hex.mdx b/docs/cloud/integrations/bi/hex.mdx index 490caed4d..9166292e5 100644 --- a/docs/cloud/integrations/bi/hex.mdx +++ b/docs/cloud/integrations/bi/hex.mdx @@ -3,7 +3,7 @@ title: "Hex" --- diff --git a/docs/cloud/integrations/bi/looker.mdx b/docs/cloud/integrations/bi/looker.mdx index 26d64f603..1cb253bda 100644 --- a/docs/cloud/integrations/bi/looker.mdx +++ b/docs/cloud/integrations/bi/looker.mdx @@ -55,7 +55,7 @@ Choose the Looker BI connection and provide the following details to validate an - **LookML code repository**: - Token - [Github](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-personal-access-token-classic) - - [Gitlab](https://docs.gitlab.com/ee/user/project/settings/project_access_tokens.html) + - [Gitlab](https://docs.gitlab.com/ee/user/project/settings/project_access_tokens.html) - make sure the role is `developer` and the scopes include `read_api, read_repository` - Repository - The repository name where your LookML code is. diff --git a/docs/cloud/integrations/bi/mode.mdx b/docs/cloud/integrations/bi/mode.mdx index 8f559fe0a..15a49129e 100644 --- a/docs/cloud/integrations/bi/mode.mdx +++ b/docs/cloud/integrations/bi/mode.mdx @@ -3,7 +3,7 @@ title: "Mode" --- diff --git a/docs/cloud/integrations/bi/power-bi.mdx b/docs/cloud/integrations/bi/power-bi.mdx index 4274b5175..e16244bfa 100644 --- a/docs/cloud/integrations/bi/power-bi.mdx +++ b/docs/cloud/integrations/bi/power-bi.mdx @@ -2,14 +2,41 @@ title: "Power BI" --- - - - -} -> - Click for details - \ No newline at end of file +After you connect Power BI, Elementary will automatically and continuously extend the lineage to the report/dashboard level. +This will provide you end-to-end data lineage to understand your downstream dependencies, called exposures. + +### Create & authorize a Service Principal + +Elementary makes use of a service principal secret to authenticate with the Power BI API on your behalf. +Please follow the [official guide by Microsoft](https://learn.microsoft.com/en-us/power-bi/developer/embedded/embed-service-principal) for creating and authorizing a service principal for Power BI. +Make sure you're going through all steps up until but not including "Step 5 - Embed your content" (Which isn't currently our goal). +Also make sure that the security group you've created is allowed access to every relevant Workspace in your account. +Do NOTICE - "My Workspace" isn't accessible by service principals and thus can't be analyzed with our product. + + +### Allow Power BI's Admin API +Go to Power BI's Admin portal -> Tenant Settings -> Admin API settings +Enable the following features, and apply at least for the Service principal's security group (created on the last step): +- `Service principals can access read-only admin APIs` +- `Enhance admin APIs responses with detailed metadata` +- `Enhance admin APIs responses with DAX and mashup expressions` + +Those features will allow Elementary to get all required info for computing the data lineage graph. + +### Connecting Power BI to Elementary + +Navigate to the **Account settings > Environments** and choose the environment to which you would like to connect Elementary. +Choose the Power BI connection and provide the following details to validate and complete the integration. + +- **Tenant:** Your Microsoft tenant which is usaully your company's domain. e.g. `my-company.com` +- **Client ID**: The client ID of the new Microsoft Entra app you've created on the first step. +- **Client Secret:** The new client secret you've created on the first step. + + +### Limitations + +- Due to service principal limitations - we cannot analyze the default "My Workspace" Workspace. +- The lineage currently doesn't apply to the following entities: (But would be in the future) + - Paginated Reports + - Dataflows + - Datamarts diff --git a/docs/cloud/integrations/bi/sigma.mdx b/docs/cloud/integrations/bi/sigma.mdx index 11207d4f0..d4e783660 100644 --- a/docs/cloud/integrations/bi/sigma.mdx +++ b/docs/cloud/integrations/bi/sigma.mdx @@ -2,14 +2,31 @@ title: "Sigma" --- - - - -} -> - Click for details - \ No newline at end of file +After you connect Sigma, Elementary will automatically and continuously extend the lineage to the workbook page & element level. +This will provide you end-to-end data lineage to understand your downstream dependencies, called exposures. + +### Create API Client Credentials + +Elementary needs authorized client credentials in your account in order to access Sigma's API on your behalf.
+To create those, Please follow the [official Sigma documentation](https://help.sigmacomputing.com/reference/generate-client-credentials#generate-api-client-credentials). +Make sure you enable 'REST API' privileges for that client. + +### Connecting Sigma to Elementary + +Navigate to the **Account settings > Environments** and choose the environment to which you would like to connect Elementary. +Choose the Sigma connection and provide the following details to validate and complete the integration. + +- **Cloud Provider:** To determine your Sigma cloud provider, Navigate to **Account -> General Settings** under Sigma's **Administration** menu and look for **'Cloud: ...'**.
Should be one of the following: + - `AWS US` + - `AWS Canada` + - `AWS Europe` + - `AWS UK` + - `Azure US` + - `GCP` +- **Client ID**: The Sigma client ID you've created on the previous step. +- **Client Secret:** The new Sigma client secret you've created on the previous step. + + +### Limitations + +`Datasets` or `Data Models` are currently excluded from computed lineage graph - which will point from DWH directly to your Workbook Elements.
\ No newline at end of file diff --git a/docs/cloud/integrations/bi/tableau.mdx b/docs/cloud/integrations/bi/tableau.mdx index 9a1a5a362..5c61b2407 100644 --- a/docs/cloud/integrations/bi/tableau.mdx +++ b/docs/cloud/integrations/bi/tableau.mdx @@ -3,7 +3,7 @@ title: "Tableau" --- After you connect Tableau, Elementary will automatically and continuously extend the lineage to the dashboard level. -This will provide you end-to-end data lineage to understand your downstream dependencies, called exposures. +This will provide end-to-end data lineage to help you understand your downstream dependencies, called exposures. In order for Elementary to extract your metadata from Tableau you must meet all of the Tableau Metadata GraphQL requirements (most are set by default): @@ -15,7 +15,7 @@ In order for Elementary to extract your metadata from Tableau you must meet all ## Tableau Cloud -### Creating Personal Access Token +### Creating a Personal Access Token Create a Personal Access Token in Tableau. For details on how to create a user token please refer to the **[Tableau guide](https://help.tableau.com/current/pro/desktop/en-us/useracct.htm#create-a-personal-access-token)**. diff --git a/docs/cloud/integrations/bi/thoughtspot.mdx b/docs/cloud/integrations/bi/thoughtspot.mdx index 52b31d8e9..dd828c187 100644 --- a/docs/cloud/integrations/bi/thoughtspot.mdx +++ b/docs/cloud/integrations/bi/thoughtspot.mdx @@ -1,15 +1,32 @@ --- -title: "ThoughtSpot" +title: "Thoughtspot" --- - - - -} -> - Click for details - \ No newline at end of file +After you connect Thoughtspot, Elementary will automatically and continuously extend the lineage to the liveboard and answer level. +This will provide you end-to-end data lineage to understand your downstream dependencies, called exposures. + + +### Enable Trusted Authentication on a priviliged user + +For Elementary to access your Thoughtspot instance's API on your behalf of your user, your user should have Trusted Authentication enabled.
+To enable Trusted Authentication on a user, please follow the [official Thoughtspot documentation](https://developers.thoughtspot.com/docs/trusted-auth-secret-key).
+Make sure you copy the generated token (`Secret Key`) as you will need it to connect Thoughtspot to Elementary. + +### User Privileges + +For an easy integration, it's recommended for the connected user to be an administrator (`ADMINISTRATION` privilege), this will ensure Elementary can access all of your Liveboards and Answers.
+It is also possible though to integrate with a regular user, just make sure it can download data (has `DATADOWNLOADING` privilege) for all the relvant ThoughtSpot entities you want Elementary to discover and show lineage for. + +### Connecting Thoughtspot to Elementary + +Navigate to the **Account settings > Environments** and choose the environment to which you would like to connect Elementary. +Choose the Thoughtspot connection and provide the following details to validate and complete the integration. + +- **User Name:** The username of the user you want to use to connect to Thoughtspot. +- **Secret Key:** The token generated for the user you want to use to connect to Thoughtspot (from the previous step). +- **Base URL:** The URL of your Thoughtspot instance. This would be `'https://.thoughtspot.cloud'` by default, or your custom domain if [you've configured one](https://docs.thoughtspot.com/cloud/10.1.0.cl/custom-domains#_domain_url_customization). If you're unsure, just check the URL you use to access your Thoughtspot instance in the browser. + + +### Limitations + +`Worksheets` or `Models` are currently excluded from computed lineage graph - which will point from DWH directly to your Liveboards or Answers.
\ No newline at end of file diff --git a/docs/cloud/integrations/catalog/atlan.mdx b/docs/cloud/integrations/catalog/atlan.mdx new file mode 100644 index 000000000..7f97b1d44 --- /dev/null +++ b/docs/cloud/integrations/catalog/atlan.mdx @@ -0,0 +1,31 @@ +--- +title: "Atlan" +--- + +Elementary aims to meet business users where they live, so we believe it's important to allow anyone who uses data to + know whether the data is healthy quickly, easily and without any technical knowledge required. This is why we integrated with Atlan. + +The integration works by pushing key insights to Atlan assets as custom metadata. +This metadata includes data health scores and open incidents, providing visibility into the quality and status of your +data. + + +
+ Elementary metadata in Atlan asset view +
+ + +## Required Authentication details for the Atlan integration +### Atlan API Key +To generate an API key, follow these steps: +1. Create a new Persona in Atlan, with the following permissions: + - Assets: Read, Update + - Governance: Update custom metadata values + +### Atlan Base Url +Your Atlan base URL, for example `https://my-company.atlan.com` + + diff --git a/docs/cloud/integrations/code-repo/azure-devops.mdx b/docs/cloud/integrations/code-repo/azure-devops.mdx new file mode 100644 index 000000000..a3dbce7d4 --- /dev/null +++ b/docs/cloud/integrations/code-repo/azure-devops.mdx @@ -0,0 +1,35 @@ +--- +title: "Azure DevOps Integration" +sidebarTitle: "Azure DevOps" +--- + +Elementary can integrate with Azure DevOps to connect to the code repository where your **dbt project code** is managed, and it opens pull requests with configuration changes. + +## Connecting Through the Azure DevOps App + +1. Navigate to **Settings > Code repository** in Elementary Cloud. +2. Click on **Connect** and select **Azure DevOps**. +3. Enter your Azure DevOps organization URL \ +(e.g., `https://dev.azure.com/your-organization`). +4. Click **Save**. +5. Connect through OAuth to authenticate between Azure DevOps and Elementary Cloud. During this process, a temporary token is issued, which can be used to make API calls. Along with the temporary token, a refresh token is also provided. The refresh token is used when Azure DevOps indicates that the temporary token has expired. For Microsoft services, OAuth is managed by Microsoft Entra ID (formerly known as Active Directory). + +--- + +## Required Permissions + +Elementary requires the following permissions in your Azure DevOps **dbt repository**: + +- **Read and write** access to the repository +- Access to **file contents** +- Permission to **open and read pull requests** + +--- + +## Troubleshooting + +If you encounter issues with the Azure DevOps integration, ensure the following: + +1. Your **organization URL** is correct. +2. You have **sufficient permissions** in Azure DevOps. +3. Elementary is properly **authorized** in your Azure DevOps organization. diff --git a/docs/cloud/integrations/code-repo/bitbucket.mdx b/docs/cloud/integrations/code-repo/bitbucket.mdx new file mode 100644 index 000000000..b18a8fab4 --- /dev/null +++ b/docs/cloud/integrations/code-repo/bitbucket.mdx @@ -0,0 +1,22 @@ +--- +title: "Bitbucket" +--- + +Elementary connects to the code repository where your dbt project code is managed, and opens PRs with configuration changes. + +## Recommended: Connect using Elementary Bitbucket App + +Navigate to the **Account settings > Environments** and choose the environment to which you would like to connect the dbt project code repository. + +Simply Click the blue button that says "Connect with Elementary Bitbucket App" and follow the instructions. +In the menu that opens up later on, select the repository where your dbt project is stored, and if needed the branch and path to the dbt project. + +Requires a user with permissions to install new applications in the repository + +### Alternative: Create a Bitbucket project token + +If connecting the Elementary Bitbucket App isn't an option, you can connect using a token managed by your team instead. + +## Repository connection settings + + diff --git a/docs/cloud/integrations/code-repo/github.mdx b/docs/cloud/integrations/code-repo/github.mdx index 67b37af4a..752b6a63c 100644 --- a/docs/cloud/integrations/code-repo/github.mdx +++ b/docs/cloud/integrations/code-repo/github.mdx @@ -4,8 +4,21 @@ title: "Github" Elementary connects to the code repository where your dbt project code is managed, and opens PRs with configuration changes. -### Create a Github [fine-grained token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-fine-grained-personal-access-token) +## Recommended: Connect using Elementary Github App +Navigate to the **Account settings > Environments** and choose the environment to which you would like to connect the dbt project code repository. + +Simply Click the blue button that says "Connect with Elementary Github App" and follow the instructions. +In the menu that opens up later on, select the repository where your dbt project is stored, and if needed the branch and path to the dbt project. + +Requires a user with permissions to install new applications in the repository. + + +## Alternative: Create a Github [fine-grained token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-fine-grained-personal-access-token) + +If connecting the Elementary Github App isn't an option, you can connect to Github using a fine-grained token managed by your team instead. + + 1. In the upper-right corner of any page, click your profile photo, then click **Settings**. 2. On the bottom of the left sidebar, click **Developer settings**. 3. On the left sidebar, select **Personal access tokens > Fine-grained tokens**. @@ -34,15 +47,8 @@ Elementary connects to the code repository where your dbt project code is manage 9. Click **Generate token**. + -### Connect Github to Elementary - -Navigate to the **Account settings > Environments** and choose the environment to which you would like to connect the dbt project code repository. -Select **Connect code repository**, and under Github enter the generated token and repo full name: +## Repository connection settings - - Github connection to Elementary - + diff --git a/docs/cloud/integrations/code-repo/gitlab.mdx b/docs/cloud/integrations/code-repo/gitlab.mdx index 9372273af..26383035d 100644 --- a/docs/cloud/integrations/code-repo/gitlab.mdx +++ b/docs/cloud/integrations/code-repo/gitlab.mdx @@ -4,7 +4,22 @@ title: "Gitlab" Elementary connects to the code repository where your dbt project code is managed, and opens PRs with configuration changes. -### Create a Gitlab project token + +## Recommended: Connect using Elementary Gitlab App + +Navigate to the **Account settings > Environments** and choose the environment to which you would like to connect the dbt project code repository. + +Simply Click the blue button that says "Connect with Elementary Gitlab App" and follow the instructions. +In the menu that opens up later on, select the repository where your dbt project is stored, and if needed the branch and path to the dbt project. + +Requires a user with permissions to install new applications in the repository + + +### Alternative: Create a Gitlab project token + +If connecting the Elementary Gitlab App isn't an option, you can connect using a token managed by your team instead. + + You need to create a [project access token](https://docs.gitlab.com/ee/user/project/settings/project_access_tokens.html) (token for a specific repository) with by following these steps: @@ -15,14 +30,8 @@ You need to create a [project access token](https://docs.gitlab.com/ee/user/proj 5. Select the following scopes: `api`, `read_api`, `read_repository`, `write_repository`. 6. Select **Create project access token**. -### Connect Gitlab to Elementary + -Navigate to the **Account settings > Environments** and choose the environment to which you would like to connect the dbt project code repository. -Select **Connect code repository**, and under Gitlab enter the generated token and repo full name: - - - Gitlab connection to Elementary - +## Repository connection settings + + \ No newline at end of file diff --git a/docs/cloud/integrations/dwh/trino.mdx b/docs/cloud/integrations/dwh/trino.mdx index 5cdecba3c..a3ea4a76c 100644 --- a/docs/cloud/integrations/dwh/trino.mdx +++ b/docs/cloud/integrations/dwh/trino.mdx @@ -3,4 +3,14 @@ title: "Connect to Trino" sidebarTitle: "Trino" --- - + + + +} +> + Click for details + diff --git a/docs/cloud/integrations/security-and-connectivity/aws-privatelink-integration.mdx b/docs/cloud/integrations/security-and-connectivity/aws-privatelink-integration.mdx new file mode 100644 index 000000000..d41c06ad1 --- /dev/null +++ b/docs/cloud/integrations/security-and-connectivity/aws-privatelink-integration.mdx @@ -0,0 +1,60 @@ +--- +title: "AWS PrivateLink" +sidebarTitle: "AWS PrivateLink" +--- + +## What is AWS PrivateLink? + +**AWS PrivateLink** is a secure and scalable networking technology that enables private connectivity between Virtual Private Clouds (VPCs), AWS services, and on-premises applications—without exposing traffic to the public internet. By leveraging PrivateLink, organizations can simplify their network architecture, reduce data exposure risks, and ensure secure communication between services. + +With PrivateLink, services are exposed as **private endpoints** within a VPC, allowing consumers to connect to them using private IP addresses. This minimizes the need for complex networking configurations like VPC peering or VPNs, and reduces the risk of data leakage by keeping traffic within the AWS network. + +In the context of our integration, AWS PrivateLink enables Elementary Cloud to securely and privately communicate with supported services, ensuring data privacy, compliance, and a streamlined user experience. We support cross-region PrivateLink and can connect to any region where your cloud is hosted, using VPC peering to link different regions to our production environment securely. Elementary Data maintains a global network of regional VPCs designed for PrivateLink, with strict security controls. + +## Architecture overview + + + +Elementary’s PrivateLink setup consists generally from two parts: + +1. **AWS PrivateLink connection** - + 1. Provider side (Customer / 3rd party) - **A VPC endpoint service** is set up at the customer’s AWS account (or a 3rd party AWS account in the case of Snowflake). This provides access to a particular service in that account. + 2. Consumer side (Elementary) - Elementary sets up a dedicated VPC interface that will connect to the integrated service, in the same AWS region as the service. + This is done through a dedicated regional VPC created for this purpose. +2. **AWS VPC Peering:** + 1. Elementary’s production servers are located in the **eu-central-1** (Frankfurt) region. For us to be able to access the service exposed through PrivateLink, we connect our main production VPC with the regional VPC mentioned above. + +## Supported integrations + +### Snowflake + +Snowflake has support for connecting to AWS-hosted Snowflake accounts via PrivateLink. This setup is entirely managed by Snowflake, so Elementary connects with an endpoint service hosted on Snowflake’s AWS account for this purpose. + +In order to set up a PrivateLink connection with Snowflake, please follow the steps below: + +1. **Open a support case to Snowflake Support** + 1. Ask to authorize Elementary’s AWS account for PrivateLink access. + 2. Provide Elementary’s account ID in the request - `743289191656` +2. **Obtain the PrivateLink configuration** + 1. Once Snowflake’s support team approves the request, obtain the PrivateLink configuration by invoking the following commands (admin access is required): + + ```sql + USE ROLE ACCOUNTADMIN; + SELECT SYSTEM$GET_PRIVATELINK_CONFIG(); + ``` + +3. **Provide Elementary with the configuration obtained in the previous step.** + 1. Elementary will then setup the required infrastructure to connect to Snowflake via PrivateLink. +4. **Add a Snowflake environment in Elementary** + 1. Follow the instructions [here](https://docs.elementary-data.com/cloud/onboarding/connect-data-warehouse) to set up a Snowflake environment in Elementary. + 1. When supplying the account, use `.privatelink` , where the account identifier is the result of the following query: + + ```sql + SELECT CURRENT_ORGANIZATION_NAME() || '-' || CURRENT_ACCOUNT_NAME(); + ``` + + 2. In the Snowflake instructions, skip the *Add the Elementary IP to allowlist* section (since the connection is done through PrivateLink, no allowlist is required). + +## Github Enterprise Server + +Coming soon! \ No newline at end of file diff --git a/docs/cloud/integrations/security-and-connectivity/ms-entra.mdx b/docs/cloud/integrations/security-and-connectivity/ms-entra.mdx new file mode 100644 index 000000000..51c1c7169 --- /dev/null +++ b/docs/cloud/integrations/security-and-connectivity/ms-entra.mdx @@ -0,0 +1,60 @@ +--- +title: "Microsoft Entra ID" +sidebarTitle: "Microsoft Entra ID" +--- + +## Enabling SAML + +In order to enable SAML using Microsoft Entra ID (Previously Azure AD SSO), we need the following steps to be taken: + +- Go to the [Microsoft Entra portal](https://entra.microsoft.com/) +- On the left, choose Applications → Enterprise Applications + + + +- Click on “New Application” + + + +- Click on “Create your own application” + + + +- Choose the last option in the side-window that opens and click “Create” + + + +- In the App window that opens, click on “Single Sign-On” + + + +- Choose SAML + + + +- Click on Edit on the “Basic SAML Configuration” section + + + +- Fill the following entries: + - Identifier (Entity ID) - `elementary` + - Reply URL - [`https://elementary-data.frontegg.com/auth/saml/callback`](https://elementary-data.frontegg.com/auth/saml/callback) +- Download the Federation Metadata XML and send it to the Elementary team. + + + +- We recommend setting up a short call where we’ll validate together that the new configuration works. +- After that, if you wish, we can disable until you update your team internally about the new configuration and update when you give us the OK! + + +## Provisioning + +Elementary supports user provisioning via SCIM to automate user management. If you want to enable automatic provisioning, follow these steps: +- In the **Microsoft Entra portal**, go to **Enterprise Applications** and select the newly created SAML application. +- Navigate to **Provisioning** and click **Get Started**. +- Set the **Provisioning Mode** to **Automatic**. +- Configure the **Tenant URL** and **Secret Token** (email the Elementary team for a 1password vault with the configuration). +- Click **Test Connection** to validate the setup. +- Enable provisioning and save changes. + +This setup ensures that users are automatically created, updated, and deactivated in Elementary based on their status in Microsoft Entra ID. You can always reach out if you need any help. diff --git a/docs/cloud/integrations/security-and-connectivity/okta.mdx b/docs/cloud/integrations/security-and-connectivity/okta.mdx new file mode 100644 index 000000000..c10b5a9f2 --- /dev/null +++ b/docs/cloud/integrations/security-and-connectivity/okta.mdx @@ -0,0 +1,80 @@ +--- +title: "Okta" +sidebarTitle: "Okta" +--- + +## Authentication & SSO Integration + +### Supported Authentication Protocols + +Elementary Cloud supports **Okta Single Sign-On (SSO)** via multiple authentication protocols: + +- **SAML 2.0** (Security Assertion Markup Language) +- **OIDC (OpenID Connect)** + +These protocols enable seamless authentication, reducing the need for manual credential management. + +### SCIM for Automated Provisioning + +Elementary Cloud supports **SCIM (System for Cross-domain Identity Management)** for automated user provisioning and deprovisioning: + +- **Automated User Creation**: Users added in Okta can be provisioned automatically in Elementary Cloud. +- **Deprovisioning Support**: When a user is removed from Okta, their access to Elementary Cloud is revoked automatically. +- **Group-Based Provisioning**: Okta groups can be mapped to roles in Elementary Cloud by the Elementary team. + +For more details on SCIM setup, refer to Okta’s SCIM integration guide: [Okta SCIM Guide](https://help.okta.com/en-us/content/topics/apps/apps_app_integration_wizard_scim.htm). + +## Security & Access Control + +### Multi-Factor Authentication (MFA) + +Elementary Cloud does not enforce MFA directly, but any MFA policies configured through Okta will automatically apply once Okta SSO is enabled. + +### Role-Based Access Control (RBAC) and Group Sync + +- Supports **RBAC with predefined roles** (**Admin, Can Write, Can Read**). +- **Role mappings for group names** can be pre-defined if sent in advance. +- **Role Assignment**: + - The account creator will have a default **Admin** role. + - For provisioned users, If no configuration is made, the **default role will be Can Read**. + - Manually invited users will have the role defined during the invite process. + - **Custom roles** are currently not supported. + + + +## How to Set Up Okta SSO for Elementary Cloud + +### Step 1: Create a Custom App in Okta + +1. Navigate to **Okta Admin Dashboard** > **Applications**. +2. Click **Create App Integration** and select **SAML 2.0**. +3. Configure the following settings: + - **Single Sign-On URL**: `https://elementary-data.frontegg.com/auth/saml/callback` + - **Audience URI (SP Entity ID)**: `elementary` +4. Obtain the **Okta IdP Metadata and SAML Signing Certificates** as an **XML file**. +5. Share the **XML file** with Elementary Cloud Support to complete the integration. + +### Step 2: Verify Integration with Elementary Cloud + +- Once the XML file is shared, Elementary Cloud will complete the integration setup. +- We recommend scheduling a **real-time verification call** to ensure everything is working before making the setting permanent. + + + +## How to Set Up SCIM for Automated Provisioning + +### Step 1: Configure SCIM in Okta + +1. Go to **Okta Admin Dashboard** > **Applications**. +2. Locate the **Elementary Cloud app** and open it. +3. Navigate to the **Provisioning tab** and enable **SCIM provisioning**. +4. Enter the following details: + - **SCIM Provisioning URL**: *(See internal 1Password for details)* + - **Authorization Token**: *(See internal 1Password for details)* +5. Save the settings and test provisioning by adding a test user. + +### Step 2: Define Role Mapping + +- By default, users are assigned the **Can Read** role. +- The default role can be changed to **Can Write** or **Admin**. +- Okta group names can be mapped to specific roles upon request. \ No newline at end of file diff --git a/docs/cloud/introduction.mdx b/docs/cloud/introduction.mdx index 0ced5c12c..3e4a14691 100644 --- a/docs/cloud/introduction.mdx +++ b/docs/cloud/introduction.mdx @@ -1,20 +1,32 @@ --- -title: "Elementary Cloud" +title: "Elementary Cloud Platform" sidebarTitle: "Introduction" icon: "cloud" --- - +**Elementary is a data observability platform tailored for dbt-first data organizations.** - - - Start 30 days free trial, no credit card is required. +The Elementary Cloud Platform integrates into dbt workflows, providing a developer-centric tool for engineers to maintain and govern data quality rules while also offering a cloud-based interface for business users and data stewards. This enables engineers to proactively identify and resolve data issues while consumers can monitor data health, receive notifications, view health scores, and add their own validations in a user-friendly environment. It offers advanced features like automated freshness and volume monitoring, ML-powered anomaly detection, a simple data catalog, and rich integrations. + + + + + + + + - + + + +## Get started with Elementary Cloud + + + + + + + + diff --git a/docs/cloud/manage-team.mdx b/docs/cloud/manage-team.mdx index 5908799e6..95ba22f5b 100644 --- a/docs/cloud/manage-team.mdx +++ b/docs/cloud/manage-team.mdx @@ -9,6 +9,12 @@ icon: "square-4" You can invite team members to join you! 🎉 Click on your initials on the top right of the screen and select `Team` to invite users. +When you invite a user, you can assign them a role. The roles are: +1. "Admin" - Has full access to all assets including team management +2. "Can Edit" - can manage environments and configurations, but cannot manage team members +3. "Can View" - can view data assets, test results, incidents and lineage + +You can also add custom roles with specific permissions to suit your needs. Talk to our support team to set this up. Users you invite will receive an Email saying you invited them, and will need to accept and activate their account. diff --git a/docs/cloud/onboarding/signup.mdx b/docs/cloud/onboarding/signup.mdx index 04b510088..dea505618 100644 --- a/docs/cloud/onboarding/signup.mdx +++ b/docs/cloud/onboarding/signup.mdx @@ -6,7 +6,7 @@ icon: "square-1" - [Signup to Elementary](https://elementary-data.frontegg.com/oauth/account/sign-up) using Google SSO or email. + [Sign up to Elementary](https://elementary-data.frontegg.com/oauth/account/sign-up) using Google SSO or email. If you are interested in advanced authentication such as MFA, Okta SSO, Microsoft AD - please contact us at cloud@elementary-data.com diff --git a/docs/data-tests/ai-data-tests/ai_data_validations.mdx b/docs/data-tests/ai-data-tests/ai_data_validations.mdx new file mode 100644 index 000000000..e4bd0c373 --- /dev/null +++ b/docs/data-tests/ai-data-tests/ai_data_validations.mdx @@ -0,0 +1,127 @@ +--- +title: "AI Data Validations" +--- + + + **Beta Feature**: AI data validation tests is currently in beta. The functionality and interface may change in future releases. + + **Version Requirement**: This feature requires Elementary dbt package version 0.18.0 or above. + + +# AI Data Validation with Elementary + +## What is AI Data Validation? + +Elementary's `elementary.ai_data_validation` test allows you to validate any data column using AI and LLM language models. This test is more flexible than traditional tests as it can be applied to any column type and uses natural language to define validation rules. + +With `ai_data_validation`, you can simply describe what you expect from your data in plain English, and Elementary will check if your data meets those expectations. This is particularly useful for complex validation rules that would be difficult to express with traditional SQL or dbt tests. + +## How It Works + +Elementary leverages the AI and LLM capabilities built directly into your data warehouse. When you run a validation test: + +1. Your data stays within your data warehouse +2. The warehouse's built-in AI and LLM functions analyze the data +3. Elementary reports whether each value meets your expectations based on the prompt + +## Required Setup for Each Data Warehouse + +Before you can use Elementary's AI data validations, you need to set up AI and LLM capabilities in your data warehouse: + +### Snowflake +- **Prerequisite**: Enable Snowflake Cortex AI LLM functions +- **Recommended Model**: `claude-3-5-sonnet` +- [View Snowflake's Guide](/data-tests/ai-data-tests/supported-platforms/snowflake) + +### Databricks +- **Prerequisite**: Ensure Databricks AI Functions are available +- **Recommended Model**: `databricks-meta-llama-3-3-70b-instruct` +- [View Databrick's Setup Guide](/data-tests/ai-data-tests/supported-platforms/databricks) + +### BigQuery +- **Prerequisite**: Configure BigQuery to use Vertex AI models +- **Recommended Model**: `gemini-1.5-pro` +- [View BigQuery's Setup Guide](/data-tests/ai-data-tests/supported-platforms/bigquery) + +### Redshift +- Support coming soon + +### Data Lakes +- Currently supported through Snowflake, Databricks, or BigQuery external object tables +- [View Data Lakes Information](/data-tests/ai-data-tests/supported-platforms/data-lakes) + +## Using the AI Data Validation Test + +The test requires one main parameter: +- `expectation_prompt`: Describe what you expect from the data in plain English + +Optionally, you can also specify: +- `llm_model_name`: Specify which AI model to use (see recommendations above for each warehouse) + + + This test works with any column type, as the data will be converted to a string format for validation. This enables natural language data validations for dates, numbers, and other structured data types. + + + + +```yml Models +version: 2 + +models: + - name: < model name > + columns: + - name: < column name > + tests: + - elementary.ai_data_validation: + expectation_prompt: "Description of what the data should satisfy" + llm_model_name: "model_name" # Optional +``` + +```yml Example - Date Validation +version: 2 + +models: + - name: crm + description: "A table containing contract details." + columns: + - name: contract_date + description: "The date when the contract was signed." + tests: + - elementary.ai_data_validation: + expectation_prompt: "There should be no contract date in the future" +``` + +```yml Example - Numeric Validation +version: 2 + +models: + - name: sales + description: "A table containing sales data." + columns: + - name: discount_percentage + description: "The discount percentage applied to the sale." + tests: + - elementary.ai_data_validation: + expectation_prompt: "The discount percentage should be between 0 and 50, and should only be a whole number." + llm_model_name: "claude-3-5-sonnet" + config: + severity: warn +``` + +```yml Example - Complex Validation +version: 2 + +models: + - name: customer_accounts + description: "A table containing customer account information." + columns: + - name: account_status + description: "The current status of the customer account." + tests: + - elementary.ai_data_validation: + expectation_prompt: "The account status should be one of: 'active', 'inactive', 'suspended', or 'pending'. If the account is 'suspended', there should be a reason code in the suspension_reason column." + llm_model_name: "gemini-1.5-pro" +``` + + + diff --git a/docs/data-tests/ai-data-tests/supported-platforms/bigquery.mdx b/docs/data-tests/ai-data-tests/supported-platforms/bigquery.mdx new file mode 100644 index 000000000..37b522033 --- /dev/null +++ b/docs/data-tests/ai-data-tests/supported-platforms/bigquery.mdx @@ -0,0 +1,106 @@ +--- +title: "BigQuery Vertex AI" +description: "Learn how to configure BigQuery to use Vertex AI models for unstructured data validation tests" +--- + +# BigQuery Setup for Unstructured Data Tests + +Elementary's unstructured data validation tests leverage BigQuery ML and Vertex AI models to perform advanced AI-powered validations. This guide will walk you through the setup process. + +## Prerequisites + +Before you begin, ensure you have: +- A Google Cloud account with appropriate permissions +- Access to BigQuery and Vertex AI services +- A BigQuery dataset where you'll create your model, that will be used by Elementary's data validation tests. This is the dataset where you have unstructured data stored and that you want to apply validations on. + +## Step 1: Enable the Vertex AI API + +1. Navigate to the Google Cloud Console +2. Go to **APIs & Services** > **API Library** +3. Search for "Vertex AI API" +4. Click on the API and select **Enable** + +## Step 2: Create a Remote Connection to Vertex AI + +Elementary's unstructured data validation tests use BigQuery ML to access pre-trained Vertex AI models. To establish this connection: + +1. Navigate to the Google Cloud Console > **BigQuery** +2. In the Explorer panel, click the **+** button +3. Select **Connections to external data sources** +4. Change the connection type to **Vertex AI remote models, remote functions and BigLake (Cloud Resource)** +5. Select the appropriate region: + - If your model and dataset are in the same region, select that specific region + - Otherwise, select multi-region + +After creating the connection: +1. In the BigQuery Explorer, navigate to **External Connections** +2. Find and click on your newly created connection +3. Copy the **Service Account ID** for the next step + +## Step 3: Grant Vertex AI Access Permissions + +Now you need to give the connection's service account permission to access Vertex AI: + +1. In the Google Cloud Console, go to **IAM & Admin** +2. Click **+ Grant Access** +3. Under "New principals", paste the service account ID you copied +4. Assign the **Vertex AI User** role +5. Click **Save** + +## Step 4: Create an LLM Model Interface in BigQuery + +1. In the BigQuery Explorer, navigate to **External Connections** +2. Find again your newly created connection from previous step and clikc on it +3. Copy the **Connection ID** (format: `projects//locations//connections/`) +4. [Select a model endpoint](https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-create-remote-model#gemini-api-multimodal-models). You can use `gemini-1.5-pro-002` as a default endpoint. +5. Run the following SQL query to create a model in your dataset: + +```sql +CREATE OR REPLACE MODEL + `..` +REMOTE WITH CONNECTION + `` +OPTIONS ( + endpoint = '' +); +``` + +### Example + +```sql +CREATE OR REPLACE MODEL + `my-project.my-dataset.gemini-1.5-pro` +REMOTE WITH CONNECTION + `projects/my-project/locations/us/connections/my-remote-connection-model-name` +OPTIONS ( + endpoint = 'gemini-1.5-pro-002' +); +``` + +> **Note:** During development, we used `gemini-1.5-pro` and recommend it as the default model for unstructured data tests in BigQuery. + +### Additional Resources + +- [Available models and endpoints](https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-create-remote-model#gemini-api-multimodal-models) +- [Documentation on creating remote models](https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-create-remote-model) + +## Step 5: Running an Unstructured Data Test + +Once your model is set up, you can reference it in your Elementary tests: + +```yaml +models: + - name: table_with_unstructured_data + description: "A table containing unstructured text data." + columns: + - name: text_data + description: "Unstructured text data stored as a string." + tests: + - elementary.validate_unstructured_data: + expectation_prompt: "The text data should represent an example of unstructured data." + llm_model_name: "gemini-1.5-pro" +``` + + + diff --git a/docs/data-tests/ai-data-tests/supported-platforms/data-lakes.mdx b/docs/data-tests/ai-data-tests/supported-platforms/data-lakes.mdx new file mode 100644 index 000000000..7d7035c5f --- /dev/null +++ b/docs/data-tests/ai-data-tests/supported-platforms/data-lakes.mdx @@ -0,0 +1,7 @@ +--- +title: "Data lakes" +--- + +Currently, you can apply Elementary's unstructured data validation tests on data lakes using Snowflake, Databricks, or BigQuery external object tables. + +Native and direct support for data lakes is coming soon. Please reach out if you would like to discuss this integration and use case. \ No newline at end of file diff --git a/docs/data-tests/ai-data-tests/supported-platforms/databricks.mdx b/docs/data-tests/ai-data-tests/supported-platforms/databricks.mdx new file mode 100644 index 000000000..41211db58 --- /dev/null +++ b/docs/data-tests/ai-data-tests/supported-platforms/databricks.mdx @@ -0,0 +1,35 @@ +--- +title: "Databricks AI Functions" +--- + +# Setting Up Databricks AI Functions + +Elementary unstructured data validation tests run on top of Databricks AI Functions for Databricks users. +This guide provides details on the prerequisites to use Databricks AI Functions. + +## What are Databricks AI Functions? + +Databricks AI Functions are built-in SQL functions that allow you to apply AI capabilities directly to your data using SQL. These functions enable you to leverage large language models and other AI capabilities without complex setup or external dependencies, making them ideal for data validation tests. + +## Availability and Prerequisites + +To use Databricks AI Functions, your environment must meet the following requirements: + +### Runtime Requirements +- **Recommended**: Databricks Runtime 15.3 or above for optimal performance + +### Environment Requirements +- Your workspace must be in a supported Model Serving region. +- For Pro SQL warehouses, AWS PrivateLink must be enabled. +- Databricks SQL does support AI functions but Databricks SQL Classic does not support it. + +### Models +Databricks AI functions can run on foundation models hosted in Databricks, external foundation models (like OpenAI's models) and custom models. +Currently Elementary's unstructured data validations support only foundation models hosted in Databricks. Adding support for external and custom models is coming soon. +> **Note**: While developing the tests we worked with `databricks-meta-llama-3-3-70b-instruct` so we recommend using this model as a default when running unstructured data validation tests in Databricks. + + +## Region Considerations + +When using AI functions, be aware that some models are limited to specific regions (US and EU). Make sure your Databricks workspace is in a supported region for the Databricks AI functions. + diff --git a/docs/data-tests/ai-data-tests/supported-platforms/redshift.mdx b/docs/data-tests/ai-data-tests/supported-platforms/redshift.mdx new file mode 100644 index 000000000..25392a4b2 --- /dev/null +++ b/docs/data-tests/ai-data-tests/supported-platforms/redshift.mdx @@ -0,0 +1,7 @@ +--- +title: "Redshift" +--- + +Elementary's unstructured data validation tests do not currently support Redshift. + +On Redshift setting up LLM functions is more complex and requires deploying a lambda function to call external LLM models. Documentation and support for this integration is coming soon. Please reach out if you'd like to discuss this use case and integration options. \ No newline at end of file diff --git a/docs/data-tests/ai-data-tests/supported-platforms/snowflake.mdx b/docs/data-tests/ai-data-tests/supported-platforms/snowflake.mdx new file mode 100644 index 000000000..c93b1b669 --- /dev/null +++ b/docs/data-tests/ai-data-tests/supported-platforms/snowflake.mdx @@ -0,0 +1,70 @@ +--- +title: "Snowflake Cortex AI" +--- + +# Snowflake Cortex AI LLM Functions + +This guide provides instructions on how to enable Snowflake Cortex AI LLM functions, which is a prerequisite for running Elementary unstructured data validation tests on Snowflake. + +## What is Snowflake Cortex? + +Snowflake Cortex is a fully managed service that brings cutting-edge AI and ML solutions directly into your Snowflake environment. It allows you to leverage the power of large language models (LLMs) without any complex setup or external dependencies. +Snowflake provides LLMs that are fully hosted and managed by Snowflake, using them requires no setup and your data stays within Snowflake. + + +## Cross-Region Model Usage + +> **Important**: It is always better to use models in the same region as your dataset to avoid errors and optimize performance. + +To learn where each model is located we recommend checking this [models list](https://docs.snowflake.com/en/user-guide/snowflake-cortex/llm-functions#availability). +If you encounter a "model not found" error, it may be because the model you're trying to use is not available in your current region. In such cases, you can enable cross-region model access with the following command (requires ACCOUNTADMIN privileges): + +```sql +-- Enable access to models in any region +ALTER ACCOUNT SET CORTEX_ENABLED_CROSS_REGION = 'ANY_REGION'; +``` + +This setting allows your account to use models from any region, which can be helpful when the model you need is not available in your current region. However, be aware that cross-region access may impact performance and could have additional cost implications. + + +## Supported LLM Models + +Snowflake Cortex provides access to various industry-leading LLM models with different capabilities and context lengths. Here are the key models available: + +### Native Snowflake Models + +* **Snowflake Arctic**: An open enterprise-grade model developed by Snowflake, optimized for business use cases. + +### External Models (Hosted within Snowflake) + +* **Claude Models (Anthropic)**: High-capability models for complex reasoning tasks. +* **Mistral Models**: Including mistral-large, mixtral-8x7b, and mistral-7b for various use cases. +* **Llama Models (Meta)**: Including llama3.2-1b, llama3.2-3b, llama3.1-8b, and llama2-70b-chat. +* **Gemma Models (Google)**: Including gemma-7b for code and text completion tasks. + +> **Note**: While developing the tests we worked with `claude-3-5-sonnet` so we recommend using this model as a default when running unstructured data tests in Snowflake. + +## Permissions + +> **Note**: By default, all users in your Snowflake account already have access to Cortex AI LLM functions through the PUBLIC role. In most cases, you don't need to do anything to enable access. + +The `CORTEX_USER` database role in the SNOWFLAKE database includes all the privileges needed to call Snowflake Cortex LLM functions. This role is automatically granted to the PUBLIC role, which all users have by default. + +The following commands are **only needed if** your administrator has revoked the default access from the PUBLIC role or if you need to set up specific access controls. If you can already use Cortex functions, you can skip this section. + +```sql +-- Run as ACCOUNTADMIN +USE ROLE ACCOUNTADMIN; + +-- Create a dedicated role for Cortex users +CREATE ROLE cortex_user_role; + +-- Grant the database role to the custom role +GRANT DATABASE ROLE SNOWFLAKE.CORTEX_USER TO ROLE cortex_user_role; + +-- Grant the role to specific users +GRANT ROLE cortex_user_role TO USER ; + +-- Optionally, grant warehouse access to the role +GRANT USAGE ON WAREHOUSE TO ROLE cortex_user_role; +``` \ No newline at end of file diff --git a/docs/data-tests/ai-data-tests/unstructured_data_validations.mdx b/docs/data-tests/ai-data-tests/unstructured_data_validations.mdx new file mode 100644 index 000000000..95a8046b2 --- /dev/null +++ b/docs/data-tests/ai-data-tests/unstructured_data_validations.mdx @@ -0,0 +1,249 @@ +--- +title: "Unstructured Data Validations" +--- + + + **Beta Feature**: Unstructured data validation tests is currently in beta. The functionality and interface may change in future releases. + + **Version Requirement**: This feature requires Elementary dbt package version 0.18.0 or above. + + +# Validating Unstructured Data with Elementary + +## What is Unstructured Data Validation? + +Elementary's `elementary.unstructured_data_validation` test allows you to validate unstructured data using AI and LLM language models. Instead of writing complex code, you can simply describe what you expect from your data in plain English, and Elementary will check if your data meets those expectations. + +For example, you can verify that customer feedback comments are in English, product descriptions contain required information, or support tickets follow a specific format or a sentiment. + +## How It Works + +Elementary leverages the AI and LLM capabilities built directly into your data warehouse. When you run a validation test: + +1. Your unstructured data stays within your data warehouse +2. The warehouse's built-in AI and LLM functions analyze the data +3. Elementary reports whether each text value meets your expectations + +## Required Setup for Each Data Warehouse + +Before you can use Elementary's unstructured data validations, you need to set up AI and LLM capabilities in your data warehouse: + +### Snowflake +- **Prerequisite**: Enable Snowflake Cortex AI LLM functions +- **Recommended Model**: `claude-3-5-sonnet` +- [View Snowflake's Guide](/data-tests/ai-data-tests/supported-platforms/snowflake) + +### Databricks +- **Prerequisite**: Ensure Databricks AI Functions are available +- **Recommended Model**: `databricks-meta-llama-3-3-70b-instruct` +- [View Databrick's Setup Guide](/data-tests/ai-data-tests/supported-platforms/databricks) + +### BigQuery +- **Prerequisite**: Configure BigQuery to use Vertex AI models +- **Recommended Model**: `gemini-1.5-pro` +- [View BigQuery's Setup Guide](/data-tests/ai-data-tests/supported-platforms/bigquery) + +### Redshift +- Support coming soon + +### Data Lakes +- Currently supported through Snowflake, Databricks, or BigQuery external object tables +- [View Data Lakes Information](/data-tests/ai-data-tests/supported-platforms/data-lakes) + + +## Using the Validation Test + +The test requires two main parameters: +- `expectation_prompt`: Describe what you expect from the text in plain English +- `llm_model_name`: Specify which AI model to use (see recommendations above for each warehouse) + + + This test works with any column containing unstructured text data such as descriptions, comments, or other free-form text fields. It can also be applied to structured columns that can be converted to strings, enabling natural language data validations. + + + + +```yml Models +version: 2 + +models: + - name: < model name > + columns: + - name: < column name > + tests: + - elementary.unstructured_data_validation: + expectation_prompt: "Description of what the text should contain or represent" + llm_model_name: "model_name" +``` + +```yml Example +version: 2 + +models: + - name: table_with_unstructured_data + description: "A table containing unstructured text data." + columns: + - name: text_data + description: "Unstructured text data stored as a string." + tests: + - elementary.unstructured_data_validation: + expectation_prompt: "The text data should represent an example of unstructured data." + llm_model_name: "test_model" +``` + +```yml Example - Validating Customer Feedback +version: 2 + +models: + - name: customer_feedback + description: "A table containing customer feedback comments." + columns: + - name: feedback_text + description: "Customer feedback in free text format." + tests: + - elementary.unstructured_data_validation: + expectation_prompt: "The text should be a customer feedback comment in English, it should describe only a bug or a feature request." + llm_model_name: "claude-3-5-sonnet" + config: + severity: warn +``` + + + + +## Usage Examples + +Here are some powerful ways you can apply unstructured data validations: + +### Validating Structure + +```yml +models: + - name: medicine_prescriptions + description: "A table containing medicine prescriptions." + columns: + - name: doctor_notes + description: "A column containing the doctor notes on the prescription" + tests: + - elementary.unstructured_data_validation: + expectation_prompt: "The prescription has to include a limited time period and recommendations to the patient" + llm_model_name: "claude-3-5-sonnet" +``` + +Test fails if: A doctor's note does not specify a time period or lacks recommendations for the patient. + +### Validating Sentiment + +```yml +models: + - name: customer_feedback + description: "A table containing customer feedback." + columns: + - name: negative_feedbacks + description: "A column containing negative feedbacks about our product." + tests: + - elementary.unstructured_data_validation: + expectation_prompt: "The customer feedback's sentiment has to be negative" + llm_model_name: "claude-3-5-sonnet" +``` + +Test fails if: Any feedback in `negative_feedbacks` is not actually negative. + +### Validating Similarities Coming Soon + +```yml +models: + - name: summerized_pdfs + description: "A table containing a summary of our ingested PDFs." + columns: + - name: pdf_summary + description: "A column containing the main PDF's content summary." + tests: + - elementary.validate_similarity: + to: ref('pdf_source_table') + column: pdf_content + match_by: pdf_name +``` + +Test fails if: A PDF summary does not accurately represent the original PDF's content. The validation will use the pdf name as the key to match a summary from the pdf_summary table to the pdf_content in the pdf_source_table. + +```yml +models: + - name: jobs + columns: + - name: job_title + tests: + - elementary.validate_similarity: + column: job_description +``` + +Test fails if: The job title does not align with the job description. + +### Accepted Categories Coming Soon + +```yml +models: + - name: support_tickets + description: "A table containing customer support tickets." + columns: + - name: issue_description + description: "A column containing customer-reported issues." + tests: + - elementary.accepted_categories: + categories: ['billing', 'technical_support', 'account_access', 'other'] +``` + +Test fails if: A support ticket does not fall within the predefined categories. + +### Accepted Entities Coming Soon + +```yml +models: + - name: news_articles + description: "A table containing news articles." + columns: + - name: article_text + description: "A column containing full article text." + tests: + - elementary.extract_and_validate_entities: + entities: + organization: + required: true + accepted_values: ['Google', 'Amazon', 'Microsoft', 'Apple'] + location: + required: false + accepted_values: {{ run_query('select zip_code from locations') }} +``` + +Test fails if: +- The required entity (e.g., `organization`) is missing. +- Extracted entities do not match the expected values. + +### Compare Numeric Values Coming Soon + +```yml +models: + - name: board_meeting_summaries + description: "A table containing board meeting summary texts." + columns: + - name: meeting_notes + description: "A column containing the full summary of the board meeting." + tests: + - elementary.extract_and_validate_numbers: + entities: + revenue: + compare_with: ref('crm_financials') + column: sum(revenue) + required: true + net_profit: + compare_with: ref('crm_financials') + column: sum(net_profit) + customer_count: + compare_with: ref('crm_customers') + column: count(customers) + required: true +``` + +Test fails if: +- Required entities are missing +- The numerical entities do not match the structured CRM data \ No newline at end of file diff --git a/docs/data-tests/anomaly-detection-configuration/anomaly-params.mdx b/docs/data-tests/anomaly-detection-configuration/anomaly-params.mdx index 0765fd48e..1df728a5f 100644 --- a/docs/data-tests/anomaly-detection-configuration/anomaly-params.mdx +++ b/docs/data-tests/anomaly-detection-configuration/anomaly-params.mdx @@ -36,6 +36,9 @@ sidebarTitle: "All configuration params"     period: [hour | day | week | month]     count: int + dimension_anomalies, column_anomalies, all_columns_anomalies tests: + -- dimensions: sql expression + volume_anomalies test: -- fail_on_zero: [true | false] @@ -45,7 +48,7 @@ sidebarTitle: "All configuration params" -- exclude_regexp: regex dimension_anomalies test: - -- dimensions: sql expression + -- exclude_final_results: [SQL where expression on fields value / average] event_freshness_anomalies: -- event_timestamp_column: column name @@ -57,7 +60,7 @@ sidebarTitle: "All configuration params" -```yml properties.yml +```yml Models version: 2 models: @@ -72,7 +75,7 @@ models: tests: ``` -```yml Example +```yml Models example version: 2 models: @@ -93,7 +96,9 @@ models: tags: ["elementary"] ``` -```yml sources_properties.yml +```yml Sources +version: 2 + sources: - name: < some name > database: < database > @@ -107,7 +112,9 @@ sources: tests: ``` -```yml Example +```yml Sources example +version: 2 + sources: - name: "my_non_dbt_table" database: "raw_events" diff --git a/docs/data-tests/anomaly-detection-configuration/column-anomalies.mdx b/docs/data-tests/anomaly-detection-configuration/column-anomalies.mdx index 3614c1d6b..0eb68820e 100644 --- a/docs/data-tests/anomaly-detection-configuration/column-anomalies.mdx +++ b/docs/data-tests/anomaly-detection-configuration/column-anomalies.mdx @@ -8,7 +8,7 @@ sidebarTitle: "column_anomalies" Select which monitors to activate as part of the test. - _Default: default monitors_ -- _Relevant tests: `all_column_anomalies`, `column_anomalies`_ +- _Relevant tests: `all_columns_anomalies`, `column_anomalies`_ - _Configuration level: test_ diff --git a/docs/data-tests/anomaly-detection-configuration/dimensions.mdx b/docs/data-tests/anomaly-detection-configuration/dimensions.mdx index d5218e3b6..748ae262f 100644 --- a/docs/data-tests/anomaly-detection-configuration/dimensions.mdx +++ b/docs/data-tests/anomaly-detection-configuration/dimensions.mdx @@ -5,15 +5,18 @@ sidebarTitle: "dimensions" `dimensions: [list of SQL expressions]` -Configuration for the tests `dimension_anomalies`, `column_anomalies` and `all_columns_anomalies`. -The test counts rows grouped by given column / columns / valid select sql expression. +The test will group the results by a given column / columns / valid select sql expression. Under `dimensions` you can configure the group by expression. -This test monitors the frequency of values in the configured dimension over time, and alerts on unexpected changes in the distribution. -It is best to configure it on low-cardinality fields. +Using this param segments the tested data per dimension, and each dimension is monitored separately. + +For example - +A `column_anomalies` test monitoring for `null_rate` with `dimensions` configured will monitor the +`null_rate` of values in the column, grouped by dimension, and will fail if in a specific dimension there is an anomaly in `null_rate`. +It is best to configure low-cardinality fields as `dimensions`. - _Default: None_ -- _Relevant tests: `dimension_anomalies`_ +- _Relevant tests: `dimension_anomalies`, `column_anomalies`, `all_columns_anomalies`_ - _Configuration level: test_ diff --git a/docs/data-tests/anomaly-detection-configuration/exclude-final-results.mdx b/docs/data-tests/anomaly-detection-configuration/exclude-final-results.mdx index fac867d5a..3152cf031 100644 --- a/docs/data-tests/anomaly-detection-configuration/exclude-final-results.mdx +++ b/docs/data-tests/anomaly-detection-configuration/exclude-final-results.mdx @@ -5,15 +5,15 @@ sidebarTitle: "exclude_final_results" `exclude_final_results: [SQL where expression on fields value / average]` -Failures in dimension anomaly tests consist of outliers in row counts across all dimensions during the training period. -Some dimensions may contribute metrics that are considered insignificant compared to others, and you may prefer not to receive alerts for them. -With this parameter, you can disregard such failures. +Failures in dimension anomaly tests consist of outliers in row count of each dimension. +Some dimensions may be considered insignificant compared to others, and you may prefer not to receive alerts for them. +With this parameter, you can exclude these dimensions from the results set and avoid such failures. -1. `value` - Outlier row count of a dimension during the detection period. +1. `value` - Max row count of a dimension during the detection period. 2. `average` - The average rows count of a dimension during the training period. - _Supported values: valid SQL where expression on the columns value / average_ -- _Relevant tests: Dimension anomalies _ +- _Relevant tests: Dimension anomalies_ diff --git a/docs/data-tests/anomaly-detection-configuration/exclude_prefix.mdx b/docs/data-tests/anomaly-detection-configuration/exclude_prefix.mdx index ae9b3c2e8..56109822b 100644 --- a/docs/data-tests/anomaly-detection-configuration/exclude_prefix.mdx +++ b/docs/data-tests/anomaly-detection-configuration/exclude_prefix.mdx @@ -8,7 +8,7 @@ sidebarTitle: "exclude_prefix" Param for the `all_columns_anomalies` test only, which enables to exclude a column from the tests based on prefix match. - _Default: None_ -- _Relevant tests: `all_column_anomalies`_ +- _Relevant tests: `all_columns_anomalies`_ - _Configuration level: test_ diff --git a/docs/data-tests/anomaly-detection-configuration/exclude_regexp.mdx b/docs/data-tests/anomaly-detection-configuration/exclude_regexp.mdx index 8bc02fcaf..02f27769b 100644 --- a/docs/data-tests/anomaly-detection-configuration/exclude_regexp.mdx +++ b/docs/data-tests/anomaly-detection-configuration/exclude_regexp.mdx @@ -8,7 +8,7 @@ sidebarTitle: "exclude_regexp" Param for the `all_columns_anomalies` test only, which enables to exclude a column from the tests based on regular expression match. - _Default: None_ -- _Relevant tests: `all_column_anomalies`_ +- _Relevant tests: `all_columns_anomalies`_ - _Configuration level: test_ diff --git a/docs/data-tests/anomaly-detection-tests-oss-vs-cloud.mdx b/docs/data-tests/anomaly-detection-tests-oss-vs-cloud.mdx new file mode 100644 index 000000000..0c33886e0 --- /dev/null +++ b/docs/data-tests/anomaly-detection-tests-oss-vs-cloud.mdx @@ -0,0 +1,37 @@ +--- +title: "Anomaly Detection Tests - OSS vs Cloud" +--- + +Elementary OSS and Elementary Cloud Platform both offer data anomaly detection. However, there are significant differences in implementation. + +There are two types of anomaly detection tests: + +* **Pipeline health monitors** - Monitor the pipeline runs, ensuring timely and complete data ingestion and transformation. These monitors monitor metadata to detect volume and freshness issues. + +* **Data quality metrics tests** - Run as part of the pipeline, collect metrics by querying the data itself. These include various data quality metrics such as nullness, cardinality, average, length, etc. + +Here is a comparison between the implementation of these tests in Elementary Cloud and OSS: + +## Pipeline Health Monitors - Freshness and Volume + +| | OSS | Cloud | +| ----------------------- | --------------------------------------------- | ------------------------------------------------- | +| **Implementation** | dbt tests | Elementary Cloud monitors | +| **Tests execution** | Run in dbt | Run in Cloud | +| **Coverage** | Manually added in code | Automated, out-of-the-box full coverage | +| **Configuration** | Manual, many parameters required for accuracy | No configuration, automated ML models | +| **Detection mechanism** | Z-score, statistical | ML anomaly detection, various models | +| **What is monitored?** | Data | Metadata (query history, information schema) | +| **Time to detection** | Only when dbt runs | As soon as the problem happens, including sources | +| **Cost** | DWH compute | No cost, only metadata is leveraged | + +## Data Quality Metrics + +| | OSS | Cloud | +| ----------------------- | --------------------------------------------- | ---------------------------------------------------- | +| **Implementation** | dbt tests | Metrics collection in dbt, Elementary Cloud monitors | +| **Tests execution** | Run in dbt | Metrics collection in dbt, detection in Cloud | +| **Coverage** | Manually added in code | Opt-in, can be added in bulk in Cloud | +| **Configuration** | Manual, many parameters required for accuracy | Automated ML models | +| **Detection mechanism** | Z-score, statistical | ML anomaly detection, various models | +| **What is monitored?** | Data | Data | \ No newline at end of file diff --git a/docs/data-tests/anomaly-detection-tests/Anomaly-troubleshooting-guide.mdx b/docs/data-tests/anomaly-detection-tests/Anomaly-troubleshooting-guide.mdx new file mode 100644 index 000000000..c740e8c9b --- /dev/null +++ b/docs/data-tests/anomaly-detection-tests/Anomaly-troubleshooting-guide.mdx @@ -0,0 +1,153 @@ +--- +title: "Anomaly Tests Troubleshooting" +sidebarTitle: "Anomaly tests troubleshooting" +--- + + + +First, check if your test uses a timestamp column: + +```yaml +# In your YAML configuration +tests: + - elementary.volume_anomalies: + timestamp_column: created_at# If this is configured, you have a timestamp-based test +``` + + + + - Metrics are calculated by grouping data into time buckets (default: 'day') + - Detection period (default: 2 days) determines how many buckets are being tested + - Training period data (default: 14 days) comes from historical buckets, allowing immediate anomaly detection with sufficient history + + Verify data collection: + + ```sql + -- Check if metrics are being collected in time buckets + SELECT + metric_timestamp, + metric_value, + COUNT(*) as metrics_per_bucket + FROM your_schema.data_monitoring_metrics + WHERE table_name = 'your_table' + GROUP BY metric_timestamp, metric_value + ORDER BY metric_timestamp DESC; + + ``` + + - Each bucket should represent one time bucket (e.g., daily metrics) + - Gaps in `metric_timestamp` might indicate data collection issues + - Training uses historical buckets for anomaly detection + + **Common collection issues:** + + - Missing or null values in timestamp column + - Timestamp column not in expected format + - No data in specified training period + + + + + + - Training period data builds up over multiple test runs, using the test run time as its timestamp column. This requires time to collect enough points; for a 14 day training period, the test would need 14 different runs on different days to have a full training set. + - Metrics are calculated for the entire table in each test run + - Detection period (default: 2 days) determines how many buckets are being tested + + Check metric collection across test runs: + + ```sql + -- Check metrics from different test runsSELECT + updated_at, + metric_value + FROM your_schema.data_monitoring_metrics + WHERE table_name = 'your_table' + ORDER BY updated_at DESC; + + ``` + + - Should see one metric per test run and per dimension + - Training requires multiple test runs over time + - Each new test run creates the training point for a time bucket. A second test run within the same bucket will override the first one. + + **Common collection issues:** + + - Test hasn't run enough times + - Previous test runs failed + - Metrics not being saved between runs + + + + + + +Anomaly detection is influenced by: + +- Detection period (default: 2 days) - the time window being tested +- Sensitivity (default: 3.0) - how many standard deviations from normal before flagging +- Training data from previous periods/runs +- `metrics_anomaly_score` calculates the anomaly based on the data in `data_monitoring metrics`. + +Check calculations in `metrics_anomaly_score`: + +```sql +-- Check how anomalies are being calculatedSELECT + metric_name, + metric_value, + training_avg, + training_stddev, + zscore, + severity +FROM your_schema.metrics_anomaly_score +WHERE table_name = 'your_table' +ORDER BY detected_at DESC; +``` + + + + + +This occurs when there are fewer than 7 training data points. To resolve: + +### For timestamp-based tests: + +- Check if your timestamp column has enough historical data +- Verify time buckets are being created correctly in `data_monitoring_metrics` +- Look for gaps in your data that might affect bucket creation + +### For non-timestamp tests: + +- Run your tests multiple times to build up training data. +- Check `data_monitoring_metrics` to verify the data collection. The test will need data for at least 7 time buckets (e.g 7 days) to calculate the anomaly. + + + + + +If your test isn't appearing in `data_monitoring_metrics`: + +Verify test configuration: + +```yaml +tests: + - elementary.volume_anomalies: + timestamp_column: created_at# Check if specified correctly +``` + +### Common causes: + +- Incorrect timestamp column name +- Timestamp column contains null values or is not of type timestamp or date +- For non-timestamp tests: Test hasn't run successfully +- Incorrect test syntax + + + + +If you change it after executing elementary tests, you will need to run a full refresh to the metrics collected. This will make the next tests collect data for the new **`training_period`** timeframe. The steps are: + +1. Change var **`training_period`** in your **`dbt_project.yml`**. +2. Full refresh of the model ‘data_monitoring_metrics’ by running **`dbt run --select data_monitoring_metrics --full-refresh`**. +3. Running the elementary tests again. + +If you want the Elementary UI to show data for a longer period of time, use the days-back option of the CLI: **`edr report --days-back 45`** + diff --git a/docs/data-tests/anomaly-detection-tests/all-columns-anomalies.mdx b/docs/data-tests/anomaly-detection-tests/all-columns-anomalies.mdx index 613a45d2e..546ea6ebb 100644 --- a/docs/data-tests/anomaly-detection-tests/all-columns-anomalies.mdx +++ b/docs/data-tests/anomaly-detection-tests/all-columns-anomalies.mdx @@ -24,7 +24,7 @@ No mandatory configuration, however it is highly recommended to configure a `tim   -- elementary.all_columns_anomalies:     timestamp_column: column name     column_anomalies: column monitors list -     dimensions: list +     dimensions: sql expression     exclude_prefix: string     exclude_regexp: regex     where_expression: sql expression diff --git a/docs/data-tests/anomaly-detection-tests/column-anomalies.mdx b/docs/data-tests/anomaly-detection-tests/column-anomalies.mdx index e48b157fc..fd88dbab9 100644 --- a/docs/data-tests/anomaly-detection-tests/column-anomalies.mdx +++ b/docs/data-tests/anomaly-detection-tests/column-anomalies.mdx @@ -22,7 +22,7 @@ No mandatory configuration, however it is highly recommended to configure a `tim tests:   -- elementary.column_anomalies:     column_anomalies: column monitors list -     dimensions: list +     dimensions: sql expression     timestamp_column: column name     where_expression: sql expression     anomaly_sensitivity: int diff --git a/docs/data-tests/anomaly-detection-tests/volume-anomalies.mdx b/docs/data-tests/anomaly-detection-tests/volume-anomalies.mdx index fffea01c1..b564514fc 100644 --- a/docs/data-tests/anomaly-detection-tests/volume-anomalies.mdx +++ b/docs/data-tests/anomaly-detection-tests/volume-anomalies.mdx @@ -24,7 +24,7 @@ No mandatory configuration, however it is highly recommended to configure a `tim
  
   tests:
-      -- elementary.volume_anomalies:
+      - elementary.volume_anomalies:
           timestamp_column: column name
           where_expression: sql expression
           anomaly_sensitivity: int
diff --git a/docs/data-tests/how-anomaly-detection-works.mdx b/docs/data-tests/how-anomaly-detection-works.mdx
index 4a2817968..f2c2663f1 100644
--- a/docs/data-tests/how-anomaly-detection-works.mdx
+++ b/docs/data-tests/how-anomaly-detection-works.mdx
@@ -54,7 +54,7 @@ If a value in the detection set is an outlier to the expected range, it will be
 ### Expected range
 
 Based of the values in the training test, we calculate an expected range for the monitor.
-Each data point in the detection period will be compared to the expected range calculated based on it’s training set.
+Each data point in the detection period will be compared to the expected range calculated based on its training set.
 
 ### Training period
 
diff --git a/docs/data-tests/introduction.mdx b/docs/data-tests/introduction.mdx
index 1f28c4238..91235c521 100644
--- a/docs/data-tests/introduction.mdx
+++ b/docs/data-tests/introduction.mdx
@@ -3,12 +3,23 @@ title: "Elementary Data Tests"
 sidebarTitle: "Introduction"
 ---
 
-Elementary provides tests for detection of data quality issues.
+Elementary provides anomaly tests for detection of data quality issues.
 Elementary data tests are configured and executed like native tests in your dbt project.
 
 Elementary tests can be used in addition to dbt tests, packages tests (such as dbt-expectations), and custom tests.
 All of these test results will be presented in the Elementary UI and alerts.
 
+The Elementary dbt package offers two test types:
+
+- **Pipeline tests:** Monitor the health of data pipelines, ensuring timely and smooth data ingestion, transformation, and loading.
+- **Data quality tests:** Validate data accuracy, completeness, and correctness, detect anomalies and schema changes, and ensure the data meets predefined business rules.
+
+Together, these tests ensure reliable pipelines and trusted data.
+
+In addition to the previously mentioned tests, the [Elementary Cloud Platform](https://docs.elementary-data.com/cloud/introduction) offers **automated pipeline tests.** While traditional tests query the dbt tables directly, automated pipeline tests analyze **query history metadata**. This method is both **faster and more cost-efficient**, as it eliminates the need to query large datasets, focusing solely on the metadata layer. 
+
+Elementary automatically creates monitors for every model and source in your dbt project once you set up your environment, no configuration is required. Learn more about [automated tests](https://docs.elementary-data.com/features/anomaly-detection/automated-monitors).
+
 
 
 ## Anomaly detection tests
@@ -41,7 +52,7 @@ Tests to detect anomalies in data quality metrics such as volume, freshness, nul
   title="Event freshness anomalies"
   href="/data-tests/anomaly-detection-tests/event-freshness-anomalies"
 >
-  Monitors the gap between the latest event timestamp and it's loading time, to
+  Monitors the gap between the latest event timestamp and its loading time, to
   detect event freshness issues.
 
 
diff --git a/docs/data-tests/no-timestamp-column-tests.mdx b/docs/data-tests/no-timestamp-column-tests.mdx
deleted file mode 100644
index e69de29bb..000000000
diff --git a/docs/dbt/package-models.mdx b/docs/dbt/package-models.mdx
index 2ffc88dc9..be013f41b 100644
--- a/docs/dbt/package-models.mdx
+++ b/docs/dbt/package-models.mdx
@@ -55,10 +55,18 @@ This is a view on `dbt_invocations`.
 _Incremental model_
 
 Run results of all dbt tests, with fields and metadata needed to produce
-the [Elementary report](/features/data-observability-dashboard).
+the [Elementary report](/features/collaboration-and-communication/data-observability-dashboard).
 Each row is the result of a single test, including native dbt tests, packages tests and elementary tests.
 New data is loaded to this model on an on-run-end hook named `elementary.handle_tests_results`.
 
+
+### test_result_rows
+
+_Incremental model_
+
+Failed test row samples. Each row contains a sample of data that caused a test to fail, including the test result ID that links to the parent test result, the actual sample data stored as JSON, and timestamps for detection and creation. By default, up to 5 sample rows are stored per failed test (configurable via [test_sample_row_count](https://docs.elementary-data.com/oss/general/faq#can-i-see-more-result-samples-in-the-report)). 
+
+
 ## dbt artifacts
 
 The dbt artifacts models are created as empty tables, and a post-hook macro inserts data from the dbt graph object to
diff --git a/docs/features/alerts-and-incidents/alert-configuration.mdx b/docs/features/alerts-and-incidents/alert-configuration.mdx
new file mode 100644
index 000000000..71f2ad240
--- /dev/null
+++ b/docs/features/alerts-and-incidents/alert-configuration.mdx
@@ -0,0 +1,3 @@
+---
+title: "Alert configuration"
+---
\ No newline at end of file
diff --git a/docs/features/alerts-and-incidents/alert-rules.mdx b/docs/features/alerts-and-incidents/alert-rules.mdx
new file mode 100644
index 000000000..88f53b85f
--- /dev/null
+++ b/docs/features/alerts-and-incidents/alert-rules.mdx
@@ -0,0 +1,38 @@
+---
+title: "Alert Rules"
+---
+
+
+
+Elementary Cloud allows you to create rules to route your alerts.
+Each rule is a combination of a filter and a destination.
+
+The destination is a messaging app ([Slack](/cloud/integrations/alerts/slack), [Microsoft Teams](/cloud/integrations/alerts/ms-teams), etc.) 
+or incident management ([PagerDuty](/cloud/integrations/alerts/pagerduty), [Opsgenie](/cloud/integrations/alerts/opsgenie), etc.)
+[integration](/features/alerts-and-incidents/alerts-and-incidents-overview#supported-alert-integrations), 
+and for supporting tools a specific channel.
+
+
+  The order of rules matters!
+  When the alerts are fetched, each alert is evaluated against all the rules,
+  until a rule matches. The alert is then routed to the destination of the
+  matching rule, and no further rules are evaluated. 
+
+
+
+
+
+
+### Default alert rule
+
+The channel you choose when connecting your messaging app ([Slack](/cloud/integrations/alerts/slack), [Microsoft Teams](/cloud/integrations/alerts/ms-teams), etc.)
+is automatically added as a default alert rule, that sends all the failures to that channel without any filtering.
+By default, warnings do not send alerts. 
+To modify, deactivate or add more rules, simply navigate to the `Alert Rules` page in the menu.
\ No newline at end of file
diff --git a/docs/features/alerts-and-incidents/alerts-and-incidents-overview.mdx b/docs/features/alerts-and-incidents/alerts-and-incidents-overview.mdx
new file mode 100644
index 000000000..5ca4dfa8f
--- /dev/null
+++ b/docs/features/alerts-and-incidents/alerts-and-incidents-overview.mdx
@@ -0,0 +1,49 @@
+---
+title: Alerts and Incidents Overview
+sidebarTitle: Alerts & incidents overview
+---
+
+
+
+Alerts and incidents in Elementary are designed to shorten your time to response and time to resolution when data issues occur.
+
+- **Alert -** Notification about an event that indicates a data issue.
+- **[Incident](/features/alerts-and-incidents/incidents) -** A data issue that starts with a single event but can include multiple events grouped together. An incident includes a start time, status, severity, assignee, and end time. Incident alerts are sent when the incident is opened and when it is resolved.
+
+
+
+Alerts provide information and context for recipients to quickly triage, prioritize and resolve issues. 
+For collaboration and promoting ownership, alerts include owners and tags. 
+You can create distribution rules to route alerts to the relevant people and channels, for faster response. 
+
+An alert would either open a new incident, or be automatically grouped and added to an ongoing incident.
+From the alert itself, you can update the status and assignee of an incident. In the [incidents page](/features/alerts-and-incidents/incident-management),
+you will be able to track all open and historical incidents, and get metrics on the quality of your response.
+
+## Alerts & incidents core functionality 
+
+- **Alerts customization** - Alerts should include relevant context for quick triage such as **owner**, **tags**, **description**. In Elementary, alerts can be customized to include this information.
+- **Alert distribution rules** - Alerts should be sent to relevant recipients. By creating [Alert Rules](/features/alerts-and-incidents/alert-rules), alerts can be distributed to different channels and systems.
+- **Incidents management** - When alerts are distributed to different channels, it can become hard to track what is open. Elementary offers a centralized Incidents page to monitor what is open, and manage incident properties: **assignee**, **status** and **severity**.  
+- **Grouping alerts to incidents** - New failures related to already open incidents will not trigger new alerts, and will be automatically added to the ongoing incident. This reduces noise and alert fatigue.  
+- **Automated resolution** - When there is a successful run that means an open incident is resolved, Elementary will automatically resolve the incident. This will help you manage the state of incidents and communicate it to stake holders in real time. 
+- **Mute test alerts** – Mute your test from the test configuration tab to run tests without triggering alerts, giving you more control over notifications while still monitoring data quality. This is useful when testing new data sets, refining thresholds, or adjusting test logic without unnecessary noise.
+
+
+
+
+
+## Alert types
+
+
+
+## Supported alert integrations
+
+
diff --git a/docs/features/alerts-and-incidents/effective-alerts-setup.mdx b/docs/features/alerts-and-incidents/effective-alerts-setup.mdx
new file mode 100644
index 000000000..8c90c22dd
--- /dev/null
+++ b/docs/features/alerts-and-incidents/effective-alerts-setup.mdx
@@ -0,0 +1,6 @@
+---
+title: Create an Effective Alerts Setup  
+sidebarTitle: "Effective alerts setup"
+---
+
+_🚧 Under construction 🚧_
\ No newline at end of file
diff --git a/docs/features/alerts-and-incidents/incident-management.mdx b/docs/features/alerts-and-incidents/incident-management.mdx
new file mode 100644
index 000000000..e85a80c2d
--- /dev/null
+++ b/docs/features/alerts-and-incidents/incident-management.mdx
@@ -0,0 +1,56 @@
+---
+title: Incident Management
+sidebarTitle: Incident management
+---
+
+
+
+The `Incidents` page is designed to enable your team to stay on top of open incidents and collaborate on resolving them.
+The page gives a comprehensive overview of all current and previous incidents, where users can view the status, prioritize, assign and resolve incidents.
+
+## Incidents view and filters
+
+The page provides a view of all incidents, and useful filters:
+
+- **Quick Filters:** Preset quick filters for all, unresolved and “open and unassigned” incidents.
+- **Filter:** Allows users to filter incidents based on various criteria such as status, severity, model name and assignee.
+- **Time frame:** Filter incidents which were open in a certain timeframe.
+
+
+
+
+## Interacting with Incidents
+
+An incident has a status, assignee and severity.
+These can be set in the Incidents page, or from an alert in integrations that support alert actions.
+
+- **Incident status**: Will be set to `open` by default, and can be changed to `Acknowledged` and back to `Open`. When an alert is manually or automatically set as `Resolved`, it will close and will no longer be modified.
+- **Incident assignee**: An incident can be assigned to any user on the team, and they will be notified.
+    - If you assign an incident to a user, it is recommended to leave the incident `Open` until the user changes status to `Acknowledged`.
+- **Incident severity**: Severity of an incident can be Low, Normal, High or Critical. By default, model errors are set to Critical, test failures set to High and warnings are marked as Normal, but the severity can be changed manually.  _Coming soon_ : Severity will be automated by an analysis of the impacted assets.
+
+ Incident severity is used to prioritize incidents and to set the urgency of resolving them. it is not the same as dbt test severity 
+
+## Incidents overview and metrics
+
+The incidents are divided into categories based on their status, and the user can view the number of incidents in each category by severity.
+For resolved incidents, the user can view the average resolution time.
+
+_Coming soon_ : The option to create and share a periodic summary of incidents will be supported in the future.
+
+
+
+ Incidents overview +
+ \ No newline at end of file diff --git a/docs/features/alerts-and-incidents/incidents.mdx b/docs/features/alerts-and-incidents/incidents.mdx new file mode 100644 index 000000000..72304f79e --- /dev/null +++ b/docs/features/alerts-and-incidents/incidents.mdx @@ -0,0 +1,75 @@ +--- +title: Incidents in Elementary +sidebarTitle: Incidents +--- + + + +One of the challenges data teams face is tracking and understand and collaborate on the status of data issues. +Tests fail daily, pipelines are executed frequently, alerts are sent to different channels. +There is a need for a centralized place to track: +- What data issues are open? Which issues were already resolved? +- Who is on it, and what's the latest status? +- Are multiple failures part of the same issue? +- What actions and events happened since the incident started? +- Did such issue happen before? Who resolved it and how? + +In Elementary, these are solved with `Incidents`. + +A comprehensive view of all incidents can be found in the [Incidents page](/features/alerts-and-incidents/incident-management). + +## How incidents work? + +Every failure or warning in Elementary will automatically open a new incident or be added as an event to an ongoing incident. +Based on grouping rules, different failures are grouped to the same incident. + +An incident has a [status, assignee and severity](/features/alerts-and-incidents/incident-management#interacting-with-incidents). +These can be set in the [Incidents page](/features/alerts-and-incidents/incident-management), or from an alert in integrations that support alert actions. + + +
+ Elementary Incidents +
+ + +## How incidents are resolved? + +Each incident starts at the first failure, and ends when the status is changed manually or automatically to `Resolved`. +An incident is **automatically resolved** when the failing tests, monitors and / or models are successful again. + +## Incident grouping rules + +Different failures and warnings are grouped to the same incident by the following grouping rules: + +1. Additional failures of the same test / monitor on a table that has an active incident. +2. _ _Coming soon_ _ Freshness and volume issues that are downstream of an open incident on a model failure. +3. _ _Coming soon_ _ Failures of the same test / monitor that are on downstream tables of an active incident. + +## Incident deep dive + +Clicking on an incident will open the test overview side panel, showing the following information: +1. Test owner, tags and subscribers (if the incident is a model failure, the model owner, tags and subscribers will be shown). +2. The execution history of the test / model, including the following information on each execution: + - Execution time + - Result (pass / fail / warning, etc) + - If the test failed - + - a sample of the failed rows + - The Slack channel where the alert was sent + - For anomaly tests - the result chart + - Compiled query +3. Configuration of the test / model - the Yaml or SQL code of the test / model. For cloud tests, the configuration is also editable. + + +You can also see the list of upstream and downstream assets - if the test is a column test you can see the upstream and downstream columns, if it's a table test you can see the upstream and downstream tables. + + +
+ Elementary Test Overview side panel +
+ diff --git a/docs/features/alerts-and-incidents/owners-and-subscribers.mdx b/docs/features/alerts-and-incidents/owners-and-subscribers.mdx new file mode 100644 index 000000000..9352c400e --- /dev/null +++ b/docs/features/alerts-and-incidents/owners-and-subscribers.mdx @@ -0,0 +1,20 @@ +--- +title: "Owners and subscribers" +--- + +We highly recommend configuring owners and subscribers for your models and / or tests. +an Owner is the person responsible for the model, and subscribers are the people who are interested in getting the alerts on the model or test. +Owners and subscribers will be mentioned (tagged) in the Slack alerts. Also their name will appear in the alerts and the UI. + + +In addition, Elementary UI has filters by owner and even the side tree can be grouped by owner. + +Owners and subscribers are configured in the dbt code: + +#### Owners + + + +#### Subscribers + + diff --git a/docs/features/anomaly-detection/automated-freshness.mdx b/docs/features/anomaly-detection/automated-freshness.mdx new file mode 100644 index 000000000..064e48b18 --- /dev/null +++ b/docs/features/anomaly-detection/automated-freshness.mdx @@ -0,0 +1,63 @@ +--- +title: Automated Freshness Monitor +sidebarTitle: "Automated freshness" +--- + +The purpose of the Freshness monitor is to alert when a data asset hasn't been updated in a period of time that exceeds the update SLA of that table. +Freshness monitors are by default created for all sources in your dbt project. They can be created for additional tables upon request. + +The freshness monitoring has 2 operation modes, it's possible to choose the desired one from the Test Overview side panel: + + +
+ Automated freshness result +
+ + + +### Anomaly detection based +The default operation mode of the freshness monitors. +It learns the update frequency of your tables and consistently checks if the table is **currently** fresh based on our model's forecast. + +By default we use 21 days of training data to understand the intervals between table updates. +The only condition that determines the status of the monitor is the time that has passed since the last update, which is compared to the model's prediction. + +The model takes into account seasonality, and supports cases such as tables that update on weekdays and not weekends. + +### SLA based +Sometimes you might want to monitor a table based on a fixed SLA, in order to have full control over when the monitor will alert. +This mode is based on a fixed SLA that you define for each table. +The monitor will alert if the table hasn't been updated in the defined SLA period. + + +## Understand the monitor result + + +
+ Automated freshness result +
+ + +The test result is a timeline of updates. + +The right end of the timeline, marked with a black triangle ▽, is the timestamp of the test result, which is near real time (can be considered as "now"). +Each update to the table is presented as a line in the timeline. +Hovering on the gaps between updates will show the updates time and time gap. + +To understand the test result, focus on the gap between the last update and now (▽): + +- Green - The gap between latest update and now is still within the expected range. +- Yellow / Red - The gap between latest update and now is above the expected range, a dotted line will show what was the expected gap limit. The color represents if this is a warning or failure. + +Use the `Anomaly settings` and `result feedback` buttons to impact the monitor. + +### Anonmaly settings + + + diff --git a/docs/features/anomaly-detection/automated-monitors.mdx b/docs/features/anomaly-detection/automated-monitors.mdx new file mode 100644 index 000000000..7a6b57474 --- /dev/null +++ b/docs/features/anomaly-detection/automated-monitors.mdx @@ -0,0 +1,57 @@ +--- +title: Automated Freshness & Volume Monitors +sidebarTitle: "Introduction" +--- + + + + + +Once your environment is set up, we automatically collect metadata from your warehouse, which our ML models run on. +The models are operational when the initial backfill is completed, there is no "loading / training period" - Elementary will collect enough historical data after setup to train the models. + +Monitors are automatically created according to your preferences, and their results are displayed in the application in the same way as package tests. + +#### Benefits of automated monitors + +1. **Zero configuration** - Our machine learning models learn data behavior, eliminating the need for manual configuration. +2. **Out-of-the-box coverage** - Rather than manually configuring a test for each model, Elementary automatically creates monitors for every source in your dbt project once you set up your environment. +3. **Metadata only, minimal cost** - The monitors rely on data warehouse metadata, and don't consume compute resources. + + +### How it works? + +The monitors collect metadata, and the [anomaly detection model](/features/anomaly-detection/monitors-overview#how-anomaly-detection-works?) adjusts based on updates frequency, seasonality and trends. + +As soon as you connect Elementary Cloud Platform to your data warehouse, a backfill process will begin to collect historical metadata. +Within an average of a few hours, your automated monitors will be operational. +By default, Elementary collects at least 21 days of historical metadata. + +The automated monitors are created for all the sources in your dbt project. +If you would like a different configuration of which tables to create the monitors on, or adding monitors for all models, you can reach out to us. + +You can fine tune the [configuration](/features/anomaly-detection/monitors-configuration) and [provide feedback](/features/anomaly-detection/monitors-feedback) to adjust the detection to your needs. + +As views are stateless, automated volume and freshness monitors only apply on tables. + +## Automated Monitors + + + +## Alerts on Failures + +By default, automated monitors failures **don't create alerts**. + +To activate alerts on automated monitors, navigate to `Setup > Alert Rules`. +- To alert on all automated monitors failures - Change the default rule (#1) alert categories to include automated monitors. +- To alert on specific datasets - Change / Create alert rules for these specific datasets, and include automated monitors in their alert categories. + + +
+ Alert categories in alert rules +
+ diff --git a/docs/features/anomaly-detection/automated-volume.mdx b/docs/features/anomaly-detection/automated-volume.mdx new file mode 100644 index 000000000..15b66da75 --- /dev/null +++ b/docs/features/anomaly-detection/automated-volume.mdx @@ -0,0 +1,45 @@ +--- +title: Automated Volume Monitor +sidebarTitle: "Automated volume" +--- + +The volume monitor tracks the **total row count** of a table over time, rather than individual table updates. +This means that Elementary will not consider a single update as anomalous, but rather a continuous anomalous trend occurring over a period of time. + + +
+ Automated volume result +
+ + +### Understand the monitor result + +The test data set is divided into two periods - + +1. Training Period - The historical behavior of the table's volume, patterns, and so forth. By default it will include 21 days. +2. Detection Period - This is the period within which we look for anomalies. By default it’s set to the last 48 hours. + +Data points and expected range - +- Data points within the training period are dark grey, and data points within the detection period are colored. +- The light grey area around the data points represents the model expected range. Data points outside this range are considered anomalous. +- Hovering over a data point will detail the row count timestamp, row count and expected range. + +Use the `Anomaly settings` and `result feedback` buttons to impact the monitor. + +### Anonmaly settings + + + + + \ No newline at end of file diff --git a/docs/features/anomaly-detection/metrics.mdx b/docs/features/anomaly-detection/metrics.mdx new file mode 100644 index 000000000..f0e9651e2 --- /dev/null +++ b/docs/features/anomaly-detection/metrics.mdx @@ -0,0 +1,103 @@ +--- +title: Metrics +sidebarTitle: "Metrics" +--- + +In Elementary, you can monitor any metric you want on the content of your data, view the metric and set up anomaly detection tests on it! + +## How does it work? + +Elementary uses a type of dbt test to collect metrics on your data and sync them up to Elementary Cloud. +The metrics will be collected each time you run `dbt test`, in a way that is similar to how training data for anomaly tests is collected. +A metric can be collected with or without an anomaly detection test configured on it. + +Metrics screen in Elementary +Metrics screen in Elementary + + +## How to set up a data content metric? + +The monitored metrics are set up in the code, in a way similar to dbt tests. + + + + +No mandatory configuration, however it is highly recommended to configure a `timestamp_column`. + +{/* prettier-ignore */} +
+ 
+  tests:
+      -- elementary.collect_metrics:
+          timestamp_column: column name
+          time_bucket:
+            period: [hour | day]
+            count: int
+          dimensions: sql expression
+          metrics: monitors list
+           name: string
+           type: monitor type
+           columns: list
+          where_expression: sql expression
+ 
+
+ + + +```yml Models +models: + - name: < model name > + tests: + - elementary.collect_metrics: + timestamp_column: < timestamp column > + time_bucket: # Daily by default + period: < time period > + count: < number of periods > + dimensions: < list of dimensions to group by > + metrics: < list of metrics > + - name: < user defined name for metric > + type: < which metric to calculate > + columns: < which columns to calculate the metric on- for column metrics > + where_expression: < sql expression > + cloud_monitored: < boolean, should Elementary automatically create anomaly tests for the collected metrics? > +``` + +```yml Models example + +models: + - name: login_events + tests: + - elementary.collect_metrics: + timestamp_column: 'loaded_at' + time_bucket: + period: hour + count: 1 + dimensions: + - country_id + - platform_id + metrics: + - name: row_count + type: row_count + - name: filtered_row_count + type: row_count + - name: null_count + type: null_count + columns: ["hello", "world"] + where_expression: "country = 'USA'" + cloud_monitored: true + +``` + + + +Upon running `dbt test`, your data is split into buckets: +- The size of each bucket is configured by the `time bucket` field +- Each row in the table is assigned to a bucket based on the timestamp of the `timestamp_column` selected +- If dimensions were selected, each combination of dimensions will create a separate metric +and then we compute the metric (or metrics) of choice for each bucket. +We save the metrics in the Elementary schema and sync it to Elementary Cloud whenever you sync your environment. + +The metric chart will be visible in the Metrics screen, along with all the metrics Elementary has collected. +If a test was created for the metric, it will be visible in Elementary just like any other test. + +To include alerts on Metrics Tests in your alert rules, simply edit your alert rule and check the box "Cloud content tests" under "Test Categories". diff --git a/docs/features/anomaly-detection/monitors-configuration.mdx b/docs/features/anomaly-detection/monitors-configuration.mdx new file mode 100644 index 000000000..b3fd30e2f --- /dev/null +++ b/docs/features/anomaly-detection/monitors-configuration.mdx @@ -0,0 +1,55 @@ +--- +title: Monitors Configuration +sidebarTitle: "Monitors configuration" +--- + +You can change the default settings and finetune the monitors to your needs using the `Anomaly settings` on each test. + +In general, users will rely on the automated machine learning model anomaly settings. +However, in some cases, an anomaly in the data is not relevant to your business. For this cases, the custom settings are useful. + +## Settings simulator + +For some supported settings, Elementary offers a simulation of the change impact on latest results. +You can use the `Simulate Configuration` button after the change and before saving. + + + +## Supported settings + +#### All monitors + + + +#### Volume monitor + + + +#### Freshness monitor + + + + +## Disable monitor + +To disable a monitor from running, press the `...` button on the top right of the test result and then `Delete test`. +This will disable the monitor. To re-activate deleted monitors, reach out to support. + + + diff --git a/docs/features/anomaly-detection/monitors-feedback.mdx b/docs/features/anomaly-detection/monitors-feedback.mdx new file mode 100644 index 000000000..6df14f74e --- /dev/null +++ b/docs/features/anomaly-detection/monitors-feedback.mdx @@ -0,0 +1,41 @@ +--- +title: Monitors Feedback +sidebarTitle: "Monitors feedback" +--- + +Using the `Result feedback` button, you can mark results as true or false positives. +This feedback can significantly improve the accuracy of detection. + +Some results trigger an automated workflow, and all are manually reveiewd by the Elementary team. + +Just so you know - Our machine learning models thrive on your feedback! +We're always hustling to make them even better, and your feedback play a huge role in helping us achieve that. So keep those comments coming! + + +
+ Anomaly result feedback +
+ + +### False positive feedback + +To get context on your false positive result feedback and trigger a response, we ask you to select a reason: + +- **Insignificant change** - The anomaly is not drastic enough for me to care about it. Usually the action item is to relax anomaly detection sensitivity. +- **Expected outlier** - This isn't an anomaly and should be within the expected range. The action item will be to re-train the model, sometimes with a wider training set. +- **Business anomaly** - This is an anomaly, but one we expected to happen due to intentional change or business event. The action item will be to exclude the anomaly from the training set. +- **Not an interesting table** - I don't want to monitor this table. The action item is to delete the monitor. +- **Other** - Non of the other reasons are a fit. Please add a comment to describe the use case. + + +
+ False positive result feedback +
+ \ No newline at end of file diff --git a/docs/features/anomaly-detection/monitors-overview.mdx b/docs/features/anomaly-detection/monitors-overview.mdx new file mode 100644 index 000000000..f972e060a --- /dev/null +++ b/docs/features/anomaly-detection/monitors-overview.mdx @@ -0,0 +1,34 @@ +--- +title: Anomaly Detection Monitors +sidebarTitle: "Monitors overview" +--- + + + +ML-powered anomaly detection monitors automatically identify outliers and unexpected patterns in your data. +These are useful to detect issues such as incomplete data, delays, a drop in a specific dimension or a spike in null values. + +Elementary offers two types of monitors: + +- **Automated Monitors** - Out-of-the-box monitors activated automatically, that query metadata only. +- **Opt-in Monitors** - Monitors that query raw data and require configuration. + +## [Automated monitors](/features/anomaly-detection/automated-monitors) + + + + + +## Opt-in monitors + +_Coming soon_ + + +## Monitor test results + +Each monitor returns a test result, that is one of the following four results: + +- **Passed** - The test passed, no anomaly was detected. +- **Warning** - An anomaly was detected, and the test is configured to `warn` severity. +- **Fail** - An anomaly was detected, and the test is configured to `fail` severity. +- **No data** - The monitor does not have enough data or an accurate model to monitor. Reach out to our support team to fix this. diff --git a/docs/features/automated-monitors.mdx b/docs/features/automated-monitors.mdx deleted file mode 100644 index c963763a2..000000000 --- a/docs/features/automated-monitors.mdx +++ /dev/null @@ -1,38 +0,0 @@ ---- -title: Automated freshness, volume and schema monitoring -sidebarTitle: "Automated Monitors" -icon: "wand-magic-sparkles" ---- - - - -Elementary offers out-of-the-box automated monitors to detect freshness, volume and schema issues. -This provides broad coverage and a basic level of observability, without any configuration effort. - -Additionally, these monitors will not increase compute costs as they leverage only warehouse metadata (information schema, query history). - -The monitors are trained on historical metadata, and adjust based on updates frequency, seasonality and trends. - -As views are stateless, automated volume and freshness monitors only apply on tables. - - - Elementary Automated Monitors - - -## Supported automated monitors - -### Volume - -Monitors how much data was added / removed / updated to the table with each update. -The monitor alerts you if there is an unexpected drop or spike in rows. - -### Freshness - -Monitors how frequently a table is updated, and alerts you if there is an unexpected delay. - -### Schema changes - -_Coming soon_ diff --git a/docs/features/catalog.mdx b/docs/features/catalog.mdx deleted file mode 100644 index 5f61a58b4..000000000 --- a/docs/features/catalog.mdx +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: "Data Catalog" -icon: "folder-tree" -iconType: "solid" ---- - - - -On the Catalog tab you can now explore your datasets information - descriptions, columns, columns descriptions, latest update time and datasets health. -From the dataset you can navigate directly to it’s lineage and test results. - -The catalog content is generated from the descriptions you maintain in your dbt project YML files. - -The goal is to make Elementary useful for more members of the data team, and to prevent the need to navigate between Elementary and other interfaces. - - - Elementary Catalog - diff --git a/docs/features/ci.mdx b/docs/features/ci.mdx index c17753e90..236a35c5c 100644 --- a/docs/features/ci.mdx +++ b/docs/features/ci.mdx @@ -1,7 +1,6 @@ --- title: "Elementary CI" sidebarTitle: "Elementary CI" -icon: "code-pull-request" --- @@ -13,17 +12,12 @@ When making changes to your data project, it can sometimes be hard to fully unde Our impact analysis will run on every pull request in your dbt project, so that you can see the downstream impact of your changes. You'll also be able to see if any of your dbt tests are failing or your models aren't being built successfully. - +![](/pics/cloud/pr_impact_example.png) -Elementary CI automations will help you make changes with confidence and seeing the full picture before merging your pull request. +Elementary CI automations help you make changes with confidence by providing a comprehensive view before merging your pull request. ## Want to join the beta? - - + + + \ No newline at end of file diff --git a/docs/features/collaboration-and-communication/catalog.mdx b/docs/features/collaboration-and-communication/catalog.mdx new file mode 100644 index 000000000..281edd20c --- /dev/null +++ b/docs/features/collaboration-and-communication/catalog.mdx @@ -0,0 +1,56 @@ +--- +title: "Data Catalog" +--- + + + +The Elementary Data Catalog is designed to make a curated collection of datasets easily discoverable by data consumers, along with their dependencies, health status and more. + +The Catalog tab provides a comprehensive view of the datasets created for the various analytics teams, enabling easy discovery and access to key metadata for data consumers within your team. It centralizes dataset information, including descriptions, columns, column descriptions, latest update times, and dataset health. + +From each dataset, you can navigate directly to its lineage and test results, ensuring seamless exploration of data dependencies and quality insights. + + + +## Key Features +### Easily Discoverable Data Assets +Quickly and easily navigate the catalog using the side tree and its search bar. Easily find tables, views, BI dashboard and more. +You can view the data assets by their location in the DWH or in the dbt project, and group them by tags, owners or path. + +Catalog tree + +### Custom Metadata Support +Need to display additional metadata fields from your dbt `meta` config? Reach out to our team to customize the catalog with fields that best fit your data workflows. + +### Critical Asset Tagging +Mark key datasets as critical to highlight their importance and prioritize monitoring efforts. Read more [here](/features/data-governance/critical_assets). + +### Lineage & Dependency Export +View upstream and downstream dependencies for each dataset, and export the full dependency list as a CSV for further analysis or documentation. +Lineage export + +## Data Source +The catalog content is automatically generated from the descriptions maintained in your dbt project YML files, ensuring consistency with your existing documentation. +In addition, Elementary enriches the catalog with health scores, test coverage status and results, as well as metadata from your DWH information schema, providing a comprehensive view of your datasets. + +## Coming soon +Click on the links below to be notified when these features are released! + + + + Edit asset descriptions, tags and owners, for a single asset or in bulk + + + Easily find gaps in your data governance coverage such as missing descriptions, owners or tags + + + Discover datasets by smart AI-based search, from Slack or the UI + + \ No newline at end of file diff --git a/docs/features/collaboration-and-communication/data-health.mdx b/docs/features/collaboration-and-communication/data-health.mdx new file mode 100644 index 000000000..f8eef3465 --- /dev/null +++ b/docs/features/collaboration-and-communication/data-health.mdx @@ -0,0 +1,68 @@ +--- +title: Data Health Dashboard +sidebarTitle: Data Health Dashboard +--- + + + + + +### Data Health Dashboard + +The Data Health Dashboard is intended for your data consumers and stakeholders, that want to get a summary of what is happening with the data in your organization. + +It gives a high-level overview that doesn't require deep technical knowledge or going into specific test results. +the dashboard presents the data health in a simple way, by giving a health score, and using a color code to indicate if this score is healthy. +Filters are available at the top of the page, making it easy to see the data health in different contexts. + +Data Health Score + +The dashboard is based on the 6 [Data Quality Dimensions](/features/collaboration-and-communication/data-quality-dimensions#data-quality-dimensions): + + + +### How is the data health score calculated? + +Each test you run in either dbt or Elementary is mapped to one of these pillars, and given a score. +The scoring method is very simple: +- If the test passes, the score is 100 +- If the test is in `warn` status, the score is 50 +- If the test is in `fail` status, the score is 0 + +The results are aggregated to give a health score for each pillar. +The total score is a weighted average of the 6 pillars, where the weight is configurable. +The thresholds for the color coding (green, yellow and red) are also configurable. + +Score weight and threshold configuration + +### Can I customize the quality dimension mapping of my tests? + +Of course! +Each test you run, whether it's a generic or a custom test, can be mapped to one of the 6 quality dimensions. +The way to do so is to add `quality_dimension` to the test definition in your dbt project: + + + +```yml test +tests: + - not_null: + meta: + quality_dimension: completeness +``` + +```yml test/model config block +{{ config( + meta={ + "quality_dimension": "completeness", + } +) }} +``` + + + + +## Coming soon + +- **Send a daily report** of the data health to your stakeholders +- **Compare the data health** of different domains +- **Set up alerts** for when the data health is below a certain threshold \ No newline at end of file diff --git a/docs/features/data-observability-dashboard.mdx b/docs/features/collaboration-and-communication/data-observability-dashboard.mdx similarity index 65% rename from docs/features/data-observability-dashboard.mdx rename to docs/features/collaboration-and-communication/data-observability-dashboard.mdx index 7dd10b357..7a2589e5a 100644 --- a/docs/features/data-observability-dashboard.mdx +++ b/docs/features/collaboration-and-communication/data-observability-dashboard.mdx @@ -1,15 +1,11 @@ --- title: Data Observability Dashboard -icon: "browsers" --- -Managing data systems can be a complex task, especially when there are hundreds (or even thousands) of models being orchestrated separately across multiple DAGs. These models serve different data consumers, including internal stakeholders, clients, and reverse-ETL pipelines. +Managing data systems can be a complex task, especially when there are hundreds (or even thousands) of models being orchestrated separately across multiple DAGs. These models serve different data consumers, including internal stakeholders, clients, and reverse-ETL pipelines. Our Data Observability Dashboard provides an easy-to-use control panel for data teams to monitor the quality and performance of their data warehouse. - Elementary Data Observability Dashboard - + Elementary Data Observability Dashboard + \ No newline at end of file diff --git a/docs/features/collaboration-and-communication/data-quality-dimensions.mdx b/docs/features/collaboration-and-communication/data-quality-dimensions.mdx new file mode 100644 index 000000000..f62bdce56 --- /dev/null +++ b/docs/features/collaboration-and-communication/data-quality-dimensions.mdx @@ -0,0 +1,65 @@ +--- +title: Data Quality Dimensions +sidebarTitle: Data Quality Dimensions +--- + +## Measuring data quality + + + + +## Data quality dimensions + +The 6 Data Quality Dimensions are: + + + +## Data quality dimensions example + +To help understand different aspects of data quality, let's explore these concepts using a familiar example - the IMDb movie database. +IMDb is a comprehensive database of movies, TV shows, cast members, ratings, and more. +Through this example, we'll see how different data quality issues could affect user experience and data reliability. + + +
+ IMDB banner +
+ + +#### Freshness +- **Definition**: Ensures that data is up to date and reflects the latest information. +- **Example**: Consider The Godfather's IMDb rating. If the rating hasn't been updated since 2000, despite users continuing to submit reviews every year, the displayed rating would be stale. This outdated information could mislead users about the current audience sentiment toward the movie. + +#### Completeness +- **Definition**: Ensures all required fields are filled in, without missing values. +- **Example**: Imagine the IMDb record for Pulp Fiction missing key cast members, such as Uma Thurman. This incomplete data would provide users with an inadequate picture of the movie's legendary cast, significantly reducing the dataset's usefulness. + +#### Uniqueness +- **Definition**: Ensures that each entity is represented only once in the system. +- **Example**: Consider having two separate records for The Matrix with the same primary key but different details - one showing a release year of 1999, another showing 1998. This duplication creates confusion about the correct information and could cause problems in downstream processes, like reporting or website display. + +#### Consistency +- **Definition**: Ensures data remains uniform across multiple datasets and sources. +- **Example**: If IMDb's Top 250 Movies page displays 254 movies due to a backend error, while the Ratings Summary page correctly shows 250 movies, this inconsistency would confuse users and diminish trust in the platform's data. + +#### Validity +- **Definition**: Ensures that data conforms to rules or expectations, such as acceptable ranges or formats. +- **Example**: If a movie's runtime is listed as 1500 minutes when the longest movie ever made was 873 minutes, this would be an invalid value. The runtime clearly doesn't conform to expected movie length ranges and would be considered invalid data. + +#### Accuracy +- **Definition**: Ensures that data represents the real-world scenario correctly. +- **Example**: If an IMDb record listed Leonardo DiCaprio as the director of Inception instead of Christopher Nolan, this would be inaccurate. While DiCaprio starred in the movie, he didn't direct it - this kind of error misrepresents the real-world facts. + + +## Implementation in Elementary + +In Elementary, all the dbt tests and Elementary monitors are automatically attributed to the relvant data quality dimension. +Based on the results of tests and monitors, a data health score is calculated for each dimension, and a total score for the data set. + +The data quality scores are presented in a [data health dashboard](/features/collaboration-and-communication/data-health#data-health-dashboard), data catalog integrations, and more. + +To learn more, **watch the webinar** [**Measuring Data Health with Elementary**](https://www.elementary-data.com/webinar/measuring-data-health-with-elementary) \ No newline at end of file diff --git a/docs/features/config-as-code.mdx b/docs/features/config-as-code.mdx index 804899bdf..de7526a49 100644 --- a/docs/features/config-as-code.mdx +++ b/docs/features/config-as-code.mdx @@ -1,11 +1,10 @@ --- -title: "Configuration as Code" -icon: "code" +title: "Configuration-as-Code" --- -All Elementary configuration is managed in your dbt code. +All Elementary configurations are managed in your dbt code. Configuring observability becomes a part of the development process that includes version control, continuous integration, and a review process. -In Elementary Cloud, you can save time by adding tests in bulk from the UI that will be added to your code. Additionally, you can allow data analysts to create quality tests without writing any code. Elementary will take care of it for them and open pull requests for them. +In Elementary Cloud, you can save time by adding tests in bulk from the UI that will be added to your code. Additionally, you can allow data analysts to create quality tests without writing any code. Elementary will take care of it for them and open pull requests on their behalf. diff --git a/docs/features/data-governance/critical_assets.mdx b/docs/features/data-governance/critical_assets.mdx new file mode 100644 index 000000000..127e6bd4a --- /dev/null +++ b/docs/features/data-governance/critical_assets.mdx @@ -0,0 +1,66 @@ +--- +title: "Critical Assets" +--- + +### **What is a Critical Asset?** + +A critical asset is **any data asset (such as a model, exposure, or report) that plays a crucial role in your company's data ecosystem**. Issues affecting these assets can have a significant impact on business operations, dashboards, and decision-making. + +Marking an asset as **critical** ensures it receives **higher priority in monitoring and alerting**, helping you quickly identify and respond to issues that may impact it. + + + + +## **What Should Be Set as a Critical Asset?** + +You should mark an asset as **critical** if: + +- It directly impacts key **business reports, dashboards, or decision-making tools**. +- It serves as an essential **upstream dependency** for other important data models. +- It is frequently used by **multiple teams or stakeholders**. +- Its failure or inaccuracy could cause **significant business or operational risks** + +## **Why Should I Define My Critical Assets?** + +Defining your **critical assets** helps you: + +- **Quickly identify and respond to issues**– Get notified when upstream problems may impact your critical assets, ensuring faster resolution and minimal disruption. +- **Prioritize issue resolution**– Focus on addressing incidents that have the greatest impact on business operations, dashboards, and decision-making. +- **Improve data reliability**– Ensure key stakeholders have access to accurate and up-to-date data by monitoring critical assets more effectively. +- **Enhance observability**– Gain better visibility into the health of your most important assets through prioritized monitoring and alerting. + +## **How to Set a Critical Asset?** + +You can mark an asset as **critical** directly in the UI: + +- **From the Catalog Page** – Navigate to the asset in the catalog and click the **diamond icon** to **"Set as Critical Asset."** +- **From the Lineage View** – Right-click on the node representing the asset and select **"Set as Critical Asset"** from the list. + +Once an asset is marked as critical, **alerts will now highlight any issues that may impact this asset or its upstream dependencies, ensuring prioritization.** + + + +## **Where Can You See Critical Assets?** + +Once an asset is marked as **critical**, you will be able to: + +- **Identify it in the UI**, where it will be visually highlighted. +- **Receive alerts** when upstream issues may impact the critical asset. +- **Filter incidents** by their impact on critical assets. +- **Track the health of critical data assets** over time through dashboard monitoring (Coming soon). + +## **Test Coverage Analysis (Coming Soon)** + +In upcoming releases, we will introduce **test coverage analysis** based on the monitoring state of your assets across key data quality dimensions. This feature will help you: + +- **Identify gaps** in monitoring and testing coverage for your most important assets and their upstream dependencies. +- **Ensure that key data components** are properly monitored and continuously observed to maintain data reliability. + +By carefully selecting **which assets to mark as critical**, you can quickly detect and prioritize issues that impact your most important data, reducing disruptions, improving reliability, and keeping key stakeholders informed. \ No newline at end of file diff --git a/docs/features/column-level-lineage.mdx b/docs/features/data-lineage/column-level-lineage.mdx similarity index 51% rename from docs/features/column-level-lineage.mdx rename to docs/features/data-lineage/column-level-lineage.mdx index d94b067ed..7b8a7239e 100644 --- a/docs/features/column-level-lineage.mdx +++ b/docs/features/data-lineage/column-level-lineage.mdx @@ -1,38 +1,37 @@ --- -title: Column Level Lineage -sidebarTitle: Column Level Lineage +title: Column-Level Lineage +sidebarTitle: Column level lineage --- + The table nodes in Elementary lineage can be expanded to show the columns. When you select a column, the lineage of that specific column will be highlighted. -Column level lineage is useful for answering questions such as: +Column-level lineage is useful for answering questions such as: + +* Which downstream columns are actually impacted by a data quality issue? -- Which downstream columns are actually impacted by a data quality issue? -- Can we deprecate or rename a column? -- Will changing this column impact a dashboard? +* Can we deprecate or rename a column? - - Elementary Column Level Lineage - +* Will changing this column impact a dashboard? ### Filter and highlight columns path -To help navigate graphs with large amount of columns per table, use the `...` menu right to the column: +To help navigate graphs with large amount of columns per table, use the `...` menu to the right of the column: + +* **Filter**: Will show a graph of only the selected column and its dependencies. + +* **Highlight**: Will highlight only the selected column and its dependencies. -- **Filter**: Will show a graph of only the selected column and it's dependencies. -- **Highlight**: Will highlight only the selected column and it's dependencies. + ### Supported BI tools: - - -### Why is lineage to exposures useful? - -- **Incidents impact analysis**: You could explore which exposures are impacted by each data issue. -- **Exposure health**: By selecting an exposure and filtering on upstream nodes, you could see the status of all it’s upstream datasets. -- **Prioritize data issues**: Prioritize the triage and resolution of issues that are impacting your critical downstream assets. -- **Change impact**: Analyze which exposures will be impacted by a planned change. -- **Unused datasets**: Detect datasets that no exposure consumes, that could be removed to save costs. + \ No newline at end of file diff --git a/docs/features/data-lineage/lineage.mdx b/docs/features/data-lineage/lineage.mdx new file mode 100644 index 000000000..8a0a1e256 --- /dev/null +++ b/docs/features/data-lineage/lineage.mdx @@ -0,0 +1,42 @@ +--- +title: End-to-End Data Lineage +sidebarTitle: Lineage overview +--- + + + +Elementary offers automated [Column-Level Lineage](/features/data-lineage/column-level-lineage) functionality, enriched with the latest test and monitors results. +It is built with usability and performance in mind. +The column-level lineage is built from the metadata of your data warehouse, and integrations with [BI tools](features/data-lineage/exposures-lineage#automated-lineage-to-the-bi) such as Looker and Tableau. + +Elementary updates your lineage view frequently, ensuring it is always current. +This up-to-date lineage data is essential for supporting several critical workflows, including: + +- **Effective Data Issue Debugging**: Identify and trace data issues back to their sources. +- **Incidents impact analysis**: You could explore which downstream assets are impacted by each data issue. +- **Prioritize data issues**: Prioritize the triage and resolution of issues that are impacting your critical downstream assets. +- **Public assets health**: By selecting an exposure and filtering on upstream nodes, you can see the status of all its upstream datasets. +- **Change impact**: Analyze which exposures will be impacted by a planned change. +- **Unused datasets**: Detect datasets that are not consumed downstrean, and could be removed to reduce costs. + + + +## Node info and test results + +To view additional information in the lineage view, use the `...` menu to the right of the column: + +- **Test results**: Access the table's latest test results in the lineage view. +- **Node info**: See details such as description, owner and tags. If collected, it will include the latest job info. + + +## Job info in lineage + +You can [configure Elementary to collect jobs information](/cloud/guides/collect-job-data) to present in the lineage _Node info_ tab. Job names can also be used to filter the lineage graph. diff --git a/docs/features/data-tests.mdx b/docs/features/data-tests.mdx deleted file mode 100644 index 0c3ef16d8..000000000 --- a/docs/features/data-tests.mdx +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: "Elementary Data Tests" -icon: "monitor-waveform" -sidebarTitle: "Data Tests" ---- - -Elementary provides tests for detection of data quality issues. -Elementary data tests are configured and executed like native tests in your dbt project. - -Elementary tests can be used in addition to dbt tests, packages tests (such as dbt-expectations), and custom tests. -All of these test results will be presented in the Elementary UI and alerts. - - diff --git a/docs/features/data-tests/custom-sql-tests.mdx b/docs/features/data-tests/custom-sql-tests.mdx new file mode 100644 index 000000000..f901d5dba --- /dev/null +++ b/docs/features/data-tests/custom-sql-tests.mdx @@ -0,0 +1,69 @@ +--- +title: Custom SQL Tests +sidebarTitle: Custom SQL test +--- + +Custom SQL queries enable you to create tailored tests that align with your specific business needs. +These tests can be executed against any of the tables in your connected data warehouse. + +### When to use custom SQL tests? + +A custom SQL test is easier to write than a new generic dbt test, but it can't be leveraged across different data sets. +On the other hand, writing custom SQL tests enables testing complex custom calculation logic, relationships between many tables and more. + +This is why most Elementary users write custom SQL tests when the behavior to be tested is complex, specific and doesn’t exist in any out of the box test. +A common use case is for data anlysts to add validations as custom SQL tests. + +As non-technical users are often not familiar with dbt, +Elementary has an interface for adding custom SQL test that converts it to a pull request adding a singular dbt test. + +### Adding a custom SQL test + +1. In `Test Configuration` choose `New test` → `custom query test`. +2. Add your query, it can be a regular SQL query on any tables in your environment. The query should only return results if something is wrong, meaning the test will pass on no results and fail on any results. Please be sure to use the full name of the table, including the db and schema. The query will then be validated and formatted, table names will be replaced with dbt model references. This can take a few seconds. + + +
+ +
+ + +3. Add your test configuration: + +- Test name (should be a valid file name- this will use as the name of your Singular test). +- Description (recommended). +- Location - this will be the directory in your dbt project where the test will be stored. +- Severity - Failure or Warning. +- Tested table (optional) - Adding the table the test is checking will link this test to that table, showing it in the table’s lineage, catalog page and more. +- Tags and Owners (Optional) + - Some users use dbt tags to help with scheduling. Creating a “daily”, “hourly”, and having scheduled jobs for those tags can help determine the test’s schedule at the time of creation. + - It is recommended to add a tag that will be used to later route the alert on failure to the right recipient. + +4. Review & Submit. In this stage you’ll be able to see the translated test query and configuration. + + +
+ +
+ + +5. Clicking “Submit” will go on to create a pull request. +6. After the pull requests is merged to production, tests should run on a `dbt test` job. + +### Custom SQL test results + +The results of all custom SQL tests can be found under a `tests` folder in the test results sidebar. +Additionally, if you configured a `tested table`, `tag` or `owner`, the test result will be visible under the relevant path. + +### Alerts on custom SQL tests + +It's recommended to use tags and owners create an [alert rule](/features/alerts-and-incidents/alert-rules) that will route these tests to the relevant recipient. + +### Scheduling custom SQL tests + +It's a common practice not to want these tests to run as part of your main job, or at the same frequency. +We recommend to use a tag for all these tests, use dbt tags as selectors to exclude from the main job, and run a dedicated test job that includes these tests only in the required frequency. \ No newline at end of file diff --git a/docs/features/data-tests/data-tests-overview.mdx b/docs/features/data-tests/data-tests-overview.mdx new file mode 100644 index 000000000..ab85adf34 --- /dev/null +++ b/docs/features/data-tests/data-tests-overview.mdx @@ -0,0 +1,59 @@ +--- +title: Data Tests Overview +sidebarTitle: Overview and configuration +--- + +Data tests are useful for validating and enforcing explicit expectations on your data. + +Elementary enables data validation and result tracking by leveraging dbt tests and dbt packages such as dbt-utils, dbt-expectations, and Elementary. +This rich ecosystem of tests covers various use cases, and is widely adopted as a standard for data validations. +Any custom dbt generic or singular test you develop will also be included. +Additionally, users can create custom SQL tests in Elementary, and add tests in bulk from the UI. + +The combination of dbt tests, Elementary monitors, custom SQL tests and the rich dbt testing ecosystem provides the ability to achieve wide and comprehensive coverage. + +### Supported data tests + + + +## Test configuration + +One of the design principals in Elementary is that users should manage configuration in code. +This enables to maintain the same workflow for building the pipeline and configuring coverage, it makes observability and governance part of the development cycle, +and provides control in review process and version management. + +However, adding many tests in code is tedious, and configuration in code isn't usable for everyone. + +In Elementary, we designed a flow to incorporate the good of both worlds: + +- **Configuration in code or in UI** - The UI test configuration flow opens pull requests to the code base through the [code repository](/cloud/integrations/code-repo/connect-code-repo) integration. +- **The code is the single source of truth** - As configuration from UI goes to code, the code remains the place where configuration is managed and maintained. + +### Create new tests from the UI + + + +### Edit existing tests from the UI +Tests can be edited from the UI, and the changes will create a pull request in the code repository. +The pull request will be reviewed and merged by the team, and the changes will be applied to the tests after dbt pipeline is run and Elementary is synced. +Simply open the test side panel from the test results / incidents page, and navigate to the Configuration tab. +Then Click "Edit test" and make the necessary changes. + + + +A pull request will be opened in the code repository, and a link to the PR will be provided in the UI. + +### Benefits of leveraging dbt tests + + + +### dbt Test Hub + + \ No newline at end of file diff --git a/docs/features/data-tests/dbt-tests.mdx b/docs/features/data-tests/dbt-tests.mdx new file mode 100644 index 000000000..3fb27d027 --- /dev/null +++ b/docs/features/data-tests/dbt-tests.mdx @@ -0,0 +1,30 @@ +--- +title: dbt, Packages and Elementary Tests +sidebarTitle: dbt tests +--- + + + +### dbt Test Hub + + + +### Supported dbt tests and packages + +Elementary collects and monitors the results of all dbt tests. + +The following packages are supported in the tests configuration wizard: + +- [dbt expectations](https://github.com/calogica/dbt-expectations) - A dbt package inspired by the Great Expectations package for Python. The intent is to allow dbt users to deploy GE-like tests in their data warehouse directly from dbt. +- [dbt utils](https://github.com/dbt-labs/dbt-utils) - A package by dbt labs that offers useful generic tests. + +Note that you need to import these packages to your dbt project to use them. + +### Elementary dbt package tests + +The Elementary dbt package also provides tests for detection of data quality issues. +Elementary data tests are configured and executed like native tests in your dbt project. + + + + diff --git a/docs/features/data-tests/schema-validation-test.mdx b/docs/features/data-tests/schema-validation-test.mdx new file mode 100644 index 000000000..80f8fe2cb --- /dev/null +++ b/docs/features/data-tests/schema-validation-test.mdx @@ -0,0 +1,29 @@ +--- +title: Schema Validation Tests +sidebarTitle: Schema validation +--- + +The Elementary dbt package includes the following schema validation tests: + + + Fails on changes in schema: deleted or added columns, or change of data type + of a column. + + + +Fails if the table schema is different in columns names or column types than a +configured baseline (can be generated with a macro). + + + + Monitors a JSON type column and fails if there are JSON events that don't + match a configured JSON schema (can be generated with a macro). + + + + Monitors changes in your models' columns that break schema for downstream + exposures, such as BI dashboards. + \ No newline at end of file diff --git a/docs/features/elementary-alerts.mdx b/docs/features/elementary-alerts.mdx index 4e6f06cc7..e69de29bb 100644 --- a/docs/features/elementary-alerts.mdx +++ b/docs/features/elementary-alerts.mdx @@ -1,14 +0,0 @@ ---- -title: "Alerts" -icon: "bell-exclamation" ---- - - - -## Alerts destinations - - - -## Alerts configuration - - diff --git a/docs/features/lineage.mdx b/docs/features/lineage.mdx deleted file mode 100644 index db015dc93..000000000 --- a/docs/features/lineage.mdx +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: End-to-End Data Lineage -sidebarTitle: Data Lineage ---- - -Elementary Cloud UI and Elementary OSS Report include a rich data lineage graph. -The graph is enriched with the latest test results, to enable easy impact and root cause analysis of data issues. - -In Elementary Cloud lineage includes [Column Level Lineage](/features/column-level-lineage) and [BI integrations](/features/exposures-lineage#automated-bi-lineage). - -## Node info and test results - -To see additional information in the lineage view, use the `...` menu right to the column: - -- **Test results**: Access the table latest test results in the lineage view. -- **Node info**: See details such as description, owner and tags. If collected, it will include the latest job info. - - - -## Job info in lineage - -You can configure Elementary to collect jobs names and information to present in the lineage _Node info_ tab. Job names can also be used to filter the lineage graph. - -Read how to configure jobs info collection for [Elementary Cloud](/cloud/guides/collect-job-data) or [OSS](/oss/guides/collect-job-data). diff --git a/docs/features/multi-env.mdx b/docs/features/multi-env.mdx index fb6d55921..4fb533ac2 100644 --- a/docs/features/multi-env.mdx +++ b/docs/features/multi-env.mdx @@ -1,11 +1,10 @@ --- title: "Multiple Environments" -icon: "rectangle-history-circle-plus" --- -An environment in Elementary is a combination of dbt project and target. -For example: If you have a single dbt project with three targets, prod, staging and dev, you could create 3 environments in Elementary and monitor these envs. +An environment in Elementary is a combination of a dbt project and a target. +For example: If you have a single dbt project with three targets, prod, staging and dev, you can create 3 environments in Elementary and monitor these environments. If you have several dbt projects and even different data warehouses, Elementary enables monitoring the data quality of all these environments in a single interface. diff --git a/docs/features/performance-monitoring/performance-alerts.mdx b/docs/features/performance-monitoring/performance-alerts.mdx new file mode 100644 index 000000000..8ae3e905e --- /dev/null +++ b/docs/features/performance-monitoring/performance-alerts.mdx @@ -0,0 +1,97 @@ +--- +title: Performance Alerts +sidebarTitle: Performance alerts +--- + +Monitoring the performance of your dbt models is crucial for maintaining an efficient data pipeline. Elementary provides capabilities to set up alerts for long-running queries, helping you identify performance bottlenecks and optimize your data pipeline. + +There are two main approaches to creating alerts for long-running model queries: + +1. **Static Threshold Alerts**: Define specific time thresholds that, when exceeded, trigger an alert +2. **Anomaly Detection Alerts**: Use Elementary's anomaly detection to identify unusual increases in query execution time + +## Static Threshold Alerts + +You can define tests that fail when model execution times exceed predefined thresholds. This approach is straightforward and ideal when you have clear performance requirements. + +### Implementation Steps + +1. Create a singular test SQL file in your dbt project (e.g., `tests/test_models_run_under_30m.sql`): + +```sql +{{ config( + tags=["model_performance"], + meta={ + "description": "This test will fail on any models running over 30 minutes within the last 24 hours" + } +) }} + +select name, package_name, status, generated_at, execution_time +from {{ ref('elementary', 'model_run_results') }} +where CAST(generated_at AS timestamp) >= TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 day) + AND execution_time >= 30 * 60 +order by execution_time desc +``` + +In this example: +- The test monitors model runs over the past 24 hours +- It fails if any model takes longer than 30 minutes to run (1800 seconds) +- The test is tagged with "model_performance" for easy identification +- Results are ordered by execution time in descending order + +When this test fails, Elementary will generate an alert based on your alert configurations. The test results will also be visible in the Elementary UI, showing the 5 worst-performing model runs. + +## Anomaly Detection Alerts + +Instead of using fixed thresholds, you can leverage Elementary's anomaly detection to identify unusual increases in execution time. This approach is more dynamic and can adapt to your evolving data pipeline. + +### Implementation Steps + +1. Define a source on the `model_run_results` view in your `schema.yml` file (or another YAML file): + +```yaml +sources: + - name: elementary_models + schema: "your_elementary_schema" # Replace with your Elementary schema name + tables: + - name: model_run_results + columns: + - name: execution_time + tests: + - elementary.column_anomalies: + config: + severity: warn + tags: ["model_performance"] + column_anomalies: + - max + dimensions: ["package_name", "name"] + timestamp_column: generated_at + anomaly_direction: spike + ignore_small_changes: + spike_failure_percent_threshold: 10 +``` + +In this configuration: +- Elementary monitors the `execution_time` column for anomalies +- Dimensions are set to `package_name` and `name` to analyze each model individually +- The test only detects spikes in execution time (`anomaly_direction: spike`) +- Small changes under 10% are ignored (`spike_failure_percent_threshold: 10`) +- The severity is set to "warn" but can be adjusted as needed + +This test will detect when a model's execution time increases significantly compared to its historical performance, triggering an alert when the increase exceeds the normal basline. + +## Choosing the Right Approach + +Both methods have their strengths: + +- **Static Threshold**: Simple to implement and understand. Ideal when you have clear performance requirements (e.g., "models must run in under 30 minutes"). + +- **Anomaly Detection**: More adaptive to your specific environment. Better at detecting relative changes in performance rather than absolute thresholds. Useful when normal execution times vary across different models. + +You can implement both approaches simultaneously for comprehensive performance monitoring. + +## Viewing Performance Alerts + +Performance alerts appear in your regular Elementary alert channels (Slack, email, etc.) based on your alert configuration. + +You can also view performance test results in the Elementary UI under the Tests tab, filtered by the "model_performance" tag that we added to both test types. diff --git a/docs/features/performance-monitoring/performance-monitoring.mdx b/docs/features/performance-monitoring/performance-monitoring.mdx new file mode 100644 index 000000000..77ce069ba --- /dev/null +++ b/docs/features/performance-monitoring/performance-monitoring.mdx @@ -0,0 +1,39 @@ +--- +title: Performance Monitoring +sidebarTitle: Performance monitoring +--- + +Monitoring the performance of your data pipeline is critical for maintaining data quality, reliability, and operational efficiency. +Proactively monitoring performance issues enables to detect bottlenecks and opportunities for optimization, prevent data delays, and avoid unnecessary costs. + +Elementary monitors and logs the execution times of: +- dbt models +- dbt tests + +## Models performance + +Navigate to the `Model Duration` tab. + +The table displays the latest execution time, median execution time, and execution time trend for each model. You can sort the table by these metrics and explore the execution times over time for the models with the longest durations + +It is also useful to use the navigation bar to filter the results, and see run times per tag/owner/folder. + + + +## Tests performance + +Navigate to the `Test Execution History` tab. + +On the table you can see the median execution time and fail rate per test. +You can sort the table by this time column, and detect tests that are compute heavy. + +It is also useful to use the navigation bar to filter the results, and see run times per tag/owner/folder. \ No newline at end of file diff --git a/docs/guides/modules-overview/dbt-package.mdx b/docs/guides/modules-overview/dbt-package.mdx index 1680a7999..1b21bbe7d 100644 --- a/docs/guides/modules-overview/dbt-package.mdx +++ b/docs/guides/modules-overview/dbt-package.mdx @@ -3,33 +3,44 @@ title: "Elementary dbt package" sidebarTitle: "Introduction" --- -For data monitoring and dbt artifacts collection, Elementary uses a dbt package. -The package adds models, macros, and Elementary tests as dbt tests to your project. +The Elementary dbt package serves as a collector of logs and metadata from your dbt project and offers a set of data anomaly detection and schema tests. +To gain the most value from the dbt package, we recommend using it with the [Elementary Cloud Platform](/cloud/introduction) or with [Elementary open-source CLI tool](/oss/oss-introduction). -The impact of the package on `dbt run` is minimal, and most of the processing happens as part of the data tests that are executed on `dbt test`. +**What brings you here?** - -A dbt package is additional Jinja and SQL code that is added to your project, for additional functionality. In fact, each package is a dbt project. By adding a package to your project, you are adding the package code to be part of your project, you can reference its macros, execute its models, and so on. - -Add packages to your project by creating a `packages.yml` file under the main project directory (where your ` -dbt_project.yml` is), and adding the relevant package. After you add a new package, run `dbt deps` to actually pull its -code to your project. This is also how you update packages. -Some packages we recommend you check out: [dbt_utils](https://github.com/dbt-labs/dbt-utils/tree/0.8.2/) -, [dbt_date](https://github.com/calogica/dbt-date/tree/0.5.4/) -, [codegen](https://github.com/dbt-labs/dbt-codegen/tree/0.5.0/). +* **An organizational data quality initiative** (e.g., platform refactoring, governance projects, or AI/ML adoption): The Elementary Cloud Platform offers pipeline monitoring, incident management, lineage tracking, dashboards, health scores, and alerts, empowering data engineers, analytics engineers, and data analysts to resolve issues efficiently and deliver trusted data products. Learn more about the [Elementary Cloud Platform](/cloud/introduction) or [book a demo](https://meetings-eu1.hubspot.com/joost-boonzajer-flaes/intro-call-docs). - +* **Individual-use observability:** If you’ll be the primary user, try the open-source CLI tool to send Slack alerts, and self-host the Elementary report. The repository and source code of the Elementary dbt package can be [found here](https://github.com/elementary-data/dbt-data-reliability). ## Package Features +The Elementary dbt package is designed to power data observability use cases for dbt pipelines. +This package will upload logs and metadata generated from your runs as dbt artifacts into tables in your datawarehouse. +Additionally, it offers a wide range of tests, including anomalies in volume, freshness, columns and different dimensions of your data. + +The impact of the package on **`dbt run`** is minimal, and most of the processing happens as part of the data tests that are executed on **`dbt test`**. + + + A dbt package is additional Jinja and SQL code that is added to your project, for additional functionality. In fact, each package is a dbt project. By adding a package to your project, you are adding the package code to be part of your project, you can reference its macros, execute its models, and so on. + + Add packages to your project by creating a `packages.yml` file under the main project directory (where your ` + dbt_project.yml` is), and adding the relevant package. After you add a new package, run `dbt deps` to actually pull its + code to your project. This is also how you update packages. + Some packages we recommend you check out: [dbt\_utils](https://github.com/dbt-labs/dbt-utils/tree/0.8.2/) + , [dbt\_date](https://github.com/calogica/dbt-date/tree/0.5.4/) + , [codegen](https://github.com/dbt-labs/dbt-codegen/tree/0.5.0/). + + After you deploy the dbt package, you can use Elementary tests, and your dbt artifacts will be uploaded automatically with on-run-end hooks: - - - + + + + + Although you can use the package only, we recommend using it with Elementary Cloud or OSS. @@ -52,4 +63,4 @@ the following var: ```yaml vars: elementary_full_refresh: true -``` +``` \ No newline at end of file diff --git a/docs/introduction.mdx b/docs/introduction.mdx index 1fc128423..c58bdb49c 100644 --- a/docs/introduction.mdx +++ b/docs/introduction.mdx @@ -5,14 +5,10 @@ description: "dbt-native data observability platform built for data and analytic icon: "fire" --- -
- Elementary banner + Elementary banner
- + Elementary includes two products: @@ -27,16 +23,14 @@ See the [detailed comparison](/overview/cloud-vs-oss) between Elementary and Ele Or - Start with [Elementary Community](/oss/quickstart/quickstart-cli-package), open-source CLI tool you can deploy and orchestrate to send Slack alerts and self-host the Elementary report. - ## Why choose Elementary? - Elementary configuration is managed in your dbt code. + Elementary configuration is managed in your dbt code. Elementary Cloud syncs configuration changes from the UI back to the dbt project code repository. You won't need to duplicate configuration - all your existing tests, owners, tags, and descriptions are leveraged. - @@ -45,26 +39,17 @@ Or - Start with [Elementary Community](/oss/quickstart/quickstart-cli-package), The dbt package creates a schema for logs, results and metadata, and Elementary only requires access to the Elementary schema. [Read about Security and Privacy >>>](/cloud/general/security-and-privacy) - Elementary dbt package automatically collects results and artifacts from your dbt project. All of your Elementary configuration is managed in your dbt code. By combining the package and Elementary Cloud, you get full dbt observability. All your tests and results in one dashboard and interface. - - - ## Want to know more? - - + + + \ No newline at end of file diff --git a/docs/key-features.mdx b/docs/key-features.mdx index 581b8d25d..d441581ea 100644 --- a/docs/key-features.mdx +++ b/docs/key-features.mdx @@ -33,7 +33,7 @@ icon: "stars" title="Data observability dashboard" icon="browsers" iconType="solid" - href="/features/data-observability-dashboard" + href="/features/collaboration-and-communication/data-observability-dashboard" > Inspect your data health overview, test results, models performance and data lineage. @@ -42,14 +42,14 @@ icon: "stars" title="End-to-end data lineage" icon="arrow-progress" iconType="solid" - href="/features/lineage" + href="/features/data-lineage/lineage" > Inspect dependencies including Column Level Lineage and integration with BI tools. @@ -67,8 +67,48 @@ icon: "stars" title="Data Catalog" icon="folder-tree" iconType="solid" - href="/features/catalog" + href="/features/collaboration-and-communication/catalog" > Explore and discover data sets, manage your documentation in code. + + + +#### Anomaly Detection + + + Out-of-the-box ML-powered monitoring for freshness and volume issues on all production tables. + The monitors track updates to tables, and will detect data delays, incomplete updates, and significant volume changes. + By qurying only metadata (e.g. information schema, query history), the monitors don't add compute costs. + + + + ML-powered anomaly detection on data quality metrics such as null rate, empty values, string length, numeric metrics (sum, max, min, avg), etc. + Elementary also supports monitoring for anomalies by dimensions. + The monitors are activated for specific data sets, and require minimal configuration (e.g. timestamp column, dimensions). + + +#### Schema Validation + + + Elementary offers a set of schema tests for validating there are no breaking changes. + The tests support detecting any schema changes, only detecting changes from a configured baseline, JSON schema validation, + and schema changes that break downstream exposures such as dashboards. + + + + Coming soon! + + +#### Data Tests + +Custom SQL Tests + +dbt tests + +Python tests + +#### Tests Coverage + +#### Performance monitoring diff --git a/docs/mint.json b/docs/mint.json index 9d9bb9820..556da1312 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -29,7 +29,7 @@ }, "tabs": [ { - "name": "Data tests", + "name": "Elementary Tests", "url": "data-tests" }, { @@ -47,21 +47,31 @@ { "name": "Book a Demo", "icon": "calendar-check", - "url": "https://cal.com/maayansa/elementary-intro-docs" + "url": "https://meetings-eu1.hubspot.com/joost-boonzajer-flaes/intro-call-docs" }, { "name": "Join Slack", "icon": "slack", "url": "https://elementary-data.com/community" + }, + { + "name": "Changelog", + "icon": "list", + "url": "https://docs.elementary-data.com/changelog" + }, + { + "name": "Best Practices Guide", + "icon": "check", + "url": "https://docs.elementary-data.com/best-practices/introduction" } ], + "navigation": [ { "group": "Getting Started", "pages": [ "introduction", "quickstart", - "cloud/general/security-and-privacy", { "group": "dbt package", "icon": "cube", @@ -75,27 +85,104 @@ ] }, { - "group": "Features", + "group": "Cloud Platform", + "pages": [ + "cloud/introduction", + "cloud/features", + "features/integrations", + "cloud/general/security-and-privacy" + ] + }, + { + "group": "Anomaly Detection Monitors", + "pages": [ + "features/anomaly-detection/monitors-overview", + { + "group": "Automated monitors", + "pages": [ + "features/anomaly-detection/automated-monitors", + "features/anomaly-detection/automated-freshness", + "features/anomaly-detection/automated-volume" + ] + }, + "features/anomaly-detection/metrics", + { + "group": "Configuration and Feedback", + "pages": [ + "features/anomaly-detection/monitors-configuration", + "features/anomaly-detection/monitors-feedback" + ] + } + ] + }, + { + "group": "Data Tests", + "pages": [ + "features/data-tests/data-tests-overview", + "features/data-tests/dbt-tests", + "features/data-tests/custom-sql-tests", + "features/data-tests/schema-validation-test" + ] + }, + { + "group": "Data Lineage", + "pages": [ + "features/data-lineage/lineage", + "features/data-lineage/column-level-lineage", + "features/data-lineage/exposures-lineage" + ] + }, + { + "group": "Alerts and Incidents", "pages": [ - "features/data-tests", - "features/automated-monitors", - "features/elementary-alerts", - "features/data-observability-dashboard", + "features/alerts-and-incidents/alerts-and-incidents-overview", { - "group": "End-to-End Lineage", - "icon": "arrow-progress", - "iconType": "solid", + "group": "Setup & configure alerts", "pages": [ - "features/lineage", - "features/exposures-lineage", - "features/column-level-lineage" + "features/alerts-and-incidents/effective-alerts-setup", + "features/alerts-and-incidents/alert-rules", + "features/alerts-and-incidents/owners-and-subscribers", + "features/alerts-and-incidents/alert-configuration" ] }, + "features/alerts-and-incidents/incidents", + "features/alerts-and-incidents/incident-management" + ] + }, + { + "group": "Performance & Cost", + "pages": [ + "features/performance-monitoring/performance-monitoring", + "features/performance-monitoring/performance-alerts" + ] + }, + { + "group": "Data Governance", + "pages": [ + "best-practices/governance-for-observability", + "features/data-governance/critical_assets" + ] + }, + { + "group": "Collaboration & Communication", + "pages": [ + "features/collaboration-and-communication/data-observability-dashboard", + { + "group": "Data Health Scores", + "pages": [ + "features/collaboration-and-communication/data-quality-dimensions", + "features/collaboration-and-communication/data-health" + ] + }, + "features/collaboration-and-communication/catalog" + ] + }, + { + "group": "Additional features", + "pages": [ "features/config-as-code", - "features/catalog", "features/multi-env", - "features/ci", - "features/integrations" + "features/ci" ] }, { @@ -111,10 +198,9 @@ ] }, { - "group": "Send Slack alerts", + "group": "Send alerts", "pages": [ "cloud/guides/enable-slack-alerts", - "cloud/guides/alert-rules", "cloud/guides/alerts-configuration" ] }, @@ -156,9 +242,9 @@ "cloud/integrations/bi/connect-bi-tool", "cloud/integrations/bi/looker", "cloud/integrations/bi/tableau", - "cloud/integrations/bi/metabase", "cloud/integrations/bi/power-bi", "cloud/integrations/bi/sigma", + "cloud/integrations/bi/metabase", "cloud/integrations/bi/thoughtspot", "cloud/integrations/bi/mode", "cloud/integrations/bi/hex", @@ -177,11 +263,13 @@ "pages": [ "cloud/integrations/code-repo/connect-code-repo", "cloud/integrations/code-repo/github", - "cloud/integrations/code-repo/gitlab" + "cloud/integrations/code-repo/gitlab", + "cloud/integrations/code-repo/bitbucket", + "cloud/integrations/code-repo/azure-devops" ] }, { - "group": "Communication & collaboration", + "group": "Alerts & Incidents", "pages": [ "cloud/integrations/alerts/slack", "cloud/integrations/alerts/ms-teams", @@ -191,13 +279,39 @@ "cloud/integrations/alerts/linear", "cloud/integrations/alerts/webhooks" ] + }, + { + "group": "Data Catalog", + "pages": [ + "cloud/integrations/catalog/atlan" + ] + }, + { + "group": "Security and Connectivity", + "pages": [ + "cloud/integrations/security-and-connectivity/aws-privatelink-integration", + "cloud/integrations/security-and-connectivity/okta", + "cloud/integrations/security-and-connectivity/ms-entra" + ] } ] + }, { "group": "Resources", "pages": [ + "resources/business-case-data-observability-platform", "overview/cloud-vs-oss", + { + "group": "Best Practices Guide", + "icon": "check", + "pages": [ + "best-practices/introduction", + "best-practices/governance-for-observability", + "best-practices/detection-and-coverage", + "best-practices/triage-and-response" + ] + }, "resources/pricing", "resources/community" ] @@ -206,6 +320,7 @@ "group": "Elementary Data tests", "pages": [ "data-tests/introduction", + "data-tests/anomaly-detection-tests-oss-vs-cloud", "data-tests/elementary-tests-configuration" ] }, @@ -240,7 +355,8 @@ "data-tests/anomaly-detection-configuration/ignore_small_changes", "data-tests/anomaly-detection-configuration/fail_on_zero", "data-tests/anomaly-detection-configuration/detection-delay", - "data-tests/anomaly-detection-configuration/anomaly-exclude-metrics" + "data-tests/anomaly-detection-configuration/anomaly-exclude-metrics", + "data-tests/anomaly-detection-configuration/exclude-final-results" ] }, "data-tests/anomaly-detection-tests/volume-anomalies", @@ -248,7 +364,8 @@ "data-tests/anomaly-detection-tests/event-freshness-anomalies", "data-tests/anomaly-detection-tests/dimension-anomalies", "data-tests/anomaly-detection-tests/all-columns-anomalies", - "data-tests/anomaly-detection-tests/column-anomalies" + "data-tests/anomaly-detection-tests/column-anomalies", + "data-tests/anomaly-detection-tests/Anomaly-troubleshooting-guide" ] }, { @@ -260,9 +377,28 @@ "data-tests/schema-tests/exposure-tests" ] }, + { + "group": "AI Data Tests (Beta)", + "pages": [ + "data-tests/ai-data-tests/ai_data_validations", + "data-tests/ai-data-tests/unstructured_data_validations", + { + "group": "Supported Platforms", + "pages": [ + "data-tests/ai-data-tests/supported-platforms/snowflake", + "data-tests/ai-data-tests/supported-platforms/databricks", + "data-tests/ai-data-tests/supported-platforms/bigquery", + "data-tests/ai-data-tests/supported-platforms/redshift", + "data-tests/ai-data-tests/supported-platforms/data-lakes" + ] + } + ] + }, { "group": "Other Tests", - "pages": ["data-tests/python-tests"] + "pages": [ + "data-tests/python-tests" + ] }, { "group": "Elementary OSS", @@ -290,8 +426,7 @@ "oss/guides/share-observability-report/host-on-s3", "oss/guides/share-observability-report/host-on-gcs", "oss/guides/share-observability-report/host-on-azure", - "oss/guides/share-observability-report/send-report-summary", - "oss/guides/collect-dbt-source-freshness" + "oss/guides/share-observability-report/send-report-summary" ] }, { @@ -303,12 +438,16 @@ "oss/guides/alerts/alerts-configuration" ] }, - "oss/guides/collect-job-data" + "oss/guides/collect-job-data", + "oss/guides/collect-dbt-source-freshness" ] }, { "group": "Configuration & usage", - "pages": ["oss/cli-install", "oss/cli-commands"] + "pages": [ + "oss/cli-install", + "oss/cli-commands" + ] }, { "group": "Deployment", @@ -348,6 +487,8 @@ { "group": "Releases", "pages": [ + "oss/release-notes/releases/0.17.0", + "oss/release-notes/releases/0.16.2", "oss/release-notes/releases/0.11.2", "oss/release-notes/releases/0.10.0", "oss/release-notes/releases/0.9.1", @@ -369,19 +510,51 @@ ] } ], - "footerSocials": { + "footerSocials": + { "website": "https://www.elementary-data.com", "slack": "https://elementary-data.com/community" }, "analytics": { - "ga4": { - "measurementId": "G-4DQN8YR4NH" - }, "posthog": { "apiKey": "phc_56XBEzZmh02mGkadqLiYW51eECyYKWPyecVwkGdGUfg" }, "gtm": { - "tagId": "GTM-TKR4HS3Q" + "tagId": "GTM-WCKCG3D4" + } + }, + "redirects": [ + { + "source": "/features/lineage", + "destination": "/features/data-lineage/lineage" + }, + { + "source": "/features/exposures-lineage", + "destination": "/features/data-lineage/exposures-lineage" + }, + { + "source": "/features/column-level-lineage", + "destination": "/features/data-lineage/column-level-lineage" + }, + { + "source": "/features/automated-monitors", + "destination": "/features/anomaly-detection/automated-monitors" + }, + { + "source": "/features/data-tests", + "destination": "/features/data-tests/dbt-tests" + }, + { + "source": "/features/elementary-alerts", + "destination": "/features/alerts-and-incidents/alerts-and-incidents-overview" + }, + { + "source": "/features/catalog", + "destination": "/features/collaboration-and-communication/catalog" + }, + { + "source": "/features/data-observability-dashboard", + "destination": "/features/collaboration-and-communication/data-observability-dashboard" } - } -} + ] +} \ No newline at end of file diff --git a/docs/oss/cli-commands.mdx b/docs/oss/cli-commands.mdx index 748315251..1ae7eec3e 100644 --- a/docs/oss/cli-commands.mdx +++ b/docs/oss/cli-commands.mdx @@ -16,13 +16,13 @@ Read from the test results table and [send new alerts](/oss/guides/alerts/alerts edr monitor ``` -Read from the test results table and generate the [Elementary UI](/features/data-observability-dashboard): +Read from the test results table and generate the [Elementary UI](/features/collaboration-and-communication/data-observability-dashboard): ```shell edr report ``` -Read from the test results table and generate the [Elementary UI](/features/data-observability-dashboard) and send +Read from the test results table and generate the [Elementary UI](/features/collaboration-and-communication/data-observability-dashboard) and send it to external platforms such as Slack, S3, GCS: diff --git a/docs/oss/deployment-and-configuration/teams.mdx b/docs/oss/deployment-and-configuration/teams.mdx index a1b331751..802dec1b3 100644 --- a/docs/oss/deployment-and-configuration/teams.mdx +++ b/docs/oss/deployment-and-configuration/teams.mdx @@ -3,17 +3,20 @@ title: "Teams setup for Elementary CLI" sidebarTitle: "Teams" --- -Elementary Teams integration includes sending [Teams alerts](/oss/guides/alerts/send-teams-alerts) on failures in dbt tests and models. +Elementary Teams integration includes sending [Teams alerts](/oss/guides/alerts/send-teams-alerts) on failures in dbt tests and models. The alerts are sent using Microsoft Teams Adaptive Cards format, which provides rich formatting and interactive capabilities. + +MS Teams supports Elementary Alerts, but unlike Slack, it does not support the Elementary report or multiple channels. ## Integration options -There is one integration option for Microsoft Teams: a Webhook. This method let you receive alerts from Elementary, but lacks -some support that is available in the Slack integration solution. -Below is features support comparison table (with Slack), to help you select the integration method. +There are two ways to create a webhook for Microsoft Teams: + +1. **Microsoft Teams Connectors (Legacy)**: The traditional way of creating webhooks, but this method is being deprecated by Microsoft. +2. **Power Automate Workflows (Recommended)**: The newer, more flexible way to create webhooks. Note that when using this method, Elementary CLI cannot directly verify if messages were delivered - you'll need to monitor your workflow runs in Power Automate. + + -| Integration | Elementary alerts | Elementary report | Multiple channels | Slack workflows | -| ------------- | ----------------- | ----------------- | ----------------- | --------------- | -| Teams Webhook | ✅ | ❌ | ❌ | ❌ | +Microsoft 365 Connectors (previously called Office 365 Connectors) are nearing deprecation. We recommend using Power Automate Workflows for new integrations. ## Teams integration setup diff --git a/docs/oss/general/troubleshooting.mdx b/docs/oss/general/troubleshooting.mdx index a0b12c658..1a8fdf5d6 100644 --- a/docs/oss/general/troubleshooting.mdx +++ b/docs/oss/general/troubleshooting.mdx @@ -32,12 +32,22 @@ If you get an empty report, there are several steps to understand what went wron - Run the CLI with the flag for force updating the packages: `edr report -u true` -**3. Validate that the CLI has a working connection profile** +**3. Validate that the CLI has a working connection profile in the [right path and format](https://docs.elementary-data.com/oss/cli-install#how-to-create-profiles-yml)** -- **Check that the connection profile exists in the right path and format** -- **Check that the connection profile points to the elementary package schema** +- Default path: `HOME_DIR/.dbt/profiles.yml` . If saved elsewhere, make sure to run dbt run and dbt test with `—profiles-dir ` +- Profile name: `elementary` +- Make sure that the elementary profile is a top-level profile, with the same indentation of the profiles you have already set up +- Schema name: The schema of the elementary models. The default name is  `_elementary` -**4. Still not working? Collect the following logs and reach our to the elementary team at [#support](https://elementary-data.com/community) on Slack** +**4. Validate the schema configuration for elementary models in your dbt_project.yml** +```yaml +models: + jaffle_shop: + +materialized: table + elementary: + +schema: 'elementary' +``` +**5. Still not working? Collect the following logs and reach our to the elementary team at [#community-support](https://elementary-data.com/community) on Slack** - **edr.log** - Created on the execution folder of the CLI. - **dbt.log** - Created under the package location at @@ -179,23 +189,24 @@ If you want the Elementary UI to show data for a longer period of time, use the - - -If you want to prevent elementary tests from running the simplest way is to exclude the tag that marks all of them in your dbt command: -```shell -dbt test --exclude tag:elementary-tests -``` -If you add the following to your dbt_project.yml, elementary models will not run and elementary tests will be executed but will do nothing and always pass. -```shell -models: - elementary: - enabled: false + + +When writing to the `dbt_artifacts` tables in the Elementary schema, data is deleted and reinserted. Running parallel jobs through an orchestrator can lead to errors, as multiple jobs may attempt to modify the same tables simultaneously. +To prevent this, you should: +1. [Disable the on-run-end hooks](https://docs.elementary-data.com/oss/general/faq#can-i-disable-the-on-run-end-hooks-or-results-uploading) +2. [Exclude the Elementary models](https://docs.elementary-data.com/oss/general/faq#can-i-disable-exclude-the-elementary-models) + +For scheduled updates to `dbt_artifacts` (e.g., a daily job), run: + +``` +dbt run --select elementary --vars '{"enable_elementary_models": true}' ``` + If you're experiencing issues of any kind, please contact us on the [#support](https://elementary-community.slack.com/archives/C02CTC89LAX) channel. diff --git a/docs/oss/guides/alerts/alerts-configuration.mdx b/docs/oss/guides/alerts/alerts-configuration.mdx index dcc4d08cb..e10e3427e 100644 --- a/docs/oss/guides/alerts/alerts-configuration.mdx +++ b/docs/oss/guides/alerts/alerts-configuration.mdx @@ -7,15 +7,22 @@ sidebarTitle: "Alerts configuration" ## Alerts CLI flags + + Alert vars are deprecated! We recommend filtering the alerts + using CLI selectors instead. + + + + #### Filter alerts Elementary supports filtering the alerts by tag, owner, model, status or resource type. Using filters, you can send alerts to the relevant people and teams by running `edr` multiple times with different filters on each run. - -alerts on skipped tests and models are filtered out by default. if you want to receive those alerts, apply the statuses filter and include them explicitly. - + +Alerts on skipped tests and models are filtered out by default. if you want to receive those alerts, apply the statuses filter and include them explicitly. + @@ -87,3 +94,11 @@ If configured otherwise in the dbt project config block or meta, the CLI value w ```shell edr monitor --suppression-interval 24 ``` + +#### Alert group threshold + +Set a minimum alert threshold before grouping notifications into a summary alert — keeping noise low while ensuring nothing gets missed. + +```shell +edr monitor --group-alerts-threshold 5 +``` \ No newline at end of file diff --git a/docs/oss/guides/alerts/send-slack-alerts.mdx b/docs/oss/guides/alerts/send-slack-alerts.mdx index 79cd4e635..1c231df9b 100644 --- a/docs/oss/guides/alerts/send-slack-alerts.mdx +++ b/docs/oss/guides/alerts/send-slack-alerts.mdx @@ -34,13 +34,7 @@ be sent to the wrong one due to the overlap accessing the backend table of eleme ## Alert on source freshness failures -_Not supported in dbt cloud_ - -To alert on source freshness, you will need to run `edr run-operation upload-source-freshness` right after each execution of `dbt source freshness`. -This operation will upload the results to a table, and the execution of `edr monitor` will send the actual alert. - -- Note that `dbt source freshness` and `upload-source-freshness` needs to run from the same machine. -- Note that `upload-source-freshness` requires passing `--project-dir` argument. +To alert on source freshness, follow [this guide](/oss/guides/collect-dbt-source-freshness). ## Continuous alerting diff --git a/docs/oss/guides/alerts/send-teams-alerts.mdx b/docs/oss/guides/alerts/send-teams-alerts.mdx index cbc0c630b..fe2665983 100644 --- a/docs/oss/guides/alerts/send-teams-alerts.mdx +++ b/docs/oss/guides/alerts/send-teams-alerts.mdx @@ -9,6 +9,8 @@ title: "Setup Teams alerts" Before you can start using the alerts, make sure to [install the dbt package](/oss/quickstart/quickstart-cli-package), [configure a profile and install the CLI](/oss/quickstart/quickstart-cli). This is **required for the alerts to work.** +Elementary sends alerts using Microsoft Teams Adaptive Cards format, which provides rich formatting and interactive capabilities. You can create a webhook URL using either Microsoft Teams Connectors (legacy, being deprecated) or Power Automate Workflows (recommended). +
diff --git a/docs/oss/guides/generate-report-ui.mdx b/docs/oss/guides/generate-report-ui.mdx index b251ae0fe..eb45b1db5 100644 --- a/docs/oss/guides/generate-report-ui.mdx +++ b/docs/oss/guides/generate-report-ui.mdx @@ -2,7 +2,7 @@ title: "Generate observability report" --- -Elementary [data observability report](/features/data-observability-dashboard) can be used for visualization and exploration of data from the dbt-package tables. That includes dbt test results, Elementary anomaly detection results, dbt artifacts, tests runs, etc. +Elementary [data observability report](/features/collaboration-and-communication/data-observability-dashboard) can be used for visualization and exploration of data from the dbt-package tables. That includes dbt test results, Elementary anomaly detection results, dbt artifacts, tests runs, etc. + + + + + + + + + + + + +
+ + + Demo + + +### Supported adapters + + - - + Install the Elementary dbt package and CLI tool. - - + + Watch the webinar to learn how to get started with the Elementary dbt package and CLI tool. + + diff --git a/docs/oss/quickstart/quickstart-cli.mdx b/docs/oss/quickstart/quickstart-cli.mdx index 7e72452ba..2e32c6d64 100644 --- a/docs/oss/quickstart/quickstart-cli.mdx +++ b/docs/oss/quickstart/quickstart-cli.mdx @@ -3,13 +3,12 @@ title: "Quickstart: Elementary CLI" sidebarTitle: "Install Elementary CLI" icon: "square-2" --- + Elementary supports Python versions 3.9 - 3.12, aligning with the [versions supported by dbt](https://docs.getdbt.com/faqs/Core/install-python-compatibility#python-compatibility-matrix). Before installing the CLI, make sure to complete the steps dbt package installation, including executing `dbt run` with the Elementary package models. - - - + @@ -23,7 +22,11 @@ Before installing the CLI, make sure to complete the steps dbt package installat ## What's next? 1. Use the CLI to: - - [Visualize all dbt test results and runs in a report](/oss/guides/generate-report-ui) ✨ - - [Send informative alerts on failures](/oss/guides/alerts/elementary-alerts) 📣 + + * [Visualize all dbt test results and runs in a report](/oss/guides/generate-report-ui) ✨ + + * [Send informative alerts on failures](/oss/guides/alerts/elementary-alerts) 📣 + 2. [Add data anomaly detection dbt tests](/data-tests/add-elementary-tests) 📈 + 3. [Deploy Elementary in production](/oss/deployment-and-configuration/elementary-in-production) 🚀 diff --git a/docs/oss/release-notes/releases/0.16.2.mdx b/docs/oss/release-notes/releases/0.16.2.mdx new file mode 100644 index 000000000..d6c741f74 --- /dev/null +++ b/docs/oss/release-notes/releases/0.16.2.mdx @@ -0,0 +1,20 @@ +--- +title: "Elementary 0.16.2" +sidebarTitle: "0.16.2" +--- + +🚀 **Elementary v0.16.2 is Here!** + +We’re happy to share the latest update to our **open-source package**, bringing **smarter alerts, improved reporting, and enhanced infrastructure** to help **small data teams** + +monitor their data more effectively. + +**What’s New?** + +📊 **Report Enhancements** – Improved navigation, lineage visualization, and full test visibility. + +🚨**Smarter Alerts** – Grouped notifications, **Microsoft Teams Adaptive Cards**, and **seed alerts**. + +🛠️**General Improvements** – DBT package lock files, **better error logging**, and **Google Cloud Storage updates.** + +Check out the full details here: https://github.com/elementary-data/elementary/releases/tag/v0.16.2 . \ No newline at end of file diff --git a/docs/oss/release-notes/releases/0.17.0.mdx b/docs/oss/release-notes/releases/0.17.0.mdx new file mode 100644 index 000000000..e0217c9f2 --- /dev/null +++ b/docs/oss/release-notes/releases/0.17.0.mdx @@ -0,0 +1,12 @@ +--- +title: "Elementary 0.17.0" +sidebarTitle: "0.17.0" +--- + +This update includes: + +- 🔧 UI bug fixes in the CLI +- 🔔 Some improvements to alerts +- 🔄 Version alignment between the CLI and dbt package + +Nothing major—just some refinements to keep things running smoothly. Check out the full details here: https://github.com/elementary-data/elementary/releases/tag/v0.17.0 \ No newline at end of file diff --git a/docs/overview/cloud-vs-oss.mdx b/docs/overview/cloud-vs-oss.mdx index 1ca3094d7..4266f5744 100644 --- a/docs/overview/cloud-vs-oss.mdx +++ b/docs/overview/cloud-vs-oss.mdx @@ -1,61 +1,56 @@ --- -title: "Elementary: Community vs Cloud" +title: "Elementary OSS vs. Elementary Cloud" +'og:title': "Elementary OSS vs. Elementary Cloud" sidebarTitle: "Cloud vs OSS" -description: Detailed comparison of Elementary product offerings. +description: "Detailed comparison of Elementary product offerings." icon: "list-check" --- -### Elementary Cloud -Ideal for teams monitoring mission-critical data pipelines, requiring guaranteed uptime and reliability, short-time-to-value, advanced features, collaboration, and professional support. The solution is secure by design, and requires no access to your data from cloud. - -### Elementary Community -An open-source CLI tool you can deploy and orchestrate to send Slack alerts and self-host the Elementary report. It is best for data and analytics engineers that require basic observability capabilities or for evaluating features without vendor approval. - -### Features comparison - -| | Community (OSS) | Elmentary Cloud | -|-----------------------------------------------------------------------|:-------------------:|:-----------------:| -| **Data monitoring and detection** | | | -| Automated freshness, volume
and schema monitors | ❌ | ✅ | -| In-pipeline data tests (via dbt) | ✅ | ✅ | -| Data anomaly detection | ✅ | ✅ | -| Custom SQL tests | ✅ | ✅ | -| Jobs performance and run results | ✅ | ✅ | -| Monitored data sets | dbt only | All tables | -| **Root cause and impact analysis** | | | -| Table-level lineage | ✅ | ✅ | -| Column-level lineage | ❌ | ✅ | -| Lineage to BI tools | ❌ | ✅ | -| **Coverage and configuration** | | | -| Configuration as code (dbt YAML) | ✅ | ✅ | -| Add tests from UI | ❌ | ✅ | -| 🚧 *Auto-tune configurations
for improved accuracy* | ❌ | ✅ | -| 🚧 *Automated test recommendations* | ❌ | ✅ | -| **Communication and collaboration** | | | -| Slack and MS Teams alerts | ✅ | ✅ | -| Alert distribution rules, multiple
destinations, custom formats | ❌ | ✅ | -| Additional alert destinations:
PagerDuty, OpsGenie, Webhook | ❌ | ✅ | -| Alert actions | ❌ | ✅ | -| Data catalog | ❌ | ✅ | -| **Deployment, security and service** | | | -| Deployment | Self hosted, CLI | Cloud service | -| Multiple environments and data warehouses | ❌ | ✅ | -| Secure by design -
Elementary has no access to raw data | ❌ | ✅ | -| Managed service - 99.9% uptime,
no maintenance | ❌ | ✅ | -| Social login with Google | ❌ | ✅ | -| SSO using OKTA / OneLogin / AD | ❌ | ✅ | -| Custom deployment options | ❌ | ✅ | -| Role based access control | ❌ | ✅ | -| Support SLA | Community support | 24 hours SLA | + +If you’re just beginning your data quality journey, the decision between OSS and Cloud depends on your goals and team setup: + +- **Start with OSS** if you have a small data team focused primarily on detection. The OSS package integrates seamlessly into dbt workflows but operates as a local solution, requiring dedicated resources for setup and ongoing maintenance. It’s ideal for small teams of data and/or analytics engineers seeking a straightforward, non-collaborative setup. + + + +- **Choose Cloud** if you’re looking to set up a scalable, collaborative data quality program. The Elementary Cloud Platform integrates into dbt workflows, providing a developer-centric tool for engineers to maintain and govern data quality rules while also offering a cloud-based interface for business users and data stewards. This enables engineers to proactively identify and resolve data issues while consumers can monitor data health, receive notifications, view health scores, and add their own validations in a user-friendly environment. It offers advanced features like automated freshness and volume monitoring, ML-powered anomaly detection, a simple data catalog, and rich integrations. + +This short video covers the difference between OSS and Cloud: + + +
+ +
+ + +### Comparing Elementary OSS to Elementary Cloud + +Below is a detailed comparison between the OSS and Cloud features: + +| Feature | **OSS** | **Cloud** | +|------------------------|------------------------------------------|---------------------------------------------------------------------------------------------------| +| **Detection** |
  • Anomaly detection and dbt tests
|
  • Automated freshness & volume monitors
  • ML-powered anomaly detection
  • dbt and cloud tests
  • Bulk add/edit for tests
| +| **Triage & Response** |
  • Basic alerts
  • Table-level lineage
|
  • Interactive alerts
  • Column-level lineage up to BI
  • BI integrations
  • Test results history
  • Incident management
| +| **Performance Monitoring** |
  • Model and test performance
|
  • Model and test performance
  • Performance alerts
| +| **Enabling Non-Tech Users** | X |
  • No-code test editor
  • Data health scores
  • External catalog integrations
  • Ticketing system integrations
| +| **Governance** | X |
  • Catalog
  • Metadata in code and UI
| ### Want to know more? - - + + + \ No newline at end of file diff --git a/docs/overview/elementary-oss.mdx b/docs/overview/elementary-oss.mdx deleted file mode 100644 index f76ddc729..000000000 --- a/docs/overview/elementary-oss.mdx +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: "Elementary OSS" -sidebarTitle: "Elementary OSS" -icon: "square-terminal" ---- - - - - - - Install and configure the Elementary dbt package and CLI tool. - - - Read more about Elementary OSS usage, guides and deployment. - - - - diff --git a/docs/quickstart.mdx b/docs/quickstart.mdx index ee24a1777..2b4799135 100644 --- a/docs/quickstart.mdx +++ b/docs/quickstart.mdx @@ -1,5 +1,5 @@ --- -title: "Elementary Quickstart" +title: "Elementary Cloud Platform Quickstart" sidebarTitle: "Quickstart" icon: "circle-play" --- @@ -8,10 +8,16 @@ icon: "circle-play" ### Need help with onboarding? -We can provide [support on Slack](https://elementary-data.com/community) or hop on a [guided onboarding call](https://savvycal.com/MaayanSa/df29881c). +We can provide [support on Slack](https://elementary-data.com/community) or hop on a [guided onboarding call](https://cal.com/maayansa/elementary-intro-docs). + +### Demo Videos + +🎥 Learn more about features available on Elementary Cloud by watching the [demo videos](https://www.elementary-data.com/elementary-cloud-demo-videos). ## What's next? 1. [Connect Slack or MS Teams](/cloud/guides/enable-slack-alerts) for alerting 🔔 + 2. [Connect your code repository](/cloud/integrations/code-repo/connect-code-repo) to add tests configuration from the cloud 🔌 + 3. [Connect your BI tool](/cloud/integrations/bi/connect-bi-tool) to automatically extend the lineage to dashboards 🚀 \ No newline at end of file diff --git a/docs/resources/business-case-data-observability-platform.mdx b/docs/resources/business-case-data-observability-platform.mdx new file mode 100644 index 000000000..a78f11580 --- /dev/null +++ b/docs/resources/business-case-data-observability-platform.mdx @@ -0,0 +1,25 @@ +--- +title: "When do I need a data observability platform?" +sidebarTitle: "When to add data observability" +--- + + +### If the consequences of data issues are high +If you are running performance marketing budgets of $millions, a data issue can result in a loss of hundreds of thousands of dollars. +In these cases, the ability to detect and resolve issues fast is business-critical. It typically involves multiple teams and the ability to measure, track, and report on data quality. + +### If data is scaling faster than the data team +The scale and complexity of modern data environments make it impossible for teams to manually manage quality without expanding the team. A data observability platform enables automation and collaboration, ensuring data quality is maintained as data continues to grow, without impacting team efficiency. + +### Common use cases +If your data is being used in one of the following use cases, you should consider adding a data observability platform: +- Self-service analytics +- Data activation +- Powering AI & ML products +- Embedded analytics +- Performance marketing +- Regulatory reporting +- A/B testing and experiments + +## Why isn't the open-source package enough? +The open-source package was designed for engineers that want to monitor their dbt project. The Cloud Platform was designed to support the complex, multifaceted requirements of larger teams and organizations, providing a holistic observability solution. \ No newline at end of file diff --git a/docs/resources/how-does-elementary-work b/docs/resources/how-does-elementary-work new file mode 100644 index 000000000..937fb3500 --- /dev/null +++ b/docs/resources/how-does-elementary-work @@ -0,0 +1,28 @@ +--- +title: "How does Elementary work" +sidebarTitle: "Elementary Could Platform" +--- +## Cloud platform architecture +The Elementary open-source package creates a schema that collects the test results and the models from your dbt projects. The platform is part of your package and it runs in your dbt pipeline and it writes to its own data set in the data warehouse and then the platform syncs that data set to the cloud. It also integrates directly with your data warehouse so it has access to the information schema, the query history and the metadata. + +We also integrate with your dbt code repository - so we understand how it’s built including tags, owners, which tables are part of your dbt project and what tables are not, and we see daily usage by connecting to your BI. + + + Elementary Cloud Platform Architecture + + + +## How it works? +1. You install the Elementary dbt package in your dbt project and configure it to write to it's own schema, the Elementary schema. +2. The package writes test results, run results, logs and metadata to the Elementary schema. +3. The cloud service only requires `read access` to the Elementary schema, not to schemas where your sensitive data is stored. +4. The cloud service connects to sync the Elementary schema using an **encrypted connection** and a **static IP address** that you will need to add to your allowlist. + + +## + + +[Read about Security and Privacy](/cloud/general/security-and-privacy) \ No newline at end of file diff --git a/docs/x_old/understand-elementary/elementary-report-ui.mdx b/docs/x_old/understand-elementary/elementary-report-ui.mdx index 2498bb3a9..ae49f10fa 100644 --- a/docs/x_old/understand-elementary/elementary-report-ui.mdx +++ b/docs/x_old/understand-elementary/elementary-report-ui.mdx @@ -7,8 +7,8 @@ the [dbt-package](/guides/modules-overview/dbt-package) tables, which includes d test results, Elementary anomaly detection results, dbt artifacts, etc. In order to visualize the data from -the [dbt-package](/general/contributions#contributing-to-the-dbt-package) tables, use -the [CLI](/understand-elementary/cli-install) you can generate the Elementary UI. +the [dbt-package](/oss/general/contributions#contributing-to-the-dbt-package) tables, use +the [CLI](/oss/cli-install) you can generate the Elementary UI. After installing and configuring the CLI, execute the command: ```shell @@ -111,4 +111,4 @@ test failure, for example on an exposure or a dashboard in the data stack. The data tests report UI can be sent via Slack, Google Cloud Storage, or Amazon S3 when you run `edr send-report`. -Refer to [this guide](/quickstart/share-report-ui) for detailed instructions. +Refer to [this guide](/oss/guides/share-report-ui) for detailed instructions.