diff --git a/.github/workflows/bump-version.yml b/.github/workflows/bump-version.yml deleted file mode 100644 index 9e72dca0b..000000000 --- a/.github/workflows/bump-version.yml +++ /dev/null @@ -1,70 +0,0 @@ -name: Bump version -on: - workflow_dispatch: - inputs: - dbt-package-version: - type: string - required: true - description: New elementary package version - - workflow_call: - inputs: - dbt-package-version: - type: string - required: true - -jobs: - validate-version: - runs-on: ubuntu-latest - outputs: - validated-dbt-package-version: ${{ steps.validate-dbt-package-input.outputs.dbt-package-validation }} - steps: - - name: validate dbt package version - id: validate-dbt-package-input - run: echo "dbt-package-validation=$(echo ${{ inputs.dbt-package-version }} | sed -n '/^[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*$/p')" >> $GITHUB_OUTPUT - - name: echo versions - run: | - echo "dbt package version: ${{ steps.validate-dbt-package-input.outputs.dbt-package-validation }}" - - name: fail on invalid input - if: ${{ steps.validate-dbt-package-input.outputs.dbt-package-validation == '' }} - uses: actions/github-script@v6 - with: - script: | - core.setFailed("Invalid version input - ${{ inputs.dbt-package-version }}") - - bump-version: - needs: validate-version - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: Create release branch - run: git checkout -b release/${{ inputs.dbt-package-version }} - - name: Initial config - run: | - git config user.name "GitHub Actions" - git config user.email noreply@github.com - - name: Bump package version - run: | - sed -i 's/version: "[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*"$/version: "${{ inputs.dbt-package-version }}"/' ./dbt_project.yml - - name: Bump readme package version - run: | - sed -i 's/version: [0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*$/version: ${{ inputs.dbt-package-version }}/' ./README.md - - name: Commit changes - run: git commit -am "release ${{ inputs.dbt-package-version }}" - - name: Push code - run: git push origin release/${{ inputs.dbt-package-version }} - - create-pr: - needs: bump-version - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: create pull request - uses: repo-sync/pull-request@v2 - with: - source_branch: "release/${{ inputs.dbt-package-version }}" - destination_branch: "master" - pr_title: "release/${{ inputs.dbt-package-version }}" - pr_body: "Open automatically using bump version workflow" - github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/remind-docs-and-tests.yml b/.github/workflows/remind-docs-and-tests.yml deleted file mode 100644 index 2712b3fb3..000000000 --- a/.github/workflows/remind-docs-and-tests.yml +++ /dev/null @@ -1,16 +0,0 @@ -name: Remind docs and tests -on: - pull_request: - branches: ["master"] -jobs: - run: - runs-on: ubuntu-latest - steps: - - uses: wow-actions/auto-comment@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - pullRequestOpened: | - đź‘‹ @{{ author }} - Thank you for raising your pull request. - Please make sure to add tests and document all user-facing changes. - You can do this by editing the `docs` files in the [`elementary`](https://github.com/elementary-data/elementary) repository. diff --git a/.github/workflows/run-precommit.yml b/.github/workflows/run-precommit.yml deleted file mode 100644 index d46c18cb2..000000000 --- a/.github/workflows/run-precommit.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: Run pre-commit hooks -on: - workflow_dispatch: - pull_request: - -jobs: - code-quality: - runs-on: ubuntu-latest - steps: - - name: Checkout Elementary - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v4.3.0 - with: - python-version: "3.8" - - - name: Install dev requirements - run: pip install -r dev-requirements.txt - - - name: Run pre-commit hooks - run: pre-commit run --all-files --show-diff-on-failure diff --git a/.github/workflows/test-all-warehouses-dbt-pre-releases.yml b/.github/workflows/test-all-warehouses-dbt-pre-releases.yml deleted file mode 100644 index e1cad4283..000000000 --- a/.github/workflows/test-all-warehouses-dbt-pre-releases.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: Test all warehouse platforms on dbt pre-releases -on: - workflow_dispatch: - -jobs: - test: - uses: ./.github/workflows/test-all-warehouses.yml - secrets: inherit - with: - dbt-version: latest_pre diff --git a/.github/workflows/test-all-warehouses.yml b/.github/workflows/test-all-warehouses.yml deleted file mode 100644 index c2a4a086a..000000000 --- a/.github/workflows/test-all-warehouses.yml +++ /dev/null @@ -1,86 +0,0 @@ -name: Test all warehouse platforms -on: - pull_request: - branches: ["master"] - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - inputs: - dbt-version: - type: string - required: false - description: dbt's version to test with - elementary-ref: - type: string - required: false - description: Branch or tag to checkout for 'elementary' repository - dbt-data-reliability-ref: - type: string - required: false - description: Branch or tag to checkout for 'dbt-data-reliability' repository - - workflow_call: - inputs: - dbt-version: - type: string - required: false - elementary-ref: - type: string - required: false - dbt-data-reliability-ref: - type: string - required: false - -jobs: - test: - strategy: - fail-fast: false - matrix: - dbt-version: - ${{ inputs.dbt-version && fromJSON(format('["{0}"]', inputs.dbt-version)) || - ! contains(github.event_name, 'pull_request') && fromJSON('["1.3.0", "latest_official"]') || - fromJSON('["latest_official"]') }} - warehouse-type: - [ - postgres, - snowflake, - bigquery, - redshift, - databricks, - databricks_catalog, - athena, - trino, - ] - include: - # If we're not running on a specific dbt version, then always add postgres on 1.3.0 - - dbt-version: "${{ inputs.dbt-version || '1.3.0' }}" - warehouse-type: postgres - - dbt-version: "${{ inputs.dbt-version || 'latest_pre' }}" - warehouse-type: postgres - exclude: - - dbt-version: "1.3.0" - warehouse-type: athena - - dbt-version: "1.3.0" - warehouse-type: trino - uses: ./.github/workflows/test-warehouse.yml - with: - warehouse-type: ${{ matrix.warehouse-type }} - dbt-version: ${{ matrix.dbt-version }} - elementary-ref: ${{ inputs.elementary-ref }} - dbt-data-reliability-ref: ${{ inputs.dbt-data-reliability-ref }} - secrets: inherit - - notify_failures: - name: Notify Slack - secrets: inherit - needs: [test] - if: | - always() && - ! contains(needs.test.result, 'success') && - ! contains(needs.test.result, 'cancelled') && - contains(github.event_name, 'schedule') && - ! cancelled() - uses: elementary-data/elementary/.github/workflows/notify_slack.yml@master - with: - result: "failure" - run_id: ${{ github.run_id }} - workflow_name: ${{ github.workflow }} diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml deleted file mode 100644 index cdddc2ed1..000000000 --- a/.github/workflows/test-warehouse.yml +++ /dev/null @@ -1,145 +0,0 @@ -name: Test warehouse platform -on: - workflow_dispatch: - inputs: - warehouse-type: - type: choice - required: true - description: Type of warehouse platform - options: - - postgres - - snowflake - - bigquery - - redshift - - databricks - - databricks_catalog - - spark - - athena - - trino - elementary-ref: - type: string - required: false - description: Branch or tag to checkout for 'elementary' repository - dbt-data-reliability-ref: - type: string - required: false - description: Branch or tag to checkout for 'dbt-data-reliability' repository - dbt-version: - type: string - required: false - default: "latest_official" - description: dbt's version to test with - - workflow_call: - inputs: - warehouse-type: - type: string - required: true - elementary-ref: - type: string - required: false - dbt-data-reliability-ref: - type: string - required: false - dbt-version: - type: string - default: "latest_official" - required: false - -env: - BRANCH_NAME: ${{ github.head_ref || github.ref_name }} - TESTS_DIR: ${{ github.workspace }}/dbt-data-reliability/integration_tests - -jobs: - test: - runs-on: ubuntu-latest - concurrency: - # This is what eventually defines the schema name in the data platform. - group: tests_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }}_${{ github.head_ref || github.ref_name }} - cancel-in-progress: true - steps: - - name: Checkout Elementary - uses: actions/checkout@v4 - with: - repository: elementary-data/elementary - path: elementary - ref: ${{ inputs.elementary-ref }} - - - name: Checkout dbt package - uses: actions/checkout@v4 - with: - path: dbt-data-reliability - ref: ${{ inputs.dbt-data-reliability-ref }} - - - name: Start Postgres - if: inputs.warehouse-type == 'postgres' - working-directory: ${{ env.TESTS_DIR }} - run: docker compose up -d postgres - - - name: Start Trino - if: inputs.warehouse-type == 'trino' - working-directory: ${{ env.TESTS_DIR }} - run: docker compose -f docker-compose-trino.yml up -d - - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: "3.9" - cache: "pip" - - - name: Install Spark requirements - if: inputs.warehouse-type == 'spark' - run: sudo apt-get install python-dev libsasl2-dev gcc - - - name: Install compatible databricks connector (not limited in older dbt-databricks versions) - if: startsWith(inputs.warehouse-type, 'databricks') && inputs.dbt-version < '1.7.0' - run: pip install databricks-sql-connector==2.9.3 - - - name: Install dbt - run: - pip install${{ (inputs.dbt-version == 'latest_pre' && ' --pre') || '' }} - "dbt-core${{ (!startsWith(inputs.dbt-version, 'latest') && format('=={0}', inputs.dbt-version)) || '' }}" - "dbt-${{ (inputs.warehouse-type == 'databricks_catalog' && 'databricks') || (inputs.warehouse-type == 'spark' && 'spark[PyHive]') || (inputs.warehouse-type == 'athena' && 'athena-community') || inputs.warehouse-type }}${{ (!startsWith(inputs.dbt-version, 'latest') && format('<={0}', inputs.dbt-version)) || '' }}" - - - name: Install Elementary - run: pip install "./elementary[${{ (inputs.warehouse-type == 'databricks_catalog' && 'databricks') || inputs.warehouse-type }}]" - - - name: Install dependencies - working-directory: ${{ env.TESTS_DIR }} - run: | - dbt deps --project-dir dbt_project - pip install -r requirements.txt - - - name: Write dbt profiles - env: - PROFILES_YML: ${{ secrets.CI_PROFILES_YML }} - run: | - mkdir -p ~/.dbt - DBT_VERSION=$(pip show dbt-core | grep -i version | awk '{print $2}' | sed 's/\.//g') - UNDERSCORED_REF_NAME=$(echo "${{ inputs.warehouse-type }}_dbt_${DBT_VERSION}_${BRANCH_NAME}" | awk '{print tolower($0)}' | head -c 40 | sed "s/-/_/g") - echo "$PROFILES_YML" | base64 -d | sed "s//dbt_pkg_$UNDERSCORED_REF_NAME/g" > ~/.dbt/profiles.yml - - - name: Check DWH connection - working-directory: ${{ env.TESTS_DIR }} - run: | - dbt debug -t "${{ inputs.warehouse-type }}" - - - name: Test - working-directory: "${{ env.TESTS_DIR }}/tests" - run: py.test -n8 -vvv --target "${{ inputs.warehouse-type }}" --junit-xml=test-results.xml --html=detailed_report_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }}.html --self-contained-html - - - name: Upload test results - if: always() - uses: pmeier/pytest-results-action@main - with: - path: ${{ env.TESTS_DIR }}/tests/test-results.xml - summary: true - display-options: fEX - fail-on-empty: true - - - name: Upload HTML report - if: always() - uses: actions/upload-artifact@v4 - with: - name: detailed_report_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }} - path: ${{ env.TESTS_DIR }}/tests/detailed_report_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }}.html diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 10e069ea9..a3e6b862d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,102 +1,4 @@ -# Contribution guidelines +# Tuva Fork -**Note**: This document contains contribution guidelines for the Elementary dbt package. If you wish to contribute -to the Elementary CLI (`edr`), please refer to the [CLI contribution guidelines](https://github.com/elementary-data/elementary/blob/master/CONTRIBUTING.md). - -## Getting started with development - -### Setup - -#### (1) Clone the repository - -``` -git clone https://github.com/elementary-data/dbt-data-reliability.git -cd dbt-data-reliability -``` - -#### (2) Edit `packages.yml` in your dbt project - -```yaml -packages: - - local: /path/to/dbt-data-reliability -``` - -#### (3) Install the package - -``` -dbt deps -``` - -You're done. Running `dbt` will now run the code in your local repository. - -## First time contributors - -If you're looking for things to help with, browse -our [issue tracker](https://github.com/elementary-data/elementary/issues)! - -In particular, look for: - -- [Open to contribution issues](https://github.com/elementary-data/elementary/labels/Open%20to%20contribution%20%F0%9F%A7%A1) -- [good first issues](https://github.com/elementary-data/elementary/labels/Good%20first%20issue%20%F0%9F%A5%87) -- [documentation issues](https://github.com/elementary-data/elementary/labels/documentation) - -You do not need to ask for permission to work on any of these issues. -Just fix the issue yourself and [open a pull request](#submitting-changes). - -To get help fixing a specific issue, it's often best to comment on the issue -itself. You're much more likely to get help if you provide details about what -you've tried and where you've -looked. [Slack](https://join.slack.com/t/elementary-community/shared_invite/zt-uehfrq2f-zXeVTtXrjYRbdE_V6xq4Rg) can also -be a good place -to ask for help. - -## Submitting changes - -Even more excellent than a good bug report is a fix for a bug, or the -implementation of a much-needed new feature. We'd love to have -your contributions. - -We use the usual GitHub pull-request flow, which may be familiar to -you if you've contributed to other projects on GitHub. For the mechanics, -view [this guide](https://help.github.com/articles/using-pull-requests/). - -If your change will be a significant amount of work -to write, we highly recommend starting by opening an issue laying out -what you want to do. That lets a conversation happen early in case -other contributors disagree with what you'd like to do or have ideas -that will help you do it. - -The best pull requests are focused, clearly describe what they're for -and why they're correct, and contain tests for whatever changes they -make to the code's behavior. As a bonus these are easiest for someone -to review, which helps your pull request get merged quickly! - -## Running integration tests - -For every PR we merge, we require integration tests to pass successfully -on all supported database platforms (Snowflake, Bigquery, Redshift, Databricks and Postgres). - -Clearly you might not have a setup for all of these, so the expectation is that you'll run -the tests on the platform you're using and make sure everything passes. We also encourage you to add new tests for any new non-trivial functionality. - -Our tests are located under the `integration_tests` directory, and written using the -[py-test](https://docs.pytest.org/en/stable/) framework. -In order to run them, please follow these steps: - -1. Install dependencies: - -```bash -cd integration_tests -pip install -r requirements.txt -dbt deps --project-dir dbt_project -``` - -2. Create a dbt profile named `elementary_tests`, with a target corresponding to the database you are using. - For more details on how to set a dbt profile please click [here](https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles). - -3. Run the tests: - -```bash -cd tests -py.test -vvv --target -``` +This is a fork of [Elementary](https://github.com/elementary-data/dbt-data-reliability) open source project. +Contributions will not be accepted in this fork of the repository. diff --git a/README.md b/README.md index d94601dd2..7f68b0c38 100644 --- a/README.md +++ b/README.md @@ -1,198 +1,4 @@ -

-Logo -

+# Tuva Fork of [Elementary](https://github.com/elementary-data/dbt-data-reliability) -# [dbt-native data observability](https://www.elementary-data.com/) - -

- - -License -Downloads -

- -## What is Elementary? - -This dbt-native package powers **Elementary**, helping data and analytics engineers **detect data anomalies** and build **rich metadata tables** from their dbt runs and tests. Gain immediate visibility into data quality trend and uncover potential issues, all within dbt. - -Choose the observability tool that fits your needs: - -✅ [**Elementary Open Source**](https://docs.elementary-data.com/oss/oss-introduction) – A powerful, self-hosted tool for teams that want full control. - -✅ [**Elementary Cloud Platform**](https://docs.elementary-data.com/cloud/introduction) – A fully managed, enterprise-ready solution with **automated ML-powered anomaly detection, flexible data discovery, integrated incident management, and collaboration tools**—all with minimal setup and infrastructure maintenance. - -### Table of Contents - -- [What's Inside the Elementary dbt Package?](#whats-inside-the-elementary-dbt-package) -- [Get more out of Elementary dbt package](#get-more-out-of-elementary-dbt-package) -- [Data Anomaly Detection & Schema changes as dbt Tests](#data-anomaly-detection--schema-changes-as-dbt-tests) -- [Elementary Tables - Run Results and dbt Artifacts](#elementary-tables---run-results-and-dbt-artifacts) -- [AI-powered data validation and unstructured data tests](#ai-powered-data-validation-and-unstructured-data-tests) -- [Quickstart - dbt Package](#quickstart---dbt-package) -- [Community & Support](#community--support) -- [Contributions](#contributions) - -### **What's Inside the Elementary dbt Package?** - -The **Elementary dbt package** is designed to enhance data observability within your dbt workflows. It includes two core components: - -- **Elementary Tests** – A collection of **anomaly detection tests** and other data quality checks that help identify unexpected trends, missing data, or schema changes directly within your dbt runs. -- **Metadata & Test Results Tables** – The package automatically generates and updates **metadata tables** in your data warehouse, capturing valuable information from your dbt runs and test results. These tables act as the backbone of your **observability setup**, enabling **alerts and reports** when connected to an Elementary observability platform. - -## Get more out of Elementary dbt package - -The **Elementary dbt package** helps you find anomalies in your data and build metadata tables from your dbt runs and tests—but there's even more you can do. - -To generate observability reports, send alerts, and govern your data quality effectively, connect your dbt package to one of the following options: - -- **Elementary OSS** -- **A self-hosted, open-source CLI** that integrates seamlessly with your dbt project and the Elementary dbt package. It **enables alerting and provides the basic Elementary data observability report**, offering a comprehensive view of your dbt runs, all dbt test results, data lineage, and test coverage. It’s ideal for small teams of data and/or analytics engineers seeking a straightforward, non-collaborative setup for data observability. Quickstart [here](https://docs.elementary-data.com/oss/quickstart/quickstart-cli), and our team and community can provide great support on [Slack](https://www.elementary-data.com/community) if needed. -- **Elementary Cloud** - - A **fully managed, enterprise-ready** solution designed for **scalability and automation**. It offers automated **ML-powered anomaly detection**, flexible **data discovery**, an integrated **incident management system**, and **collaboration features.** Delivering **high value with minimal setup and infrastructure maintenance**, it's ideal for teams looking to enhance data reliability without operational overhead. To learn more, [book a demo](https://cal.com/maayansa/elementary-intro-github-package) or [start a trial](https://www.elementary-data.com/signup). - - - - - -## Data Anomaly Detection & Schema changes as dbt Tests - -**Elementary tests are configured and executed like native tests in your project!** - -Elementary dbt tests help track and alert on schema changes as well as key metrics and metadata over time, including freshness, volume, distribution, cardinality, and more. - -**Seamlessly configured and run like native dbt tests,** Elementary tests detect anomalies and outliers, helping you catch data issues early. - -Example of an Elementary test config in `schema.yml`: - -``` - -models: - - name: all_events - config: - elementary: - timestamp_column: 'loaded_at' - columns: - - name: event_count - tests: - - elementary.column_anomalies: - column_anomalies: - - average - where_expression: "event_type in ('event_1', 'event_2') and country_name != 'unwanted country'" - anomaly_sensitivity: 2 - time_bucket: - period: day - count:1 - -``` - -Elementary tests include: - -### **Anomaly Detection Tests** - -- **Volume anomalies -** Monitors the row count of your table over time per time bucket. -- **Freshness anomalies -** Monitors the freshness of your table over time, as the expected time between data updates. -- **Event freshness anomalies -** Monitors the freshness of event data over time, as the expected time it takes each event to load - that is, the time between when the event actually occurs (the **`event timestamp`**), and when it is loaded to the database (the **`update timestamp`**). -- **Dimension anomalies -** Monitors the count of rows grouped by given **`dimensions`** (columns/expressions). -- **Column anomalies -** Executes column level monitors on a certain column, with a chosen metric. -- **All columns anomalies** - Executes column level monitors and anomaly detection on all the columns of the table. - -### **Schema Tests** - -- **Schema changes -** Alerts on a deleted table, deleted or added columns, or change of data type of a column. -- **Schema changes from baseline** - Checks for schema changes against baseline columns defined in a source’s or model’s configuration. -- **JSON schema** - Allows validating that a string column matches a given JSON schema. -- **Exposure validation test -** Detects changes in your models’ columns that break downstream exposure. - -Read more about the available [Elementary tests and configuration](https://docs.elementary-data.com/data-tests/introduction). - -## Elementary Tables - Run Results and dbt Artifacts - -The **Elementary dbt package** automatically stores **dbt artifacts and run results** in your data warehouse, creating structured tables that provide visibility into your dbt runs and metadata. - -### **Metadata Tables - dbt Artifacts** - -These tables provide a comprehensive view of your dbt project structure and configurations: - -- **dbt_models** – Details on all dbt models. -- **dbt_tests** – Stores information about dbt tests. -- **dbt_sources** – Tracks source tables and freshness checks. -- **dbt_exposures** – Logs downstream data usage. -- **dbt_metrics** – Captures dbt-defined metrics. -- **dbt_snapshots** – Stores historical snapshot data. -- **dbt_seeds -** Stores current metadata about seed files in the dbt project. -- **dbt_columns** - Stores detailed information about columns across the dbt project. - -### **Run Results Tables** - -These tables track execution details, test outcomes, and performance metrics from your dbt runs: - -- **dbt_run_results** – Captures high-level details of each dbt run. -- **model_run_results** – Stores execution data for dbt models. -- **snapshot_run_results** – Logs results from dbt snapshots. -- **dbt_invocations** – Tracks each instance of dbt being run. -- **elementary_test_results** – Consolidates all dbt test results, including Elementary anomaly tests. - -For a full breakdown of these tables, see the [documentation](https://docs.elementary-data.com/dbt/package-models). - -## AI-powered data validation and unstructured data tests - -Elementary leverages AI to enhance data reliability with natural language test definitions: - -- **AI data validation**: Define expectations in plain English to validate structured data -- **Unstructured data validation**: Validate text, JSON, and other non-tabular data types - -Example: - -```yml -# AI data validation example -models: - - name: crm - description: "A table containing contract details." - columns: - - name: contract_date - description: "The date when the contract was signed." - tests: - - elementary.ai_data_validation: - expectation_prompt: "There should be no contract date in the future" -``` - -Learn more in our [AI data validations documentation](https://docs.elementary-data.com/data-tests/ai-data-tests/ai_data_validations). - -## Quickstart - dbt Package - -1. Add to your `packages.yml`: - -``` -packages: - - package: elementary-data/elementary - version: 0.18.0 - ## Docs: - -``` - -2. Run `dbt deps` -3. Add to your `dbt_project.yml`: - -``` -models: - ## elementary models will be created in the schema '_elementary' - ## for details, see docs: - elementary: - +schema: "elementary" - -``` - -4. Run `dbt run --select elementary` - -Check out the [full documentation](https://docs.elementary-data.com/). - -## Community & Support - -- [Slack](https://join.slack.com/t/elementary-community/shared_invite/zt-uehfrq2f-zXeVTtXrjYRbdE_V6xq4Rg) (Talk to us, support, etc.) -- [GitHub issues](https://github.com/elementary-data/elementary/issues) (Bug reports, feature requests) - -## Contributions - -Thank you :orange_heart: Whether it's a bug fix, new feature, or additional documentation - we greatly appreciate contributions! - -Check out the [contributions guide](https://docs.elementary-data.com/oss/general/contributions) and [open issues](https://github.com/elementary-data/elementary/issues) in the main repo. +## Changes from Main Repo +* Disabling this package for microsoft fabric support diff --git a/dbt_project.yml b/dbt_project.yml index b63e4556d..89248d2a8 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,5 +1,5 @@ name: "elementary" -version: "0.18.0" +version: "0.18.2-tuva.1" require-dbt-version: [">=1.0.0", "<2.0.0"] @@ -13,6 +13,13 @@ seed-paths: ["data"] macro-paths: ["macros"] snapshot-paths: ["snapshots"] +vars: + mute_ensure_materialization_override: true + + +models: + +enabled: "{{ target.type != 'fabric' }}" + target-path: "target" # directory which will store compiled SQL files clean-targets: # directories to be removed by `dbt clean` - "target" @@ -20,6 +27,6 @@ clean-targets: # directories to be removed by `dbt clean` - "dbt_modules" on-run-start: - - "{{ elementary.on_run_start() }}" + - "{{ elementary.on_run_start() if target.type != 'fabric' }}" on-run-end: - - "{{ elementary.on_run_end() }}" + - "{{ elementary.on_run_end() if target.type != 'fabric' }}" diff --git a/integration_tests/deprecated_tests/models/schema.yml b/integration_tests/deprecated_tests/models/schema.yml index 647bbfce6..31f376284 100644 --- a/integration_tests/deprecated_tests/models/schema.yml +++ b/integration_tests/deprecated_tests/models/schema.yml @@ -347,7 +347,7 @@ models: - name: groups columns: - name: group_a - data_type: "{{ 'strIng' if (target.type == 'bigquery' or target.type == 'databricks') else 'CHArACTER varying' if target.type == 'redshift' else 'teXt' }}" + data_type: "{{ 'strIng' if (target.type == 'bigquery' or target.type == 'databricks' or target.type == 'athena') else 'CHArACTER varying' if target.type == 'redshift' else 'teXt' }}" - name: group_b data_type: double - name: group_c @@ -365,7 +365,7 @@ models: - name: stats_players columns: - name: player - data_type: "{{ 'STRING' if (target.type == 'bigquery' or target.type == 'databricks') else 'character varying' if target.type == 'redshift' else 'TEXT' }}" + data_type: "{{ 'STRING' if (target.type == 'bigquery' or target.type == 'databricks' or target.type == 'athena') else 'character varying' if target.type == 'redshift' else 'TEXT' }}" - name: goals data_type: BOOLEAN - name: coffee_cups_consumed diff --git a/integration_tests/tests/conftest.py b/integration_tests/tests/conftest.py index b9c0ab7ed..56a34ca43 100644 --- a/integration_tests/tests/conftest.py +++ b/integration_tests/tests/conftest.py @@ -2,18 +2,22 @@ from pathlib import Path from tempfile import mkdtemp -import env import pytest from dbt.version import __version__ as dbt_version from dbt_project import DbtProject +from env import Environment +from logger import get_logger from packaging import version DBT_PROJECT_PATH = Path(__file__).parent.parent / "dbt_project" +logger = get_logger(__name__) + def pytest_addoption(parser): parser.addoption("--target", action="store", default="postgres") parser.addoption("--skip-init", action="store_true", default=False) + parser.addoption("--clear-on-end", action="store_true", default=False) @pytest.fixture(scope="session") @@ -32,9 +36,22 @@ def project_dir_copy(): @pytest.fixture(scope="session", autouse=True) -def init_tests_env(target, skip_init, project_dir_copy: str): +def init_tests_env( + target: str, skip_init: bool, clear_on_end: bool, project_dir_copy: str +): + env = Environment(target, project_dir_copy) if not skip_init: - env.init(target, project_dir_copy) + logger.info("Initializing test environment") + env.clear() + env.init() + logger.info("Initialization complete") + + yield + + if clear_on_end: + logger.info("Clearing tests environment") + env.clear() + logger.info("Clearing complete") @pytest.fixture(autouse=True) @@ -78,10 +95,15 @@ def target(request) -> str: @pytest.fixture(scope="session") -def skip_init(request) -> str: +def skip_init(request) -> bool: return request.config.getoption("--skip-init") +@pytest.fixture(scope="session") +def clear_on_end(request) -> bool: + return request.config.getoption("--clear-on-end") + + @pytest.fixture def test_id(request) -> str: if request.cls: diff --git a/integration_tests/tests/env.py b/integration_tests/tests/env.py index f60cda7ed..27b78641e 100644 --- a/integration_tests/tests/env.py +++ b/integration_tests/tests/env.py @@ -1,12 +1,6 @@ import dbt_project -def init(target: str, project_dir: str): - tests_env = Environment(target, project_dir) - tests_env.clear() - tests_env.init() - - class Environment: def __init__(self, target: str, project_dir: str): self.dbt_runner = dbt_project.get_dbt_runner(target, project_dir) diff --git a/integration_tests/tests/test_failed_row_count.py b/integration_tests/tests/test_failed_row_count.py index 21df0c5aa..5ed41d7d9 100644 --- a/integration_tests/tests/test_failed_row_count.py +++ b/integration_tests/tests/test_failed_row_count.py @@ -54,3 +54,19 @@ def test_custom_failed_row_count(test_id: str, dbt_project: DbtProject): ) assert test_result["status"] == "fail" assert test_result["failed_row_count"] == overwrite_failed_row_count + + +def test_warn_if_0(test_id: str, dbt_project: DbtProject): + # Edge case that we want to verify + + null_count = 50 + data = [{COLUMN_NAME: "pasten"} for _ in range(null_count)] + test_result = dbt_project.test( + test_id, + "not_null", + dict(column_name=COLUMN_NAME, warn_if="=0"), + data=data, + test_vars={"enable_elementary_test_materialization": True}, + ) + assert test_result["status"] == "warn" + assert test_result["failed_row_count"] == 0 diff --git a/macros/commands/generate_schema_baseline_test.sql b/macros/commands/generate_schema_baseline_test.sql index 99053625f..f02b1ecac 100644 --- a/macros/commands/generate_schema_baseline_test.sql +++ b/macros/commands/generate_schema_baseline_test.sql @@ -1,6 +1,6 @@ -{% macro generate_schema_baseline_test(name=none, include_sources=True, include_models=False, fail_on_added=False, enforce_types=False, convert_to_lower=False) %} +{% macro generate_schema_baseline_test(name=none, include_sources=True, include_models=False, fail_on_added=False, enforce_types=False, convert_to_lower=False, resource_type=none) %} {% if name %} - {{ generate_schema_baseline_test_for_node(name, fail_on_added=fail_on_added, enforce_types=enforce_types, convert_to_lower=convert_to_lower) }} + {{ generate_schema_baseline_test_for_node(name, fail_on_added=fail_on_added, enforce_types=enforce_types, convert_to_lower=convert_to_lower, resource_type=resource_type) }} {% else %} {{ generate_schema_baseline_test_for_all_nodes(include_sources=include_sources, include_models=include_models, fail_on_added=fail_on_added, enforce_types=enforce_types, convert_to_lower=convert_to_lower) }} @@ -20,11 +20,10 @@ {% endfor %} {% endmacro %} -{% macro generate_schema_baseline_test_for_node(node, fail_on_added=False, enforce_types=False, convert_to_lower=False) %} +{% macro generate_schema_baseline_test_for_node(node, fail_on_added=False, enforce_types=False, convert_to_lower=False, resource_type=none) %} {% if node is string %} {% set node_name = node %} - {% set node = elementary.get_node_by_name(node_name) %} - + {% set node = elementary.get_node_by_name(node_name, resource_type) %} {% if not node %} {% do print("Could not find any model or source by the name '{}'!".format(node_name)) %} {% do return(none) %} @@ -84,7 +83,7 @@ sources: {%- for param, param_val in test_params.items() %} {{param}}: {{param_val}} {%- endfor -%} - {% endif -%} + {% endif -%} {% endmacro %} {% macro generate_schema_baseline_test_for_model(node, columns, test_params, convert_to_lower) %} diff --git a/macros/edr/system/system_utils/full_names.sql b/macros/edr/system/system_utils/full_names.sql index 6e579596f..cb5aef445 100644 --- a/macros/edr/system/system_utils/full_names.sql +++ b/macros/edr/system/system_utils/full_names.sql @@ -60,6 +60,19 @@ trim(split_part(full_table_name,'.',{{ part_index }}),'"') as {{ part_name }} {% endmacro %} +{% macro athena__full_name_split(part_name) %} + {%- if part_name == 'database_name' -%} + {%- set part_index = 1 -%} + {%- elif part_name == 'schema_name' -%} + {%- set part_index = 2 -%} + {%- elif part_name == 'table_name' -%} + {%- set part_index = 3 -%} + {%- else -%} + {{ return('') }} + {%- endif -%} + trim(split_part(full_table_name,'.',{{ part_index }}),'"') as {{ part_name }} +{% endmacro %} + {% macro databricks__full_name_split(part_name) %} {%- if part_name == 'database_name' -%} diff --git a/macros/edr/tests/on_run_end/handle_tests_results.sql b/macros/edr/tests/on_run_end/handle_tests_results.sql index 98e41f082..98860c606 100644 --- a/macros/edr/tests/on_run_end/handle_tests_results.sql +++ b/macros/edr/tests/on_run_end/handle_tests_results.sql @@ -41,7 +41,13 @@ {% for elementary_test_results_row in elementary_test_results_rows %} {% set failures = elementary_test_results_row.get("failures", result.failures) %} - {% set status = "pass" if failures == 0 else result.status %} + + {# For Elementary anomaly tests, we actually save more than one result per test, in that case the dbt status will be "fail" + even if one such result failed and the rest succeeded. To handle this, we make sure to mark the status as "pass" for these + results if the number of failed rows is 0. + We don't want to do this for every test though - because otherwise it can break configurations like warn_if=0 #} + {% set status = "pass" if failures == 0 and elementary_test_results_row.get("test_type") == "anomaly_detection" else result.status %} + {% do elementary_test_results_row.update({'status': status, 'failures': failures, 'invocation_id': invocation_id, 'failed_row_count': elementary_test_failed_row_count}) %} {% do elementary_test_results_row.setdefault('test_results_description', result.message) %} diff --git a/macros/utils/cross_db_utils/contains.sql b/macros/utils/cross_db_utils/contains.sql index f56d941ae..e14ee0ab9 100644 --- a/macros/utils/cross_db_utils/contains.sql +++ b/macros/utils/cross_db_utils/contains.sql @@ -30,4 +30,16 @@ lower({{ string }}) like lower('%{{ string_to_search }}%') then true else false end {%- endif %} +{% endmacro %} + +{% macro athena__contains(string, string_to_search, case_sensitive) %} + {%- if case_sensitive %} + case when + {{ string }} like '%{{ string_to_search }}%' then true + else false end + {%- else %} + case when + lower({{ string }}) like lower('%{{ string_to_search }}%') then true + else false end + {%- endif %} {% endmacro %} \ No newline at end of file diff --git a/macros/utils/cross_db_utils/generate_elementary_profile_args.sql b/macros/utils/cross_db_utils/generate_elementary_profile_args.sql index e0c7b9546..fff758061 100644 --- a/macros/utils/cross_db_utils/generate_elementary_profile_args.sql +++ b/macros/utils/cross_db_utils/generate_elementary_profile_args.sql @@ -171,3 +171,13 @@ {% macro default__generate_elementary_profile_args(method, elementary_database, elementary_schema) %} Adapter "{{ target.type }}" is not supported on Elementary. {% endmacro %} + +{# FIX: Duckdb arguments #} +{% macro duckdb__generate_elementary_profile_args(method, elementary_database, elementary_schema) %} + {% do return([ + _parameter("type", target.type), + _parameter("path", target.path), + _parameter("schema", elementary_schema), + _parameter("threads", target.threads), + ]) %} +{% endmacro %} \ No newline at end of file diff --git a/macros/utils/cross_db_utils/timeadd.sql b/macros/utils/cross_db_utils/timeadd.sql index ca3a9260f..a9ec22cc8 100644 --- a/macros/utils/cross_db_utils/timeadd.sql +++ b/macros/utils/cross_db_utils/timeadd.sql @@ -33,3 +33,8 @@ {% macro trino__edr_timeadd(date_part, number, timestamp_expression) %} date_add('{{ date_part }}', {{ elementary.edr_cast_as_int(number) }}, {{ elementary.edr_cast_as_timestamp(timestamp_expression) }}) {% endmacro %} + +{# FIX: Add adaptor for DuckDB #} +{% macro duckdb__edr_timeadd(date_part, number, timestamp_expression) %} + {{ elementary.edr_cast_as_timestamp(timestamp_expression) }} + {{ elementary.edr_cast_as_int(number) }} * INTERVAL '1 {{ date_part }}' +{% endmacro %} \ No newline at end of file diff --git a/macros/utils/graph/get_node_by_name.sql b/macros/utils/graph/get_node_by_name.sql index d5c1513bd..58d350db9 100644 --- a/macros/utils/graph/get_node_by_name.sql +++ b/macros/utils/graph/get_node_by_name.sql @@ -1,7 +1,7 @@ -{% macro get_node_by_name(name) %} +{% macro get_node_by_name(name, resource_type=none) %} {%- set nodes = elementary.get_nodes_from_graph() -%} {% for node in nodes %} - {% if node.name == name %} + {% if node.name == name and (resource_type is none or node.resource_type == resource_type) %} {% do return(node) %} {% endif %} {% endfor %} diff --git a/macros/utils/table_operations/create_temp_table.sql b/macros/utils/table_operations/create_temp_table.sql index a846e41ab..00de501d6 100644 --- a/macros/utils/table_operations/create_temp_table.sql +++ b/macros/utils/table_operations/create_temp_table.sql @@ -1,14 +1,36 @@ {% macro create_temp_table(database_name, schema_name, table_name, sql_query) %} + {% do return(adapter.dispatch('create_temp_table','elementary')(database_name, schema_name, table_name, sql_query)) %} +{%- endmacro %} + +{% macro default__create_temp_table(database_name, schema_name, table_name, sql_query) %} {% set temp_table_exists, temp_table_relation = dbt.get_or_create_relation(database=database_name, schema=schema_name, identifier=table_name, type='table') -%} - {% set temp_table_relation = elementary.edr_make_temp_relation(temp_table_relation) %} + {% set temp_table_relation = elementary.make_temp_table_relation(temp_table_relation) %} {% if temp_table_exists %} {% do adapter.drop_relation(temp_table_relation) %} {% do elementary.run_query(dbt.create_table_as(True, temp_table_relation, sql_query)) %} {% else %} {% do elementary.run_query(dbt.create_table_as(True, temp_table_relation, sql_query)) %} {% endif %} + {{ return(temp_table_relation) }}{% endmacro %} + +{% macro snowflake__create_temp_table(database_name, schema_name, table_name, sql_query) %} + {% set temp_table_exists, temp_table_relation = dbt.get_or_create_relation(database=database_name, + schema=schema_name, + identifier=table_name, + type='table') -%} + {% set temp_table_relation = elementary.make_temp_table_relation(temp_table_relation) %} + {% set create_query %} + create or replace temporary table {{ temp_table_relation }} + as ( + {{ sql_query }} + ); + + {% endset %} + + {% do elementary.run_query(create_query) %} + {{ return(temp_table_relation) }} {% endmacro %} \ No newline at end of file diff --git a/macros/utils/table_operations/delete_and_insert.sql b/macros/utils/table_operations/delete_and_insert.sql index 4672ef116..cf9226180 100644 --- a/macros/utils/table_operations/delete_and_insert.sql +++ b/macros/utils/table_operations/delete_and_insert.sql @@ -132,3 +132,19 @@ {% do return(queries) %} {% endmacro %} + +{# FIX: Duckdb adaptor; Removed begin/end transaction behavior #} +{% macro duckdb__get_delete_and_insert_queries(relation, insert_relation, delete_relation, delete_column_key) %} + {% set query %} + {% if delete_relation %} + delete from {{ relation }} + where + {{ delete_column_key }} is null + or {{ delete_column_key }} in (select {{ delete_column_key }} from {{ delete_relation }}); + {% endif %} + {% if insert_relation %} + insert into {{ relation }} select * from {{ insert_relation }}; + {% endif %} + {% endset %} + {% do return([query]) %} +{% endmacro %} \ No newline at end of file diff --git a/macros/utils/table_operations/insert_rows.sql b/macros/utils/table_operations/insert_rows.sql index 19826d762..1240f45c9 100644 --- a/macros/utils/table_operations/insert_rows.sql +++ b/macros/utils/table_operations/insert_rows.sql @@ -174,3 +174,8 @@ NULL {%- endif -%} {%- endmacro -%} + +{# FIX: Duckdb escape single quote #} +{%- macro duckdb__escape_special_chars(string_value) -%} + {{- return(string_value | replace("\\", "\\\\") | replace("'", "''") | replace("\n", "\\n") | replace("\r", "\\r")) -}} +{%- endmacro -%} \ No newline at end of file