From 5f5d97a6c44c8a04d3ce026c33ac66be4b102c86 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Thu, 10 Oct 2024 13:07:58 -0500 Subject: [PATCH 1/3] add setup for dbt supported testing --- .github/workflows/ci.yml | 33 ++++++ dev-requirements.txt | 1 + .../monitor/dbt_project/dbt_project.yml | 2 +- integration_tests/profiles.yml | 111 ++++++++++++++++++ supported_adapters.env | 1 + .../tests_with_db/dbt_project/dbt_project.yml | 2 +- tox.ini | 44 +++++++ 7 files changed, 192 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 integration_tests/profiles.yml create mode 100644 supported_adapters.env create mode 100644 tox.ini diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..a6a096db6 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,33 @@ +# **what?** +# Run tests for against supported adapters + +# **why?** +# To ensure that works as expected with all supported adapters + +# **when?** +# On every PR, and every push to main and when manually triggered + +name: Package Integration Tests + +on: + push: + branches: + - main + pull_request: + workflow_dispatch: + +jobs: + run-tests: + uses: dbt-labs/dbt-package-testing/.github/workflows/run_tox.yml@v1 + # this just tests with postgres so no variables need to be passed through. + # When it's time to add more adapters you will need to pass through inputs for + # the other adapters as shown in the below example for redshift + # with: + # # redshift + # REDSHIFT_HOST: ${{ vars.REDSHIFT_HOST }} + # REDSHIFT_USER: ${{ vars.REDSHIFT_USER }} + # REDSHIFT_DATABASE: ${{ vars.REDSHIFT_DATABASE }} + # REDSHIFT_SCHEMA: "integration_tests_redshift_${{ github.run_number }}" + # REDSHIFT_PORT: ${{ vars.REDSHIFT_PORT }} + # secrets: + # DBT_ENV_SECRET_REDSHIFT_PASS: ${{ secrets.DBT_ENV_SECRET_REDSHIFT_PASS }} diff --git a/dev-requirements.txt b/dev-requirements.txt index f41268a0d..a24f4d4f3 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -2,6 +2,7 @@ pytest pytest-parametrization>=2022.2.1 pre-commit mypy +tox # MyPy stubs types-requests diff --git a/elementary/monitor/dbt_project/dbt_project.yml b/elementary/monitor/dbt_project/dbt_project.yml index ce4e3dbe0..ee2eb6a2b 100644 --- a/elementary/monitor/dbt_project/dbt_project.yml +++ b/elementary/monitor/dbt_project/dbt_project.yml @@ -6,7 +6,7 @@ version: "1.0.0" config-version: 2 # This setting configures which "profile" dbt uses for this project. -profile: "elementary" +profile: "integration_tests" # These configurations specify where dbt should look for different types of files. # The `model-paths` config, for example, states that models in this project can be diff --git a/integration_tests/profiles.yml b/integration_tests/profiles.yml new file mode 100644 index 000000000..277596c0c --- /dev/null +++ b/integration_tests/profiles.yml @@ -0,0 +1,111 @@ +integration_tests: + target: postgres + outputs: + postgres: + type: "postgres" + host: "{{ env_var('POSTGRES_HOST') }}" + user: "{{ env_var('POSTGRES_USER') }}" + pass: "{{ env_var('DBT_ENV_SECRET_POSTGRES_PASS') }}" + port: "{{ env_var('POSTGRES_PORT') | as_number }}" + dbname: "{{ env_var('POSTGRES_DATABASE') }}" + schema: "{{ env_var('POSTGRES_SCHEMA') }}" + threads: 5 + + redshift: + type: "redshift" + host: "{{ env_var('REDSHIFT_HOST') }}" + user: "{{ env_var('REDSHIFT_USER') }}" + pass: "{{ env_var('DBT_ENV_SECRET_REDSHIFT_PASS') }}" + dbname: "{{ env_var('REDSHIFT_DATABASE') }}" + port: "{{ env_var('REDSHIFT_PORT') | as_number }}" + schema: "{{ env_var('REDSHIFT_SCHEMA') }}" + threads: 5 + + bigquery: + type: "bigquery" + method: "service-account-json" + project: "{{ env_var('BIGQUERY_PROJECT') }}" + dataset: "{{ env_var('BIGQUERY_SCHEMA') }}" + threads: 10 + keyfile_json: + "{{ env_var('BIGQUERY_KEYFILE_JSON') | as_native}}" + job_retries: 3 + + snowflake: + type: "snowflake" + account: "{{ env_var('SNOWFLAKE_ACCOUNT') }}" + user: "{{ env_var('SNOWFLAKE_USER') }}" + password: "{{ env_var('DBT_ENV_SECRET_SNOWFLAKE_PASS') }}" + role: "{{ env_var('SNOWFLAKE_ROLE') }}" + database: "{{ env_var('SNOWFLAKE_DATABASE') }}" + warehouse: "{{ env_var('SNOWFLAKE_WAREHOUSE') }}" + schema: "{{ env_var('SNOWFLAKE_SCHEMA') }}" + threads: 10 + + trino: + type: "trino" + method: "{{ env_var('TRINO_METHOD') }}" + user: "{{ env_var('TRINO_USER') }}" + password: "{{ env_var('DBT_ENV_SECRET_TRINO_PASS') }}" + host: "{{ env_var('TRINO_HOST') }}" + port: "{{ env_var('TRINO_PORT') | as_number}}" + catalog: "{{ env_var('TRINO_CATALOG') }}" + schema: "{{ env_var('TRINO_SCHEMA') }}" + timezone: "{{ env_var('TRINO_TIMEZONE') }}" + threads: 12 + + databricks: + type: "databricks" + schema: "{{ env_var('DATABRICKS_SCHEMA') }}" + host: "{{ env_var('DATABRICKS_HOST') }}" + http_path: "{{ env_var('DATABRICKS_HTTP_PATH') }}" + token: "{{ env_var('DBT_SECRET_ENV_DATABRICKS_TOKEN') }}" + threads: 8 + connect_retries: 5 + connect_timeout: 300 + + spark: + type: spark + host: "{{ env_var('SPARK_HOST') }}" + schema: "{{ env_var('SPARK_SCHEMA') }}" + user: "{{ env_var('SPARK_USER') }}" + method: "{{ env_var('SPARK_METHOD') }}" + port: "{{ env_var('SPARK_PORT') | as_number}}" + connect_retries: 3 + connect_timeout: 5 + + fabric: + type: fabric + driver: "{{ env_var('FABRIC_DRIVER') }}" + server: "{{ env_var('FABRIC_HOST') }}" + port: "{{ env_var('FABRIC_PORT') | as_number}}" + database: "{{ env_var('FABRIC_DATABASE') }}" + schema: "{{ env_var('FABRIC_SCHEMA') }}" + authentication: "{{ env_var('FABRIC_AUTHENTICATION') }}" + tenant_id: "{{ env_var('FABRIC_TENANT') }}" + client_id: "{{ env_var('FABRIC_CLIENT') }}" + client_secret: "{{ env_var('DBT_ENV_SECRET_FABRIC_CLIENT_SECRET') }}" + + synapse: + type: synapse + driver: "{{ env_var('SYNAPSE_DRIVER') }}" + server: "{{ env_var('SYNAPSE_HOST') }}" + port: "{{ env_var('SYNAPSE_PORT') | as_number}}" + database: "{{ env_var('SYNAPSE_DATABASE') }}" + schema: "{{ env_var('SYNAPSE_SCHEMA') }}" + authentication: "{{ env_var('SYNAPSE_AUTHENTICATION') }}" + tenant_id: "{{ env_var('SYNAPSE_TENANT_ID') }}" + client_id: "{{ env_var('SYNAPSE_CLIENT_ID') }}" + client_secret: "{{ env_var('DBT_ENV_SECRET_SYNAPSE_CLIENT_SECRET') }}" + + athena: + type: athena + s3_staging_dir: "{{ env_var('ATHENA_S3_STAGING_DIR') }}" + s3_data_dir: "{{ env_var('ATHENA_S3_DATA_DIR') }}" + s3_data_naming: "{{ env_var('ATHENA_S3_DATA_NAMING') }}" + region_name: "{{ env_var('ATHENA_REGION_NAME') }}" + schema: "{{ env_var('ATHENA_SCHEMA') }}" + database: "{{ env_var('ATHENA_DATABASE') }}" + threads: 4 + aws_access_key_id: "{{ env_var('DBT_ENV_SECRET_ATHENA_AWS_ACCESS_KEY_ID') }}" + aws_secret_access_key: "{{ env_var('DBT_ENV_SECRET_ATHENA_AWS_SECRET_ACCESS_KEY') }}" diff --git a/supported_adapters.env b/supported_adapters.env new file mode 100644 index 000000000..14c965c93 --- /dev/null +++ b/supported_adapters.env @@ -0,0 +1 @@ +SUPPORTED_ADAPTERS=postgres \ No newline at end of file diff --git a/tests/tests_with_db/dbt_project/dbt_project.yml b/tests/tests_with_db/dbt_project/dbt_project.yml index 34cfa5204..00b7fe0c5 100644 --- a/tests/tests_with_db/dbt_project/dbt_project.yml +++ b/tests/tests_with_db/dbt_project/dbt_project.yml @@ -1,7 +1,7 @@ name: "elementary_tests" version: "1.0.0" config-version: 2 -profile: "elementary_tests" +profile: "integration_tests" model-paths: ["models"] analysis-paths: ["analyses"] diff --git a/tox.ini b/tox.ini new file mode 100644 index 000000000..38f340a14 --- /dev/null +++ b/tox.ini @@ -0,0 +1,44 @@ +[tox] +skipsdist = True +envlist = lint_all, testenv, copyfile + +[testenv] +passenv = + # postgres env vars + POSTGRES_HOST + POSTGRES_USER + DBT_ENV_SECRET_POSTGRES_PASS + POSTGRES_PORT + POSTGRES_DATABASE + POSTGRES_SCHEMA +allowlist_externals = + edr + cp + pip +deps = + -rdev-requirements.txt + -e . +commands = + edr + # Create the destination folder if it doesn't exist + mkdir -p ~/.dbt + # Copy the file to the home directory + cp integration_tests/profiles.yml ~/.dbt/profiles.yml + +# Postgres integration tests for centralized dbt testing +# run pytest but skips e2e tests with reports +[testenv:dbt_integration_postgres] +changedir = tests +allowlist_externals = + pytest + cp + edr + mkdir +skip_install = true +commands = + edr + # Create the destination folder if it doesn't exist + mkdir -p ~/.dbt + # Copy the file to the home directory + cp ../integration_tests/profiles.yml ~/.dbt/profiles.yml + pytest -v --target postgres --ignore e2e \ No newline at end of file From 22f8d893713a97b235b61c072ab7a59a52ea18c4 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Thu, 10 Oct 2024 14:14:09 -0500 Subject: [PATCH 2/3] use master not main --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a6a096db6..b5cf5ca7f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,14 +5,14 @@ # To ensure that works as expected with all supported adapters # **when?** -# On every PR, and every push to main and when manually triggered +# On every PR, and every push to master and when manually triggered name: Package Integration Tests on: push: branches: - - main + - master pull_request: workflow_dispatch: From e3eb61252e01aec6ceae775e95b2182d2635acd2 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Fri, 11 Oct 2024 07:47:16 -0500 Subject: [PATCH 3/3] remove unused profile targets --- integration_tests/profiles.yml | 99 +--------------------------------- 1 file changed, 1 insertion(+), 98 deletions(-) diff --git a/integration_tests/profiles.yml b/integration_tests/profiles.yml index 277596c0c..043b9f39e 100644 --- a/integration_tests/profiles.yml +++ b/integration_tests/profiles.yml @@ -11,101 +11,4 @@ integration_tests: schema: "{{ env_var('POSTGRES_SCHEMA') }}" threads: 5 - redshift: - type: "redshift" - host: "{{ env_var('REDSHIFT_HOST') }}" - user: "{{ env_var('REDSHIFT_USER') }}" - pass: "{{ env_var('DBT_ENV_SECRET_REDSHIFT_PASS') }}" - dbname: "{{ env_var('REDSHIFT_DATABASE') }}" - port: "{{ env_var('REDSHIFT_PORT') | as_number }}" - schema: "{{ env_var('REDSHIFT_SCHEMA') }}" - threads: 5 - - bigquery: - type: "bigquery" - method: "service-account-json" - project: "{{ env_var('BIGQUERY_PROJECT') }}" - dataset: "{{ env_var('BIGQUERY_SCHEMA') }}" - threads: 10 - keyfile_json: - "{{ env_var('BIGQUERY_KEYFILE_JSON') | as_native}}" - job_retries: 3 - - snowflake: - type: "snowflake" - account: "{{ env_var('SNOWFLAKE_ACCOUNT') }}" - user: "{{ env_var('SNOWFLAKE_USER') }}" - password: "{{ env_var('DBT_ENV_SECRET_SNOWFLAKE_PASS') }}" - role: "{{ env_var('SNOWFLAKE_ROLE') }}" - database: "{{ env_var('SNOWFLAKE_DATABASE') }}" - warehouse: "{{ env_var('SNOWFLAKE_WAREHOUSE') }}" - schema: "{{ env_var('SNOWFLAKE_SCHEMA') }}" - threads: 10 - - trino: - type: "trino" - method: "{{ env_var('TRINO_METHOD') }}" - user: "{{ env_var('TRINO_USER') }}" - password: "{{ env_var('DBT_ENV_SECRET_TRINO_PASS') }}" - host: "{{ env_var('TRINO_HOST') }}" - port: "{{ env_var('TRINO_PORT') | as_number}}" - catalog: "{{ env_var('TRINO_CATALOG') }}" - schema: "{{ env_var('TRINO_SCHEMA') }}" - timezone: "{{ env_var('TRINO_TIMEZONE') }}" - threads: 12 - - databricks: - type: "databricks" - schema: "{{ env_var('DATABRICKS_SCHEMA') }}" - host: "{{ env_var('DATABRICKS_HOST') }}" - http_path: "{{ env_var('DATABRICKS_HTTP_PATH') }}" - token: "{{ env_var('DBT_SECRET_ENV_DATABRICKS_TOKEN') }}" - threads: 8 - connect_retries: 5 - connect_timeout: 300 - - spark: - type: spark - host: "{{ env_var('SPARK_HOST') }}" - schema: "{{ env_var('SPARK_SCHEMA') }}" - user: "{{ env_var('SPARK_USER') }}" - method: "{{ env_var('SPARK_METHOD') }}" - port: "{{ env_var('SPARK_PORT') | as_number}}" - connect_retries: 3 - connect_timeout: 5 - - fabric: - type: fabric - driver: "{{ env_var('FABRIC_DRIVER') }}" - server: "{{ env_var('FABRIC_HOST') }}" - port: "{{ env_var('FABRIC_PORT') | as_number}}" - database: "{{ env_var('FABRIC_DATABASE') }}" - schema: "{{ env_var('FABRIC_SCHEMA') }}" - authentication: "{{ env_var('FABRIC_AUTHENTICATION') }}" - tenant_id: "{{ env_var('FABRIC_TENANT') }}" - client_id: "{{ env_var('FABRIC_CLIENT') }}" - client_secret: "{{ env_var('DBT_ENV_SECRET_FABRIC_CLIENT_SECRET') }}" - - synapse: - type: synapse - driver: "{{ env_var('SYNAPSE_DRIVER') }}" - server: "{{ env_var('SYNAPSE_HOST') }}" - port: "{{ env_var('SYNAPSE_PORT') | as_number}}" - database: "{{ env_var('SYNAPSE_DATABASE') }}" - schema: "{{ env_var('SYNAPSE_SCHEMA') }}" - authentication: "{{ env_var('SYNAPSE_AUTHENTICATION') }}" - tenant_id: "{{ env_var('SYNAPSE_TENANT_ID') }}" - client_id: "{{ env_var('SYNAPSE_CLIENT_ID') }}" - client_secret: "{{ env_var('DBT_ENV_SECRET_SYNAPSE_CLIENT_SECRET') }}" - - athena: - type: athena - s3_staging_dir: "{{ env_var('ATHENA_S3_STAGING_DIR') }}" - s3_data_dir: "{{ env_var('ATHENA_S3_DATA_DIR') }}" - s3_data_naming: "{{ env_var('ATHENA_S3_DATA_NAMING') }}" - region_name: "{{ env_var('ATHENA_REGION_NAME') }}" - schema: "{{ env_var('ATHENA_SCHEMA') }}" - database: "{{ env_var('ATHENA_DATABASE') }}" - threads: 4 - aws_access_key_id: "{{ env_var('DBT_ENV_SECRET_ATHENA_AWS_ACCESS_KEY_ID') }}" - aws_secret_access_key: "{{ env_var('DBT_ENV_SECRET_ATHENA_AWS_SECRET_ACCESS_KEY') }}" +# required format for other adapters can be found at https://github.com/dbt-labs/dbt-package-testing/blob/main/integration_tests/profiles.yml