Skip to content

Test warehouse platform #121

Test warehouse platform

Test warehouse platform #121

Workflow file for this run

name: Test warehouse platform
on:
workflow_dispatch:
inputs:
warehouse-type:
type: choice
required: true
description: Type of warehouse platform
options:
- postgres
- snowflake
- bigquery
- redshift
- databricks_catalog
- spark
- athena
elementary-ref:
type: string
required: false
description: Branch or tag to checkout for 'elementary' repository
dbt-data-reliability-ref:
type: string
required: false
description: Branch or tag to checkout for 'dbt-data-reliability' repository
dbt-version:
type: string
required: false
description: dbt's version to test with
generate-data:
type: boolean
required: false
default: false
description: Whether to generate new data
workflow_call:
inputs:
warehouse-type:
type: string
required: true
elementary-ref:
type: string
required: false
dbt-data-reliability-ref:
type: string
required: false
dbt-version:
type: string
required: false
generate-data:
type: boolean
required: false
default: false
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
ELEMENTARY_DBT_PACKAGE_PATH: ${{ github.workspace }}/dbt-data-reliability
CLI_INTERNAL_DBT_PKG_DIR: ${{ github.workspace }}/elementary/elementary/monitor/dbt_project
E2E_DBT_PROJECT_DIR: ${{ github.workspace }}/elementary/tests/e2e_dbt_project
jobs:
# PRs from forks require approval, specifically with the "pull_request_target" event as it contains repo secrets.
check-if-requires-approval:
runs-on: ubuntu-latest
outputs:
requires_approval: ${{ steps.set-output.outputs.requires_approval }}
steps:
- name: Set requires approval output
id: set-output
run: |
if [[ "${{ github.event_name }}" =~ ^pull_request && "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]]; then
echo "requires_approval=true" >> $GITHUB_OUTPUT
else
echo "requires_approval=false" >> $GITHUB_OUTPUT
fi
test:
runs-on: ubuntu-latest
needs: [check-if-requires-approval]
environment: ${{ (needs.check-if-requires-approval.outputs.requires_approval == 'true' && 'elementary_test_env') || '' }}
defaults:
run:
working-directory: elementary
concurrency:
# This is what eventually defines the schema name in the data platform.
group: tests_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }}_${{ github.head_ref || github.ref_name }}
cancel-in-progress: true
steps:
- name: Checkout Elementary
uses: actions/checkout@v4
with:
path: elementary
ref: ${{ inputs.elementary-ref }}
- name: Checkout dbt package
uses: actions/checkout@v4
with:
repository: elementary-data/dbt-data-reliability
path: dbt-data-reliability
ref: ${{ inputs.dbt-data-reliability-ref }}
- name: Start Postgres
if: inputs.warehouse-type == 'postgres'
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
run: docker compose up -d postgres
# - name: Start Clickhouse
# if: inputs.warehouse-type == 'clickhouse'
# working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
# run: docker compose up -d clickhouse
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: "3.9"
- name: Install Spark requirements
if: inputs.warehouse-type == 'spark'
run: sudo apt-get install python-dev libsasl2-dev gcc
- name: Install dbt
run: >
pip install
"dbt-core${{ inputs.dbt-version && format('=={0}', inputs.dbt-version) }}"
# TODO: remove the <1.10.2 once we have a fix for https://github.com/elementary-data/elementary/issues/1931
"dbt-${{ (inputs.warehouse-type == 'databricks_catalog' && 'databricks<1.10.2,') || inputs.warehouse-type }}${{ inputs.dbt-version && format('~={0}', inputs.dbt-version) }}"
- name: Install Elementary
run: |
pip install -r dev-requirements.txt
pip install ".[${{ (inputs.warehouse-type == 'databricks_catalog' && 'databricks') || inputs.warehouse-type }}]"
- name: Write dbt profiles
env:
PROFILES_YML: ${{ secrets.CI_PROFILES_YML }}
run: |
mkdir -p ~/.dbt
DBT_VERSION=$(pip show dbt-core | grep -i version | awk '{print $2}' | sed 's/\.//g')
UNDERSCORED_REF_NAME=$(echo "${{ inputs.warehouse-type }}_dbt_${DBT_VERSION}_${BRANCH_NAME}" | awk '{print tolower($0)}' | head -c 40 | sed "s/[-\/]/_/g")
echo "$PROFILES_YML" | base64 -d | sed "s/<SCHEMA_NAME>/py_$UNDERSCORED_REF_NAME/g" > ~/.dbt/profiles.yml
- name: Run Python package unit tests
run: pytest -vv tests/unit --warehouse-type ${{ inputs.warehouse-type }}
- name: Run Python package integration tests
run: pytest -vv tests/integration --warehouse-type ${{ inputs.warehouse-type }}
- name: Install dbt package
run: |
ELEMENTARY_PKG_LOCATION=$(pip show elementary-data | grep -i location | awk '{print $2}')
DBT_PROJECT_PATH="$ELEMENTARY_PKG_LOCATION/elementary/monitor/dbt_project"
DBT_PKGS_PATH="$DBT_PROJECT_PATH/dbt_packages"
dbt deps --project-dir "$DBT_PROJECT_PATH"
rm -rf "$DBT_PKGS_PATH/elementary"
ln -vs "$GITHUB_WORKSPACE/dbt-data-reliability" "$DBT_PKGS_PATH/elementary"
- name: Run deps for E2E dbt project
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
env:
ELEMENTARY_DBT_PACKAGE_PATH: ${{ env.ELEMENTARY_DBT_PACKAGE_PATH }}
run: |
dbt deps
- name: Seed e2e dbt project
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
if: inputs.warehouse-type == 'postgres' || inputs.generate-data
run: |
python generate_data.py
dbt seed -f --target "${{ inputs.warehouse-type }}"
- name: Run e2e dbt project
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
run: |
dbt run --target "${{ inputs.warehouse-type }}" || true
# Validate run_results.json: only error_model should be non-success
jq -e '
[.results[] | select(.status != "success") | .unique_id]
| length == 1 and .[0] == "model.elementary_integration_tests.error_model"
' target/run_results.json > /dev/null
jq_exit=$?
if [ $jq_exit -eq 0 ]; then
echo "✅ Validation passed: only error_model failed."
else
echo "❌ Validation failed. Unexpected failures:"
jq '[.results[] | select(.status != "success") | .unique_id] | join(", ")' target/run_results.json
fi
exit $jq_exit
- name: Test e2e dbt project
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
continue-on-error: true
run: |
dbt test --target "${{ inputs.warehouse-type }}"
- name: Run help
run: edr --help
- name: Run monitor
env:
SLACK_WEBHOOK: ${{ secrets.CI_SLACK_WEBHOOK }}
run: >
edr monitor
-t "${{ inputs.warehouse-type }}"
--group-by table
--project-dir "${{ env.E2E_DBT_PROJECT_DIR }}"
--project-profile-target "${{ inputs.warehouse-type }}"
--slack-webhook "$SLACK_WEBHOOK"
- name: Validate alerts statuses were updated
working-directory: ${{ env.CLI_INTERNAL_DBT_PKG_DIR }}
run: |
dbt deps
dbt run-operation validate_alert_statuses_are_updated -t "${{ inputs.warehouse-type }}"
- name: Run report
run: >
edr monitor report
-t "${{ inputs.warehouse-type }}"
--project-dir "${{ env.E2E_DBT_PROJECT_DIR }}"
--project-profile-target "${{ inputs.warehouse-type }}"
- name: Set report artifact name
id: set_report_artifact_name
run: |
ARTIFACT_NAME=$(echo "report_${{ inputs.warehouse-type }}_${BRANCH_NAME}_dbt_${{ inputs.dbt-version || '' }}.html" | awk '{print tolower($0)}' | sed 's#[":/\\<>|*?-]#_#g')
echo "artifact_name=$ARTIFACT_NAME" >> "$GITHUB_OUTPUT"
- name: Upload report artifact
uses: actions/upload-artifact@v4
with:
name: ${{ steps.set_report_artifact_name.outputs.artifact_name }}
path: elementary/edr_target/elementary_report.html
- name: Write GCS keyfile
env:
GCS_KEYFILE: ${{ secrets.GCS_KEYFILE }}
run: echo "$GCS_KEYFILE" | base64 -d > /tmp/gcs_keyfile.json
- name: Run send report
env:
SLACK_TOKEN: ${{ secrets.CI_SLACK_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AZURE_CONNECTION_STRING: ${{ secrets.AZURE_CONNECTION_STRING }}
run: >
edr monitor send-report
-t "${{ inputs.warehouse-type }}"
--project-dir "${{ env.E2E_DBT_PROJECT_DIR }}"
--project-profile-target "${{ inputs.warehouse-type }}"
--slack-file-name "report_${{ inputs.warehouse-type }}_${{ env.BRANCH_NAME }}.html"
--slack-token "$SLACK_TOKEN"
--slack-channel-name data-ops
--bucket-file-path "ci_reports/report_${{ inputs.warehouse-type }}_${{ env.BRANCH_NAME }}.html"
--aws-access-key-id "$AWS_ACCESS_KEY_ID"
--aws-secret-access-key "$AWS_SECRET_ACCESS_KEY"
--s3-bucket-name elementary-ci-artifacts
# --google-service-account-path /tmp/gcs_keyfile.json
# --gcs-bucket-name elementary_ci_artifacts
--azure-connection-string "$AZURE_CONNECTION_STRING"
--azure-container-name reports
--update-bucket-website true
- name: Set artifact name
id: set_artifact_name
run: |
ARTIFACT_NAME=$(echo "edr_${{ inputs.warehouse-type }}_${BRANCH_NAME}_dbt_${{ inputs.dbt-version || '' }}.log" | awk '{print tolower($0)}' | sed 's#[":/\\<>|*?-]#_#g')
echo "artifact_name=$ARTIFACT_NAME" >> "$GITHUB_OUTPUT"
- name: Upload edr log
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ steps.set_artifact_name.outputs.artifact_name }}
path: elementary/edr_target/edr.log
- name: Run Python package e2e tests
run: pytest -vv tests/e2e --warehouse-type ${{ inputs.warehouse-type }}