Test warehouse platform #124
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Test warehouse platform | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| warehouse-type: | |
| type: choice | |
| required: true | |
| description: Type of warehouse platform | |
| options: | |
| - postgres | |
| - snowflake | |
| - bigquery | |
| - redshift | |
| - databricks_catalog | |
| - spark | |
| - athena | |
| elementary-ref: | |
| type: string | |
| required: false | |
| description: Branch or tag to checkout for 'elementary' repository | |
| dbt-data-reliability-ref: | |
| type: string | |
| required: false | |
| description: Branch or tag to checkout for 'dbt-data-reliability' repository | |
| dbt-version: | |
| type: string | |
| required: false | |
| description: dbt's version to test with | |
| generate-data: | |
| type: boolean | |
| required: false | |
| default: false | |
| description: Whether to generate new data | |
| workflow_call: | |
| inputs: | |
| warehouse-type: | |
| type: string | |
| required: true | |
| elementary-ref: | |
| type: string | |
| required: false | |
| dbt-data-reliability-ref: | |
| type: string | |
| required: false | |
| dbt-version: | |
| type: string | |
| required: false | |
| generate-data: | |
| type: boolean | |
| required: false | |
| default: false | |
| env: | |
| BRANCH_NAME: ${{ github.head_ref || github.ref_name }} | |
| ELEMENTARY_DBT_PACKAGE_PATH: ${{ github.workspace }}/dbt-data-reliability | |
| CLI_INTERNAL_DBT_PKG_DIR: ${{ github.workspace }}/elementary/elementary/monitor/dbt_project | |
| E2E_DBT_PROJECT_DIR: ${{ github.workspace }}/elementary/tests/e2e_dbt_project | |
| jobs: | |
| # PRs from forks require approval, specifically with the "pull_request_target" event as it contains repo secrets. | |
| check-if-requires-approval: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| requires_approval: ${{ steps.set-output.outputs.requires_approval }} | |
| steps: | |
| - name: Set requires approval output | |
| id: set-output | |
| run: | | |
| if [[ "${{ github.event_name }}" =~ ^pull_request && "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]]; then | |
| echo "requires_approval=true" >> $GITHUB_OUTPUT | |
| else | |
| echo "requires_approval=false" >> $GITHUB_OUTPUT | |
| fi | |
| test: | |
| runs-on: ubuntu-latest | |
| needs: [check-if-requires-approval] | |
| environment: ${{ (needs.check-if-requires-approval.outputs.requires_approval == 'true' && 'elementary_test_env') || '' }} | |
| defaults: | |
| run: | |
| working-directory: elementary | |
| concurrency: | |
| # This is what eventually defines the schema name in the data platform. | |
| group: tests_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }}_${{ github.head_ref || github.ref_name }} | |
| cancel-in-progress: true | |
| steps: | |
| - name: Checkout Elementary | |
| uses: actions/checkout@v4 | |
| with: | |
| path: elementary | |
| ref: ${{ inputs.elementary-ref }} | |
| - name: Checkout dbt package | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: elementary-data/dbt-data-reliability | |
| path: dbt-data-reliability | |
| ref: ${{ inputs.dbt-data-reliability-ref }} | |
| - name: Start Postgres | |
| if: inputs.warehouse-type == 'postgres' | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| run: docker compose up -d postgres | |
| # - name: Start Clickhouse | |
| # if: inputs.warehouse-type == 'clickhouse' | |
| # working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| # run: docker compose up -d clickhouse | |
| - name: Setup Python | |
| uses: actions/setup-python@v4 | |
| with: | |
| python-version: "3.9" | |
| - name: Install Spark requirements | |
| if: inputs.warehouse-type == 'spark' | |
| run: sudo apt-get install python-dev libsasl2-dev gcc | |
| - name: Install dbt | |
| run: > | |
| pip install | |
| "dbt-core${{ inputs.dbt-version && format('=={0}', inputs.dbt-version) }}" | |
| # TODO: remove the <1.10.2 once we have a fix for https://github.com/elementary-data/elementary/issues/1931 | |
| "dbt-${{ (inputs.warehouse-type == 'databricks_catalog' && 'databricks<1.10.2,') || inputs.warehouse-type }}${{ inputs.dbt-version && format('~={0}', inputs.dbt-version) }}" | |
| - name: Install Elementary | |
| run: | | |
| pip install -r dev-requirements.txt | |
| pip install ".[${{ (inputs.warehouse-type == 'databricks_catalog' && 'databricks') || inputs.warehouse-type }}]" | |
| - name: Write dbt profiles | |
| env: | |
| PROFILES_YML: ${{ secrets.CI_PROFILES_YML }} | |
| run: | | |
| mkdir -p ~/.dbt | |
| DBT_VERSION=$(pip show dbt-core | grep -i version | awk '{print $2}' | sed 's/\.//g') | |
| UNDERSCORED_REF_NAME=$(echo "${{ inputs.warehouse-type }}_dbt_${DBT_VERSION}_${BRANCH_NAME}" | awk '{print tolower($0)}' | head -c 40 | sed "s/[-\/]/_/g") | |
| echo "$PROFILES_YML" | base64 -d | sed "s/<SCHEMA_NAME>/py_$UNDERSCORED_REF_NAME/g" > ~/.dbt/profiles.yml | |
| - name: Run Python package unit tests | |
| run: pytest -vv tests/unit --warehouse-type ${{ inputs.warehouse-type }} | |
| - name: Run Python package integration tests | |
| run: pytest -vv tests/integration --warehouse-type ${{ inputs.warehouse-type }} | |
| - name: Install dbt package | |
| run: | | |
| ELEMENTARY_PKG_LOCATION=$(pip show elementary-data | grep -i location | awk '{print $2}') | |
| DBT_PROJECT_PATH="$ELEMENTARY_PKG_LOCATION/elementary/monitor/dbt_project" | |
| DBT_PKGS_PATH="$DBT_PROJECT_PATH/dbt_packages" | |
| dbt deps --project-dir "$DBT_PROJECT_PATH" | |
| rm -rf "$DBT_PKGS_PATH/elementary" | |
| ln -vs "$GITHUB_WORKSPACE/dbt-data-reliability" "$DBT_PKGS_PATH/elementary" | |
| - name: Run deps for E2E dbt project | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| env: | |
| ELEMENTARY_DBT_PACKAGE_PATH: ${{ env.ELEMENTARY_DBT_PACKAGE_PATH }} | |
| run: | | |
| dbt deps | |
| - name: Seed e2e dbt project | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| if: inputs.warehouse-type == 'postgres' || inputs.generate-data | |
| run: | | |
| python generate_data.py | |
| dbt seed -f --target "${{ inputs.warehouse-type }}" | |
| - name: Run e2e dbt project | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| run: | | |
| dbt run --target "${{ inputs.warehouse-type }}" || true | |
| # Validate run_results.json: only error_model should be non-success | |
| jq -e ' | |
| [.results[] | select(.status != "success") | .unique_id] | |
| | length == 1 and .[0] == "model.elementary_integration_tests.error_model" | |
| ' target/run_results.json > /dev/null | |
| jq_exit=$? | |
| if [ $jq_exit -eq 0 ]; then | |
| echo "✅ Validation passed: only error_model failed." | |
| else | |
| echo "❌ Validation failed. Unexpected failures:" | |
| jq '[.results[] | select(.status != "success") | .unique_id] | join(", ")' target/run_results.json | |
| fi | |
| exit $jq_exit | |
| - name: Test e2e dbt project | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| continue-on-error: true | |
| run: | | |
| dbt test --target "${{ inputs.warehouse-type }}" | |
| - name: Run help | |
| run: edr --help | |
| - name: Run monitor | |
| env: | |
| SLACK_WEBHOOK: ${{ secrets.CI_SLACK_WEBHOOK }} | |
| run: > | |
| edr monitor | |
| -t "${{ inputs.warehouse-type }}" | |
| --group-by table | |
| --project-dir "${{ env.E2E_DBT_PROJECT_DIR }}" | |
| --project-profile-target "${{ inputs.warehouse-type }}" | |
| --slack-webhook "$SLACK_WEBHOOK" | |
| - name: Validate alerts statuses were updated | |
| working-directory: ${{ env.CLI_INTERNAL_DBT_PKG_DIR }} | |
| run: | | |
| dbt deps | |
| dbt run-operation validate_alert_statuses_are_updated -t "${{ inputs.warehouse-type }}" | |
| - name: Run report | |
| run: > | |
| edr monitor report | |
| -t "${{ inputs.warehouse-type }}" | |
| --project-dir "${{ env.E2E_DBT_PROJECT_DIR }}" | |
| --project-profile-target "${{ inputs.warehouse-type }}" | |
| - name: Set report artifact name | |
| id: set_report_artifact_name | |
| run: | | |
| ARTIFACT_NAME=$(echo "report_${{ inputs.warehouse-type }}_${BRANCH_NAME}_dbt_${{ inputs.dbt-version || '' }}.html" | awk '{print tolower($0)}' | sed 's#[":/\\<>|*?-]#_#g') | |
| echo "artifact_name=$ARTIFACT_NAME" >> "$GITHUB_OUTPUT" | |
| - name: Upload report artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ steps.set_report_artifact_name.outputs.artifact_name }} | |
| path: elementary/edr_target/elementary_report.html | |
| - name: Write GCS keyfile | |
| env: | |
| GCS_KEYFILE: ${{ secrets.GCS_KEYFILE }} | |
| run: echo "$GCS_KEYFILE" | base64 -d > /tmp/gcs_keyfile.json | |
| - name: Run send report | |
| env: | |
| SLACK_TOKEN: ${{ secrets.CI_SLACK_TOKEN }} | |
| AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
| AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
| AZURE_CONNECTION_STRING: ${{ secrets.AZURE_CONNECTION_STRING }} | |
| run: > | |
| edr monitor send-report | |
| -t "${{ inputs.warehouse-type }}" | |
| --project-dir "${{ env.E2E_DBT_PROJECT_DIR }}" | |
| --project-profile-target "${{ inputs.warehouse-type }}" | |
| --slack-file-name "report_${{ inputs.warehouse-type }}_${{ env.BRANCH_NAME }}.html" | |
| --slack-token "$SLACK_TOKEN" | |
| --slack-channel-name data-ops | |
| --bucket-file-path "ci_reports/report_${{ inputs.warehouse-type }}_${{ env.BRANCH_NAME }}.html" | |
| --aws-access-key-id "$AWS_ACCESS_KEY_ID" | |
| --aws-secret-access-key "$AWS_SECRET_ACCESS_KEY" | |
| --s3-bucket-name elementary-ci-artifacts | |
| # --google-service-account-path /tmp/gcs_keyfile.json | |
| # --gcs-bucket-name elementary_ci_artifacts | |
| --azure-connection-string "$AZURE_CONNECTION_STRING" | |
| --azure-container-name reports | |
| --update-bucket-website true | |
| - name: Set artifact name | |
| id: set_artifact_name | |
| run: | | |
| ARTIFACT_NAME=$(echo "edr_${{ inputs.warehouse-type }}_${BRANCH_NAME}_dbt_${{ inputs.dbt-version || '' }}.log" | awk '{print tolower($0)}' | sed 's#[":/\\<>|*?-]#_#g') | |
| echo "artifact_name=$ARTIFACT_NAME" >> "$GITHUB_OUTPUT" | |
| - name: Upload edr log | |
| if: ${{ always() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ steps.set_artifact_name.outputs.artifact_name }} | |
| path: elementary/edr_target/edr.log | |
| - name: Run Python package e2e tests | |
| run: pytest -vv tests/e2e --warehouse-type ${{ inputs.warehouse-type }} |