Refactor CI scripts and test fixes (#304)

99Lys · web-flow · commit e20e92c2296a · 2025-08-19T15:13:16.000+01:00
### Summary

Refactoring CI scripts and fixing some tests.

### Description

[CI Refactoring]
- Deleted `create_dbt_test_users.sh` → users are only needed for grants
tests, which are not being ran in the current CI
- Created `extract_auth_token.sh` and updated
`create_and_format_sources.sh` → split authentication token extraction
from sources creation
- Updated `create_env_file.sh` → removed users and roles, as they are
only needed for grants tests
- Display the exit status of each test/job → this gives us more
visibility on what group of tests failed exactly, unfortunately the
pipeline as a whole will be considered as "failed", but we only need to
consider the results of **Verify Expected Test Failures** job
- Fixed erroring tests not being considered as failing tests → look for
`ERROR tests` in artifacts as well, not only `FAILED tests`

[Tests Fixing]
- Grants → override `apply_grants` macro to include `user:` if no prefix
is defined, otherwise any rerun of the same model will lead to revoking
and regranting; override tests to include prefix in the expected results
- Hooks → add test prefix in dataset folder creation to avoid existing
folder error

### Test Results

- All tests are passing/failing as expected

### Changelog

-   [x] Added a summary of what this PR accomplishes to CHANGELOG.md
diff --git a/.github/scripts/compare_test_failures.sh b/.github/scripts/compare_test_failures.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+set -e
+
+echo "Comparing actual test failures with expected failures..."
+
+# Enable globstar for recursive globbing
+shopt -s globstar
+
+# Extract actual failures from test reports
+actual_failures=$(grep -E "(FAILED tests|ERROR tests)" reports/**/*.txt | awk '{print $2}' | sort)
+
+# Read expected failures
+expected_failures=$(sort .github/expected_failures.txt)
+
+echo "Expected Failures:"
+echo "$expected_failures"
+echo ""
+echo "Actual Failures:"
+echo "$actual_failures"
+echo ""
+
+# Identify unexpected failures (in actual but not in expected)
+unexpected_failures=$(comm -13 <(echo "$expected_failures") <(echo "$actual_failures"))
+
+# Identify missing expected failures (in expected but not in actual)
+missing_failures=$(comm -23 <(echo "$expected_failures") <(echo "$actual_failures"))
+
+# Initialize exit code
+exit_code=0
+
+if [ -n "$unexpected_failures" ]; then
+  echo "Unexpected test failures detected:"
+  echo "$unexpected_failures"
+  exit_code=1
+fi
+
+if [ -n "$missing_failures" ]; then
+  echo "::warning::Expected test failures that did not occur (they passed):"
+  echo "$missing_failures"
+  exit_code=1
+fi
+
+if [ $exit_code -eq 0 ]; then
+  echo "All failed tests are expected, and all expected failures have occurred."
+else
+  echo "Verification failed: There are unexpected or missing test failures."
+fi
+
+exit $exit_code
diff --git a/.github/scripts/create_and_format_sources.sh b/.github/scripts/create_and_format_sources.sh
@@ -1,50 +1,20 @@
 #!/bin/bash
 set -e
 
-: "${RETRY_COUNT:?Need to set RETRY_COUNT}"
 : "${DREMIO_HEALTH_URL:?Need to set DREMIO_HEALTH_URL}"
-: "${SLEEP_INTERVAL:?Need to set SLEEP_INTERVAL}"
-: "${DREMIO_SOFTWARE_USERNAME:?Need to set DREMIO_SOFTWARE_USERNAME}"
-: "${DREMIO_SOFTWARE_PASSWORD:?Need to set DREMIO_SOFTWARE_PASSWORD}"
 : "${MINIO_ROOT_USER:?Need to set MINIO_ROOT_USER}"
 : "${MINIO_ROOT_PASSWORD:?Need to set MINIO_ROOT_PASSWORD}"
 
-for i in $(seq 1 $RETRY_COUNT); do
-  if curl -s $DREMIO_HEALTH_URL; then
-    echo "Dremio is up."
-    break
-  fi
-  echo "Waiting for Dremio to become ready... ($i/$RETRY_COUNT)"
-  sleep $SLEEP_INTERVAL
-done
-
-if ! curl -s $DREMIO_HEALTH_URL; then
-  echo "Dremio did not become ready in time."
-  exit 1
-fi
-
-# Obtain Dremio auth token
-echo "Logging into Dremio to obtain auth token..."
-AUTH_RESPONSE=$(curl -s -X POST "$DREMIO_HEALTH_URL/apiv2/login" \
-  -H "Content-Type: application/json" \
-  --data "{\"userName\":\"${DREMIO_SOFTWARE_USERNAME}\", \"password\":\"${DREMIO_SOFTWARE_PASSWORD}\"}")
-
-AUTH_TOKEN=$(echo "$AUTH_RESPONSE" | jq -r .token)
-
-# Check if AUTH_TOKEN is not empty
-if [ -z "$AUTH_TOKEN" ] || [ "$AUTH_TOKEN" == "null" ]; then
-  echo "Failed to obtain Dremio auth token."
-  exit 1
-fi
-
-echo "Obtained Dremio auth token."
-echo "::add-mask::$AUTH_TOKEN"
+# Get AUTH_TOKEN from environment or file
 if [ "$GITHUB_ACTIONS" = "true" ]; then
-  echo "Running in GitHub Actions"
-  echo "AUTH_TOKEN=${AUTH_TOKEN}" >> $GITHUB_ENV
+  : "${AUTH_TOKEN:?Need to set AUTH_TOKEN}"
   HOST="minio"
 else # Jenkins
-  echo $AUTH_TOKEN > /tmp/auth_token.txt
+  if [ ! -f /tmp/auth_token.txt ]; then
+    echo "Auth token file not found. Please run extract_auth_token.sh first."
+    exit 1
+  fi
+  AUTH_TOKEN=$(cat /tmp/auth_token.txt)
   HOST="localhost"
 fi
 
diff --git a/.github/scripts/create_dbt_test_users.sh b/.github/scripts/create_dbt_test_users.sh
diff --git a/.github/scripts/create_env_file.sh b/.github/scripts/create_env_file.sh
@@ -13,12 +13,7 @@ DREMIO_SOFTWARE_USERNAME=${DREMIO_SOFTWARE_USERNAME}
 DREMIO_SOFTWARE_PASSWORD=${DREMIO_SOFTWARE_PASSWORD}
 DREMIO_DATALAKE=dbt_test_source
 DREMIO_DATABASE=dbt_test
-DBT_TEST_USER_1=dbt_test_user_1
-DBT_TEST_USER_2=dbt_test_user_2
-DBT_TEST_USER_3=dbt_test_user_3
-DBT_TEST_ROLE_1=dbt_test_role_1
-DBT_TEST_ROLE_2=dbt_test_role_2
 DREMIO_EDITION=community
 EOF
 
-echo ".env file created successfully."
+echo ".env file created successfully."
diff --git a/.github/scripts/extract_auth_token.sh b/.github/scripts/extract_auth_token.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+set -e
+
+: "${RETRY_COUNT:?Need to set RETRY_COUNT}"
+: "${DREMIO_HEALTH_URL:?Need to set DREMIO_HEALTH_URL}"
+: "${SLEEP_INTERVAL:?Need to set SLEEP_INTERVAL}"
+: "${DREMIO_SOFTWARE_USERNAME:?Need to set DREMIO_SOFTWARE_USERNAME}"
+: "${DREMIO_SOFTWARE_PASSWORD:?Need to set DREMIO_SOFTWARE_PASSWORD}"
+
+for i in $(seq 1 $RETRY_COUNT); do
+  if curl -s $DREMIO_HEALTH_URL; then
+    echo "Dremio is up."
+    break
+  fi
+  echo "Waiting for Dremio to become ready... ($i/$RETRY_COUNT)"
+  sleep $SLEEP_INTERVAL
+done
+
+if ! curl -s $DREMIO_HEALTH_URL; then
+  echo "Dremio did not become ready in time."
+  exit 1
+fi
+
+# Obtain Dremio auth token
+echo "Logging into Dremio to obtain auth token..."
+AUTH_RESPONSE=$(curl -s -X POST "$DREMIO_HEALTH_URL/apiv2/login" \
+  -H "Content-Type: application/json" \
+  --data "{\"userName\":\"${DREMIO_SOFTWARE_USERNAME}\", \"password\":\"${DREMIO_SOFTWARE_PASSWORD}\"}")
+
+AUTH_TOKEN=$(echo "$AUTH_RESPONSE" | jq -r .token)
+
+# Check if AUTH_TOKEN is not empty
+if [ -z "$AUTH_TOKEN" ] || [ "$AUTH_TOKEN" == "null" ]; then
+  echo "Failed to obtain Dremio auth token."
+  exit 1
+fi
+
+echo "Obtained Dremio auth token."
+echo "::add-mask::$AUTH_TOKEN"
+if [ "$GITHUB_ACTIONS" = "true" ]; then
+  echo "AUTH_TOKEN=${AUTH_TOKEN}" >> $GITHUB_ENV
+else # Jenkins
+  echo $AUTH_TOKEN > /tmp/auth_token.txt
+fi
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -27,7 +27,8 @@ jobs:
   run-parallel-tests:
     name: Run Parallel Tests
     runs-on: ubuntu-latest
-    needs: discover-tests 
+    continue-on-error: true
+    needs: discover-tests
     env:
       RETRY_COUNT: 12 # number of retries for health checks
       SLEEP_INTERVAL: 5 # Sleep duration in seconds between retries
@@ -75,6 +76,9 @@ jobs:
       - name: Create MinIO bucket
         run: bash .github/scripts/create_minio_bucket.sh
 
+      - name: Extract Auth Token
+        run: bash .github/scripts/extract_auth_token.sh
+
       - name: Create and Format Sources
         run: bash .github/scripts/create_and_format_sources.sh
 
@@ -89,9 +93,6 @@ jobs:
           pip install -r dev_requirements.txt
           pip install .
 
-      - name: Create dbt test users
-        run: bash .github/scripts/create_dbt_test_users.sh
-
       - name: Create dbt projects
         run: bash .github/scripts/create_dbt_projects.sh
 
@@ -107,13 +108,15 @@ jobs:
         run: echo "sanitized_test_dir=$(echo ${{ matrix.test_dir }} | tr '/' '_')" >> $GITHUB_ENV
 
       - name: Run tests
-        continue-on-error: true
+        id: run_tests
         run: |
           mkdir reports/
           report_file="reports/${{ env.sanitized_test_dir }}.txt"
           pytest ${{ matrix.test_dir }} | tee $report_file
+          exit ${PIPESTATUS[0]}
 
       - name: Upload test report as artifact
+        if: always()
         uses: actions/upload-artifact@v4
         with:
           name: ${{ env.sanitized_test_dir }}
@@ -133,43 +136,4 @@ jobs:
           path: reports/
 
       - name: Compare Actual Failures with Expected Failures
-        run: |
-          shopt -s globstar
-          actual_failures=$(grep "FAILED tests" reports/**/*.txt | awk '{print $2}' | sort)
-          expected_failures=$(sort .github/expected_failures.txt)
-
-          echo "Expected Failures:"
-          echo "$expected_failures"
-          echo ""
-          echo "Actual Failures:"
-          echo "$actual_failures"
-          echo ""
-
-          # Identify unexpected failures
-          unexpected_failures=$(comm -13 <(echo "$expected_failures") <(echo "$actual_failures"))
-
-          # Identify missing expected failures
-          missing_failures=$(comm -23 <(echo "$expected_failures") <(echo "$actual_failures"))
-
-          # Initialize exit code
-          exit_code=0
-
-          if [ -n "$unexpected_failures" ]; then
-            echo "Unexpected test failures detected:"
-            echo "$unexpected_failures"
-            exit_code=1
-          fi
-
-          if [ -n "$missing_failures" ]; then
-            echo "Expected test failures that did not occur (they passed):"
-            echo "$missing_failures"
-            exit_code=1
-          fi
-
-          if [ $exit_code -eq 0 ]; then
-            echo "All failed tests are expected, and all expected failures have occurred."
-          else
-            echo "Verification failed: There are unexpected or missing test failures."
-          fi
-
-          exit $exit_code
+        run: bash .github/scripts/compare_test_failures.sh
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,8 @@
 ## Changes
 
 - [#299](https://github.com/dremio/dbt-dremio/pull/299) Enhance persist_docs macro to wrap model and column metadata (including descriptions, tags and tests) into a Markdown wiki for Dremio.
+- Refactored CI
+- Fixed tests for hooks and grants
 
 # dbt-dremio v1.9.0
 
diff --git a/dbt/adapters/dremio/impl.py b/dbt/adapters/dremio/impl.py
@@ -161,7 +161,7 @@ def standardize_grants_dict(self, grants_table: agate.Table) -> dict:
         for row in grants_table:
             # Just needed to change these two values to match Dremio cols
             grantee = row["grantee_id"]
-            privilege = row["privilege"]
+            privilege = row["privilege"].lower()
             grantee_type = row["grantee_type"]
 
             if privilege in grants_dict.keys():
diff --git a/dbt/include/dremio/macros/adapters/apply_grants.sql b/dbt/include/dremio/macros/adapters/apply_grants.sql
@@ -12,6 +12,48 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.*/
 
+{% macro dremio__apply_grants(relation, grant_config, should_revoke=True) %}
+    {#-- If grant_config is {} or None, this is a no-op --#}
+    {% if grant_config %}
+        {#-- ensure grant have a prefix, defaults to user if not provided --#}
+        {% for privilege, grantees in grant_config.items() %}
+        {% set updated_grantees = [] %}
+        {% for grantee in grantees %}
+            {% if ':' not in grantee %}
+            {% do updated_grantees.append('user:' ~ grantee) %}
+            {% else %}
+            {% do updated_grantees.append(grantee) %}
+            {% endif %}
+        {% endfor %}
+        {% do grant_config.update({privilege: updated_grantees}) %}
+        {% endfor %}
+        {% if should_revoke %}
+            {#-- We think previous grants may have carried over --#}
+            {#-- Show current grants and calculate diffs --#}
+            {% set current_grants_table = run_query(get_show_grant_sql(relation)) %}
+            {% set current_grants_dict = adapter.standardize_grants_dict(current_grants_table) %}
+            {% set needs_granting = diff_of_two_dicts(grant_config, current_grants_dict) %}
+            {% set needs_revoking = diff_of_two_dicts(current_grants_dict, grant_config) %}
+            {% if not (needs_granting or needs_revoking) %}
+                {{ log('On ' ~ relation.render() ~': All grants are in place, no revocation or granting needed.')}}
+            {% endif %}
+        {% else %}
+            {#-- We don't think there's any chance of previous grants having carried over. --#}
+            {#-- Jump straight to granting what the user has configured. --#}
+            {% set needs_revoking = {} %}
+            {% set needs_granting = grant_config %}
+        {% endif %}
+        {% if needs_granting or needs_revoking %}
+            {% set revoke_statement_list = get_dcl_statement_list(relation, needs_revoking, get_revoke_sql) %}
+            {% set grant_statement_list = get_dcl_statement_list(relation, needs_granting, get_grant_sql) %}
+            {% set dcl_statement_list = revoke_statement_list + grant_statement_list %}
+            {% if dcl_statement_list %}
+                {{ call_dcl_statements(dcl_statement_list) }}
+            {% endif %}
+        {% endif %}
+    {% endif %}
+{% endmacro %}
+
 {%- macro dremio__support_multiple_grantees_per_dcl_statement() -%}
     {{ return(False) }}
 {%- endmacro -%}
diff --git a/tests/README.md b/tests/README.md
@@ -17,6 +17,8 @@ To run our tests, a test environment must be set up.
     DBT_TEST_USER_1=dbt_test_user_1
     DBT_TEST_USER_2=dbt_test_user_2
     DBT_TEST_USER_3=dbt_test_user_3
+    DBT_TEST_ROLE_1=dbt_test_role_1
+    DBT_TEST_ROLE_2=dbt_test_role_2
     ```
    For cloud version of Dremio:
     ```
@@ -29,6 +31,8 @@ To run our tests, a test environment must be set up.
     DBT_TEST_USER_1=dbt_test_user_1
     DBT_TEST_USER_2=dbt_test_user_2
     DBT_TEST_USER_3=dbt_test_user_3
+    DBT_TEST_ROLE_1=dbt_test_role_1
+    DBT_TEST_ROLE_2=dbt_test_role_2
     ```
 1. Create the three users listed above (dbt_test_user_1, dbt_test_user_2, dbt_test_user_3) in the Dremio instance.
 1. Create a bucket in Object storage with a name `dbtdremios3`
diff --git a/tests/functional/adapter/grants/base_grants.py b/tests/functional/adapter/grants/base_grants.py
@@ -14,6 +14,7 @@
 
 import pytest
 
+from dbt.context.base import BaseContext  # diff_of_two_dicts only
 from dbt.tests.adapter.grants.base_grants import BaseGrants
 from tests.utils.util import BUCKET
 
@@ -59,3 +60,12 @@ def dbt_profile_data(
         if profiles_config_update:
             profile.update(profiles_config_update)
         return profile
+
+    # Override to include assertion error message
+    def assert_expected_grants_match_actual(self, project, relation_name, expected_grants):
+        actual_grants = self.get_grants_on_relation(project, relation_name)
+        # need a case-insensitive comparison
+        # so just a simple "assert expected == actual_grants" won't work
+        diff_a = BaseContext.diff_of_two_dicts(actual_grants, expected_grants)
+        diff_b = BaseContext.diff_of_two_dicts(expected_grants, actual_grants)
+        assert diff_a == diff_b == {}, f"Expected {str(expected_grants)} but got {str(actual_grants)}"
diff --git a/tests/functional/adapter/grants/test_incremental_grants.py b/tests/functional/adapter/grants/test_incremental_grants.py
diff --git a/tests/functional/adapter/grants/test_seed_grants.py b/tests/functional/adapter/grants/test_seed_grants.py
diff --git a/tests/functional/adapter/grants/test_snapshot_grants.py b/tests/functional/adapter/grants/test_snapshot_grants.py
diff --git a/tests/hooks/test_model_hooks.py b/tests/hooks/test_model_hooks.py