Add query comments to system queries (#1091)

VersusFacit · colin-rogers-dbt · web-flow · commit 89f944e704d5 · 2025-06-11T07:18:24.000Z
Co-authored-by: Colin Rogers &lt;111200756+colin-rogers-dbt@users.noreply.github.com&gt;
diff --git a/dbt-bigquery/.changes/unreleased/Fixes-20250513-010233.yaml b/dbt-bigquery/.changes/unreleased/Fixes-20250513-010233.yaml
@@ -0,0 +1,6 @@
+kind: Fixes
+body: Use execute wrapper instead of raw execute to add the query comment as query header
+time: 2025-05-13T01:02:33.891023-07:00
+custom:
+    Author: versusfacit Kayrnt
+    Issue: "1090"
diff --git a/dbt-bigquery/src/dbt/adapters/bigquery/connections.py b/dbt-bigquery/src/dbt/adapters/bigquery/connections.py
@@ -274,6 +274,28 @@ def raw_execute(
                 limit=limit,
             )
 
+    def raw_execute_with_comment(
+        self,
+        sql: str,
+        use_legacy_sql: bool = False,
+        limit: Optional[int] = None,
+        dry_run: bool = False,
+    ):
+        """
+        A lightweight wrapper over raw_execute that prepends the dbt query comment.
+
+        This exists as a "third way" between raw_execute (fully manual, no preprocessing)
+        and execute (postprocessing and formatting). This is useful when you need query
+        auditing but no Adapter Response.
+        """
+        sql = self._add_query_comment(sql)
+        return self.raw_execute(
+            sql,
+            use_legacy_sql=use_legacy_sql,
+            limit=limit,
+            dry_run=dry_run,
+        )
+
     def execute(
         self, sql, auto_begin=False, fetch=None, limit: Optional[int] = None
     ) -> Tuple[BigQueryAdapterResponse, "agate.Table"]:
diff --git a/dbt-bigquery/src/dbt/adapters/bigquery/impl.py b/dbt-bigquery/src/dbt/adapters/bigquery/impl.py
@@ -446,7 +446,7 @@ def get_column_schema_from_query(self, sql: str) -> List[BigQueryColumn]:
         :param str sql: The sql to execute.
         :return: List[BigQueryColumn]
         """
-        _, iterator = self.connections.raw_execute(sql)
+        _, iterator = self.connections.raw_execute_with_comment(sql)
         columns = [self.Column.create_from_field(field) for field in iterator.schema]
         flattened_columns = []
         for column in columns:
@@ -458,7 +458,7 @@ def get_columns_in_select_sql(self, select_sql: str) -> List[BigQueryColumn]:
         try:
             conn = self.connections.get_thread_connection()
             client = conn.handle
-            query_job, iterator = self.connections.raw_execute(select_sql)
+            query_job, iterator = self.connections.raw_execute_with_comment(select_sql)
             query_table = client.get_table(query_job.destination)
             return self._get_dbt_columns_from_bq_table(query_table)
 
diff --git a/dbt-bigquery/tests/functional/test_incremental_materialization.py b/dbt-bigquery/tests/functional/test_incremental_materialization.py
@@ -1,10 +1,15 @@
+import json
 import pytest
-from dbt.tests.util import run_dbt
+import re
+from dbt.tests.util import run_dbt, run_dbt_and_capture
 
 # This is a short term hack, we need to go back
 # and make adapter implementations of:
 # https://github.com/dbt-labs/dbt-core/pull/6330
 
+
+_COMMENT_RE = re.compile(r'/\*\s*{[^}]*"dbt_version"\s*:\s*"[^"]+"[^}]*}\s*\*/')
+
 _INCREMENTAL_MODEL = """
 {{
     config(
@@ -14,16 +19,41 @@
 
 {% if not is_incremental() %}
 
-    select 10 as id, cast('2020-01-01 01:00:00' as datetime) as date_hour union all
-    select 30 as id, cast('2020-01-01 02:00:00' as datetime) as date_hour
+    select
+        10 as id, cast('2020-01-01 01:00:00' as datetime) as date_hour
+    union all select
+        30 as id, cast('2020-01-01 02:00:00' as datetime) as date_hour
 
 {% else %}
 
-    select 20 as id, cast('2020-01-01 01:00:00' as datetime) as date_hour union all
-    select 40 as id, cast('2020-01-01 02:00:00' as datetime) as date_hour
+    select
+        20 as id, cast('2020-01-01 01:00:00' as datetime) as date_hour
+    union all select
+        40 as id, cast('2020-01-01 02:00:00' as datetime) as date_hour
 
 {% endif %}
--- Test Comment To Prevent Reccurence of https://github.com/dbt-labs/dbt-core/issues/6485
+-- Test Comment To Prevent Recurrence of
+--     https://github.com/dbt-labs/dbt-core/issues/6485
+"""
+
+INCREMENTAL_MODEL_COPY_PARTITIONS = """
+{{
+  config(
+      materialized='incremental',
+      incremental_strategy='insert_overwrite',
+      partition_by={
+          'field': '_partition',
+          'granularity': 'day',
+          'data_type': 'timestamp',
+          'time_ingestion_partitioning': True,
+          'copy_partitions': True,
+      },
+      on_schema_change='append_new_columns'
+  )
+}}
+SELECT
+  timestamp_trunc(current_timestamp(), day) AS _partition,
+  'some value'                               AS col1
 """
 
 
@@ -39,3 +69,49 @@ def test_incremental_model_succeeds(self, project):
         assert len(results) == 1
         results = run_dbt(["run"])
         assert len(results) == 1
+
+
+class TestAllQueriesHaveDbtComment:
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {"my_incremental_model.sql": INCREMENTAL_MODEL_COPY_PARTITIONS}
+
+    def _extract_executed_sql(self, raw_logs: str) -> list[str]:
+        """
+        Return every SQL script that dbt 1.4+ actually sent to BigQuery.
+
+        In JSON logs each statement is logged by an event whose `data`
+        payload is a dict containing a key `"sql"`.
+        """
+        scripts: list[str] = []
+        for line in raw_logs.splitlines():
+            try:
+                parsed = json.loads(line)
+            except json.JSONDecodeError:
+                continue
+
+            data = parsed.get("data")
+            if isinstance(data, dict) and "sql" in data:
+                sql = str(data["sql"]).strip()
+                if sql:
+                    scripts.append(sql)
+        return scripts
+
+    def _has_structured_comment(self, sql: str) -> bool:
+        """True iff the first non-blank line is the structured dbt comment."""
+        first_line = sql.lstrip().splitlines()[0]
+        return bool(_COMMENT_RE.fullmatch(first_line))
+
+    def test_every_query_has_comment(self, project):
+        run_dbt(["run"])
+        _, raw_logs = run_dbt_and_capture(["--debug", "--log-format=json", "run"])
+
+        executed_sqls = self._extract_executed_sql(raw_logs)
+        assert executed_sqls, "No SQL was captured from the dbt logs"
+
+        missing = [sql for sql in executed_sqls if not self._has_structured_comment(sql)]
+
+        assert not missing, (
+            f"{len(missing)} queries are missing structured dbt comments.\n\n"
+            + "\n\n---\n\n".join(missing)
+        )