Skip to content
This repository was archived by the owner on Sep 2, 2025. It is now read-only.

Commit 35c32f1

Browse files
authored
Break out credentials as a separate module (#1391)
* break out credentials into its own module * fix imports * update unit test mocks for new location of get_bigquery_defaults
1 parent 23e8020 commit 35c32f1

File tree

13 files changed

+275
-259
lines changed

13 files changed

+275
-259
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
kind: Under the Hood
2+
body: Separate credentials functionality into its own module for reuse in retry and
3+
python submissions
4+
time: 2024-11-04T17:38:15.940962-05:00
5+
custom:
6+
Author: mikealfare
7+
Issue: "1391"

dbt/adapters/bigquery/__init__.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
from dbt.adapters.bigquery.connections import BigQueryConnectionManager # noqa
2-
from dbt.adapters.bigquery.connections import BigQueryCredentials
3-
from dbt.adapters.bigquery.relation import BigQueryRelation # noqa
4-
from dbt.adapters.bigquery.column import BigQueryColumn # noqa
5-
from dbt.adapters.bigquery.impl import BigQueryAdapter, GrantTarget, PartitionConfig # noqa
1+
from dbt.adapters.bigquery.column import BigQueryColumn
2+
from dbt.adapters.bigquery.connections import BigQueryConnectionManager
3+
from dbt.adapters.bigquery.credentials import BigQueryCredentials
4+
from dbt.adapters.bigquery.impl import BigQueryAdapter, GrantTarget, PartitionConfig
5+
from dbt.adapters.bigquery.relation import BigQueryRelation
66

77
from dbt.adapters.base import AdapterPlugin
88
from dbt.include import bigquery

dbt/adapters/bigquery/column.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
from dataclasses import dataclass
2-
from typing import Optional, List, TypeVar, Iterable, Type, Any, Dict, Union
2+
from typing import Any, Dict, Iterable, List, Optional, Type, TypeVar, Union
3+
4+
from google.cloud.bigquery import SchemaField
35

46
from dbt.adapters.base.column import Column
57

6-
from google.cloud.bigquery import SchemaField
78

89
_PARENT_DATA_TYPE_KEY = "__parent_data_type"
910

dbt/adapters/bigquery/connections.py

Lines changed: 22 additions & 175 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,54 @@
11
from collections import defaultdict
22
from concurrent.futures import TimeoutError
3+
from contextlib import contextmanager
4+
from dataclasses import dataclass
35
import json
6+
from multiprocessing.context import SpawnContext
47
import re
5-
from contextlib import contextmanager
6-
from dataclasses import dataclass, field
8+
from typing import Dict, Hashable, List, Optional, Tuple, TYPE_CHECKING
79
import uuid
8-
from mashumaro.helper import pass_through
9-
10-
from functools import lru_cache
11-
from requests.exceptions import ConnectionError
12-
13-
from multiprocessing.context import SpawnContext
14-
from typing import Optional, Any, Dict, Tuple, Hashable, List, TYPE_CHECKING
1510

11+
from google.api_core import client_info, client_options, retry
1612
import google.auth
13+
from google.auth import impersonated_credentials
1714
import google.auth.exceptions
1815
import google.cloud.bigquery
1916
import google.cloud.exceptions
20-
from google.api_core import retry, client_info, client_options
21-
from google.auth import impersonated_credentials
2217
from google.oauth2 import (
2318
credentials as GoogleCredentials,
2419
service_account as GoogleServiceAccountCredentials,
2520
)
21+
from requests.exceptions import ConnectionError
2622

2723
from dbt_common.events.contextvars import get_node_info
2824
from dbt_common.events.functions import fire_event
29-
from dbt_common.exceptions import (
30-
DbtRuntimeError,
31-
DbtConfigError,
32-
DbtDatabaseError,
33-
)
25+
from dbt_common.exceptions import DbtDatabaseError, DbtRuntimeError
3426
from dbt_common.invocation import get_invocation_id
35-
from dbt.adapters.bigquery import gcloud
27+
from dbt.adapters.base import BaseConnectionManager
3628
from dbt.adapters.contracts.connection import (
37-
ConnectionState,
38-
AdapterResponse,
39-
Credentials,
4029
AdapterRequiredConfig,
30+
AdapterResponse,
31+
ConnectionState,
4132
)
42-
from dbt.adapters.exceptions.connection import FailedToConnectError
43-
from dbt.adapters.base import BaseConnectionManager
4433
from dbt.adapters.events.logging import AdapterLogger
4534
from dbt.adapters.events.types import SQLQuery
46-
from dbt.adapters.bigquery import __version__ as dbt_version
47-
from dbt.adapters.bigquery.utility import is_base64, base64_to_string
35+
from dbt.adapters.exceptions.connection import FailedToConnectError
4836

49-
from dbt_common.dataclass_schema import ExtensibleDbtClassMixin, StrEnum
37+
import dbt.adapters.bigquery.__version__ as dbt_version
38+
from dbt.adapters.bigquery.credentials import (
39+
BigQueryConnectionMethod,
40+
Priority,
41+
get_bigquery_defaults,
42+
setup_default_credentials,
43+
)
44+
from dbt.adapters.bigquery.utility import is_base64, base64_to_string
5045

5146
if TYPE_CHECKING:
5247
# Indirectly imported via agate_helper, which is lazy loaded further downfile.
5348
# Used by mypy for earlier type hints.
5449
import agate
5550

51+
5652
logger = AdapterLogger("BigQuery")
5753

5854
BQ_QUERY_JOB_SPLIT = "-----Query Job SQL Follows-----"
@@ -73,33 +69,6 @@
7369
)
7470

7571

76-
@lru_cache()
77-
def get_bigquery_defaults(scopes=None) -> Tuple[Any, Optional[str]]:
78-
"""
79-
Returns (credentials, project_id)
80-
81-
project_id is returned available from the environment; otherwise None
82-
"""
83-
# Cached, because the underlying implementation shells out, taking ~1s
84-
try:
85-
credentials, _ = google.auth.default(scopes=scopes)
86-
return credentials, _
87-
except google.auth.exceptions.DefaultCredentialsError as e:
88-
raise DbtConfigError(f"Failed to authenticate with supplied credentials\nerror:\n{e}")
89-
90-
91-
class Priority(StrEnum):
92-
Interactive = "interactive"
93-
Batch = "batch"
94-
95-
96-
class BigQueryConnectionMethod(StrEnum):
97-
OAUTH = "oauth"
98-
SERVICE_ACCOUNT = "service-account"
99-
SERVICE_ACCOUNT_JSON = "service-account-json"
100-
OAUTH_SECRETS = "oauth-secrets"
101-
102-
10372
@dataclass
10473
class BigQueryAdapterResponse(AdapterResponse):
10574
bytes_processed: Optional[int] = None
@@ -110,128 +79,6 @@ class BigQueryAdapterResponse(AdapterResponse):
11079
slot_ms: Optional[int] = None
11180

11281

113-
@dataclass
114-
class DataprocBatchConfig(ExtensibleDbtClassMixin):
115-
def __init__(self, batch_config):
116-
self.batch_config = batch_config
117-
118-
119-
@dataclass
120-
class BigQueryCredentials(Credentials):
121-
method: BigQueryConnectionMethod = None # type: ignore
122-
123-
# BigQuery allows an empty database / project, where it defers to the
124-
# environment for the project
125-
database: Optional[str] = None
126-
schema: Optional[str] = None
127-
execution_project: Optional[str] = None
128-
quota_project: Optional[str] = None
129-
location: Optional[str] = None
130-
priority: Optional[Priority] = None
131-
maximum_bytes_billed: Optional[int] = None
132-
impersonate_service_account: Optional[str] = None
133-
134-
job_retry_deadline_seconds: Optional[int] = None
135-
job_retries: Optional[int] = 1
136-
job_creation_timeout_seconds: Optional[int] = None
137-
job_execution_timeout_seconds: Optional[int] = None
138-
139-
# Keyfile json creds (unicode or base 64 encoded)
140-
keyfile: Optional[str] = None
141-
keyfile_json: Optional[Dict[str, Any]] = None
142-
143-
# oauth-secrets
144-
token: Optional[str] = None
145-
refresh_token: Optional[str] = None
146-
client_id: Optional[str] = None
147-
client_secret: Optional[str] = None
148-
token_uri: Optional[str] = None
149-
150-
dataproc_region: Optional[str] = None
151-
dataproc_cluster_name: Optional[str] = None
152-
gcs_bucket: Optional[str] = None
153-
154-
dataproc_batch: Optional[DataprocBatchConfig] = field(
155-
metadata={
156-
"serialization_strategy": pass_through,
157-
},
158-
default=None,
159-
)
160-
161-
scopes: Optional[Tuple[str, ...]] = (
162-
"https://www.googleapis.com/auth/bigquery",
163-
"https://www.googleapis.com/auth/cloud-platform",
164-
"https://www.googleapis.com/auth/drive",
165-
)
166-
167-
_ALIASES = {
168-
# 'legacy_name': 'current_name'
169-
"project": "database",
170-
"dataset": "schema",
171-
"target_project": "target_database",
172-
"target_dataset": "target_schema",
173-
"retries": "job_retries",
174-
"timeout_seconds": "job_execution_timeout_seconds",
175-
}
176-
177-
def __post_init__(self):
178-
if self.keyfile_json and "private_key" in self.keyfile_json:
179-
self.keyfile_json["private_key"] = self.keyfile_json["private_key"].replace(
180-
"\\n", "\n"
181-
)
182-
if not self.method:
183-
raise DbtRuntimeError("Must specify authentication method")
184-
185-
if not self.schema:
186-
raise DbtRuntimeError("Must specify schema")
187-
188-
@property
189-
def type(self):
190-
return "bigquery"
191-
192-
@property
193-
def unique_field(self):
194-
return self.database
195-
196-
def _connection_keys(self):
197-
return (
198-
"method",
199-
"database",
200-
"execution_project",
201-
"schema",
202-
"location",
203-
"priority",
204-
"maximum_bytes_billed",
205-
"impersonate_service_account",
206-
"job_retry_deadline_seconds",
207-
"job_retries",
208-
"job_creation_timeout_seconds",
209-
"job_execution_timeout_seconds",
210-
"timeout_seconds",
211-
"client_id",
212-
"token_uri",
213-
"dataproc_region",
214-
"dataproc_cluster_name",
215-
"gcs_bucket",
216-
"dataproc_batch",
217-
)
218-
219-
@classmethod
220-
def __pre_deserialize__(cls, d: Dict[Any, Any]) -> Dict[Any, Any]:
221-
# We need to inject the correct value of the database (aka project) at
222-
# this stage, ref
223-
# https://github.com/dbt-labs/dbt/pull/2908#discussion_r532927436.
224-
225-
# `database` is an alias of `project` in BigQuery
226-
if "database" not in d:
227-
_, database = get_bigquery_defaults()
228-
d["database"] = database
229-
# `execution_project` default to dataset/project
230-
if "execution_project" not in d:
231-
d["execution_project"] = d["database"]
232-
return d
233-
234-
23582
class BigQueryConnectionManager(BaseConnectionManager):
23683
TYPE = "bigquery"
23784

@@ -433,7 +280,7 @@ def open(cls, connection):
433280

434281
except google.auth.exceptions.DefaultCredentialsError:
435282
logger.info("Please log into GCP to continue")
436-
gcloud.setup_default_credentials()
283+
setup_default_credentials()
437284

438285
handle = cls.get_bigquery_client(connection.credentials)
439286

0 commit comments

Comments
 (0)