diff --git a/dbt-snowflake/.changes/unreleased/Features-20250909-221007.yaml b/dbt-snowflake/.changes/unreleased/Features-20250909-221007.yaml new file mode 100644 index 000000000..c0b67949a --- /dev/null +++ b/dbt-snowflake/.changes/unreleased/Features-20250909-221007.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Introduction of Workload Identity Federation as a supported method of authentication to Snowflake +time: 2025-09-09T22:10:07.504016+01:00 +custom: + Author: roryjbd,sfc-gh-pmansour + Issue: "1234" diff --git a/dbt-snowflake/src/dbt/adapters/snowflake/connections.py b/dbt-snowflake/src/dbt/adapters/snowflake/connections.py index b6139e578..42301fe7b 100644 --- a/dbt-snowflake/src/dbt/adapters/snowflake/connections.py +++ b/dbt-snowflake/src/dbt/adapters/snowflake/connections.py @@ -38,6 +38,8 @@ BindUploadError, ) +from snowflake.connector.network import WORKLOAD_IDENTITY_AUTHENTICATOR + from dbt_common.exceptions import ( DbtInternalError, DbtRuntimeError, @@ -81,6 +83,8 @@ def setup_snowflake_logging(level: str): _TOKEN_REQUEST_URL = "https://{}.snowflakecomputing.com/oauth/token-request" +ACCEPTED_WORKLOAD_IDENTITY_PROVIDERS = ["OIDC", "AZURE", "GCP", "AWS"] + ERROR_REDACTION_PATTERNS = { re.compile(r"Row Values: \[(.|\n)*\]"): "Row Values: [redacted]", re.compile(r"Duplicate field key '(.|\n)*'"): "Duplicate field key '[redacted]'", @@ -125,9 +129,13 @@ class SnowflakeCredentials(Credentials): # this needs to default to `None` so that we can tell if the user set it; see `__post_init__()` reuse_connections: Optional[bool] = None s3_stage_vpce_dns_name: Optional[str] = None + workload_identity_provider: Optional[str] = None + workload_identity_entra_resource: Optional[str] = None # Setting this to 0.0 will disable platform detection which adds query latency # this should only be set to a non-zero value if you are using WIF authentication - platform_detection_timeout_seconds: float = 0.0 + platform_detection_timeout_seconds: Optional[float] = ( + None if workload_identity_provider else 0.0 + ) def __post_init__(self): if self.authenticator != "oauth" and (self.oauth_client_secret or self.oauth_client_id): @@ -196,6 +204,8 @@ def _connection_keys(self): "insecure_mode", "reuse_connections", "s3_stage_vpce_dns_name", + "workload_identity_provider", + "workload_identity_entra_resource", "platform_detection_timeout_seconds", ) @@ -247,6 +257,36 @@ def auth_args(self): result["token"] = self.token result["authenticator"] = "oauth" + elif self.authenticator.lower() == "workload_identity": + result["authenticator"] = WORKLOAD_IDENTITY_AUTHENTICATOR + + if ( + not self.workload_identity_provider + or self.workload_identity_provider.upper() + not in ACCEPTED_WORKLOAD_IDENTITY_PROVIDERS + ): + + raise DbtConfigError( + "workload_identity_provider must be set to one of the following values if authenticator='workload_identity'!:\n" + f"{', '.join(ACCEPTED_WORKLOAD_IDENTITY_PROVIDERS)}\n\n" + f"Provided workload_identity_provider was '{self.workload_identity_provider}'" + ) + + result["workload_identity_provider"] = self.workload_identity_provider + + if self.token: + result["token"] = self.token + + if self.workload_identity_entra_resource: + if self.workload_identity_provider.upper() != "AZURE": + raise DbtConfigError( + "workload_identity_entra_resource can only be set if workload_identity_provider is Azure" + ) + + result["workload_identity_entra_resource"] = ( + self.workload_identity_entra_resource + ) + # enable id token cache for linux result["client_store_temporary_credential"] = True # enable mfa token cache for linux diff --git a/dbt-snowflake/src/dbt/include/snowflake/profile_template.yml b/dbt-snowflake/src/dbt/include/snowflake/profile_template.yml index b437853e7..6596bf42c 100644 --- a/dbt-snowflake/src/dbt/include/snowflake/profile_template.yml +++ b/dbt-snowflake/src/dbt/include/snowflake/profile_template.yml @@ -20,6 +20,10 @@ prompts: authenticator: hint: "'externalbrowser' or a valid Okta URL" default: 'externalbrowser' + workload_identity: + _fixed_authenticator: workload_identity + workload_identity_provider: + hint: Must be one of the following - [OIDC, AWS, AZURE, GCP] role: hint: 'dev role' warehouse: diff --git a/dbt-snowflake/tests/functional/auth_tests/test_workload_identity_federation_aws.py b/dbt-snowflake/tests/functional/auth_tests/test_workload_identity_federation_aws.py new file mode 100644 index 000000000..f972ac494 --- /dev/null +++ b/dbt-snowflake/tests/functional/auth_tests/test_workload_identity_federation_aws.py @@ -0,0 +1,87 @@ +""" +Functional tests for Snowflake Workload Identity Federation (WIF) with AWS authentication. +Prerequisites for testing WIF with AWS: +1. **AWS IAM Configuration:** + Create an IAM role that can be assumed by the EC2 service. An example trust policy below: + ``` + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Service": "ec2.amazonaws.com" // or your specific service + }, + "Action": "sts:AssumeRole" + } + ] + } + ``` +2. **EC2 Instance:** + Launch an EC2 instance with the IAM role attached as an instance profile. + Connect to the EC2 instance and + + +3. **Snowflake User Configuration:** + Create a service user in Snowflake with WIF enabled: + ```sql + CREATE USER + WORKLOAD_IDENTITY = ( + TYPE = AWS + ARN = '' + ) + TYPE = SERVICE + DEFAULT_ROLE = ; + ``` + Replace `` with your desired username and `` + with the ARN of your AWS IAM role. +4. **AWS Environment:** + This test must run from within the configured EC2 environment. + Connect to the EC2 instance using SSH or similar. + Clone this repository, run the setup, and execute this test e.g. + `hatch run pytest tests/functional/auth_tests/test_workload_identity_federation_aws.py::test_snowflake_wif_basic_functionality` +5. **Environment Variables:** + Set the following environment variables for testing: + - SNOWFLAKE_TEST_ACCOUNT: Your Snowflake account identifier + - SNOWFLAKE_TEST_USER: The Snowflake service user created for WIF + - SNOWFLAKE_TEST_DATABASE: Test database name + - SNOWFLAKE_TEST_WAREHOUSE: Test warehouse name + - SNOWFLAKE_TEST_ROLE: Snowflake Role for the user (optional) + - SNOWFLAKE_TEST_SCHEMA: Schema for testing (optional, defaults to schema in profile) +Note: WIF authentication relies on being in the AWS environment, so these tests can't be run locally or in the CI/CD pipeline. +""" + +import os +from dbt.tests.util import run_dbt +import pytest + + +_MODELS__MODEL_1_SQL = """ +select 1 as id, 'wif_test' as source +""" + + +class TestSnowflakeWorkloadIdentityFederation: + @pytest.fixture(scope="class", autouse=True) + def dbt_profile_target(self): + return { + "type": "snowflake", + "threads": 4, + "account": os.getenv("SNOWFLAKE_TEST_ACCOUNT"), + "user": os.getenv("SNOWFLAKE_TEST_USER"), + "database": os.getenv("SNOWFLAKE_TEST_DATABASE"), + "warehouse": os.getenv("SNOWFLAKE_TEST_WAREHOUSE"), + "role": os.getenv("SNOWFLAKE_TEST_ROLE"), + "authenticator": "workload_identity", + "workload_identity_provider": "aws", + } + + @pytest.fixture(scope="class") + def models(self): + return { + "model_1.sql": _MODELS__MODEL_1_SQL, + } + + def test_snowflake_wif_basic_functionality(self, project): + """Test basic dbt functionality with WIF authentication""" + run_dbt() diff --git a/dbt-snowflake/tests/functional/auth_tests/test_workload_identity_federation_oidc.py b/dbt-snowflake/tests/functional/auth_tests/test_workload_identity_federation_oidc.py new file mode 100644 index 000000000..85da7c1d9 --- /dev/null +++ b/dbt-snowflake/tests/functional/auth_tests/test_workload_identity_federation_oidc.py @@ -0,0 +1,106 @@ +""" +Functional tests for Snowflake Workload Identity Federation (WIF) with OIDC authentication. +Prerequisites for testing WIF with OIDC: + +1. **Create a Snowflake User with OIDC Auth** + + Create a service user in Snowflake with WIF enabled: + ```sql + CREATE USER + TYPE = SERVICE + WORKLOAD_IDENTITY = ( + TYPE = OIDC, + ISSUER = 'https://token.actions.githubusercontent.com', + SUBJECT = 'repo:/dbt-adapters:ref:refs/heads/main', + OIDC_AUDIENCE_LIST = ('snowflakecomputing.com') + ); + ``` + +2. **Create a GitHub Actions that generates the OIDC token and runs the test ** + + ```yaml + +name: Run Snowflake Workload Identity Federation (WIF) Test +on: + workflow_dispatch: + push: + branches: [ main ] + +permissions: + contents: read + id-token: write + +jobs: + run-snowflake: + runs-on: ubuntu-latest + env: + SNOWFLAKE_TEST_ACCOUNT: + SNOWFLAKE_TEST_DATABASE: + SNOWFLAKE_TEST_WAREHOUSE: + SNOWFLAKE_TEST_ROLE: + SNOWFLAKE_TEST_USER: + + steps: + - uses: actions/checkout@v4 + + - name: Get OIDC token for Snowflake + id: oidc + uses: actions/github-script@v7 + with: + script: | + const token = await core.getIDToken('snowflakecomputing.com'); + core.setOutput('id_token', token); + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - uses: pypa/hatch@install + + - run: hatch run setup + working-directory: ./dbt-snowflake + + - run: hatch run python -m pytest tests/functional/auth_tests/test_workload_identity_federation_oidc.py + working-directory: ./dbt-snowflake + env: + ODIC_TOKEN: ${{ steps.oidc.outputs.id_token }} + ``` + +""" + +import os +from dbt.tests.util import run_dbt +import pytest + + +_MODELS__MODEL_1_SQL = """ +select 1 as id, 'wif_test' as source +""" + + +class TestSnowflakeWorkloadIdentityFederation: + @pytest.fixture(scope="class", autouse=True) + def dbt_profile_target(self): + return { + "type": "snowflake", + "threads": 4, + "account": os.getenv("SNOWFLAKE_TEST_ACCOUNT"), + "user": os.getenv("SNOWFLAKE_TEST_USER"), + "database": os.getenv("SNOWFLAKE_TEST_DATABASE"), + "warehouse": os.getenv("SNOWFLAKE_TEST_WAREHOUSE"), + "role": os.getenv("SNOWFLAKE_TEST_ROLE"), + "authenticator": "workload_identity", + "workload_identity_provider": "oidc", + "token": os.getenv("ODIC_TOKEN"), + } + + @pytest.fixture(scope="class") + def models(self): + return { + "model_1.sql": _MODELS__MODEL_1_SQL, + } + + def test_snowflake_wif_basic_functionality(self, project): + """Test basic dbt functionality with WIF authentication""" + run_dbt() diff --git a/dbt-snowflake/tests/unit/test_connections.py b/dbt-snowflake/tests/unit/test_connections.py index 3e1cebb3d..93328d5f4 100644 --- a/dbt-snowflake/tests/unit/test_connections.py +++ b/dbt-snowflake/tests/unit/test_connections.py @@ -5,6 +5,7 @@ from unittest.mock import Mock, patch, call import multiprocessing from dbt.adapters.exceptions.connection import FailedToConnectError +from dbt_common.exceptions import DbtConfigError import dbt.adapters.snowflake.connections as connections import dbt.adapters.events.logging @@ -230,3 +231,73 @@ def test_snowflake_oauth_expired_token_raises_error(): with pytest.raises(FailedToConnectError): adapter.open() + + +def test_connnections_credentials_passes_through_wif_params(): + credentials = { + "account": "account_id_with_underscores", + "database": "database", + "warehouse": "warehouse", + "schema": "schema", + "authenticator": "workload_identity", + "workload_identity_provider": "azure", + "workload_identity_entra_resource": "app://123", + "token": "test_token", + } + auth_args = connections.SnowflakeCredentials(**credentials).auth_args() + assert auth_args["authenticator"] == "WORKLOAD_IDENTITY" + assert auth_args["workload_identity_provider"] == "azure" + assert auth_args["workload_identity_entra_resource"] == "app://123" + assert auth_args["token"] == "test_token" + + +def test_connnections_credentials_wif_authenticator_fails_without_provider(): + credentials = { + "account": "account_id_with_underscores", + "database": "database", + "warehouse": "warehouse", + "schema": "schema", + "authenticator": "workload_identity", + # Missing workload_identity_provider + } + with pytest.raises(DbtConfigError) as excinfo: + connections.SnowflakeCredentials(**credentials).auth_args() + assert ( + "workload_identity_provider must be set to one of the following values if authenticator='workload_identity'!" + in str(excinfo) + ) + + +def test_connnections_credentials_wif_authenticator_fails_with_invalid_provider(): + credentials = { + "account": "account_id_with_underscores", + "database": "database", + "warehouse": "warehouse", + "schema": "schema", + "authenticator": "workload_identity", + "workload_identity_provider": "some_non_existent_cloud_provider", + } + with pytest.raises(DbtConfigError) as excinfo: + connections.SnowflakeCredentials(**credentials).auth_args() + assert ( + "workload_identity_provider must be set to one of the following values if authenticator='workload_identity'!" + in str(excinfo) + ) + + +def test_connnections_credentials_wif_authenticator_fails_with_entra_resource_and_non_azure_provider(): + credentials = { + "account": "account_id_with_underscores", + "database": "database", + "warehouse": "warehouse", + "schema": "schema", + "authenticator": "workload_identity", + "workload_identity_provider": "aws", + "workload_identity_entra_resource": "app://123", + } + with pytest.raises(DbtConfigError) as excinfo: + connections.SnowflakeCredentials(**credentials).auth_args() + assert ( + "workload_identity_entra_resource can only be set if workload_identity_provider is Azure" + in str(excinfo) + )