diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index b4585a98d..cffb802d6 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -4,6 +4,8 @@ ### New Features and Improvements +* Add native support for authentication through Azure DevOps OIDC + ### Bug Fixes ### Documentation diff --git a/README.md b/README.md index 58a885307..af8a960cb 100644 --- a/README.md +++ b/README.md @@ -126,10 +126,11 @@ Depending on the Databricks authentication method, the SDK uses the following in ### Databricks native authentication -By default, the Databricks SDK for Python initially tries [Databricks token authentication](https://docs.databricks.com/dev-tools/api/latest/authentication.html) (`auth_type='pat'` argument). If the SDK is unsuccessful, it then tries Databricks Workload Identity Federation (WIF) authentication using OIDC (`auth_type="github-oidc"` argument). +By default, the Databricks SDK for Python initially tries [Databricks token authentication](https://docs.databricks.com/dev-tools/api/latest/authentication.html) (`auth_type='pat'` argument). If the SDK is unsuccessful, it then tries Workload Identity Federation (WIF). See [Supported WIF](https://docs.databricks.com/aws/en/dev-tools/auth/oauth-federation-provider) for the supported JWT token providers. - For Databricks token authentication, you must provide `host` and `token`; or their environment variable or `.databrickscfg` file field equivalents. - For Databricks OIDC authentication, you must provide the `host`, `client_id` and `token_audience` _(optional)_ either directly, through the corresponding environment variables, or in your `.databrickscfg` configuration file. +- For Azure DevOps OIDC authentication, the `token_audience` is irrelevant as the audience is always set to `api://AzureADTokenExchange`. Also, the `System.AccessToken` pipeline variable required for OIDC request must be exposed as the `SYSTEM_ACCESSTOKEN` environment variable, following [Pipeline variables](https://learn.microsoft.com/en-us/azure/devops/pipelines/build/variables?view=azure-devops&tabs=yaml#systemaccesstoken) | Argument | Description | Environment variable | |------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------| diff --git a/databricks/sdk/credentials_provider.py b/databricks/sdk/credentials_provider.py index 613172cf1..022482370 100644 --- a/databricks/sdk/credentials_provider.py +++ b/databricks/sdk/credentials_provider.py @@ -12,7 +12,7 @@ import threading import time from datetime import datetime -from typing import Callable, Dict, List, Optional, Tuple, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Union import google.auth # type: ignore import requests @@ -89,7 +89,6 @@ def inner( @functools.wraps(func) def wrapper(cfg: "Config") -> Optional[CredentialsProvider]: for attr in require: - getattr(cfg, attr) if not getattr(cfg, attr): return None return func(cfg) @@ -103,7 +102,12 @@ def wrapper(cfg: "Config") -> Optional[CredentialsProvider]: def oauth_credentials_strategy(name: str, require: List[str]): """Given the function that receives a Config and returns an OauthHeaderFactory, create an OauthCredentialsProvider with a given name and required configuration - attribute names to be present for this function to be called.""" + attribute names to be present for this function to be called. + + Args: + name: The name of the authentication strategy + require: List of config attributes that must be present + """ def inner( func: Callable[["Config"], OAuthCredentialsProvider], @@ -356,33 +360,47 @@ def token() -> oauth.Token: return OAuthCredentialsProvider(refreshed_headers, token) -@oauth_credentials_strategy("github-oidc", ["host", "client_id"]) -def github_oidc(cfg: "Config") -> Optional[CredentialsProvider]: +def _oidc_credentials_provider( + cfg: "Config", supplier_factory: Callable[[], Any], provider_name: str +) -> Optional[CredentialsProvider]: """ - DatabricksWIFCredentials uses a Token Supplier to get a JWT Token and exchanges - it for a Databricks Token. + Generic OIDC credentials provider that works with any OIDC token supplier. + + Args: + cfg: Databricks configuration + supplier_factory: Callable that returns an OIDC token supplier instance + provider_name: Human-readable name (e.g., "GitHub OIDC", "Azure DevOps OIDC") - Supported suppliers: - - GitHub OIDC + Returns: + OAuthCredentialsProvider if successful, None if supplier unavailable or token retrieval fails """ - supplier = oidc_token_supplier.GitHubOIDCTokenSupplier() + # Try to create the supplier + try: + supplier = supplier_factory() + except Exception as e: + logger.debug(f"{provider_name}: {str(e)}") + return None + # Determine the audience for token exchange audience = cfg.token_audience if audience is None and cfg.is_account_client: audience = cfg.account_id if audience is None and not cfg.is_account_client: audience = cfg.oidc_endpoints.token_endpoint - # Try to get an idToken. If no supplier returns a token, we cannot use this authentication mode. + # Try to get an OIDC token. If no supplier returns a token, we cannot use this authentication mode. id_token = supplier.get_oidc_token(audience) if not id_token: + logger.debug(f"{provider_name}: no token available, skipping authentication method") return None + logger.info(f"Configured {provider_name} authentication") + def token_source_for(audience: str) -> oauth.TokenSource: id_token = supplier.get_oidc_token(audience) if not id_token: # Should not happen, since we checked it above. - raise Exception("Cannot get OIDC token") + raise Exception(f"Cannot get {provider_name} token") return oauth.ClientCredentials( client_id=cfg.client_id, @@ -408,6 +426,36 @@ def token() -> oauth.Token: return OAuthCredentialsProvider(refreshed_headers, token) +@oauth_credentials_strategy("github-oidc", ["host", "client_id"]) +def github_oidc(cfg: "Config") -> Optional[CredentialsProvider]: + """ + GitHub OIDC authentication uses a Token Supplier to get a JWT Token and exchanges + it for a Databricks Token. + + Supported in GitHub Actions with OIDC service connections. + """ + return _oidc_credentials_provider( + cfg=cfg, + supplier_factory=lambda: oidc_token_supplier.GitHubOIDCTokenSupplier(), + provider_name="GitHub OIDC", + ) + + +@oauth_credentials_strategy("azure-devops-oidc", ["host", "client_id"]) +def azure_devops_oidc(cfg: "Config") -> Optional[CredentialsProvider]: + """ + Azure DevOps OIDC authentication uses a Token Supplier to get a JWT Token + and exchanges it for a Databricks Token. + + Supported in Azure DevOps pipelines with OIDC service connections. + """ + return _oidc_credentials_provider( + cfg=cfg, + supplier_factory=lambda: oidc_token_supplier.AzureDevOpsOIDCTokenSupplier(), + provider_name="Azure DevOps OIDC", + ) + + @oauth_credentials_strategy("github-oidc-azure", ["host", "azure_client_id"]) def github_oidc_azure(cfg: "Config") -> Optional[CredentialsProvider]: if "ACTIONS_ID_TOKEN_REQUEST_TOKEN" not in os.environ: @@ -1019,6 +1067,7 @@ def __init__(self) -> None: azure_service_principal, github_oidc_azure, azure_cli, + azure_devops_oidc, external_browser, databricks_cli, runtime_native_auth, diff --git a/databricks/sdk/oidc_token_supplier.py b/databricks/sdk/oidc_token_supplier.py index dfd139de5..bd050dd5f 100644 --- a/databricks/sdk/oidc_token_supplier.py +++ b/databricks/sdk/oidc_token_supplier.py @@ -1,9 +1,13 @@ +import logging import os from typing import Optional import requests +logger = logging.getLogger("databricks.sdk") + +# TODO: Check the required environment variables while creating the instance rather than in the get_oidc_token method to allow early return. class GitHubOIDCTokenSupplier: """ Supplies OIDC tokens from GitHub Actions. @@ -26,3 +30,79 @@ def get_oidc_token(self, audience: str) -> Optional[str]: return None return response_json["value"] + + +class AzureDevOpsOIDCTokenSupplier: + """ + Supplies OIDC tokens from Azure DevOps pipelines. + + Constructs the OIDC token request URL using official Azure DevOps predefined variables. + See: https://docs.microsoft.com/en-us/azure/devops/pipelines/build/variables + """ + + def __init__(self): + """Initialize and validate Azure DevOps environment variables.""" + # Get Azure DevOps environment variables. + self.access_token = os.environ.get("SYSTEM_ACCESSTOKEN") + self.collection_uri = os.environ.get("SYSTEM_TEAMFOUNDATIONCOLLECTIONURI") + self.project_id = os.environ.get("SYSTEM_TEAMPROJECTID") + self.plan_id = os.environ.get("SYSTEM_PLANID") + self.job_id = os.environ.get("SYSTEM_JOBID") + self.hub_name = os.environ.get("SYSTEM_HOSTTYPE") + + # Check for required variables with specific error messages. + missing_vars = [] + if not self.access_token: + missing_vars.append("SYSTEM_ACCESSTOKEN") + if not self.collection_uri: + missing_vars.append("SYSTEM_TEAMFOUNDATIONCOLLECTIONURI") + if not self.project_id: + missing_vars.append("SYSTEM_TEAMPROJECTID") + if not self.plan_id: + missing_vars.append("SYSTEM_PLANID") + if not self.job_id: + missing_vars.append("SYSTEM_JOBID") + if not self.hub_name: + missing_vars.append("SYSTEM_HOSTTYPE") + + if missing_vars: + if "SYSTEM_ACCESSTOKEN" in missing_vars: + error_msg = "Azure DevOps OIDC: SYSTEM_ACCESSTOKEN env var not found. If calling from Azure DevOps Pipeline, please set this env var following https://learn.microsoft.com/en-us/azure/devops/pipelines/build/variables?view=azure-devops&tabs=yaml#systemaccesstoken" + else: + error_msg = f"Azure DevOps OIDC: missing required environment variables: {', '.join(missing_vars)}" + raise ValueError(error_msg) + + def get_oidc_token(self, audience: str) -> Optional[str]: + # Note: Azure DevOps OIDC tokens have a fixed audience of "api://AzureADTokenExchange". + # The audience parameter is ignored but kept for interface compatibility with other OIDC suppliers. + + try: + # Construct the OIDC token request URL. + # Format: {collection_uri}{project_id}/_apis/distributedtask/hubs/{hubName}/plans/{planId}/jobs/{jobId}/oidctoken. + request_url = f"{self.collection_uri}{self.project_id}/_apis/distributedtask/hubs/{self.hub_name}/plans/{self.plan_id}/jobs/{self.job_id}/oidctoken" + + # Add API version (audience is fixed to "api://AzureADTokenExchange" by Azure DevOps). + endpoint = f"{request_url}?api-version=7.2-preview.1" + headers = { + "Authorization": f"Bearer {self.access_token}", + "Content-Type": "application/json", + "Content-Length": "0", + } + + # Azure DevOps OIDC endpoint requires POST request with empty body. + response = requests.post(endpoint, headers=headers) + if not response.ok: + logger.debug(f"Azure DevOps OIDC: token request failed with status {response.status_code}") + return None + + # Azure DevOps returns the token in 'oidcToken' field. + response_json = response.json() + if "oidcToken" not in response_json: + logger.debug("Azure DevOps OIDC: response missing 'oidcToken' field") + return None + + logger.debug("Azure DevOps OIDC: successfully obtained token") + return response_json["oidcToken"] + except Exception as e: + logger.debug(f"Azure DevOps OIDC: failed to get token: {e}") + return None diff --git a/tests/test_oidc_token_supplier.py b/tests/test_oidc_token_supplier.py new file mode 100644 index 000000000..57109c37b --- /dev/null +++ b/tests/test_oidc_token_supplier.py @@ -0,0 +1,248 @@ +from dataclasses import dataclass +from typing import Dict, Optional + +import pytest + +from databricks.sdk.oidc_token_supplier import AzureDevOpsOIDCTokenSupplier + + +@dataclass +class AzureDevOpsOIDCConstructorTestCase: + """Test case for AzureDevOpsOIDCTokenSupplier constructor validation.""" + + name: str + env_vars: Optional[Dict[str, str]] = None + should_raise_exception: bool = False + expected_exception_message: Optional[str] = None + + +@dataclass +class AzureDevOpsOIDCTokenRequestTestCase: + """Test case for OIDC token request/response handling (assumes constructor succeeds).""" + + name: str + env_vars: Dict[str, str] # Token request tests always have all required environment variables. + response_ok: bool = True + response_json: Optional[Dict[str, str]] = None + want_token: Optional[str] = None + want_none: bool = False + + +# Test cases for constructor validation (both success and failure). +_azure_devops_oidc_constructor_test_cases = [ + # Constructor success cases. + AzureDevOpsOIDCConstructorTestCase( + name="constructor_success_all_env_vars", + env_vars={ + "SYSTEM_ACCESSTOKEN": "azure-devops-access-token", + "SYSTEM_TEAMFOUNDATIONCOLLECTIONURI": "https://dev.azure.com/myorg/", + "SYSTEM_TEAMPROJECTID": "project-123", + "SYSTEM_PLANID": "plan-456", + "SYSTEM_JOBID": "job-789", + "SYSTEM_HOSTTYPE": "build", + }, + should_raise_exception=False, + ), + # Constructor failure cases. + AzureDevOpsOIDCConstructorTestCase( + name="missing_access_token", + env_vars={ + "SYSTEM_TEAMFOUNDATIONCOLLECTIONURI": "https://dev.azure.com/myorg/", + "SYSTEM_TEAMPROJECTID": "project-123", + "SYSTEM_PLANID": "plan-456", + "SYSTEM_JOBID": "job-789", + "SYSTEM_HOSTTYPE": "build", + }, + should_raise_exception=True, + expected_exception_message="Azure DevOps OIDC: SYSTEM_ACCESSTOKEN env var not found", + ), + AzureDevOpsOIDCConstructorTestCase( + name="missing_hosttype", + env_vars={ + "SYSTEM_ACCESSTOKEN": "azure-devops-access-token", + "SYSTEM_TEAMFOUNDATIONCOLLECTIONURI": "https://dev.azure.com/myorg/", + "SYSTEM_TEAMPROJECTID": "project-123", + "SYSTEM_PLANID": "plan-456", + "SYSTEM_JOBID": "job-789", + }, + should_raise_exception=True, + expected_exception_message="Azure DevOps OIDC: missing required environment variables: SYSTEM_HOSTTYPE", + ), + AzureDevOpsOIDCConstructorTestCase( + name="missing_plan_id", + env_vars={ + "SYSTEM_ACCESSTOKEN": "azure-devops-access-token", + "SYSTEM_TEAMFOUNDATIONCOLLECTIONURI": "https://dev.azure.com/myorg/", + "SYSTEM_TEAMPROJECTID": "project-123", + "SYSTEM_JOBID": "job-789", + "SYSTEM_HOSTTYPE": "build", + }, + should_raise_exception=True, + expected_exception_message="Azure DevOps OIDC: missing required environment variables: SYSTEM_PLANID", + ), + AzureDevOpsOIDCConstructorTestCase( + name="missing_job_id", + env_vars={ + "SYSTEM_ACCESSTOKEN": "azure-devops-access-token", + "SYSTEM_TEAMFOUNDATIONCOLLECTIONURI": "https://dev.azure.com/myorg/", + "SYSTEM_TEAMPROJECTID": "project-123", + "SYSTEM_PLANID": "plan-456", + "SYSTEM_HOSTTYPE": "build", + }, + should_raise_exception=True, + expected_exception_message="Azure DevOps OIDC: missing required environment variables: SYSTEM_JOBID", + ), + AzureDevOpsOIDCConstructorTestCase( + name="missing_team_foundation_collection_uri", + env_vars={ + "SYSTEM_ACCESSTOKEN": "azure-devops-access-token", + "SYSTEM_TEAMPROJECTID": "project-123", + "SYSTEM_PLANID": "plan-456", + "SYSTEM_JOBID": "job-789", + "SYSTEM_HOSTTYPE": "build", + }, + should_raise_exception=True, + expected_exception_message="Azure DevOps OIDC: missing required environment variables: SYSTEM_TEAMFOUNDATIONCOLLECTIONURI", + ), + AzureDevOpsOIDCConstructorTestCase( + name="missing_project_id", + env_vars={ + "SYSTEM_ACCESSTOKEN": "azure-devops-access-token", + "SYSTEM_TEAMFOUNDATIONCOLLECTIONURI": "https://dev.azure.com/myorg/", + "SYSTEM_PLANID": "plan-456", + "SYSTEM_JOBID": "job-789", + "SYSTEM_HOSTTYPE": "build", + }, + should_raise_exception=True, + expected_exception_message="Azure DevOps OIDC: missing required environment variables: SYSTEM_TEAMPROJECTID", + ), + AzureDevOpsOIDCConstructorTestCase( + name="missing_multiple_vars", + env_vars={ + "SYSTEM_ACCESSTOKEN": "azure-devops-access-token", + }, + should_raise_exception=True, + expected_exception_message="Azure DevOps OIDC: missing required environment variables:", + ), +] + +# Test cases for OIDC token request/response handling. +_azure_devops_oidc_token_request_test_cases = [ + AzureDevOpsOIDCTokenRequestTestCase( + name="success_with_hosttype", + env_vars={ + "SYSTEM_ACCESSTOKEN": "azure-devops-access-token", + "SYSTEM_TEAMFOUNDATIONCOLLECTIONURI": "https://dev.azure.com/myorg/", + "SYSTEM_TEAMPROJECTID": "project-123", + "SYSTEM_PLANID": "plan-456", + "SYSTEM_JOBID": "job-789", + "SYSTEM_HOSTTYPE": "build", + }, + response_ok=True, + response_json={"oidcToken": "test-azure-devops-jwt-token"}, + want_token="test-azure-devops-jwt-token", + ), + AzureDevOpsOIDCTokenRequestTestCase( + name="request_failure", + env_vars={ + "SYSTEM_ACCESSTOKEN": "azure-devops-access-token", + "SYSTEM_TEAMFOUNDATIONCOLLECTIONURI": "https://dev.azure.com/myorg/", + "SYSTEM_TEAMPROJECTID": "project-123", + "SYSTEM_PLANID": "plan-456", + "SYSTEM_JOBID": "job-789", + "SYSTEM_HOSTTYPE": "build", + }, + response_ok=False, + want_none=True, + ), + AzureDevOpsOIDCTokenRequestTestCase( + name="missing_oidc_token_in_response", + env_vars={ + "SYSTEM_ACCESSTOKEN": "azure-devops-access-token", + "SYSTEM_TEAMFOUNDATIONCOLLECTIONURI": "https://dev.azure.com/myorg/", + "SYSTEM_TEAMPROJECTID": "project-123", + "SYSTEM_PLANID": "plan-456", + "SYSTEM_JOBID": "job-789", + "SYSTEM_HOSTTYPE": "build", + }, + response_ok=True, + response_json={"error": "no oidcToken"}, + want_none=True, + ), +] + + +@pytest.mark.parametrize("test_case", _azure_devops_oidc_constructor_test_cases) +def test_azure_devops_oidc_constructor_validation(test_case: AzureDevOpsOIDCConstructorTestCase, monkeypatch): + """Test AzureDevOpsOIDCTokenSupplier constructor validation with various environment variable scenarios.""" + # Set up environment variables. + if test_case.env_vars: + for key, value in test_case.env_vars.items(): + monkeypatch.setenv(key, value) + + if test_case.should_raise_exception: + # Test that constructor raises ValueError with expected message. + with pytest.raises(ValueError) as exc_info: + AzureDevOpsOIDCTokenSupplier() + + # Verify the exception message contains the expected text. + if test_case.expected_exception_message: + assert test_case.expected_exception_message in str( + exc_info.value + ), f"Exception message should contain '{test_case.expected_exception_message}', but got: {str(exc_info.value)}" + else: + # Test that constructor succeeds. + supplier = AzureDevOpsOIDCTokenSupplier() + assert supplier is not None + # Verify that all required attributes are set. + assert supplier.access_token is not None + assert supplier.collection_uri is not None + assert supplier.project_id is not None + assert supplier.plan_id is not None + assert supplier.job_id is not None + assert supplier.hub_name is not None + + +@pytest.mark.parametrize("test_case", _azure_devops_oidc_token_request_test_cases) +def test_azure_devops_oidc_token_request(test_case: AzureDevOpsOIDCTokenRequestTestCase, monkeypatch, mocker): + """Test OIDC token request/response handling (assumes constructor succeeds).""" + # Set up environment variables. + for key, value in test_case.env_vars.items(): + monkeypatch.setenv(key, value) + + # Mock HTTP response. + mock_response = mocker.Mock() + mock_response.ok = test_case.response_ok + if test_case.response_json: + mock_response.json.return_value = test_case.response_json + mock_post = mocker.patch("requests.post", return_value=mock_response) + + # Initialize supplier (should succeed for these test cases since they have all required environment variables). + supplier = AzureDevOpsOIDCTokenSupplier() + + # Get token. + token = supplier.get_oidc_token("ignored-audience") # Audience is ignored for Azure DevOps. + + # Verify token result. + if test_case.want_none: + assert token is None + else: + assert token == test_case.want_token + + # Verify the HTTP request was made correctly (only for successful token cases). + expected_url = ( + "https://dev.azure.com/myorg/project-123/_apis/distributedtask/" + "hubs/build/plans/plan-456/jobs/job-789/oidctoken?api-version=7.2-preview.1" + ) + mock_post.assert_called_once_with( + expected_url, + headers={ + "Authorization": "Bearer azure-devops-access-token", + "Content-Type": "application/json", + "Content-Length": "0", + }, + ) + + # For failure cases, verify HTTP request was still made but returned failure. + if test_case.want_none and test_case.response_ok is False: + mock_post.assert_called_once() # Request was made but failed.