Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEXT_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

### New Features and Improvements

* Add native support for authentication through Azure DevOps OIDC

### Bug Fixes

### Documentation
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,11 @@ Depending on the Databricks authentication method, the SDK uses the following in

### Databricks native authentication

By default, the Databricks SDK for Python initially tries [Databricks token authentication](https://docs.databricks.com/dev-tools/api/latest/authentication.html) (`auth_type='pat'` argument). If the SDK is unsuccessful, it then tries Databricks Workload Identity Federation (WIF) authentication using OIDC (`auth_type="github-oidc"` argument).
By default, the Databricks SDK for Python initially tries [Databricks token authentication](https://docs.databricks.com/dev-tools/api/latest/authentication.html) (`auth_type='pat'` argument). If the SDK is unsuccessful, it then tries Workload Identity Federation (WIF). See [Supported WIF](https://docs.databricks.com/aws/en/dev-tools/auth/oauth-federation-provider) for the supported JWT token providers.

- For Databricks token authentication, you must provide `host` and `token`; or their environment variable or `.databrickscfg` file field equivalents.
- For Databricks OIDC authentication, you must provide the `host`, `client_id` and `token_audience` _(optional)_ either directly, through the corresponding environment variables, or in your `.databrickscfg` configuration file.
- For Azure DevOps OIDC authentication, the `token_audience` is irrelevant as the audience is always set to `api://AzureADTokenExchange`. Also, the `System.AccessToken` pipeline variable required for OIDC request must be exposed as the `SYSTEM_ACCESSTOKEN` environment variable, following [Pipeline variables](https://learn.microsoft.com/en-us/azure/devops/pipelines/build/variables?view=azure-devops&tabs=yaml#systemaccesstoken)

| Argument | Description | Environment variable |
|------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------|
Expand Down
92 changes: 89 additions & 3 deletions databricks/sdk/credentials_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ def inner(
@functools.wraps(func)
def wrapper(cfg: "Config") -> Optional[CredentialsProvider]:
for attr in require:
getattr(cfg, attr)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seemed like unnecessary Double check

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure? what does this function do?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function checks if the attribute exists in the config or not. This check is redundant, as it is done in the next line as well

if not getattr(cfg, attr):
return None
return func(cfg)
Expand All @@ -100,16 +99,37 @@ def wrapper(cfg: "Config") -> Optional[CredentialsProvider]:
return inner


def oauth_credentials_strategy(name: str, require: List[str]):
def oauth_credentials_strategy(name: str, require: List[str], env_vars: Optional[List[str]] = None):
"""Given the function that receives a Config and returns an OauthHeaderFactory,
create an OauthCredentialsProvider with a given name and required configuration
attribute names to be present for this function to be called."""
attribute names to be present for this function to be called.

Args:
name: The name of the authentication strategy
require: List of config attributes that must be present
env_vars: Optional list of environment variables that must all be present for this strategy
"""

def inner(
func: Callable[["Config"], OAuthCredentialsProvider],
) -> OauthCredentialsStrategy:
@functools.wraps(func)
def wrapper(cfg: "Config") -> Optional[OAuthCredentialsProvider]:
# Early environment detection - check before config validation
if env_vars and not all(os.environ.get(var) for var in env_vars):
# Provide specific error message for Azure DevOps OIDC SYSTEM_ACCESSTOKEN
if (
name == "azdo-oidc"
and "SYSTEM_ACCESSTOKEN" in env_vars
and not os.environ.get("SYSTEM_ACCESSTOKEN")
):
logger.debug(
"Azure DevOps OIDC: SYSTEM_ACCESSTOKEN env var not found. If calling from Azure DevOps Pipeline, please set this env var following https://learn.microsoft.com/en-us/azure/devops/pipelines/build/variables?view=azure-devops&tabs=yaml#systemaccesstoken"
)
else:
logger.debug(f"{name}: required environment variables not present, skipping")
return None

for attr in require:
if not getattr(cfg, attr):
return None
Expand Down Expand Up @@ -408,6 +428,71 @@ def token() -> oauth.Token:
return OAuthCredentialsProvider(refreshed_headers, token)


@oauth_credentials_strategy(
"azdo-oidc",
["host", "client_id"],
env_vars=[
"SYSTEM_ACCESSTOKEN",
"SYSTEM_TEAMFOUNDATIONCOLLECTIONURI",
"SYSTEM_TEAMPROJECTID",
"SYSTEM_PLANID",
"SYSTEM_JOBID",
"SYSTEM_HOSTTYPE",
],
)
def azure_devops_oidc(cfg: "Config") -> Optional[CredentialsProvider]:
"""
Azure DevOps OIDC authentication uses a Token Supplier to get a JWT Token
and exchanges it for a Databricks Token.

Supported in Azure DevOps pipelines with OIDC service connections.
"""
supplier = oidc_token_supplier.AzureDevOpsOIDCTokenSupplier()

audience = cfg.token_audience
if audience is None and cfg.is_account_client:
audience = cfg.account_id
if audience is None and not cfg.is_account_client:
audience = cfg.oidc_endpoints.token_endpoint

# Try to get an idToken. If no supplier returns a token, we cannot use this authentication mode.
id_token = supplier.get_oidc_token(audience)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is almost the same as github_oidc. Can and should we reuse the code?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The old code should not be changed, right? The common part is within the GithubOIDC function. I can refactor the code to put the common parts outside it and use them in AzureDevOps. What is the better option?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On a quick look, I think that the only difference if the supplier being used.
We may be able to have

#TODO: better name
def _internal_oidc(cfg: "Config", supplier: ) -> Optional[CredentialsProvider]:
...

@oauth_credentials_strategy("azure-devops-oidc", ["host", "client_id"])
def azure_devops_oidc(cfg: "Config") -> Optional[CredentialsProvider]:
    return _internal_oidc(cfg, () -> oidc_token_supplier.AzureDevOpsOIDCTokenSupplier())

if not id_token:
logger.debug("Azure DevOps OIDC: no token available, skipping authentication method")
return None

logger.info("Configured Azure DevOps OIDC authentication")

def token_source_for(audience: str) -> oauth.TokenSource:
id_token = supplier.get_oidc_token(audience)
if not id_token:
# Should not happen, since we checked it above.
raise Exception("Cannot get Azure DevOps OIDC token")

return oauth.ClientCredentials(
client_id=cfg.client_id,
client_secret="", # we have no (rotatable) secrets in OIDC flow
token_url=cfg.oidc_endpoints.token_endpoint,
endpoint_params={
"subject_token_type": "urn:ietf:params:oauth:token-type:jwt",
"subject_token": id_token,
"grant_type": "urn:ietf:params:oauth:grant-type:token-exchange",
},
scopes=["all-apis"],
use_params=True,
disable_async=cfg.disable_async_token_refresh,
)

def refreshed_headers() -> Dict[str, str]:
token = token_source_for(audience).token()
return {"Authorization": f"{token.token_type} {token.access_token}"}

def token() -> oauth.Token:
return token_source_for(audience).token()

return OAuthCredentialsProvider(refreshed_headers, token)


@oauth_credentials_strategy("github-oidc-azure", ["host", "azure_client_id"])
def github_oidc_azure(cfg: "Config") -> Optional[CredentialsProvider]:
if "ACTIONS_ID_TOKEN_REQUEST_TOKEN" not in os.environ:
Expand Down Expand Up @@ -1016,6 +1101,7 @@ def __init__(self) -> None:
env_oidc,
file_oidc,
github_oidc,
azure_devops_oidc,
azure_service_principal,
github_oidc_azure,
azure_cli,
Expand Down
60 changes: 60 additions & 0 deletions databricks/sdk/oidc_token_supplier.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import logging
import os
from typing import Optional

import requests

logger = logging.getLogger("databricks.sdk")


class GitHubOIDCTokenSupplier:
"""
Expand All @@ -26,3 +29,60 @@ def get_oidc_token(self, audience: str) -> Optional[str]:
return None

return response_json["value"]

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same: GitHubOIDC does not validate on create. Is there a reasons to change this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is done to ensure Early exit. This is similar to what is done in Go SDK. If the environment variables are not set then we are sure that we are not in Azure DevOps Environment so we should exit at the earliest and try other providers.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we add a TODO on the GitHub OIDC implementation to make it clear that it is the one with tech debt?


class AzureDevOpsOIDCTokenSupplier:
"""
Supplies OIDC tokens from Azure DevOps pipelines.

Constructs the OIDC token request URL using official Azure DevOps predefined variables.
See: https://docs.microsoft.com/en-us/azure/devops/pipelines/build/variables
"""

def get_oidc_token(self, audience: str) -> Optional[str]:
# Note: Azure DevOps OIDC tokens have a fixed audience of "api://AzureADTokenExchange"
# The audience parameter is ignored but kept for interface compatibility with other OIDC suppliers

access_token = os.environ.get("SYSTEM_ACCESSTOKEN")
collection_uri = os.environ.get("SYSTEM_TEAMFOUNDATIONCOLLECTIONURI")
project_id = os.environ.get("SYSTEM_TEAMPROJECTID")
plan_id = os.environ.get("SYSTEM_PLANID")
job_id = os.environ.get("SYSTEM_JOBID")
hub_name = os.environ.get("SYSTEM_HOSTTYPE")

# Check for required variables
if not all([access_token, collection_uri, project_id, plan_id, job_id, hub_name]):
# not in Azure DevOps pipeline
logger.debug("Azure DevOps OIDC: not in Azure DevOps pipeline environment")
return None

try:
# Construct the OIDC token request URL
# Format: {collection_uri}{project_id}/_apis/distributedtask/hubs/{hubName}/plans/{planId}/jobs/{jobId}/oidctoken
request_url = f"{collection_uri}{project_id}/_apis/distributedtask/hubs/{hub_name}/plans/{plan_id}/jobs/{job_id}/oidctoken"

# Add API version (audience is fixed to "api://AzureADTokenExchange" by Azure DevOps)
endpoint = f"{request_url}?api-version=7.2-preview.1"
headers = {
"Authorization": f"Bearer {access_token}",
"Content-Type": "application/json",
"Content-Length": "0",
}

# Azure DevOps OIDC endpoint requires POST request with empty body
response = requests.post(endpoint, headers=headers)
if not response.ok:
logger.debug(f"Azure DevOps OIDC: token request failed with status {response.status_code}")
return None

# Azure DevOps returns the token in 'oidcToken' field
response_json = response.json()
if "oidcToken" not in response_json:
logger.debug("Azure DevOps OIDC: response missing 'oidcToken' field")
return None

logger.debug("Azure DevOps OIDC: successfully obtained token")
return response_json["oidcToken"]
except Exception as e:
logger.debug(f"Azure DevOps OIDC: failed to get token: {e}")
return None
Loading
Loading