Add support for agent user and embedded credential strategies

aravind-segu · aravind-segu · commit 47972958c6e6 · 2025-01-30T16:54:53.000-08:00
Signed-off-by: aravind-segu &lt;aravind.segu@databricks.com&gt;
diff --git a/databricks/sdk/credentials_provider.py b/databricks/sdk/credentials_provider.py
@@ -9,6 +9,7 @@
 import platform
 import subprocess
 import sys
+import threading
 import time
 from datetime import datetime
 from typing import Callable, Dict, List, Optional, Tuple, Union
@@ -313,12 +314,11 @@ def github_oidc_azure(cfg: 'Config') -> Optional[CredentialsProvider]:
         # detect Azure AD Tenant ID if it's not specified directly
         token_endpoint = cfg.oidc_endpoints.token_endpoint
         cfg.azure_tenant_id = token_endpoint.replace(aad_endpoint, '').split('/')[0]
-    inner = ClientCredentials(
-        client_id=cfg.azure_client_id,
-        client_secret="", # we have no (rotatable) secrets in OIDC flow
-        token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token",
-        endpoint_params=params,
-        use_params=True)
+    inner = ClientCredentials(client_id=cfg.azure_client_id,
+                              client_secret="", # we have no (rotatable) secrets in OIDC flow
+                              token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token",
+                              endpoint_params=params,
+                              use_params=True)
 
     def refreshed_headers() -> Dict[str, str]:
         token = inner.token()
@@ -717,14 +717,18 @@ def inner() -> Dict[str, str]:
 # This Code is derived from Mlflow DatabricksModelServingConfigProvider
 # https://github.com/mlflow/mlflow/blob/1219e3ef1aac7d337a618a352cd859b336cf5c81/mlflow/legacy_databricks_cli/configure/provider.py#L332
 class ModelServingAuthProvider():
+    USER_CREDENTIALS = "user_credentials"
+    EMBEDDED_CREDENTIALS = "embedded_credentials"
+
     _MODEL_DEPENDENCY_OAUTH_TOKEN_FILE_PATH = "/var/credentials-secret/model-dependencies-oauth-token"
 
-    def __init__(self):
+    def __init__(self, credential_type):
         self.expiry_time = -1
         self.current_token = None
         self.refresh_duration = 300 # 300 Seconds
+        self.credential_type = credential_type
 
-    def should_fetch_model_serving_environment_oauth(self) -> bool:
+    def should_fetch_model_serving_environment_oauth() -> bool:
         """
         Check whether this is the model serving environment
         Additionally check if the oauth token file path exists
@@ -733,15 +737,15 @@ def should_fetch_model_serving_environment_oauth(self) -> bool:
         is_in_model_serving_env = (os.environ.get("IS_IN_DB_MODEL_SERVING_ENV")
                                    or os.environ.get("IS_IN_DATABRICKS_MODEL_SERVING_ENV") or "false")
         return (is_in_model_serving_env == "true"
-                and os.path.isfile(self._MODEL_DEPENDENCY_OAUTH_TOKEN_FILE_PATH))
+                and os.path.isfile(ModelServingAuthProvider._MODEL_DEPENDENCY_OAUTH_TOKEN_FILE_PATH))
 
     def get_model_dependency_oauth_token(self, should_retry=True) -> str:
         # Use Cached value if it is valid
         if self.current_token is not None and self.expiry_time > time.time():
             return self.current_token
 
         try:
-            with open(self._MODEL_DEPENDENCY_OAUTH_TOKEN_FILE_PATH) as f:
+            with open(ModelServingAuthProvider._MODEL_DEPENDENCY_OAUTH_TOKEN_FILE_PATH) as f:
                 oauth_dict = json.load(f)
                 self.current_token = oauth_dict["OAUTH_TOKEN"][0]["oauthTokenValue"]
                 self.expiry_time = time.time() + self.refresh_duration
@@ -758,25 +762,38 @@ def get_model_dependency_oauth_token(self, should_retry=True) -> str:
                 ) from e
         return self.current_token
 
+    def get_invokers_token(self):
+        current_thread = threading.current_thread()
+        thread_data = current_thread.__dict__
+        invokers_token = None
+        if "invokers_token" in thread_data:
+            invokers_token = thread_data["invokers_token"]
+
+        if invokers_token is None:
+            raise RuntimeError("Unable to read Invokers Token in Databricks Model Serving")
+
+        return invokers_token
+
     def get_databricks_host_token(self) -> Optional[Tuple[str, str]]:
-        if not self.should_fetch_model_serving_environment_oauth():
+        if not ModelServingAuthProvider.should_fetch_model_serving_environment_oauth():
             return None
 
         # read from DB_MODEL_SERVING_HOST_ENV_VAR if available otherwise MODEL_SERVING_HOST_ENV_VAR
         host = os.environ.get("DATABRICKS_MODEL_SERVING_HOST_URL") or os.environ.get(
             "DB_MODEL_SERVING_HOST_URL")
-        token = self.get_model_dependency_oauth_token()
+        token = self.get_model_dependency_oauth_token(
+        ) if self.credential_type == ModelServingAuthProvider.EMBEDDED_CREDENTIALS else self.get_invokers_token(
+        )
 
         return (host, token)
 
 
-@credentials_strategy('model-serving', [])
-def model_serving_auth(cfg: 'Config') -> Optional[CredentialsProvider]:
+def model_serving_auth_func(cfg: 'Config', credential_type) -> Optional[CredentialsProvider]:
     try:
-        model_serving_auth_provider = ModelServingAuthProvider()
-        if not model_serving_auth_provider.should_fetch_model_serving_environment_oauth():
+        if not ModelServingAuthProvider.should_fetch_model_serving_environment_oauth():
             logger.debug("model-serving: Not in Databricks Model Serving, skipping")
             return None
+        model_serving_auth_provider = ModelServingAuthProvider(credential_type)
         host, token = model_serving_auth_provider.get_databricks_host_token()
         if token is None:
             raise ValueError(
@@ -787,7 +804,6 @@ def model_serving_auth(cfg: 'Config') -> Optional[CredentialsProvider]:
     except Exception as e:
         logger.warning("Unable to get auth from Databricks Model Serving Environment", exc_info=e)
         return None
-
     logger.info("Using Databricks Model Serving Authentication")
 
     def inner() -> Dict[str, str]:
@@ -798,6 +814,11 @@ def inner() -> Dict[str, str]:
     return inner
 
 
+@credentials_strategy('model-serving', [])
+def model_serving_auth(cfg: 'Config') -> Optional[CredentialsProvider]:
+    return model_serving_auth_func(cfg, ModelServingAuthProvider.EMBEDDED_CREDENTIALS)
+
+
 class DefaultCredentials:
     """ Select the first applicable credential provider from the chain """
 
@@ -840,3 +861,39 @@ def __call__(self, cfg: 'Config') -> CredentialsProvider:
         raise ValueError(
             f'cannot configure default credentials, please check {auth_flow_url} to configure credentials for your preferred authentication method.'
         )
+
+
+class AgentCredentials(CredentialsStrategy):
+
+    def __init__(self, credential_type):
+        self.credential_type = credential_type
+        self.default_credentials = DefaultCredentials()
+
+    def auth_type(self):
+        if ModelServingAuthProvider.should_fetch_model_serving_environment_oauth():
+            return "agent_" + self.credential_type
+        else:
+            return self.default_credentials.auth_type()
+
+    def __call__(self, cfg: 'Config') -> CredentialsProvider:
+        if ModelServingAuthProvider.should_fetch_model_serving_environment_oauth():
+            header_factory = model_serving_auth_func(cfg, self.credential_type)
+            if not header_factory:
+                raise ValueError(
+                    f"Unable to authenticate using {self.credential_type} in Databricks Model Serving Environment"
+                )
+            return header_factory
+        else:
+            return self.default_credentials(cfg)
+
+
+class AgentUserCredentials(AgentCredentials):
+
+    def __init__(self):
+        super().__init__(ModelServingAuthProvider.USER_CREDENTIALS)
+
+
+class AgentEmbeddedCredentials(AgentCredentials):
+
+    def __init__(self):
+        super().__init__(ModelServingAuthProvider.EMBEDDED_CREDENTIALS)
diff --git a/tests/test_model_serving_auth.py b/tests/test_model_serving_auth.py
@@ -1,8 +1,11 @@
+import threading
 import time
 
 import pytest
 
 from databricks.sdk.core import Config
+from databricks.sdk.credentials_provider import (AgentEmbeddedCredentials,
+                                                 AgentUserCredentials)
 
 from .conftest import raises
 
@@ -24,7 +27,9 @@
                           ([('IS_IN_DATABRICKS_MODEL_SERVING_ENV', 'true'),
                             ('DATABRICKS_MODEL_SERVING_HOST_URL', 'x')
                             ], ['DB_MODEL_SERVING_HOST_URL'], "tests/testdata/model-serving-test-token"), ])
-def test_model_serving_auth(env_values, del_env_values, oauth_file_name, monkeypatch, mocker):
+@pytest.mark.parametrize("use_credential_strategy", [True, False])
+def test_model_serving_auth(env_values, del_env_values, oauth_file_name, use_credential_strategy, monkeypatch,
+                            mocker):
     ## In mlflow we check for these two environment variables to return the correct config
     for (env_name, env_value) in env_values:
         monkeypatch.setenv(env_name, env_value)
@@ -37,26 +42,25 @@ def test_model_serving_auth(env_values, del_env_values, oauth_file_name, monkeyp
         "databricks.sdk.credentials_provider.ModelServingAuthProvider._MODEL_DEPENDENCY_OAUTH_TOKEN_FILE_PATH",
         oauth_file_name)
     mocker.patch('databricks.sdk.config.Config._known_file_config_loader')
-
-    cfg = Config()
-
-    assert cfg.auth_type == 'model-serving'
+    if use_credential_strategy:
+        cfg = Config(credentials_strategy=AgentEmbeddedCredentials())
+        assert cfg.auth_type == 'agent_embedded_credentials'
+    else:
+        cfg = Config()
+        assert cfg.auth_type == 'model-serving'
     headers = cfg.authenticate()
     assert (cfg.host == 'x')
     # Token defined in the test file
     assert headers.get("Authorization") == 'Bearer databricks_sdk_unit_test_token'
 
 
-@pytest.mark.parametrize(
-    "env_values, oauth_file_name",
-    [
-        ([], "invalid_file_name"), # Not in Model Serving and Invalid File Name
-        ([('IS_IN_DB_MODEL_SERVING_ENV', 'true')
-          ], "invalid_file_name"), # In Model Serving and Invalid File Name
-        ([('IS_IN_DATABRICKS_MODEL_SERVING_ENV', 'true')
-          ], "invalid_file_name"), # In Model Serving and Invalid File Name
-        ([], "tests/testdata/model-serving-test-token") # Not in Model Serving and Valid File Name
-    ])
+@pytest.mark.parametrize("env_values, oauth_file_name", [
+    ([], "invalid_file_name"), # Not in Model Serving and Invalid File Name
+    ([('IS_IN_DB_MODEL_SERVING_ENV', 'true')], "invalid_file_name"), # In Model Serving and Invalid File Name
+    ([('IS_IN_DATABRICKS_MODEL_SERVING_ENV', 'true')
+      ], "invalid_file_name"), # In Model Serving and Invalid File Name
+    ([], "tests/testdata/model-serving-test-token") # Not in Model Serving and Valid File Name
+])
 @raises(default_auth_base_error_message)
 def test_model_serving_auth_errors(env_values, oauth_file_name, monkeypatch):
     # Guarantee that the tests defaults to env variables rather than config file.
@@ -74,7 +78,8 @@ def test_model_serving_auth_errors(env_values, oauth_file_name, monkeypatch):
     Config()
 
 
-def test_model_serving_auth_refresh(monkeypatch, mocker):
+@pytest.mark.parametrize("use_credential_strategy", [True, False])
+def test_model_serving_auth_refresh(use_credential_strategy, monkeypatch, mocker):
     ## In mlflow we check for these two environment variables to return the correct config
     monkeypatch.setenv('IS_IN_DB_MODEL_SERVING_ENV', 'true')
     monkeypatch.setenv('DB_MODEL_SERVING_HOST_URL', 'x')
@@ -85,15 +90,18 @@ def test_model_serving_auth_refresh(monkeypatch, mocker):
         "tests/testdata/model-serving-test-token")
     mocker.patch('databricks.sdk.config.Config._known_file_config_loader')
 
-    cfg = Config()
-    assert cfg.auth_type == 'model-serving'
+    if use_credential_strategy:
+        cfg = Config(credentials_strategy=AgentEmbeddedCredentials())
+        assert cfg.auth_type == 'agent_embedded_credentials'
+    else:
+        cfg = Config()
+        assert cfg.auth_type == 'model-serving'
 
     current_time = time.time()
     headers = cfg.authenticate()
     assert (cfg.host == 'x')
     assert headers.get(
         "Authorization") == 'Bearer databricks_sdk_unit_test_token' # Token defined in the test file
-
     # Simulate refreshing the token by patching to to a new file
     monkeypatch.setattr(
         "databricks.sdk.credentials_provider.ModelServingAuthProvider._MODEL_DEPENDENCY_OAUTH_TOKEN_FILE_PATH",
@@ -113,3 +121,64 @@ def test_model_serving_auth_refresh(monkeypatch, mocker):
     assert (cfg.host == 'x')
     # Read V2 now
     assert headers.get("Authorization") == 'Bearer databricks_sdk_unit_test_token_v2'
+
+
+def test_agent_user_credentials(monkeypatch, mocker):
+    monkeypatch.setenv('IS_IN_DB_MODEL_SERVING_ENV', 'true')
+    monkeypatch.setenv('DB_MODEL_SERVING_HOST_URL', 'x')
+    monkeypatch.setattr(
+        "databricks.sdk.credentials_provider.ModelServingAuthProvider._MODEL_DEPENDENCY_OAUTH_TOKEN_FILE_PATH",
+        "tests/testdata/model-serving-test-token")
+
+    invokers_token_val = "databricks_invokers_token"
+    current_thread = threading.current_thread()
+    thread_data = current_thread.__dict__
+    thread_data["invokers_token"] = invokers_token_val
+
+    cfg = Config(credentials_strategy=AgentUserCredentials())
+    assert cfg.auth_type == 'agent_user_credentials'
+
+    headers = cfg.authenticate()
+
+    assert (cfg.host == 'x')
+    assert headers.get("Authorization") == f'Bearer {invokers_token_val}'
+
+    # Test updates of invokers token
+    invokers_token_val = "databricks_invokers_token_v2"
+    current_thread = threading.current_thread()
+    thread_data = current_thread.__dict__
+    thread_data["invokers_token"] = invokers_token_val
+
+    headers = cfg.authenticate()
+    assert (cfg.host == 'x')
+    assert headers.get("Authorization") == f'Bearer {invokers_token_val}'
+
+
+# If this credential strategy is being used in a non model serving environments then use default credential strategy instead
+def test_agent_user_credentials_in_non_model_serving_environments(monkeypatch):
+
+    monkeypatch.setenv('DATABRICKS_HOST', 'x')
+    monkeypatch.setenv('DATABRICKS_TOKEN', 'token')
+
+    cfg = Config(credentials_strategy=AgentUserCredentials())
+    assert cfg.auth_type == 'pat' # Auth type is PAT as it is no longer in a model serving environment
+
+    headers = cfg.authenticate()
+
+    assert (cfg.host == 'https://x')
+    assert headers.get("Authorization") == f'Bearer token'
+
+
+# If this credential strategy is being used in a non model serving environments then use default credential strategy instead
+def test_agent_embedded_credentials_in_non_model_serving_environments(monkeypatch):
+
+    monkeypatch.setenv('DATABRICKS_HOST', 'x')
+    monkeypatch.setenv('DATABRICKS_TOKEN', 'token')
+
+    cfg = Config(credentials_strategy=AgentEmbeddedCredentials())
+    assert cfg.auth_type == 'pat' # Auth type is PAT as it is no longer in a model serving environment
+
+    headers = cfg.authenticate()
+
+    assert (cfg.host == 'https://x')
+    assert headers.get("Authorization") == f'Bearer token'