Skip to content

Commit 374b5c1

Browse files
authored
Merge branch 'databricks:main' into main
2 parents 8493eca + 998a117 commit 374b5c1

File tree

154 files changed

+13620
-12461
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

154 files changed

+13620
-12461
lines changed

.codegen.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"mode": "py_v0",
3-
"changelog_config": ".codegen/changelog_config.yml",
3+
"api_changelog": true,
44
"version": {
55
"databricks/sdk/version.py": "__version__ = '$VERSION'"
66
},

.codegen/changelog_config.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,5 @@ change_types:
77
tag: "[Doc]"
88
- message: Internal Changes
99
tag: "[Internal]"
10-
# Does not appear in the Changelog. Only for PR validation.
11-
- message: Release
12-
tag: "[Release]"
1310
# Default for messages without a tag
1411
- message: Other Changes
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# Generated file. DO NOT EDIT.
2+
name: Check for NEXT_CHANGELOG.md Changes
3+
4+
on:
5+
# Use pull_request_target to have access to GitHub API
6+
pull_request_target:
7+
8+
jobs:
9+
check-next-changelog:
10+
runs-on:
11+
group: databricks-deco-testing-runner-group
12+
labels: ubuntu-latest-deco
13+
14+
steps:
15+
- name: Checkout code
16+
uses: actions/checkout@v3
17+
18+
- name: Fetch list of changed files
19+
id: changed-files
20+
env:
21+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
22+
run: |
23+
# Use the GitHub API to fetch changed files
24+
files=$(gh pr view ${{ github.event.pull_request.number }} --json files -q '.files[].path')
25+
26+
# Sanitize to avoid code injection
27+
sanitized_files=$(echo "$files" | sed 's/[^a-zA-Z0-9._/-]/_/g')
28+
29+
# Store the sanitized list of files in a temporary file to avoid env variable issues
30+
echo "$sanitized_files" > modified_files.txt
31+
32+
- name: Fetch PR message
33+
id: pr-message
34+
env:
35+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
36+
run: |
37+
# Use the GitHub API to fetch the PR message
38+
pr_message=$(gh pr view ${{ github.event.pull_request.number }} --json body -q '.body')
39+
40+
# Sanitize the PR message to avoid code injection, keeping the equal sign
41+
sanitized_pr_message=$(echo "$pr_message" | sed 's/[^a-zA-Z0-9._/-=]/_/g')
42+
43+
# Store the sanitized PR message
44+
echo "$sanitized_pr_message" > pr_message.txt
45+
46+
- name: Verify NEXT_CHANGELOG.md was modified or PR message contains NO_CHANGELOG=true
47+
run: |
48+
# Read the sanitized files and PR message from the temporary files
49+
modified_files=$(cat modified_files.txt)
50+
pr_message=$(cat pr_message.txt)
51+
52+
# Check if NEXT_CHANGELOG.md exists in the list of changed files
53+
echo "Changed files: $modified_files"
54+
if ! echo "$modified_files" | grep -q "NEXT_CHANGELOG.md"; then
55+
echo "NEXT_CHANGELOG.md not modified."
56+
57+
# Check if PR message contains NO_CHANGELOG=true
58+
if echo "$pr_message" | grep -q "NO_CHANGELOG=true"; then
59+
echo "NO_CHANGELOG=true found in PR message. Skipping changelog check."
60+
exit 0
61+
else
62+
echo "WARNING: file NEXT_CHANGELOG.md not changed. If this is expected, add NO_CHANGELOG=true to the PR message."
63+
exit 1
64+
fi
65+
fi
66+
67+
- name: Comment on PR with instructions if needed
68+
if: failure() # This step will only run if the previous step fails (i.e., if NEXT_CHANGELOG.md was not modified and NO_CHANGELOG=true was not in the PR message)
69+
env:
70+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
71+
run: |
72+
# Check if a comment exists with the instructions
73+
previous_comment_ids=$(gh api "repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments" \
74+
--jq '.[] | select(.body | startswith("<!-- NEXT_CHANGELOG_INSTRUCTIONS -->")) | .id')
75+
echo "Previous comment IDs: $previous_comment_ids"
76+
77+
# If no previous comment exists, add one with instructions
78+
if [ -z "$previous_comment_ids" ]; then
79+
echo "Adding instructions comment."
80+
gh pr comment ${{ github.event.pull_request.number }} --body \
81+
"<!-- NEXT_CHANGELOG_INSTRUCTIONS -->
82+
Please ensure that the NEXT_CHANGELOG.md file is updated with any relevant changes.
83+
If this is not necessary for your PR, please include the following in your PR description:
84+
NO_CHANGELOG=true
85+
and rerun the job."
86+
fi
87+
88+
- name: Delete instructions comment on success
89+
if: success() # This step will only run if the previous check passed (i.e., if NEXT_CHANGELOG.md was modified or NO_CHANGELOG=true is in the PR message)
90+
env:
91+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
92+
run: |
93+
# Check if there is a previous instructions comment
94+
previous_comment_ids=$(gh api "repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments" \
95+
--jq '.[] | select(.body | startswith("<!-- NEXT_CHANGELOG_INSTRUCTIONS -->")) | .id')
96+
97+
# If a comment exists, delete it
98+
if [ -n "$previous_comment_ids" ]; then
99+
echo "Deleting previous instructions comment."
100+
for comment_id in $previous_comment_ids; do
101+
gh api "repos/${{ github.repository }}/issues/comments/$comment_id" --method DELETE
102+
done
103+
else
104+
echo "No instructions comment found to delete."
105+
fi

.github/workflows/tagging.yml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Generated file. DO NOT EDIT.
2+
name: tagging
3+
4+
on:
5+
workflow_dispatch:
6+
# Enable for automatic tagging
7+
#schedule:
8+
# - cron: '0 0 * * TUE'
9+
10+
# Ensure that only a single instance of the workflow is running at a time.
11+
concurrency:
12+
group: "tagging"
13+
14+
15+
jobs:
16+
tag:
17+
environment: "release-is"
18+
runs-on:
19+
group: databricks-deco-testing-runner-group
20+
labels: ubuntu-latest-deco
21+
steps:
22+
- name: Generate GitHub App Token
23+
id: generate-token
24+
uses: actions/create-github-app-token@v1
25+
with:
26+
app-id: ${{ secrets.DECO_SDK_TAGGING_APP_ID }}
27+
private-key: ${{ secrets.DECO_SDK_TAGGING_PRIVATE_KEY }}
28+
29+
- name: Checkout repository
30+
uses: actions/checkout@v4
31+
with:
32+
fetch-depth: 0
33+
token: ${{ steps.generate-token.outputs.token }}
34+
35+
#NOTE: email must be the GitHub App email or the commit will not be verified.
36+
- name: Set up Git configuration
37+
run: |
38+
git config user.name "Databricks SDK Release Bot"
39+
git config user.email "DECO-SDK-Tagging[bot]@users.noreply.github.com"
40+
41+
- name: Install dependencies
42+
run: |
43+
python -m pip install --upgrade pip
44+
pip install PyGithub
45+
46+
- name: Run script
47+
env:
48+
GITHUB_TOKEN: ${{ steps.generate-token.outputs.token }}
49+
GITHUB_REPOSITORY: ${{ github.repository }}
50+
run: |
51+
python tagging.py
52+

.package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{}

CHANGELOG.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,18 @@
11
# Version changelog
22

3+
## [Release] Release v0.44.1
4+
5+
### New Features and Improvements
6+
7+
* Introduce new Credential Strategies for Agents ([#882](https://github.com/databricks/databricks-sdk-py/pull/882)).
8+
9+
10+
### Internal Changes
11+
12+
* GetRun logic paginates more arrays ([#867](https://github.com/databricks/databricks-sdk-py/pull/867)).
13+
14+
15+
316
## [Release] Release v0.44.0
417

518
### Internal Changes

NEXT_CHANGELOG.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# NEXT CHANGELOG
2+
3+
## Release v0.45.0
4+
5+
### New Features and Improvements
6+
7+
### Bug Fixes
8+
9+
### Documentation
10+
11+
### Internal Changes
12+
* Introduce automated tagging ([#888](https://github.com/databricks/databricks-sdk-py/pull/888))
13+
* Update Jobs GetJob API to support paginated responses ([#869](https://github.com/databricks/databricks-sdk-py/pull/869)).
14+
15+
### API Changes

databricks/sdk/credentials_provider.py

Lines changed: 71 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import platform
1010
import subprocess
1111
import sys
12+
import threading
1213
import time
1314
from datetime import datetime
1415
from typing import Callable, Dict, List, Optional, Tuple, Union
@@ -723,14 +724,17 @@ def inner() -> Dict[str, str]:
723724
# This Code is derived from Mlflow DatabricksModelServingConfigProvider
724725
# https://github.com/mlflow/mlflow/blob/1219e3ef1aac7d337a618a352cd859b336cf5c81/mlflow/legacy_databricks_cli/configure/provider.py#L332
725726
class ModelServingAuthProvider():
727+
USER_CREDENTIALS = "user_credentials"
728+
726729
_MODEL_DEPENDENCY_OAUTH_TOKEN_FILE_PATH = "/var/credentials-secret/model-dependencies-oauth-token"
727730

728-
def __init__(self):
731+
def __init__(self, credential_type: Optional[str]):
729732
self.expiry_time = -1
730733
self.current_token = None
731734
self.refresh_duration = 300 # 300 Seconds
735+
self.credential_type = credential_type
732736

733-
def should_fetch_model_serving_environment_oauth(self) -> bool:
737+
def should_fetch_model_serving_environment_oauth() -> bool:
734738
"""
735739
Check whether this is the model serving environment
736740
Additionally check if the oauth token file path exists
@@ -739,15 +743,15 @@ def should_fetch_model_serving_environment_oauth(self) -> bool:
739743
is_in_model_serving_env = (os.environ.get("IS_IN_DB_MODEL_SERVING_ENV")
740744
or os.environ.get("IS_IN_DATABRICKS_MODEL_SERVING_ENV") or "false")
741745
return (is_in_model_serving_env == "true"
742-
and os.path.isfile(self._MODEL_DEPENDENCY_OAUTH_TOKEN_FILE_PATH))
746+
and os.path.isfile(ModelServingAuthProvider._MODEL_DEPENDENCY_OAUTH_TOKEN_FILE_PATH))
743747

744-
def get_model_dependency_oauth_token(self, should_retry=True) -> str:
748+
def _get_model_dependency_oauth_token(self, should_retry=True) -> str:
745749
# Use Cached value if it is valid
746750
if self.current_token is not None and self.expiry_time > time.time():
747751
return self.current_token
748752

749753
try:
750-
with open(self._MODEL_DEPENDENCY_OAUTH_TOKEN_FILE_PATH) as f:
754+
with open(ModelServingAuthProvider._MODEL_DEPENDENCY_OAUTH_TOKEN_FILE_PATH) as f:
751755
oauth_dict = json.load(f)
752756
self.current_token = oauth_dict["OAUTH_TOKEN"][0]["oauthTokenValue"]
753757
self.expiry_time = time.time() + self.refresh_duration
@@ -757,32 +761,43 @@ def get_model_dependency_oauth_token(self, should_retry=True) -> str:
757761
logger.warning("Unable to read oauth token on first attmept in Model Serving Environment",
758762
exc_info=e)
759763
time.sleep(0.5)
760-
return self.get_model_dependency_oauth_token(should_retry=False)
764+
return self._get_model_dependency_oauth_token(should_retry=False)
761765
else:
762766
raise RuntimeError(
763767
"Unable to read OAuth credentials from the file mounted in Databricks Model Serving"
764768
) from e
765769
return self.current_token
766770

771+
def _get_invokers_token(self):
772+
current_thread = threading.current_thread()
773+
thread_data = current_thread.__dict__
774+
invokers_token = None
775+
if "invokers_token" in thread_data:
776+
invokers_token = thread_data["invokers_token"]
777+
778+
if invokers_token is None:
779+
raise RuntimeError("Unable to read Invokers Token in Databricks Model Serving")
780+
781+
return invokers_token
782+
767783
def get_databricks_host_token(self) -> Optional[Tuple[str, str]]:
768-
if not self.should_fetch_model_serving_environment_oauth():
784+
if not ModelServingAuthProvider.should_fetch_model_serving_environment_oauth():
769785
return None
770786

771787
# read from DB_MODEL_SERVING_HOST_ENV_VAR if available otherwise MODEL_SERVING_HOST_ENV_VAR
772788
host = os.environ.get("DATABRICKS_MODEL_SERVING_HOST_URL") or os.environ.get(
773789
"DB_MODEL_SERVING_HOST_URL")
774-
token = self.get_model_dependency_oauth_token()
775790

776-
return (host, token)
791+
if self.credential_type == ModelServingAuthProvider.USER_CREDENTIALS:
792+
return (host, self._get_invokers_token())
793+
else:
794+
return (host, self._get_model_dependency_oauth_token())
777795

778796

779-
@credentials_strategy('model-serving', [])
780-
def model_serving_auth(cfg: 'Config') -> Optional[CredentialsProvider]:
797+
def model_serving_auth_visitor(cfg: 'Config',
798+
credential_type: Optional[str] = None) -> Optional[CredentialsProvider]:
781799
try:
782-
model_serving_auth_provider = ModelServingAuthProvider()
783-
if not model_serving_auth_provider.should_fetch_model_serving_environment_oauth():
784-
logger.debug("model-serving: Not in Databricks Model Serving, skipping")
785-
return None
800+
model_serving_auth_provider = ModelServingAuthProvider(credential_type)
786801
host, token = model_serving_auth_provider.get_databricks_host_token()
787802
if token is None:
788803
raise ValueError(
@@ -793,7 +808,6 @@ def model_serving_auth(cfg: 'Config') -> Optional[CredentialsProvider]:
793808
except Exception as e:
794809
logger.warning("Unable to get auth from Databricks Model Serving Environment", exc_info=e)
795810
return None
796-
797811
logger.info("Using Databricks Model Serving Authentication")
798812

799813
def inner() -> Dict[str, str]:
@@ -804,6 +818,15 @@ def inner() -> Dict[str, str]:
804818
return inner
805819

806820

821+
@credentials_strategy('model-serving', [])
822+
def model_serving_auth(cfg: 'Config') -> Optional[CredentialsProvider]:
823+
if not ModelServingAuthProvider.should_fetch_model_serving_environment_oauth():
824+
logger.debug("model-serving: Not in Databricks Model Serving, skipping")
825+
return None
826+
827+
return model_serving_auth_visitor(cfg)
828+
829+
807830
class DefaultCredentials:
808831
""" Select the first applicable credential provider from the chain """
809832

@@ -846,3 +869,35 @@ def __call__(self, cfg: 'Config') -> CredentialsProvider:
846869
raise ValueError(
847870
f'cannot configure default credentials, please check {auth_flow_url} to configure credentials for your preferred authentication method.'
848871
)
872+
873+
874+
class ModelServingUserCredentials(CredentialsStrategy):
875+
"""
876+
This credential strategy is designed for authenticating the Databricks SDK in the model serving environment using user-specific rights.
877+
In the model serving environment, the strategy retrieves a downscoped user token from the thread-local variable.
878+
In any other environments, the class defaults to the DefaultCredentialStrategy.
879+
To use this credential strategy, instantiate the WorkspaceClient with the ModelServingUserCredentials strategy as follows:
880+
881+
invokers_client = WorkspaceClient(credential_strategy = ModelServingUserCredentials())
882+
"""
883+
884+
def __init__(self):
885+
self.credential_type = ModelServingAuthProvider.USER_CREDENTIALS
886+
self.default_credentials = DefaultCredentials()
887+
888+
def auth_type(self):
889+
if ModelServingAuthProvider.should_fetch_model_serving_environment_oauth():
890+
return "model_serving_" + self.credential_type
891+
else:
892+
return self.default_credentials.auth_type()
893+
894+
def __call__(self, cfg: 'Config') -> CredentialsProvider:
895+
if ModelServingAuthProvider.should_fetch_model_serving_environment_oauth():
896+
header_factory = model_serving_auth_visitor(cfg, self.credential_type)
897+
if not header_factory:
898+
raise ValueError(
899+
f"Unable to authenticate using {self.credential_type} in Databricks Model Serving Environment"
900+
)
901+
return header_factory
902+
else:
903+
return self.default_credentials(cfg)

0 commit comments

Comments
 (0)