Skip to content

Commit 58574b8

Browse files
authored
Added with_product(...) and with_user_agent_extra(...) public functions to improve telemetry for mid-stream libraries (#679)
## Changes This PR solves three tracking challenges: 1. Adds consistent mechanism with Go SDK Public API. See https://github.com/databricks/databricks-sdk-go/blob/00b1d09b24aa9fb971bcf23f3db3e80bf2bec6fe/useragent/user_agent.go#L20-L31 and https://github.com/databricks/databricks-sdk-go/blob/00b1d09b24aa9fb971bcf23f3db3e80bf2bec6fe/useragent/user_agent.go#L49-L54 2. Some of our products, like UCX and Remorph, are used not only as standalone CLI, but also as mid-stream libraries in other products, where developers don't specify product name and product version within the WorkspaceClient. This results in missing tracking information for those integrations. 3. Mid-stream libraries, like blueprint, pytester, and lsql, do have their own versions, but they don't create manage sdk.WorkspaceClient and/or core.Config themselves, so we currently lack traffic attribution for those libraries. Technically, Databricks Connect falls into the same use-case. ## Tests Moved unit tests that are relevant to User-Agent verification from test_core.py to test_config.py to bring back consistency.
1 parent 4e751f8 commit 58574b8

File tree

3 files changed

+96
-43
lines changed

3 files changed

+96
-43
lines changed

databricks/sdk/config.py

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import platform
77
import sys
88
import urllib.parse
9-
from typing import Dict, Iterable, Optional
9+
from typing import Dict, Iterable, List, Optional, Tuple
1010

1111
import requests
1212

@@ -44,6 +44,32 @@ def __repr__(self) -> str:
4444
return f"<ConfigAttribute '{self.name}' {self.transform.__name__}>"
4545

4646

47+
_DEFAULT_PRODUCT_NAME = 'unknown'
48+
_DEFAULT_PRODUCT_VERSION = '0.0.0'
49+
_STATIC_USER_AGENT: Tuple[str, str, List[str]] = (_DEFAULT_PRODUCT_NAME, _DEFAULT_PRODUCT_VERSION, [])
50+
51+
52+
def with_product(product: str, product_version: str):
53+
"""[INTERNAL API] Change the product name and version used in the User-Agent header."""
54+
global _STATIC_USER_AGENT
55+
prev_product, prev_version, prev_other_info = _STATIC_USER_AGENT
56+
logger.debug(f'Changing product from {prev_product}/{prev_version} to {product}/{product_version}')
57+
_STATIC_USER_AGENT = product, product_version, prev_other_info
58+
59+
60+
def with_user_agent_extra(key: str, value: str):
61+
"""[INTERNAL API] Add extra metadata to the User-Agent header when developing a library."""
62+
global _STATIC_USER_AGENT
63+
product_name, product_version, other_info = _STATIC_USER_AGENT
64+
for item in other_info:
65+
if item.startswith(f"{key}/"):
66+
# ensure that we don't have duplicates
67+
other_info.remove(item)
68+
break
69+
other_info.append(f"{key}/{value}")
70+
_STATIC_USER_AGENT = product_name, product_version, other_info
71+
72+
4773
class Config:
4874
host: str = ConfigAttribute(env='DATABRICKS_HOST')
4975
account_id: str = ConfigAttribute(env='DATABRICKS_ACCOUNT_ID')
@@ -85,12 +111,21 @@ def __init__(self,
85111
# Deprecated. Use credentials_strategy instead.
86112
credentials_provider: CredentialsStrategy = None,
87113
credentials_strategy: CredentialsStrategy = None,
88-
product="unknown",
89-
product_version="0.0.0",
114+
product=_DEFAULT_PRODUCT_NAME,
115+
product_version=_DEFAULT_PRODUCT_VERSION,
90116
clock: Clock = None,
91117
**kwargs):
92118
self._header_factory = None
93119
self._inner = {}
120+
# as in SDK for Go, pull information from global static user agent context,
121+
# so that we can track additional metadata for mid-stream libraries, as well
122+
# as for cases, when the downstream product is used as a library and is not
123+
# configured with a proper product name and version.
124+
static_product, static_version, _ = _STATIC_USER_AGENT
125+
if product == _DEFAULT_PRODUCT_NAME:
126+
product = static_product
127+
if product_version == _DEFAULT_PRODUCT_VERSION:
128+
product_version = static_version
94129
self._user_agent_other_info = []
95130
if credentials_strategy and credentials_provider:
96131
raise ValueError(
@@ -234,6 +269,12 @@ def user_agent(self):
234269
]
235270
if len(self._user_agent_other_info) > 0:
236271
ua.append(' '.join(self._user_agent_other_info))
272+
# as in SDK for Go, pull information from global static user agent context,
273+
# so that we can track additional metadata for mid-stream libraries. this value
274+
# is shared across all instances of Config objects intentionally.
275+
_, _, static_info = _STATIC_USER_AGENT
276+
if len(static_info) > 0:
277+
ua.append(' '.join(static_info))
237278
if len(self._upstream_user_agent) > 0:
238279
ua.append(self._upstream_user_agent)
239280
if 'DATABRICKS_RUNTIME_VERSION' in os.environ:

tests/test_config.py

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
from databricks.sdk.config import Config
1+
import platform
2+
3+
from databricks.sdk.config import Config, with_product, with_user_agent_extra
4+
from databricks.sdk.version import __version__
25

36
from .conftest import noop_credentials
47

@@ -15,3 +18,51 @@ def test_config_supports_legacy_credentials_provider():
1518
c2 = c.copy()
1619
assert c2._product == 'foo'
1720
assert c2._product_version == '1.2.3'
21+
22+
23+
def test_extra_and_upstream_user_agent(monkeypatch):
24+
25+
class MockUname:
26+
27+
@property
28+
def system(self):
29+
return 'TestOS'
30+
31+
monkeypatch.setattr(platform, 'python_version', lambda: '3.0.0')
32+
monkeypatch.setattr(platform, 'uname', MockUname)
33+
monkeypatch.setenv('DATABRICKS_SDK_UPSTREAM', "upstream-product")
34+
monkeypatch.setenv('DATABRICKS_SDK_UPSTREAM_VERSION', "0.0.1")
35+
monkeypatch.setenv('DATABRICKS_RUNTIME_VERSION', "13.1 anything/else")
36+
37+
config = Config(host='http://localhost', username="something", password="something", product='test',
38+
product_version='0.0.0') \
39+
.with_user_agent_extra('test-extra-1', '1') \
40+
.with_user_agent_extra('test-extra-2', '2')
41+
42+
assert config.user_agent == (
43+
f"test/0.0.0 databricks-sdk-py/{__version__} python/3.0.0 os/testos auth/basic"
44+
f" test-extra-1/1 test-extra-2/2 upstream/upstream-product upstream-version/0.0.1"
45+
" runtime/13.1-anything-else")
46+
47+
with_product('some-product', '0.32.1')
48+
config2 = Config(host='http://localhost', token='...')
49+
assert config2.user_agent.startswith('some-product/0.32.1')
50+
51+
config3 = Config(host='http://localhost', token='...', product='abc', product_version='1.2.3')
52+
assert not config3.user_agent.startswith('some-product/0.32.1')
53+
54+
55+
def test_config_copy_deep_copies_user_agent_other_info(config):
56+
config_copy = config.copy()
57+
58+
config.with_user_agent_extra("test", "test1")
59+
assert "test/test1" not in config_copy.user_agent
60+
assert "test/test1" in config.user_agent
61+
62+
config_copy.with_user_agent_extra("test", "test2")
63+
assert "test/test2" in config_copy.user_agent
64+
assert "test/test2" not in config.user_agent
65+
66+
with_user_agent_extra("blueprint", "0.4.6")
67+
assert "blueprint/0.4.6" in config.user_agent
68+
assert "blueprint/0.4.6" in config_copy.user_agent

tests/test_core.py

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import functools
33
import os
44
import pathlib
5-
import platform
65
import random
76
import string
87
import typing
@@ -25,7 +24,6 @@
2524
DatabricksEnvironment)
2625
from databricks.sdk.service.catalog import PermissionsChange
2726
from databricks.sdk.service.iam import AccessControlRequest
28-
from databricks.sdk.version import __version__
2927

3028
from .clock import FakeClock
3129
from .conftest import noop_credentials
@@ -181,31 +179,6 @@ def test_databricks_cli_credential_provider_installed_new(config, monkeypatch, t
181179
assert databricks_cli(config) is not None
182180

183181

184-
def test_extra_and_upstream_user_agent(monkeypatch):
185-
186-
class MockUname:
187-
188-
@property
189-
def system(self):
190-
return 'TestOS'
191-
192-
monkeypatch.setattr(platform, 'python_version', lambda: '3.0.0')
193-
monkeypatch.setattr(platform, 'uname', MockUname)
194-
monkeypatch.setenv('DATABRICKS_SDK_UPSTREAM', "upstream-product")
195-
monkeypatch.setenv('DATABRICKS_SDK_UPSTREAM_VERSION', "0.0.1")
196-
monkeypatch.setenv('DATABRICKS_RUNTIME_VERSION', "13.1 anything/else")
197-
198-
config = Config(host='http://localhost', username="something", password="something", product='test',
199-
product_version='0.0.0') \
200-
.with_user_agent_extra('test-extra-1', '1') \
201-
.with_user_agent_extra('test-extra-2', '2')
202-
203-
assert config.user_agent == (
204-
f"test/0.0.0 databricks-sdk-py/{__version__} python/3.0.0 os/testos auth/basic"
205-
f" test-extra-1/1 test-extra-2/2 upstream/upstream-product upstream-version/0.0.1"
206-
" runtime/13.1-anything-else")
207-
208-
209182
def test_config_copy_shallow_copies_credential_provider():
210183

211184
class TestCredentialsStrategy(CredentialsStrategy):
@@ -237,18 +210,6 @@ def refresh(self):
237210
assert config._credentials_strategy == config_copy._credentials_strategy
238211

239212

240-
def test_config_copy_deep_copies_user_agent_other_info(config):
241-
config_copy = config.copy()
242-
243-
config.with_user_agent_extra("test", "test1")
244-
assert "test/test1" not in config_copy.user_agent
245-
assert "test/test1" in config.user_agent
246-
247-
config_copy.with_user_agent_extra("test", "test2")
248-
assert "test/test2" in config_copy.user_agent
249-
assert "test/test2" not in config.user_agent
250-
251-
252213
def test_config_accounts_aws_is_accounts_host(config):
253214
config.host = "https://accounts.cloud.databricks.com"
254215
assert config.is_account_client

0 commit comments

Comments
 (0)