Skip to content

Commit 69a0cf8

Browse files
authored
create-uber-principal fixes and improvements (#2941)
## Changes `create-uber-principal` fixes and improvements: - Handle the missing `set_workspace_warehouse_config_wrapper` in Databricks warehouses API. - Solves updating the configuration on the warehouse. - Ask for uber principal name only if going to create one. - Avoid unnecessary storage account crawl. ### Linked issues Resolves #2764 Resolves #2771 Progresses #2949 Reference databricks/databricks-sdk-py#305 ### Functionality - [x] modified existing command: `databricks labs ucx create-uber-principal` ### Tests - [x] manually tested - [x] modified unit tests - [x] added integration tests
1 parent 22a2cd3 commit 69a0cf8

File tree

6 files changed

+386
-211
lines changed

6 files changed

+386
-211
lines changed

src/databricks/labs/ucx/azure/access.py

Lines changed: 162 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@
1212
from databricks.labs.blueprint.parallel import ManyError, Threads
1313
from databricks.labs.blueprint.tui import Prompts
1414
from databricks.sdk import WorkspaceClient
15+
from databricks.sdk.core import ApiClient
1516
from databricks.sdk.errors import (
1617
BadRequest,
18+
DatabricksError,
1719
InvalidParameterValue,
1820
NotFound,
1921
PermissionDenied,
@@ -24,9 +26,12 @@
2426
from databricks.sdk.service.catalog import Privilege
2527
from databricks.sdk.service.compute import Policy
2628
from databricks.sdk.service.sql import (
29+
Channel,
2730
EndpointConfPair,
2831
GetWorkspaceWarehouseConfigResponse,
32+
RepeatedEndpointConfPairs,
2933
SetWorkspaceWarehouseConfigRequestSecurityPolicy,
34+
WarehouseTypePair,
3035
)
3136
from databricks.sdk.service.workspace import GetSecretResponse
3237

@@ -45,6 +50,59 @@
4550
R = TypeVar('R')
4651

4752

53+
def set_workspace_warehouse_config_wrapper( # pylint: disable=too-many-arguments,missing-param-doc
54+
api: ApiClient,
55+
*,
56+
channel: Channel | None = None,
57+
config_param: RepeatedEndpointConfPairs | None = None,
58+
data_access_config: list[EndpointConfPair] | None = None,
59+
enabled_warehouse_types: list[WarehouseTypePair] | None = None,
60+
global_param: RepeatedEndpointConfPairs | None = None,
61+
google_service_account: str | None = None,
62+
instance_profile_arn: str | None = None,
63+
security_policy: SetWorkspaceWarehouseConfigRequestSecurityPolicy | None = None,
64+
sql_configuration_parameters: RepeatedEndpointConfPairs | None = None,
65+
enable_serverless_compute: bool = False,
66+
):
67+
"""Sets the workspace level configuration that is shared by all SQL warehouses in a workspace.
68+
69+
See :meth:WorkspaceClient.warehouses.set_workspace_warehouse_config.
70+
71+
:param enable_serverless_compute: bool (optional)
72+
Enable serverless compute. Note that this value does not enforce serverless compute but it allows for serverless
73+
compute when `True`. Otherwise, serverless compute it not allowed.
74+
75+
TODO: Once https://github.com/databricks/databricks-sdk-py/issues/305 is fixed this wrapper should be discarded.
76+
"""
77+
body: dict[Any, Any] = {}
78+
if channel is not None:
79+
body['channel'] = channel.as_dict()
80+
if config_param is not None:
81+
body['config_param'] = config_param.as_dict()
82+
if data_access_config is not None:
83+
body['data_access_config'] = [v.as_dict() for v in data_access_config]
84+
if enabled_warehouse_types is not None:
85+
body['enabled_warehouse_types'] = [v.as_dict() for v in enabled_warehouse_types]
86+
if global_param is not None:
87+
body['global_param'] = global_param.as_dict()
88+
if google_service_account is not None:
89+
body['google_service_account'] = google_service_account
90+
if instance_profile_arn is not None:
91+
body['instance_profile_arn'] = instance_profile_arn
92+
if security_policy is not None:
93+
body['security_policy'] = security_policy.value
94+
if sql_configuration_parameters is not None:
95+
body['sql_configuration_parameters'] = sql_configuration_parameters.as_dict()
96+
body['enable_serverless_compute'] = str(enable_serverless_compute).lower()
97+
98+
headers = {
99+
'Accept': 'application/json',
100+
'Content-Type': 'application/json',
101+
}
102+
103+
api.do('PUT', '/api/2.0/sql/config/warehouses', body=body, headers=headers)
104+
105+
48106
@dataclass
49107
class StoragePermissionMapping:
50108
prefix: str
@@ -170,30 +228,35 @@ def _policy_config(value: str) -> dict[str, str]:
170228
return {"type": "fixed", "value": value}
171229

172230
def _create_service_principal_cluster_policy_configuration_pairs(
173-
self, principal_client_id: str, principal_secret_identifier: str, storage: StorageAccount
231+
self,
232+
principal_client_id: str,
233+
principal_secret_identifier: str,
234+
storage: StorageAccount,
235+
*,
236+
configuration_prefix: str,
174237
) -> list[tuple[str, dict[str, str]]]:
175238
"""Create the cluster policy configuration pairs to access the storage"""
176239
tenant_id = self._azurerm.tenant_id()
177240
endpoint = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
178241
configuration_pairs = [
179242
(
180-
f"spark_conf.fs.azure.account.oauth2.client.id.{storage.name}.dfs.core.windows.net",
243+
f"{configuration_prefix}.fs.azure.account.oauth2.client.id.{storage.name}.dfs.core.windows.net",
181244
self._policy_config(principal_client_id),
182245
),
183246
(
184-
f"spark_conf.fs.azure.account.oauth.provider.type.{storage.name}.dfs.core.windows.net",
247+
f"{configuration_prefix}.fs.azure.account.oauth.provider.type.{storage.name}.dfs.core.windows.net",
185248
self._policy_config("org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"),
186249
),
187250
(
188-
f"spark_conf.fs.azure.account.oauth2.client.endpoint.{storage.name}.dfs.core.windows.net",
251+
f"{configuration_prefix}.fs.azure.account.oauth2.client.endpoint.{storage.name}.dfs.core.windows.net",
189252
self._policy_config(endpoint),
190253
),
191254
(
192-
f"spark_conf.fs.azure.account.auth.type.{storage.name}.dfs.core.windows.net",
255+
f"{configuration_prefix}.fs.azure.account.auth.type.{storage.name}.dfs.core.windows.net",
193256
self._policy_config("OAuth"),
194257
),
195258
(
196-
f"spark_conf.fs.azure.account.oauth2.client.secret.{storage.name}.dfs.core.windows.net",
259+
f"{configuration_prefix}.fs.azure.account.oauth2.client.secret.{storage.name}.dfs.core.windows.net",
197260
self._policy_config("{{" + principal_secret_identifier + "}}"),
198261
),
199262
]
@@ -209,7 +272,10 @@ def _add_service_principal_configuration_to_cluster_policy_definition(
209272
policy_dict = json.loads(policy_definition)
210273
for storage in storage_accounts:
211274
for key, value in self._create_service_principal_cluster_policy_configuration_pairs(
212-
principal_client_id, principal_secret_identifier, storage
275+
principal_client_id,
276+
principal_secret_identifier,
277+
storage,
278+
configuration_prefix="spark_conf",
213279
):
214280
policy_dict[key] = value
215281
return json.dumps(policy_dict)
@@ -251,7 +317,10 @@ def _remove_service_principal_configuration_from_cluster_policy_definition(
251317
policy_dict = json.loads(policy_definition)
252318
for storage in storage_accounts:
253319
for key, _ in self._create_service_principal_cluster_policy_configuration_pairs(
254-
principal_client_id, principal_secret_identifier, storage
320+
principal_client_id,
321+
principal_secret_identifier,
322+
storage,
323+
configuration_prefix="spark_conf",
255324
):
256325
if key in policy_dict:
257326
del policy_dict[key]
@@ -284,12 +353,18 @@ def _remove_service_principal_configuration_from_cluster_policy(
284353
self._ws.cluster_policies.edit(policy_id, name=policy.name, definition=policy_definition)
285354

286355
def _create_storage_account_data_access_configuration_pairs(
287-
self, principal_client_id: str, principal_secret_identifier: str, storage: StorageAccount
356+
self,
357+
principal_client_id: str,
358+
principal_secret_identifier: str,
359+
storage: StorageAccount,
288360
) -> list[EndpointConfPair]:
289361
"""Create the data access configuration pairs to access the storage"""
290362
configuration_pairs = []
291363
for key, value in self._create_service_principal_cluster_policy_configuration_pairs(
292-
principal_client_id, principal_secret_identifier, storage
364+
principal_client_id,
365+
principal_secret_identifier,
366+
storage,
367+
configuration_prefix="spark.hadoop",
293368
):
294369
configuration_pairs.append(EndpointConfPair(key, value["value"]))
295370
return configuration_pairs
@@ -302,36 +377,53 @@ def _add_service_principal_configuration_to_workspace_warehouse_config(
302377
):
303378
warehouse_config = self._ws.warehouses.get_workspace_warehouse_config()
304379
self._installation.save(warehouse_config, filename="warehouse-config-backup.json")
380+
305381
sql_dac = warehouse_config.data_access_config or []
306382
for storage in storage_accounts:
307383
configuration_pairs = self._create_storage_account_data_access_configuration_pairs(
308384
principal_client_id, principal_secret_identifier, storage
309385
)
310386
sql_dac.extend(configuration_pairs)
387+
311388
security_policy = (
312389
SetWorkspaceWarehouseConfigRequestSecurityPolicy(warehouse_config.security_policy.value)
313390
if warehouse_config.security_policy
314391
else SetWorkspaceWarehouseConfigRequestSecurityPolicy.NONE
315392
)
393+
394+
succeeded_message = "Updated workspace warehouse config with service principal connection details for accessing storage accounts"
395+
sql_dac_log_msg = "\n".join(f"{config_pair.key} {config_pair.value}" for config_pair in sql_dac)
396+
error_message = (
397+
f"Adding uber principal to SQL warehouse Data Access Properties is failed using Python SDK with error. "
398+
f"Please try applying the following configs manually in the workspace admin UI:\n{sql_dac_log_msg}",
399+
)
400+
# TODO: Once https://github.com/databricks/databricks-sdk-py/issues/305 is fixed:
401+
# - Remove second try-except completely
402+
# - Remove first -except, only keeping the contents in the try-statement
316403
try:
317404
self._ws.warehouses.set_workspace_warehouse_config(
318405
data_access_config=sql_dac,
319406
sql_configuration_parameters=warehouse_config.sql_configuration_parameters,
320407
security_policy=security_policy,
321408
)
322-
logger.info(
323-
"Updated workspace warehouse config with service principal connection details for accessing storage accounts"
324-
)
325-
# TODO: Remove following try except once https://github.com/databricks/databricks-sdk-py/issues/305 is fixed
326-
except InvalidParameterValue as error:
327-
sql_dac_log_msg = "\n".join(f"{config_pair.key} {config_pair.value}" for config_pair in sql_dac)
328-
logger.error(
329-
f'Adding uber principal to SQL warehouse Data Access Properties is failed using Python SDK with error "{error}". '
330-
f'Please try applying the following configs manually in the worksapce admin UI:\n{sql_dac_log_msg}'
409+
logger.info(succeeded_message)
410+
except InvalidParameterValue as e:
411+
if "enable_serverless_compute" not in str(e):
412+
logger.error(error_message, exc_info=e)
413+
raise
414+
try:
415+
set_workspace_warehouse_config_wrapper(
416+
self._ws.api_client,
417+
data_access_config=sql_dac,
418+
sql_configuration_parameters=warehouse_config.sql_configuration_parameters,
419+
security_policy=security_policy,
420+
enable_serverless_compute=True,
331421
)
332-
raise error
422+
except InvalidParameterValue as e:
423+
logger.error(error_message, exc_info=e)
424+
raise
333425

334-
def _remove_service_principal_configuration_from_workspace_warehouse_config(
426+
def _remove_service_principal_configuration_from_workspace_warehouse_config( # pylint: disable=too-complex
335427
self,
336428
principal_client_id: str,
337429
principal_secret_identifier: str,
@@ -358,28 +450,43 @@ def _remove_service_principal_configuration_from_workspace_warehouse_config(
358450
if warehouse_config.security_policy
359451
else SetWorkspaceWarehouseConfigRequestSecurityPolicy.NONE
360452
)
453+
454+
succeeded_message = "Updated workspace warehouse config with service principal connection details for accessing storage accounts"
455+
sql_dac_log_msg = "\n".join(f"{config_pair.key} {config_pair.value}" for config_pair in sql_dac)
456+
error_message = (
457+
f"Adding uber principal to SQL warehouse Data Access Properties is failed using Python SDK with error. "
458+
f"Please try applying the following configs manually in the workspace admin UI:\n{sql_dac_log_msg}",
459+
)
460+
# TODO: Once https://github.com/databricks/databricks-sdk-py/issues/305 is fixed:
461+
# - Remove second try-except completely
462+
# - Remove first -except, only keeping the contents in the try-statement
361463
try:
362464
self._ws.warehouses.set_workspace_warehouse_config(
363465
data_access_config=sql_dac,
364466
sql_configuration_parameters=warehouse_config.sql_configuration_parameters,
365467
security_policy=security_policy,
366468
)
367-
# TODO: Remove following try except once https://github.com/databricks/databricks-sdk-py/issues/305 is fixed
368-
except InvalidParameterValue as error:
369-
sql_dac_log_msg = "\n".join(f"{config_pair.key} {config_pair.value}" for config_pair in sql_dac)
370-
logger.error(
371-
f'Adding uber principal to SQL warehouse Data Access Properties is failed using Python SDK with error "{error}". '
372-
f'Please try applying the following configs manually in the workspace admin UI:\n{sql_dac_log_msg}'
469+
logger.info(succeeded_message)
470+
except InvalidParameterValue as e:
471+
if "enable_serverless_compute" not in str(e):
472+
logger.error(error_message, exc_info=e)
473+
raise
474+
try:
475+
set_workspace_warehouse_config_wrapper(
476+
self._ws.api_client,
477+
data_access_config=sql_dac,
478+
sql_configuration_parameters=warehouse_config.sql_configuration_parameters,
479+
security_policy=security_policy,
480+
enable_serverless_compute=True,
373481
)
374-
raise error
482+
except InvalidParameterValue as e:
483+
logger.error(error_message, exc_info=e)
484+
raise
375485

376486
def create_uber_principal(self, prompts: Prompts) -> None:
377487
config = self._installation.load(WorkspaceConfig)
378488
inventory_database = config.inventory_database
379489
display_name = f"unity-catalog-migration-{inventory_database}-{self._ws.get_workspace_id()}"
380-
uber_principal_name = prompts.question(
381-
"Enter a name for the uber service principal to be created", default=display_name
382-
)
383490
policy_id = config.policy_id
384491
if policy_id is None:
385492
msg = "UCX cluster policy not found in config. Please run latest UCX installation to set cluster policy"
@@ -395,6 +502,9 @@ def create_uber_principal(self, prompts: Prompts) -> None:
395502
"Please check if assessment job is run"
396503
)
397504
return
505+
uber_principal_name = prompts.question(
506+
"Enter a name for the uber service principal to be created", default=display_name
507+
)
398508
logger.info("Creating service principal")
399509

400510
secret_identifier = f"secrets/{inventory_database}/{self._UBER_PRINCIPAL_SECRET_KEY}"
@@ -421,19 +531,26 @@ def create_uber_principal(self, prompts: Prompts) -> None:
421531

422532
def _delete_uber_principal(self) -> None:
423533

424-
def log_permission_denied(function: Callable[P, R], *, message: str) -> Callable[P, R | None]:
534+
def safe_call(function: Callable[P, R], *, error_message: str) -> Callable[P, R | None]:
535+
"""Make a safe call of the function
536+
537+
Args:
538+
function (Callable[P, R]) : Function to safely call.
539+
error_message (str) : Message to log if error raised by the function
540+
"""
541+
425542
@wraps(function)
426543
def wrapper(*args: Any, **kwargs: Any) -> R | None:
427544
try:
428545
return function(*args, **kwargs)
429-
except PermissionDenied:
430-
logger.error(message, exc_info=True)
546+
except DatabricksError as e:
547+
logger.error(error_message, exc_info=e)
431548
return None
432549

433550
return wrapper
434551

435552
message = "Missing permissions to load the configuration"
436-
config = log_permission_denied(self._installation.load, message=message)(WorkspaceConfig)
553+
config = safe_call(self._installation.load, error_message=message)(WorkspaceConfig)
437554
if config is None or config.uber_spn_id is None:
438555
return
439556

@@ -442,25 +559,25 @@ def wrapper(*args: Any, **kwargs: Any) -> R | None:
442559

443560
storage_account_ids = ' '.join(str(st.id) for st in storage_accounts)
444561
message = f"Missing permissions to delete storage permissions for: {storage_account_ids}"
445-
log_permission_denied(self._azurerm.delete_storage_permission, message=message)(
562+
safe_call(self._azurerm.delete_storage_permission, error_message=message)(
446563
config.uber_spn_id, *storage_accounts, safe=True
447564
)
448565
message = f"Missing permissions to delete service principal: {config.uber_spn_id}"
449-
log_permission_denied(self._azurerm.delete_service_principal, message=message)(config.uber_spn_id, safe=True)
566+
safe_call(self._azurerm.delete_service_principal, error_message=message)(config.uber_spn_id, safe=True)
450567
if config.policy_id is not None:
451568
message = "Missing permissions to revert cluster policy"
452-
log_permission_denied(self._remove_service_principal_configuration_from_cluster_policy, message=message)(
569+
safe_call(self._remove_service_principal_configuration_from_cluster_policy, error_message=message)(
453570
config.policy_id, config.uber_spn_id, secret_identifier, storage_accounts
454571
)
455572
message = "Missing permissions to revert SQL warehouse config"
456-
log_permission_denied(
457-
self._remove_service_principal_configuration_from_workspace_warehouse_config, message=message
458-
)(config.uber_spn_id, secret_identifier, storage_accounts)
573+
safe_call(self._remove_service_principal_configuration_from_workspace_warehouse_config, error_message=message)(
574+
config.uber_spn_id, secret_identifier, storage_accounts
575+
)
459576
message = "Missing permissions to delete secret scope"
460-
log_permission_denied(self._safe_delete_scope, message=message)(config.inventory_database)
577+
safe_call(self._safe_delete_scope, error_message=message)(config.inventory_database)
461578
message = "Missing permissions to save the configuration"
462579
config.uber_spn_id = None
463-
log_permission_denied(self._installation.save, message=message)(config)
580+
safe_call(self._installation.save, error_message=message)(config)
464581

465582
def _create_access_connector_for_storage_account(
466583
self, storage_account: StorageAccount, role_name: str = "STORAGE_BLOB_DATA_READER"
@@ -582,6 +699,8 @@ def _get_storage_accounts(self) -> list[StorageAccount]:
582699
storage_acct = location.location[start + 1 : end]
583700
if storage_acct not in used_storage_accounts:
584701
used_storage_accounts.append(storage_acct)
702+
if not used_storage_accounts: # Avoid unnecessary crawling storage accounts below
703+
return []
585704
storage_accounts = []
586705
for storage_account in self._azurerm.storage_accounts():
587706
if storage_account.name in used_storage_accounts:

0 commit comments

Comments
 (0)