|
4 | 4 | from collections.abc import Iterable |
5 | 5 | from dataclasses import dataclass |
6 | 6 |
|
7 | | -from databricks.labs.blueprint.installation import Installation |
8 | 7 | from databricks.sdk import WorkspaceClient |
9 | | -from databricks.sdk.core import ( |
10 | | - ApiClient, |
11 | | - AzureCliTokenSource, |
12 | | - Config, |
13 | | - credentials_provider, |
14 | | -) |
15 | 8 | from databricks.sdk.errors import NotFound |
16 | | -from databricks.sdk.service.catalog import Privilege |
17 | 9 | from databricks.sdk.service.compute import ClusterSource, Policy |
18 | 10 |
|
19 | 11 | from databricks.labs.ucx.assessment.crawlers import ( |
|
25 | 17 | logger, |
26 | 18 | ) |
27 | 19 | from databricks.labs.ucx.assessment.jobs import JobsMixin |
28 | | -from databricks.labs.ucx.config import WorkspaceConfig |
29 | | -from databricks.labs.ucx.framework.crawlers import ( |
30 | | - CrawlerBase, |
31 | | - SqlBackend, |
32 | | - StatementExecutionBackend, |
33 | | -) |
34 | | -from databricks.labs.ucx.hive_metastore.locations import ExternalLocations |
| 20 | +from databricks.labs.ucx.framework.crawlers import CrawlerBase, SqlBackend |
35 | 21 |
|
36 | 22 |
|
37 | 23 | @dataclass |
@@ -244,293 +230,3 @@ def snapshot(self) -> Iterable[AzureServicePrincipalInfo]: |
244 | 230 | def _try_fetch(self) -> Iterable[AzureServicePrincipalInfo]: |
245 | 231 | for row in self._fetch(f"SELECT * FROM {self._schema}.{self._table}"): |
246 | 232 | yield AzureServicePrincipalInfo(*row) |
247 | | - |
248 | | - |
249 | | -@dataclass |
250 | | -class AzureSubscription: |
251 | | - name: str |
252 | | - subscription_id: str |
253 | | - tenant_id: str |
254 | | - |
255 | | - |
256 | | -class AzureResource: |
257 | | - def __init__(self, resource_id: str): |
258 | | - self._pairs = {} |
259 | | - self._resource_id = resource_id |
260 | | - split = resource_id.lstrip("/").split("/") |
261 | | - if len(split) % 2 != 0: |
262 | | - msg = f"not a list of pairs: {resource_id}" |
263 | | - raise ValueError(msg) |
264 | | - i = 0 |
265 | | - while i < len(split): |
266 | | - k = split[i] |
267 | | - v = split[i + 1] |
268 | | - i += 2 |
269 | | - self._pairs[k] = v |
270 | | - |
271 | | - @property |
272 | | - def subscription_id(self): |
273 | | - return self._pairs.get("subscriptions") |
274 | | - |
275 | | - @property |
276 | | - def resource_group(self): |
277 | | - return self._pairs.get("resourceGroups") |
278 | | - |
279 | | - @property |
280 | | - def storage_account(self): |
281 | | - return self._pairs.get("storageAccounts") |
282 | | - |
283 | | - @property |
284 | | - def container(self): |
285 | | - return self._pairs.get("containers") |
286 | | - |
287 | | - def __eq__(self, other): |
288 | | - if not isinstance(other, AzureResource): |
289 | | - return NotImplemented |
290 | | - return self._resource_id == other._resource_id |
291 | | - |
292 | | - def __repr__(self): |
293 | | - properties = ["subscription_id", "resource_group", "storage_account", "container"] |
294 | | - pairs = [f"{_}={getattr(self, _)}" for _ in properties] |
295 | | - return f'AzureResource<{", ".join(pairs)}>' |
296 | | - |
297 | | - def __str__(self): |
298 | | - return self._resource_id |
299 | | - |
300 | | - |
301 | | -@dataclass |
302 | | -class Principal: |
303 | | - client_id: str |
304 | | - display_name: str |
305 | | - object_id: str |
306 | | - |
307 | | - |
308 | | -@dataclass |
309 | | -class AzureRoleAssignment: |
310 | | - resource: AzureResource |
311 | | - scope: AzureResource |
312 | | - principal: Principal |
313 | | - role_name: str |
314 | | - |
315 | | - |
316 | | -class AzureResources: |
317 | | - def __init__(self, ws: WorkspaceClient, *, include_subscriptions=None): |
318 | | - if not include_subscriptions: |
319 | | - include_subscriptions = [] |
320 | | - rm_host = ws.config.arm_environment.resource_manager_endpoint |
321 | | - self._resource_manager = ApiClient( |
322 | | - Config( |
323 | | - host=rm_host, |
324 | | - credentials_provider=self._provider_for(ws.config.arm_environment.service_management_endpoint), |
325 | | - ) |
326 | | - ) |
327 | | - self._graph = ApiClient( |
328 | | - Config( |
329 | | - host="https://graph.microsoft.com", |
330 | | - credentials_provider=self._provider_for("https://graph.microsoft.com"), |
331 | | - ) |
332 | | - ) |
333 | | - self._token_source = AzureCliTokenSource(rm_host) |
334 | | - self._include_subscriptions = include_subscriptions |
335 | | - self._role_definitions = {} # type: dict[str, str] |
336 | | - self._principals: dict[str, Principal | None] = {} |
337 | | - |
338 | | - def _provider_for(self, endpoint: str): |
339 | | - @credentials_provider("azure-cli", ["host"]) |
340 | | - def _credentials(_: Config): |
341 | | - token_source = AzureCliTokenSource(endpoint) |
342 | | - |
343 | | - def inner() -> dict[str, str]: |
344 | | - token = token_source.token() |
345 | | - return {"Authorization": f"{token.token_type} {token.access_token}"} |
346 | | - |
347 | | - return inner |
348 | | - |
349 | | - return _credentials |
350 | | - |
351 | | - def _get_subscriptions(self) -> Iterable[AzureSubscription]: |
352 | | - for subscription in self._get_resource("/subscriptions", api_version="2022-12-01").get("value", []): |
353 | | - yield AzureSubscription( |
354 | | - name=subscription["displayName"], |
355 | | - subscription_id=subscription["subscriptionId"], |
356 | | - tenant_id=subscription["tenantId"], |
357 | | - ) |
358 | | - |
359 | | - def _tenant_id(self): |
360 | | - token = self._token_source.token() |
361 | | - return token.jwt_claims().get("tid") |
362 | | - |
363 | | - def subscriptions(self): |
364 | | - tenant_id = self._tenant_id() |
365 | | - for subscription in self._get_subscriptions(): |
366 | | - if subscription.tenant_id != tenant_id: |
367 | | - continue |
368 | | - if subscription.subscription_id not in self._include_subscriptions: |
369 | | - continue |
370 | | - yield subscription |
371 | | - |
372 | | - def _get_resource(self, path: str, api_version: str): |
373 | | - headers = {"Accept": "application/json"} |
374 | | - query = {"api-version": api_version} |
375 | | - return self._resource_manager.do("GET", path, query=query, headers=headers) |
376 | | - |
377 | | - def storage_accounts(self) -> Iterable[AzureResource]: |
378 | | - for subscription in self.subscriptions(): |
379 | | - logger.info(f"Checking in subscription {subscription.name} for storage accounts") |
380 | | - path = f"/subscriptions/{subscription.subscription_id}/providers/Microsoft.Storage/storageAccounts" |
381 | | - for storage in self._get_resource(path, "2023-01-01").get("value", []): |
382 | | - resource_id = storage.get("id") |
383 | | - if not resource_id: |
384 | | - continue |
385 | | - yield AzureResource(resource_id) |
386 | | - |
387 | | - def containers(self, storage: AzureResource): |
388 | | - for raw in self._get_resource(f"{storage}/blobServices/default/containers", "2023-01-01").get("value", []): |
389 | | - resource_id = raw.get("id") |
390 | | - if not resource_id: |
391 | | - continue |
392 | | - yield AzureResource(resource_id) |
393 | | - |
394 | | - def _get_principal(self, principal_id: str) -> Principal | None: |
395 | | - if principal_id in self._principals: |
396 | | - return self._principals[principal_id] |
397 | | - try: |
398 | | - path = f"/v1.0/directoryObjects/{principal_id}" |
399 | | - raw: dict[str, str] = self._graph.do("GET", path) # type: ignore[assignment] |
400 | | - except NotFound: |
401 | | - # don't load principals from external directories twice |
402 | | - self._principals[principal_id] = None |
403 | | - return self._principals[principal_id] |
404 | | - client_id = raw.get("appId") |
405 | | - display_name = raw.get("displayName") |
406 | | - object_id = raw.get("id") |
407 | | - assert client_id is not None |
408 | | - assert display_name is not None |
409 | | - assert object_id is not None |
410 | | - self._principals[principal_id] = Principal(client_id, display_name, object_id) |
411 | | - return self._principals[principal_id] |
412 | | - |
413 | | - def role_assignments( |
414 | | - self, resource_id: str, *, principal_types: list[str] | None = None |
415 | | - ) -> Iterable[AzureRoleAssignment]: |
416 | | - """See https://learn.microsoft.com/en-us/rest/api/authorization/role-assignments/list-for-resource""" |
417 | | - if not principal_types: |
418 | | - principal_types = ["ServicePrincipal"] |
419 | | - result = self._get_resource(f"{resource_id}/providers/Microsoft.Authorization/roleAssignments", "2022-04-01") |
420 | | - for role_assignment in result.get("value", []): |
421 | | - assignment_properties = role_assignment.get("properties", {}) |
422 | | - principal_type = assignment_properties.get("principalType") |
423 | | - if not principal_type: |
424 | | - continue |
425 | | - if principal_type not in principal_types: |
426 | | - continue |
427 | | - principal_id = assignment_properties.get("principalId") |
428 | | - if not principal_id: |
429 | | - continue |
430 | | - role_definition_id = assignment_properties.get("roleDefinitionId") |
431 | | - if not role_definition_id: |
432 | | - continue |
433 | | - scope = assignment_properties.get("scope") |
434 | | - if not scope: |
435 | | - continue |
436 | | - if role_definition_id not in self._role_definitions: |
437 | | - role_definition = self._get_resource(role_definition_id, "2022-04-01") |
438 | | - definition_properties = role_definition.get("properties", {}) |
439 | | - role_name: str = definition_properties.get("roleName") |
440 | | - if not role_name: |
441 | | - continue |
442 | | - self._role_definitions[role_definition_id] = role_name |
443 | | - principal = self._get_principal(principal_id) |
444 | | - if not principal: |
445 | | - continue |
446 | | - role_name = self._role_definitions[role_definition_id] |
447 | | - if scope == "/": |
448 | | - scope = resource_id |
449 | | - yield AzureRoleAssignment( |
450 | | - resource=AzureResource(resource_id), |
451 | | - scope=AzureResource(scope), |
452 | | - principal=principal, |
453 | | - role_name=role_name, |
454 | | - ) |
455 | | - |
456 | | - |
457 | | -@dataclass |
458 | | -class StoragePermissionMapping: |
459 | | - prefix: str |
460 | | - client_id: str |
461 | | - principal: str |
462 | | - privilege: str |
463 | | - |
464 | | - |
465 | | -class AzureResourcePermissions: |
466 | | - def __init__(self, installation: Installation, ws: WorkspaceClient, azurerm: AzureResources, lc: ExternalLocations): |
467 | | - self._filename = 'azure_storage_account_info.csv' |
468 | | - self._installation = installation |
469 | | - self._locations = lc |
470 | | - self._azurerm = azurerm |
471 | | - self._ws = ws |
472 | | - self._levels = { |
473 | | - "Storage Blob Data Contributor": Privilege.WRITE_FILES, |
474 | | - "Storage Blob Data Owner": Privilege.WRITE_FILES, |
475 | | - "Storage Blob Data Reader": Privilege.READ_FILES, |
476 | | - } |
477 | | - |
478 | | - @classmethod |
479 | | - def for_cli(cls, ws: WorkspaceClient, product='ucx'): |
480 | | - installation = Installation.current(ws, product) |
481 | | - config = installation.load(WorkspaceConfig) |
482 | | - sql_backend = StatementExecutionBackend(ws, config.warehouse_id) |
483 | | - azurerm = AzureResources(ws) |
484 | | - locations = ExternalLocations(ws, sql_backend, config.inventory_database) |
485 | | - return cls(installation, ws, azurerm, locations) |
486 | | - |
487 | | - def _map_storage(self, storage: AzureResource) -> list[StoragePermissionMapping]: |
488 | | - logger.info(f"Fetching role assignment for {storage.storage_account}") |
489 | | - out = [] |
490 | | - for container in self._azurerm.containers(storage): |
491 | | - for role_assignment in self._azurerm.role_assignments(str(container)): |
492 | | - # one principal may be assigned multiple roles with overlapping dataActions, hence appearing |
493 | | - # here in duplicates. hence, role name -> permission level is not enough for the perfect scenario. |
494 | | - if role_assignment.role_name not in self._levels: |
495 | | - continue |
496 | | - privilege = self._levels[role_assignment.role_name].value |
497 | | - out.append( |
498 | | - StoragePermissionMapping( |
499 | | - prefix=f"abfss://{container.container}@{container.storage_account}.dfs.core.windows.net/", |
500 | | - client_id=role_assignment.principal.client_id, |
501 | | - principal=role_assignment.principal.display_name, |
502 | | - privilege=privilege, |
503 | | - ) |
504 | | - ) |
505 | | - return out |
506 | | - |
507 | | - def save_spn_permissions(self) -> str | None: |
508 | | - used_storage_accounts = self._get_storage_accounts() |
509 | | - if len(used_storage_accounts) == 0: |
510 | | - logger.warning( |
511 | | - "There are no external table present with azure storage account. " |
512 | | - "Please check if assessment job is run" |
513 | | - ) |
514 | | - return None |
515 | | - storage_account_infos = [] |
516 | | - for storage in self._azurerm.storage_accounts(): |
517 | | - if storage.storage_account not in used_storage_accounts: |
518 | | - continue |
519 | | - for mapping in self._map_storage(storage): |
520 | | - storage_account_infos.append(mapping) |
521 | | - if len(storage_account_infos) == 0: |
522 | | - logger.error("No storage account found in current tenant with spn permission") |
523 | | - return None |
524 | | - return self._installation.save(storage_account_infos, filename=self._filename) |
525 | | - |
526 | | - def _get_storage_accounts(self) -> list[str]: |
527 | | - external_locations = self._locations.snapshot() |
528 | | - storage_accounts = [] |
529 | | - for location in external_locations: |
530 | | - if location.location.startswith("abfss://"): |
531 | | - start = location.location.index("@") |
532 | | - end = location.location.index(".dfs.core.windows.net") |
533 | | - storage_acct = location.location[start + 1 : end] |
534 | | - if storage_acct not in storage_accounts: |
535 | | - storage_accounts.append(storage_acct) |
536 | | - return storage_accounts |
0 commit comments