Skip to content

Commit 1e9c991

Browse files
authored
Fix issue with missing users and temp groups after workspace-local groups migration and skip table when crawling table size if it does not exist anymore (#770)
1 parent 54c07dc commit 1e9c991

File tree

14 files changed

+877
-41
lines changed

14 files changed

+877
-41
lines changed

src/databricks/labs/ucx/framework/dashboards.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def viz_args(self) -> dict:
4747
class VizColumn:
4848
name: str
4949
title: str
50-
type: str = "string" # noqa: A003
50+
type: str = "string"
5151
imageUrlTemplate: str = "{{ @ }}" # noqa: N815
5252
imageTitleTemplate: str = "{{ @ }}" # noqa: N815
5353
linkUrlTemplate: str = "{{ @ }}" # noqa: N815

src/databricks/labs/ucx/hive_metastore/table_size.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,10 @@ def _crawl(self) -> Iterable[TableSize]:
4242
continue
4343
if not table.is_dbfs_root:
4444
continue
45-
size_in_bytes = self.get_table_size(table.key)
45+
size_in_bytes = self._safe_get_table_size(table.key)
46+
if size_in_bytes is None:
47+
continue # table does not exist anymore
48+
4649
yield TableSize(
4750
catalog=table.catalog, database=table.database, name=table.name, size_in_bytes=size_in_bytes
4851
)
@@ -55,12 +58,19 @@ def _try_load(self) -> Iterable[TableSize]:
5558
def snapshot(self) -> list[TableSize]:
5659
"""
5760
Takes a snapshot of tables in the specified catalog and database.
61+
Return None if the table cannot be found anymore.
5862
5963
Returns:
6064
list[Table]: A list of Table objects representing the snapshot of tables.
6165
"""
6266
return self._snapshot(partial(self._try_load), partial(self._crawl))
6367

64-
def get_table_size(self, table_full_name: str) -> int:
68+
def _safe_get_table_size(self, table_full_name: str) -> int | None:
6569
logger.debug(f"Evaluating {table_full_name} table size.")
66-
return self._spark._jsparkSession.table(table_full_name).queryExecution().analyzed().stats().sizeInBytes()
70+
try:
71+
return self._spark._jsparkSession.table(table_full_name).queryExecution().analyzed().stats().sizeInBytes()
72+
except Exception as e:
73+
if "[TABLE_OR_VIEW_NOT_FOUND]" in str(e):
74+
logger.warning(f"Failed to evaluate {table_full_name} table size. Table not found.")
75+
return None
76+
raise RuntimeError(str(e)) from e

src/databricks/labs/ucx/mixins/fixtures.py

Lines changed: 160 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from databricks.sdk.core import DatabricksError
1818
from databricks.sdk.errors import ResourceConflict
1919
from databricks.sdk.retries import retried
20-
from databricks.sdk.service import compute, iam, jobs, pipelines, workspace
20+
from databricks.sdk.service import compute, iam, jobs, pipelines, sql, workspace
2121
from databricks.sdk.service.catalog import (
2222
CatalogInfo,
2323
DataSourceFormat,
@@ -27,6 +27,8 @@
2727
)
2828
from databricks.sdk.service.sql import (
2929
CreateWarehouseRequestWarehouseType,
30+
GetResponse,
31+
ObjectTypePlural,
3032
Query,
3133
QueryInfo,
3234
)
@@ -271,6 +273,47 @@ def _path(ws, path):
271273
]
272274

273275

276+
def _redash_permissions_mapping():
277+
def _simple(_, object_id):
278+
return object_id
279+
280+
return [
281+
(
282+
"query",
283+
ObjectTypePlural.QUERIES,
284+
[
285+
sql.PermissionLevel.CAN_VIEW,
286+
sql.PermissionLevel.CAN_RUN,
287+
sql.PermissionLevel.CAN_MANAGE,
288+
sql.PermissionLevel.CAN_EDIT,
289+
],
290+
_simple,
291+
),
292+
(
293+
"alert",
294+
ObjectTypePlural.ALERTS,
295+
[
296+
sql.PermissionLevel.CAN_VIEW,
297+
sql.PermissionLevel.CAN_RUN,
298+
sql.PermissionLevel.CAN_MANAGE,
299+
sql.PermissionLevel.CAN_EDIT,
300+
],
301+
_simple,
302+
),
303+
(
304+
"dashboard",
305+
ObjectTypePlural.DASHBOARDS,
306+
[
307+
sql.PermissionLevel.CAN_VIEW,
308+
sql.PermissionLevel.CAN_RUN,
309+
sql.PermissionLevel.CAN_MANAGE,
310+
sql.PermissionLevel.CAN_EDIT,
311+
],
312+
_simple,
313+
),
314+
]
315+
316+
274317
class _PermissionsChange:
275318
def __init__(self, object_id: str, before: list[iam.AccessControlRequest], after: list[iam.AccessControlRequest]):
276319
self._object_id = object_id
@@ -293,6 +336,26 @@ def __repr__(self):
293336
return f"{self._object_id} [{self._list(self._before)}] -> [{self._list(self._after)}]"
294337

295338

339+
class _RedashPermissionsChange:
340+
def __init__(self, object_id: str, before: list[sql.AccessControl], after: list[sql.AccessControl]):
341+
self._object_id = object_id
342+
self._before = before
343+
self._after = after
344+
345+
@staticmethod
346+
def _principal(acr: sql.AccessControl) -> str:
347+
if acr.user_name is not None:
348+
return f"user_name {acr.user_name}"
349+
else:
350+
return f"group_name {acr.group_name}"
351+
352+
def _list(self, acl: list[sql.AccessControl]):
353+
return ", ".join(f"{self._principal(_)} {_.permission_level.value}" for _ in acl)
354+
355+
def __repr__(self):
356+
return f"{self._object_id} [{self._list(self._before)}] -> [{self._list(self._after)}]"
357+
358+
296359
def _make_permissions_factory(name, resource_type, levels, id_retriever):
297360
def _non_inherited(x: iam.ObjectPermissions):
298361
out: list[iam.AccessControlRequest] = []
@@ -337,14 +400,29 @@ def create(
337400
names = ", ".join(_.value for _ in levels)
338401
msg = f"invalid permission level: {permission_level.value}. Valid levels: {names}"
339402
raise ValueError(msg)
340-
access_control_list = [
341-
iam.AccessControlRequest(
342-
group_name=group_name,
343-
user_name=user_name,
344-
service_principal_name=service_principal_name,
345-
permission_level=permission_level,
403+
404+
access_control_list = []
405+
if group_name is not None:
406+
access_control_list.append(
407+
iam.AccessControlRequest(
408+
group_name=group_name,
409+
permission_level=permission_level,
410+
)
411+
)
412+
if user_name is not None:
413+
access_control_list.append(
414+
iam.AccessControlRequest(
415+
user_name=user_name,
416+
permission_level=permission_level,
417+
)
418+
)
419+
if service_principal_name is not None:
420+
access_control_list.append(
421+
iam.AccessControlRequest(
422+
service_principal_name=service_principal_name,
423+
permission_level=permission_level,
424+
)
346425
)
347-
]
348426
ws.permissions.update(resource_type, object_id, access_control_list=access_control_list)
349427
return _PermissionsChange(object_id, initial, access_control_list)
350428

@@ -356,12 +434,86 @@ def remove(change: _PermissionsChange):
356434
return _make_permissions
357435

358436

437+
def _make_redash_permissions_factory(name, resource_type, levels, id_retriever):
438+
def _non_inherited(x: GetResponse):
439+
out: list[sql.AccessControl] = []
440+
assert x.access_control_list is not None
441+
for access_control in x.access_control_list:
442+
out.append(
443+
sql.AccessControl(
444+
permission_level=access_control.permission_level,
445+
group_name=access_control.group_name,
446+
user_name=access_control.user_name,
447+
)
448+
)
449+
return out
450+
451+
def _make_permissions(ws):
452+
def create(
453+
*,
454+
object_id: str,
455+
permission_level: sql.PermissionLevel | None = None,
456+
group_name: str | None = None,
457+
user_name: str | None = None,
458+
access_control_list: Optional["list[sql.AccessControl]"] = None,
459+
):
460+
nothing_specified = permission_level is None and access_control_list is None
461+
both_specified = permission_level is not None and access_control_list is not None
462+
if nothing_specified or both_specified:
463+
msg = "either permission_level or access_control_list has to be specified"
464+
raise ValueError(msg)
465+
466+
object_id = id_retriever(ws, object_id)
467+
initial = _non_inherited(ws.dbsql_permissions.get(resource_type, object_id))
468+
469+
if access_control_list is None:
470+
if permission_level not in levels:
471+
assert permission_level is not None
472+
names = ", ".join(_.value for _ in levels)
473+
msg = f"invalid permission level: {permission_level.value}. Valid levels: {names}"
474+
raise ValueError(msg)
475+
476+
access_control_list = []
477+
if group_name is not None:
478+
access_control_list.append(
479+
sql.AccessControl(
480+
group_name=group_name,
481+
permission_level=permission_level,
482+
)
483+
)
484+
if user_name is not None:
485+
access_control_list.append(
486+
sql.AccessControl(
487+
user_name=user_name,
488+
permission_level=permission_level,
489+
)
490+
)
491+
492+
ws.dbsql_permissions.set(resource_type, object_id, access_control_list=access_control_list)
493+
return _RedashPermissionsChange(object_id, initial, access_control_list)
494+
495+
def remove(change: _RedashPermissionsChange):
496+
ws.dbsql_permissions.set(
497+
sql.ObjectTypePlural(resource_type), change._object_id, access_control_list=change._before
498+
)
499+
500+
yield from factory(f"{name} permissions", create, remove)
501+
502+
return _make_permissions
503+
504+
359505
for name, resource_type, levels, id_retriever in _permissions_mapping():
360506
# wrap function factory, otherwise loop scope sticks the wrong way
361507
locals()[f"make_{name}_permissions"] = pytest.fixture(
362508
_make_permissions_factory(name, resource_type, levels, id_retriever)
363509
)
364510

511+
for name, resource_type, levels, id_retriever in _redash_permissions_mapping():
512+
# wrap function factory, otherwise loop scope sticks the wrong way
513+
locals()[f"make_{name}_permissions"] = pytest.fixture(
514+
_make_redash_permissions_factory(name, resource_type, levels, id_retriever)
515+
)
516+
365517

366518
@pytest.fixture
367519
def make_secret_scope(ws, make_random):

src/databricks/labs/ucx/workspace_access/groups.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@ def get_target_principal(self, name: str) -> str | None:
6868
return None
6969
return mg.name_in_account
7070

71+
def get_temp_principal(self, name: str) -> str | None:
72+
mg = self._name_to_group.get(name)
73+
if mg is None:
74+
return None
75+
return mg.temporary_name
76+
7177
def is_in_scope(self, name: str) -> bool:
7278
if name is None:
7379
return False

src/databricks/labs/ucx/workspace_access/redash.py

Lines changed: 73 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,20 @@ def __iter__(self):
4141

4242
class RedashPermissionsSupport(AclSupport):
4343
def __init__(
44-
self, ws: WorkspaceClient, listings: list[Listing], verify_timeout: timedelta | None = timedelta(minutes=1)
44+
self,
45+
ws: WorkspaceClient,
46+
listings: list[Listing],
47+
set_permissions_timeout: timedelta | None = timedelta(minutes=1),
48+
# Group information in Redash are cached for up to 10 minutes causing inconsistencies.
49+
# If a group is renamed, the old name may still be returned by the dbsql get permissions api.
50+
# Note that the update/set API is strongly consistent and is not affected by this behaviour.
51+
# The validation step should keep retrying for at least 10 mins until the get api returns the new group name.
52+
# More details here: https://databricks.atlassian.net/browse/ES-992619
53+
verify_timeout: timedelta | None = timedelta(minutes=11),
4554
):
4655
self._ws = ws
4756
self._listings = listings
57+
self._set_permissions_timeout = set_permissions_timeout
4858
self._verify_timeout = verify_timeout
4959

5060
@staticmethod
@@ -85,6 +95,41 @@ def _safe_get_dbsql_permissions(self, object_type: sql.ObjectTypePlural, object_
8595
logger.warning(f"removed on backend: {object_type} {object_id}")
8696
return None
8797

98+
def _load_as_request(self, object_type: sql.ObjectTypePlural, object_id: str) -> list[sql.AccessControl]:
99+
loaded = self._safe_get_dbsql_permissions(object_type, object_id)
100+
if loaded is None:
101+
return []
102+
acl: list[sql.AccessControl] = []
103+
if not loaded.access_control_list:
104+
return acl
105+
106+
for v in loaded.access_control_list:
107+
acl.append(
108+
sql.AccessControl(
109+
permission_level=v.permission_level,
110+
group_name=v.group_name,
111+
user_name=v.user_name,
112+
)
113+
)
114+
# sort to return deterministic results
115+
return sorted(acl, key=lambda v: f"{v.group_name}:{v.user_name}")
116+
117+
def load_as_dict(self, object_type: sql.ObjectTypePlural, object_id: str) -> dict[str, sql.PermissionLevel]:
118+
result = {}
119+
for acl in self._load_as_request(object_type, object_id):
120+
if not acl.permission_level:
121+
continue
122+
result[self._key_for_acl_dict(acl)] = acl.permission_level
123+
return result
124+
125+
@staticmethod
126+
def _key_for_acl_dict(acl: sql.AccessControl) -> str:
127+
if acl.group_name is not None:
128+
return acl.group_name
129+
if acl.user_name is not None:
130+
return acl.user_name
131+
return "UNKNOWN"
132+
88133
@rate_limited(max_requests=100)
89134
def _crawler_task(self, object_id: str, object_type: sql.ObjectTypePlural) -> Permissions | None:
90135
permissions = self._safe_get_dbsql_permissions(object_type=object_type, object_id=object_id)
@@ -120,7 +165,7 @@ def _applier_task(self, object_type: sql.ObjectTypePlural, object_id: str, acl:
120165
This affects the way how we prepare the new ACL request.
121166
"""
122167

123-
set_retry_on_value_error = retried(on=[InternalError, ValueError], timeout=self._verify_timeout)
168+
set_retry_on_value_error = retried(on=[InternalError, ValueError], timeout=self._set_permissions_timeout)
124169
set_retried_check = set_retry_on_value_error(self._safe_set_permissions)
125170
set_retried_check(object_type, object_id, acl)
126171

@@ -133,22 +178,45 @@ def _prepare_new_acl(
133178
) -> list[sql.AccessControl]:
134179
"""
135180
Please note the comment above on how we apply these permissions.
181+
Permissions are set/replaced and not updated/patched, therefore all existing ACLs need to be collected
182+
including users and temp/backup groups.
136183
"""
137184
acl_requests: list[sql.AccessControl] = []
138185
for access_control in acl:
186+
if access_control.user_name:
187+
logger.debug(f"Including redash permissions acl for user: `{access_control.user_name}`")
188+
acl_requests.append(access_control)
189+
continue
190+
139191
if not access_control.group_name:
140192
continue
193+
141194
if not migration_state.is_in_scope(access_control.group_name):
142-
logger.debug(f"Skipping redash item for `{access_control.group_name}`: not in scope")
195+
logger.debug(
196+
f"Including redash permissions acl for group not in the scope: `{access_control.group_name}`"
197+
)
143198
acl_requests.append(access_control)
144199
continue
200+
145201
target_principal = migration_state.get_target_principal(access_control.group_name)
146-
if target_principal is None:
147-
logger.debug(f"Skipping redash item for `{access_control.group_name}`: no target principal")
202+
if not target_principal:
203+
logger.debug(
204+
f"Including redash permissions acl for group without target principal: "
205+
f"`{access_control.group_name}`"
206+
)
148207
acl_requests.append(access_control)
149208
continue
209+
210+
logger.debug(f"Including redash permissions acl for target group `{target_principal}`")
150211
new_acl_request = dataclasses.replace(access_control, group_name=target_principal)
151212
acl_requests.append(new_acl_request)
213+
214+
temp_principal = migration_state.get_temp_principal(access_control.group_name)
215+
if temp_principal is not None:
216+
logger.debug(f"Including redash permissions acl for temp group `{temp_principal}`")
217+
temp_group_new_acl_request = dataclasses.replace(access_control, group_name=temp_principal)
218+
acl_requests.append(temp_group_new_acl_request)
219+
152220
return acl_requests
153221

154222
@rate_limited(burst_period_seconds=30)

0 commit comments

Comments
 (0)