Skip to content

Commit bca711d

Browse files
authored
Adds retries and uses listing to fetch groups (#4370)
## Changes The original function being used to fetch groups is taking a long time to be eventually consistent. Using the same retry logic and fetching the entire groups list is a much faster approach. ### Linked issues Resolves #4368 Resolves #4337 Resolved #4334 ### Tests - [x] modified integration tests
1 parent dfd0469 commit bca711d

File tree

3 files changed

+24
-27
lines changed

3 files changed

+24
-27
lines changed

tests/integration/conftest.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from dataclasses import replace
1010
from datetime import timedelta
1111
from functools import cached_property
12-
from typing import Literal
12+
from typing import Literal, NoReturn
1313

1414
import pytest # pylint: disable=wrong-import-order
1515
from databricks.labs.blueprint.commands import CommandExecutor
@@ -1465,3 +1465,17 @@ def _run(command: str) -> str:
14651465
except ValueError as err:
14661466
logger.debug(f"pytest_ignore_collect: error: {err}")
14671467
return False
1468+
1469+
1470+
@retried(on=[AssertionError], timeout=timedelta(minutes=1))
1471+
def get_group(group_manager: GroupManager, group_name: str) -> NoReturn:
1472+
"""
1473+
# Group deletion is eventually consistent. Although the group manager tries to wait for convergence, parts of the
1474+
# API internals have a 60s timeout. As such we should wait at least that long before concluding deletion has not
1475+
# happened.
1476+
"""
1477+
deleted_group = group_manager.has_workspace_group(group_name)
1478+
if deleted_group:
1479+
logger.info(f"Group {group_name} was not deleted. Retrying...")
1480+
raise AssertionError(f"Group is not deleted: {group_name}")
1481+
raise NotFound(f"Group not found: {group_name}")

tests/integration/install/test_installation.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import subprocess
55
import sys
66
from datetime import timedelta
7-
from typing import NoReturn
87

98
import pytest
109

@@ -28,7 +27,7 @@
2827
from databricks.labs.ucx.config import WorkspaceConfig
2928
from databricks.labs.ucx.install import WorkspaceInstaller
3029

31-
from ..conftest import MockInstallationContext
30+
from ..conftest import MockInstallationContext, get_group
3231

3332
logger = logging.getLogger(__name__)
3433

@@ -155,6 +154,8 @@ def test_job_cluster_on_uc_enabled_workpace(ws, installation_ctx) -> None:
155154
@retried(on=[NotFound, InvalidParameterValue], timeout=timedelta(minutes=5))
156155
def test_running_real_remove_backup_groups_job(ws: WorkspaceClient, installation_ctx: MockInstallationContext) -> None:
157156
ws_group_a, _ = installation_ctx.make_ucx_group(wait_for_provisioning=True)
157+
assert installation_ctx.group_manager.has_workspace_group(ws_group_a.display_name)
158+
158159
installation_ctx.workspace_installation.run()
159160
installation_ctx.group_manager.snapshot()
160161
installation_ctx.group_manager.rename_groups()
@@ -165,17 +166,8 @@ def test_running_real_remove_backup_groups_job(ws: WorkspaceClient, installation
165166

166167
assert installation_ctx.deployed_workflows.validate_step(workflow), f"Workflow failed: {workflow}"
167168

168-
# Group deletion is eventually consistent. Although the group manager tries to wait for convergence, parts of the
169-
# API internals have a 60s timeout. As such we should wait at least that long before concluding deletion has not
170-
# happened.
171-
# Note: If you are adjusting this, also look at: test_delete_ws_groups_should_delete_renamed_and_reflected_groups_only
172-
@retried(on=[KeyError], timeout=timedelta(minutes=5))
173-
def get_group(group_id: str) -> NoReturn:
174-
ws.groups.get(group_id)
175-
raise KeyError(f"Group is not deleted: {group_id}")
176-
177169
with pytest.raises(NotFound):
178-
get_group(ws_group_a.id)
170+
get_group(installation_ctx.group_manager, ws_group_a.display_name)
179171

180172

181173
@retried(on=[NotFound, InvalidParameterValue], timeout=timedelta(minutes=5))

tests/integration/workspace_access/test_groups.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,14 @@
22
import logging
33
import re
44
from datetime import timedelta
5-
from typing import NoReturn
65

76
import pytest
87
from databricks.sdk.errors import NotFound, ResourceConflict
98
from databricks.sdk.retries import retried
109
from databricks.sdk.service.iam import Group, ResourceMeta
1110

1211
from databricks.labs.ucx.workspace_access.groups import GroupManager, MigratedGroup
13-
12+
from tests.integration.conftest import get_group
1413

1514
logger = logging.getLogger(__name__)
1615

@@ -129,7 +128,7 @@ def test_reflect_account_groups_on_workspace(ws, make_ucx_group, sql_backend, in
129128
# At this time previous ws level groups aren't deleted
130129

131130

132-
@retried(on=[NotFound], timeout=timedelta(minutes=5))
131+
@retried(on=[NotFound], timeout=timedelta(minutes=2))
133132
def test_delete_ws_groups_should_delete_renamed_and_reflected_groups_only(
134133
ws, make_ucx_group, sql_backend, inventory_schema
135134
):
@@ -142,24 +141,16 @@ def test_delete_ws_groups_should_delete_renamed_and_reflected_groups_only(
142141
[ws_group.display_name],
143142
"ucx-temp-",
144143
)
144+
assert group_manager.has_workspace_group(ws_group.display_name)
145145
group_manager.rename_groups()
146146
group_manager.reflect_account_groups_on_workspace()
147147
group_manager.delete_original_workspace_groups()
148148

149-
# Group deletion is eventually consistent. Although the group manager tries to wait for convergence, parts of the
150-
# API internals have a 60s timeout. As such we should wait at least that long before concluding deletion has not
151-
# happened.
152-
# Note: If you are adjusting this, also look at: test_running_real_remove_backup_groups_job
153-
@retried(on=[KeyError], timeout=timedelta(minutes=5))
154-
def get_group(group_id: str) -> NoReturn:
155-
ws.groups.get(group_id)
156-
raise KeyError(f"Group is not deleted: {group_id}")
157-
158149
with pytest.raises(NotFound):
159-
get_group(ws_group.id)
150+
get_group(group_manager, ws_group.display_name)
160151

161152

162-
@retried(on=[NotFound], timeout=timedelta(minutes=3))
153+
@retried(on=[NotFound], timeout=timedelta(minutes=2))
163154
def test_delete_ws_groups_should_not_delete_current_ws_groups(ws, make_ucx_group, sql_backend, inventory_schema):
164155
ws_group, _ = make_ucx_group()
165156

0 commit comments

Comments
 (0)