Skip to content

Commit 5c92886

Browse files
authored
Improve integration test stability (#597)
This PR fixes the bugs in the code that prevented a successful integration testing pass
1 parent 104e1b7 commit 5c92886

File tree

27 files changed

+158
-226
lines changed

27 files changed

+158
-226
lines changed

.github/workflows/acceptance.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ jobs:
4141
subscription-id: ${{ secrets.ARM_SUBSCRIPTION_ID }}
4242

4343
- name: Run integration tests
44-
run: hatch run integration:test
44+
run: hatch run integration
4545
env:
4646
CLOUD_ENV: "${{ vars.CLOUD_ENV }}"
4747
DATABRICKS_HOST: "${{ secrets.DATABRICKS_HOST }}"

.github/workflows/push.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
run: pip install hatch==$HATCH_VERSION
3939

4040
- name: Run unit tests
41-
run: hatch run unit:test
41+
run: hatch run test
4242

4343
- name: Publish test coverage
4444
uses: codecov/codecov-action@v1
@@ -60,4 +60,4 @@ jobs:
6060
run: pip install hatch==$HATCH_VERSION
6161

6262
- name: Verify linting
63-
run: hatch run lint:verify
63+
run: hatch run verify

CONTRIBUTING.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,22 @@ in turn, contributes to the overall reliability and quality of our software.
118118
Currently, VSCode IDE is not supported, as it does not offer interactive debugging single integration tests.
119119
However, it's possible that this limitation may be addressed in the future.
120120

121+
### Flaky tests
122+
123+
You can add `@retried` decorator to deal with [flaky tests](https://docs.pytest.org/en/latest/explanation/flaky.html):
124+
125+
```python
126+
from datetime import timedelta
127+
128+
from databricks.sdk.errors import NotFound
129+
from databricks.sdk.retries import retried
130+
131+
@retried(on=[NotFound], timeout=timedelta(minutes=5))
132+
def test_something(ws):
133+
...
134+
135+
```
136+
121137
## Local Setup
122138

123139
This section provides a step-by-step guide to set up and start working on the project. These steps will help you set up your project environment and dependencies for efficient development.

Makefile

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,27 @@
1-
all: clean lint fmt test
1+
all: clean lint fmt test coverage
22

33
clean:
4-
rm -fr htmlcov .mypy_cache .pytest_cache .ruff_cache .coverage coverage.xml
5-
hatch env remove unit
4+
rm -fr .venv clean htmlcov .mypy_cache .pytest_cache .ruff_cache .coverage coverage.xml
65

7-
dev:
6+
.venv/bin/python:
87
pip install hatch
98
hatch env create
10-
hatch run pip install -e '.[test]'
11-
hatch run which python
9+
10+
dev: .venv/bin/python
11+
@hatch run which python
1212

1313
lint:
14-
hatch run lint:verify
14+
hatch run verify
1515

1616
fmt:
17-
hatch run lint:fmt
17+
hatch run fmt
1818

1919
test:
20-
hatch run unit:test
20+
hatch run test
2121

2222
integration:
23-
hatch run integration:test
23+
hatch run integration
2424

25-
test-cov:
26-
hatch run unit:test-cov-report && open htmlcov/index.html
25+
coverage:
26+
hatch run coverage && open htmlcov/index.html
2727

pyproject.toml

Lines changed: 27 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,8 @@ classifiers = [
2626
"Programming Language :: Python :: 3.11",
2727
"Programming Language :: Python :: Implementation :: CPython",
2828
]
29-
dependencies = [
30-
"databricks-sdk~=0.13.0",
31-
"PyYAML>=6.0.0,<7.0.0",
32-
]
33-
34-
[project.optional-dependencies]
35-
test = [
36-
"coverage[toml]>=6.5",
37-
"pytest",
38-
"pytest-xdist",
39-
"pytest-cov>=4.0.0,<5.0.0",
40-
"pytest-mock>=3.0.0,<4.0.0",
41-
]
29+
dependencies = ["databricks-sdk~=0.13.0",
30+
"PyYAML>=6.0.0,<7.0.0"]
4231

4332
[project.entry-points.databricks]
4433
runtime = "databricks.labs.ucx.runtime:main"
@@ -50,58 +39,49 @@ Source = "https://github.com/databricks/ucx"
5039
[tool.hatch.version]
5140
path = "src/databricks/labs/ucx/__about__.py"
5241

53-
[tool.hatch.envs.unit]
42+
[tool.hatch.envs.default]
5443
dependencies = [
55-
"databricks-labs-ucx[test]"
44+
"coverage[toml]>=6.5",
45+
"pytest",
46+
"pytest-xdist",
47+
"pytest-cov>=4.0.0,<5.0.0",
48+
"pytest-mock>=3.0.0,<4.0.0",
49+
"black>=23.1.0",
50+
"ruff>=0.0.243",
51+
"isort>=2.5.0",
5652
]
5753

58-
[tool.hatch.envs.unit.scripts]
59-
test = "pytest --cov src --cov-report=xml tests/unit"
60-
test-cov-report = "pytest --cov src tests/unit --cov-report=html"
54+
python="3.10"
6155

62-
[tool.hatch.envs.integration]
63-
dependencies = [
64-
"databricks-labs-ucx[test]",
65-
]
56+
# store virtual env as the child of this folder. Helps VSCode (and PyCharm) to run better
57+
path = ".venv"
6658

67-
[tool.hatch.envs.integration.scripts]
68-
test = "pytest --cov src tests/integration"
69-
parallel = "pytest -n auto --cov src tests/integration"
70-
71-
[tool.hatch.envs.lint]
72-
detached = true
73-
dependencies = [
74-
"black>=23.1.0",
75-
"ruff>=0.0.243",
76-
"isort>=2.5.0"
77-
]
78-
[tool.hatch.envs.lint.scripts]
79-
fmt = [
80-
"isort .",
81-
"black .",
82-
"ruff . --fix",
83-
]
84-
verify = [
85-
"black --check .",
86-
"isort . --check-only",
87-
"ruff .",
88-
]
59+
[tool.hatch.envs.default.scripts]
60+
test = "pytest -n auto --cov src --cov-report=xml tests/unit"
61+
coverage = "pytest -n auto --cov src tests/unit --cov-report=html"
62+
integration = "pytest -n 10 --cov src tests/integration"
63+
fmt = ["isort .",
64+
"black .",
65+
"ruff . --fix"]
66+
verify = ["black --check .",
67+
"isort . --check-only",
68+
"ruff ."]
8969

9070
[tool.isort]
91-
skip_glob = [
92-
"notebooks/*.py"
93-
]
71+
skip_glob = ["notebooks/*.py"]
9472
profile = "black"
9573

9674
[tool.pytest.ini_options]
9775
addopts = "-s -p no:warnings -vv --cache-clear"
76+
cache_dir = ".venv/pytest-cache"
9877

9978
[tool.black]
10079
target-version = ["py310"]
10180
line-length = 120
10281
skip-string-normalization = true
10382

10483
[tool.ruff]
84+
cache-dir = ".venv/ruff-cache"
10585
target-version = "py310"
10686
line-length = 120
10787
select = [

src/databricks/labs/ucx/assessment/crawlers.py

Lines changed: 10 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from dataclasses import dataclass
66

77
from databricks.sdk import WorkspaceClient
8-
from databricks.sdk.core import DatabricksError
8+
from databricks.sdk.errors import DatabricksError, NotFound
99
from databricks.sdk.service.compute import ClusterSource, Policy
1010
from databricks.sdk.service.jobs import BaseJob
1111

@@ -345,15 +345,9 @@ def _get_azure_spn_with_data_access(self, cluster_config):
345345
def _safe_get_cluster_policy(self, policy_id: str) -> Policy | None:
346346
try:
347347
return self._ws.cluster_policies.get(policy_id)
348-
except DatabricksError as err:
349-
if err.error_code == "RESOURCE_DOES_NOT_EXIST":
350-
logger.warning(
351-
f"Error retrieving cluster policy {policy_id}. The cluster policy was deleted. Error: {err}"
352-
)
353-
else:
354-
raise err
355-
356-
return None
348+
except NotFound:
349+
logger.warning(f"The cluster policy was deleted: {policy_id}")
350+
return None
357351

358352
def _list_all_spn_in_sql_warehouses_spark_conf(self) -> list:
359353
warehouse_config_list = self._ws.warehouses.get_workspace_warehouse_config().data_access_config
@@ -510,15 +504,9 @@ def _assess_clusters(self, all_clusters):
510504
def _safe_get_cluster_policy(self, policy_id: str) -> Policy | None:
511505
try:
512506
return self._ws.cluster_policies.get(policy_id)
513-
except DatabricksError as err:
514-
if err.error_code == "RESOURCE_DOES_NOT_EXIST":
515-
logger.warning(
516-
f"Error retrieving cluster policy {policy_id}. The cluster policy was deleted. Error: {err}"
517-
)
518-
else:
519-
raise err
520-
521-
return None
507+
except NotFound:
508+
logger.warning(f"The cluster policy was deleted: {policy_id}")
509+
return None
522510

523511
def snapshot(self) -> list[ClusterInfo]:
524512
return self._snapshot(self._try_fetch, self._crawl)
@@ -624,15 +612,9 @@ def _assess_jobs(self, all_jobs: list[BaseJob], all_clusters_by_id) -> list[JobI
624612
def _safe_get_cluster_policy(self, policy_id: str) -> Policy | None:
625613
try:
626614
return self._ws.cluster_policies.get(policy_id)
627-
except DatabricksError as err:
628-
if err.error_code == "RESOURCE_DOES_NOT_EXIST":
629-
logger.warning(
630-
f"Error retrieving cluster policy {policy_id}. The cluster policy was deleted. Error: {err}"
631-
)
632-
else:
633-
raise err
634-
635-
return None
615+
except NotFound:
616+
logger.warning(f"The cluster policy was deleted: {policy_id}")
617+
return None
636618

637619
def snapshot(self) -> list[JobInfo]:
638620
return self._snapshot(self._try_fetch, self._crawl)

src/databricks/labs/ucx/framework/crawlers.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing import ClassVar
88

99
from databricks.sdk import WorkspaceClient
10+
from databricks.sdk.errors import NotFound
1011

1112
from databricks.labs.ucx.mixins.sql import StatementExecutionExt
1213

@@ -234,9 +235,8 @@ def _snapshot(self, fetcher, loader) -> list[any]:
234235
cached_results = list(fetcher())
235236
if len(cached_results) > 0:
236237
return cached_results
237-
except Exception as err:
238-
if "TABLE_OR_VIEW_NOT_FOUND" not in str(err):
239-
raise err
238+
except NotFound:
239+
pass
240240
logger.debug(f"[{self._full_name}] crawling new batch for {self._table}")
241241
loaded_records = list(loader())
242242
self._append_records(loaded_records)

src/databricks/labs/ucx/framework/dashboards.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from pathlib import Path
88

99
from databricks.sdk import WorkspaceClient
10-
from databricks.sdk.core import DatabricksError
10+
from databricks.sdk.errors import DatabricksError, NotFound
1111
from databricks.sdk.service import workspace
1212
from databricks.sdk.service.sql import (
1313
AccessControl,
@@ -171,14 +171,12 @@ def _state_pre_v06(self):
171171
continue
172172
try:
173173
self._ws.queries.get(v)
174-
except DatabricksError:
174+
except NotFound:
175175
to_remove.append(k)
176176
for key in to_remove:
177177
del state[key]
178178
return state
179-
except DatabricksError as err:
180-
if err.error_code != "RESOURCE_DOES_NOT_EXIST":
181-
raise err
179+
except NotFound:
182180
self._ws.workspace.mkdirs(self._remote_folder)
183181
return {}
184182
except JSONDecodeError:
@@ -187,9 +185,7 @@ def _state_pre_v06(self):
187185
def _remote_folder_object(self) -> workspace.ObjectInfo:
188186
try:
189187
return self._ws.workspace.get_status(self._remote_folder)
190-
except DatabricksError as err:
191-
if err.error_code != "RESOURCE_DOES_NOT_EXIST":
192-
raise err
188+
except NotFound:
193189
self._ws.workspace.mkdirs(self._remote_folder)
194190
return self._remote_folder_object()
195191

src/databricks/labs/ucx/framework/install_state.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from json import JSONDecodeError
44

55
from databricks.sdk import WorkspaceClient
6-
from databricks.sdk.core import DatabricksError
6+
from databricks.sdk.errors import NotFound
77
from databricks.sdk.service.workspace import ImportFormat
88

99
logger = logging.getLogger(__name__)
@@ -32,10 +32,8 @@ def _load(self):
3232
msg = f"expected state $version={self._version}, got={version}"
3333
raise ValueError(msg)
3434
return raw
35-
except DatabricksError as err:
36-
if err.error_code == "RESOURCE_DOES_NOT_EXIST":
37-
return default_state
38-
raise err
35+
except NotFound:
36+
return default_state
3937
except JSONDecodeError:
4038
logger.warning(f"JSON state file corrupt: {self._state_file}")
4139
return default_state

src/databricks/labs/ucx/hive_metastore/grants.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -248,19 +248,18 @@ def _grants(
248248
continue
249249
# we have to return concrete list, as with yield we're executing
250250
# everything on the main thread.
251-
grants.append(
252-
Grant(
253-
principal=principal,
254-
action_type=action_type,
255-
table=table,
256-
view=view,
257-
database=database,
258-
catalog=catalog,
259-
any_file=any_file,
260-
anonymous_function=anonymous_function,
261-
)
251+
grant = Grant(
252+
principal=principal,
253+
action_type=action_type,
254+
table=table,
255+
view=view,
256+
database=database,
257+
catalog=catalog,
258+
any_file=any_file,
259+
anonymous_function=anonymous_function,
262260
)
263-
return grants
261+
grants.append(grant)
262+
return grants
264263
except Exception as e:
265264
# TODO: https://github.com/databrickslabs/ucx/issues/406
266265
logger.error(f"Couldn't fetch grants for object {on_type} {key}: {e}")

0 commit comments

Comments
 (0)