databrickslabs
diff --git a/‎.github/workflows/acceptance.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/acceptance.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/push.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/push.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 16 additions & 0 deletions b/‎CONTRIBUTING.md‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 12 additions & 12 deletions b/‎Makefile‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 27 additions & 47 deletions b/‎pyproject.toml‎
Lines changed: 27 additions & 47 deletions
diff --git a/‎src/databricks/labs/ucx/assessment/crawlers.py‎
Lines changed: 10 additions & 28 deletions b/‎src/databricks/labs/ucx/assessment/crawlers.py‎
Lines changed: 10 additions & 28 deletions
diff --git a/‎src/databricks/labs/ucx/framework/crawlers.py‎
Lines changed: 3 additions & 3 deletions b/‎src/databricks/labs/ucx/framework/crawlers.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/databricks/labs/ucx/framework/dashboards.py‎
Lines changed: 4 additions & 8 deletions b/‎src/databricks/labs/ucx/framework/dashboards.py‎
Lines changed: 4 additions & 8 deletions
diff --git a/‎src/databricks/labs/ucx/framework/install_state.py‎
Lines changed: 3 additions & 5 deletions b/‎src/databricks/labs/ucx/framework/install_state.py‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎src/databricks/labs/ucx/hive_metastore/grants.py‎
Lines changed: 11 additions & 12 deletions b/‎src/databricks/labs/ucx/hive_metastore/grants.py‎
Lines changed: 11 additions & 12 deletions
@@ -41,7 +41,7 @@ jobs:
           subscription-id: ${{ secrets.ARM_SUBSCRIPTION_ID }}
 
       - name: Run integration tests
-        run: hatch run integration:test
+        run: hatch run integration
         env:
           CLOUD_ENV: "${{ vars.CLOUD_ENV }}"
           DATABRICKS_HOST: "${{ secrets.DATABRICKS_HOST }}"
 
@@ -38,7 +38,7 @@ jobs:
         run: pip install hatch==$HATCH_VERSION
 
       - name: Run unit tests
-        run: hatch run unit:test
+        run: hatch run test
 
       - name: Publish test coverage
         uses: codecov/codecov-action@v1
@@ -60,4 +60,4 @@ jobs:
         run: pip install hatch==$HATCH_VERSION
 
       - name: Verify linting
-        run: hatch run lint:verify
+        run: hatch run verify
@@ -118,6 +118,22 @@ in turn, contributes to the overall reliability and quality of our software.
 Currently, VSCode IDE is not supported, as it does not offer interactive debugging single integration tests. 
 However, it's possible that this limitation may be addressed in the future.
 
+### Flaky tests
+
+You can add `@retried` decorator to deal with [flaky tests](https://docs.pytest.org/en/latest/explanation/flaky.html):
+
+```python
+from datetime import timedelta
+
+from databricks.sdk.errors import NotFound
+from databricks.sdk.retries import retried
+
+@retried(on=[NotFound], timeout=timedelta(minutes=5))
+def test_something(ws):
+    ...
+
+```
+
 ## Local Setup
 
 This section provides a step-by-step guide to set up and start working on the project. These steps will help you set up your project environment and dependencies for efficient development.
 
@@ -1,27 +1,27 @@
-all: clean lint fmt test
+all: clean lint fmt test coverage
 
 clean:
-	rm -fr htmlcov .mypy_cache .pytest_cache .ruff_cache .coverage coverage.xml
-	hatch env remove unit
+	rm -fr .venv clean htmlcov .mypy_cache .pytest_cache .ruff_cache .coverage coverage.xml
 
-dev:
+.venv/bin/python:
 	pip install hatch
 	hatch env create
-	hatch run pip install -e '.[test]'
-	hatch run which python
+
+dev: .venv/bin/python
+	@hatch run which python
 
 lint:
-	hatch run lint:verify
+	hatch run verify
 
 fmt:
-	hatch run lint:fmt
+	hatch run fmt
 
 test:
-	hatch run unit:test
+	hatch run test
 
 integration:
-	hatch run integration:test
+	hatch run integration
 
-test-cov:
-	hatch run unit:test-cov-report && open htmlcov/index.html
+coverage:
+	hatch run coverage && open htmlcov/index.html
 
@@ -26,19 +26,8 @@ classifiers = [
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: Implementation :: CPython",
 ]
-dependencies = [
-    "databricks-sdk~=0.13.0",
-    "PyYAML>=6.0.0,<7.0.0",
-]
-
-[project.optional-dependencies]
-test = [
-    "coverage[toml]>=6.5",
-    "pytest",
-    "pytest-xdist",
-    "pytest-cov>=4.0.0,<5.0.0",
-    "pytest-mock>=3.0.0,<4.0.0",
-]
+dependencies = ["databricks-sdk~=0.13.0",
+                "PyYAML>=6.0.0,<7.0.0"]
 
 [project.entry-points.databricks]
 runtime = "databricks.labs.ucx.runtime:main"
@@ -50,58 +39,49 @@ Source = "https://github.com/databricks/ucx"
 [tool.hatch.version]
 path = "src/databricks/labs/ucx/__about__.py"
 
-[tool.hatch.envs.unit]
+[tool.hatch.envs.default]
 dependencies = [
-    "databricks-labs-ucx[test]"
+    "coverage[toml]>=6.5",
+    "pytest",
+    "pytest-xdist",
+    "pytest-cov>=4.0.0,<5.0.0",
+    "pytest-mock>=3.0.0,<4.0.0",
+    "black>=23.1.0",
+    "ruff>=0.0.243",
+    "isort>=2.5.0",
 ]
 
-[tool.hatch.envs.unit.scripts]
-test = "pytest --cov src --cov-report=xml tests/unit"
-test-cov-report = "pytest --cov src tests/unit --cov-report=html"
+python="3.10"
 
-[tool.hatch.envs.integration]
-dependencies = [
-    "databricks-labs-ucx[test]",
-]
+# store virtual env as the child of this folder. Helps VSCode (and PyCharm) to run better
+path = ".venv"
 
-[tool.hatch.envs.integration.scripts]
-test = "pytest --cov src tests/integration"
-parallel = "pytest -n auto --cov src tests/integration"
-
-[tool.hatch.envs.lint]
-detached = true
-dependencies = [
-    "black>=23.1.0",
-    "ruff>=0.0.243",
-    "isort>=2.5.0"
-]
-[tool.hatch.envs.lint.scripts]
-fmt = [
-    "isort .",
-    "black .",
-    "ruff  . --fix",
-]
-verify = [
-    "black --check .",
-    "isort . --check-only",
-    "ruff .",
-]
+[tool.hatch.envs.default.scripts]
+test        = "pytest -n auto --cov src --cov-report=xml tests/unit"
+coverage    = "pytest -n auto --cov src tests/unit --cov-report=html"
+integration = "pytest -n 10 --cov src tests/integration"
+fmt         = ["isort .",
+               "black .",
+               "ruff  . --fix"]
+verify      = ["black --check .",
+               "isort . --check-only",
+               "ruff ."]
 
 [tool.isort]
-skip_glob = [
-    "notebooks/*.py"
-]
+skip_glob = ["notebooks/*.py"]
 profile = "black"
 
 [tool.pytest.ini_options]
 addopts = "-s -p no:warnings -vv --cache-clear"
+cache_dir = ".venv/pytest-cache"
 
 [tool.black]
 target-version = ["py310"]
 line-length = 120
 skip-string-normalization = true
 
 [tool.ruff]
+cache-dir = ".venv/ruff-cache"
 target-version = "py310"
 line-length = 120
 select = [
 
@@ -5,7 +5,7 @@
 from dataclasses import dataclass
 
 from databricks.sdk import WorkspaceClient
-from databricks.sdk.core import DatabricksError
+from databricks.sdk.errors import DatabricksError, NotFound
 from databricks.sdk.service.compute import ClusterSource, Policy
 from databricks.sdk.service.jobs import BaseJob
 
@@ -345,15 +345,9 @@ def _get_azure_spn_with_data_access(self, cluster_config):
     def _safe_get_cluster_policy(self, policy_id: str) -> Policy | None:
         try:
             return self._ws.cluster_policies.get(policy_id)
-        except DatabricksError as err:
-            if err.error_code == "RESOURCE_DOES_NOT_EXIST":
-                logger.warning(
-                    f"Error retrieving cluster policy {policy_id}. The cluster policy was deleted. Error: {err}"
-                )
-            else:
-                raise err
-
-        return None
+        except NotFound:
+            logger.warning(f"The cluster policy was deleted: {policy_id}")
+            return None
 
     def _list_all_spn_in_sql_warehouses_spark_conf(self) -> list:
         warehouse_config_list = self._ws.warehouses.get_workspace_warehouse_config().data_access_config
@@ -510,15 +504,9 @@ def _assess_clusters(self, all_clusters):
     def _safe_get_cluster_policy(self, policy_id: str) -> Policy | None:
         try:
             return self._ws.cluster_policies.get(policy_id)
-        except DatabricksError as err:
-            if err.error_code == "RESOURCE_DOES_NOT_EXIST":
-                logger.warning(
-                    f"Error retrieving cluster policy {policy_id}. The cluster policy was deleted. Error: {err}"
-                )
-            else:
-                raise err
-
-        return None
+        except NotFound:
+            logger.warning(f"The cluster policy was deleted: {policy_id}")
+            return None
 
     def snapshot(self) -> list[ClusterInfo]:
         return self._snapshot(self._try_fetch, self._crawl)
@@ -624,15 +612,9 @@ def _assess_jobs(self, all_jobs: list[BaseJob], all_clusters_by_id) -> list[JobI
     def _safe_get_cluster_policy(self, policy_id: str) -> Policy | None:
         try:
             return self._ws.cluster_policies.get(policy_id)
-        except DatabricksError as err:
-            if err.error_code == "RESOURCE_DOES_NOT_EXIST":
-                logger.warning(
-                    f"Error retrieving cluster policy {policy_id}. The cluster policy was deleted. Error: {err}"
-                )
-            else:
-                raise err
-
-        return None
+        except NotFound:
+            logger.warning(f"The cluster policy was deleted: {policy_id}")
+            return None
 
     def snapshot(self) -> list[JobInfo]:
         return self._snapshot(self._try_fetch, self._crawl)
 
@@ -7,6 +7,7 @@
 from typing import ClassVar
 
 from databricks.sdk import WorkspaceClient
+from databricks.sdk.errors import NotFound
 
 from databricks.labs.ucx.mixins.sql import StatementExecutionExt
 
@@ -234,9 +235,8 @@ def _snapshot(self, fetcher, loader) -> list[any]:
             cached_results = list(fetcher())
             if len(cached_results) > 0:
                 return cached_results
-        except Exception as err:
-            if "TABLE_OR_VIEW_NOT_FOUND" not in str(err):
-                raise err
+        except NotFound:
+            pass
         logger.debug(f"[{self._full_name}] crawling new batch for {self._table}")
         loaded_records = list(loader())
         self._append_records(loaded_records)
 
@@ -7,7 +7,7 @@
 from pathlib import Path
 
 from databricks.sdk import WorkspaceClient
-from databricks.sdk.core import DatabricksError
+from databricks.sdk.errors import DatabricksError, NotFound
 from databricks.sdk.service import workspace
 from databricks.sdk.service.sql import (
     AccessControl,
@@ -171,14 +171,12 @@ def _state_pre_v06(self):
                     continue
                 try:
                     self._ws.queries.get(v)
-                except DatabricksError:
+                except NotFound:
                     to_remove.append(k)
             for key in to_remove:
                 del state[key]
             return state
-        except DatabricksError as err:
-            if err.error_code != "RESOURCE_DOES_NOT_EXIST":
-                raise err
+        except NotFound:
             self._ws.workspace.mkdirs(self._remote_folder)
             return {}
         except JSONDecodeError:
@@ -187,9 +185,7 @@ def _state_pre_v06(self):
     def _remote_folder_object(self) -> workspace.ObjectInfo:
         try:
             return self._ws.workspace.get_status(self._remote_folder)
-        except DatabricksError as err:
-            if err.error_code != "RESOURCE_DOES_NOT_EXIST":
-                raise err
+        except NotFound:
             self._ws.workspace.mkdirs(self._remote_folder)
             return self._remote_folder_object()
 
 
@@ -3,7 +3,7 @@
 from json import JSONDecodeError
 
 from databricks.sdk import WorkspaceClient
-from databricks.sdk.core import DatabricksError
+from databricks.sdk.errors import NotFound
 from databricks.sdk.service.workspace import ImportFormat
 
 logger = logging.getLogger(__name__)
@@ -32,10 +32,8 @@ def _load(self):
                 msg = f"expected state $version={self._version}, got={version}"
                 raise ValueError(msg)
             return raw
-        except DatabricksError as err:
-            if err.error_code == "RESOURCE_DOES_NOT_EXIST":
-                return default_state
-            raise err
+        except NotFound:
+            return default_state
         except JSONDecodeError:
             logger.warning(f"JSON state file corrupt: {self._state_file}")
             return default_state
 
@@ -248,19 +248,18 @@ def _grants(
                     continue
                 # we have to return concrete list, as with yield we're executing
                 # everything on the main thread.
-                grants.append(
-                    Grant(
-                        principal=principal,
-                        action_type=action_type,
-                        table=table,
-                        view=view,
-                        database=database,
-                        catalog=catalog,
-                        any_file=any_file,
-                        anonymous_function=anonymous_function,
-                    )
+                grant = Grant(
+                    principal=principal,
+                    action_type=action_type,
+                    table=table,
+                    view=view,
+                    database=database,
+                    catalog=catalog,
+                    any_file=any_file,
+                    anonymous_function=anonymous_function,
                 )
-                return grants
+                grants.append(grant)
+            return grants
         except Exception as e:
             # TODO: https://github.com/databrickslabs/ucx/issues/406
             logger.error(f"Couldn't fetch grants for object {on_type} {key}: {e}")