Added databricks labs ucx ensure-assessment-run to CLI commands. (#708)

FastLee · web-flow · commit 8b2c72d7e302 · 2023-12-21T23:04:47.000+01:00
Closes #674 Description: This change adds a CLI command that performs the following logic. 1. Check whether UCX is installed for the current user on the workspace 2. Check whether the Assessment workflow was successfully run or is still running 3. Wait for a current run to complete or start a new run and wait for it to complete.
diff --git a/labs.yml b/labs.yml
@@ -44,5 +44,8 @@ commands:
   - name: create-table-mapping
     description: create initial table mapping for review
 
+  - name: ensure-assessment-run
+    description: ensure the assessment job was run on a workspace
+
   - name: validate-external-locations
     description: validates and provides mapping to external table to external location and shared generation tf scripts
diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py
@@ -4,7 +4,6 @@
 import webbrowser
 
 from databricks.sdk import WorkspaceClient
-from databricks.sdk.errors import NotFound
 
 from databricks.labs.ucx.account import AccountWorkspaces, WorkspaceInfo
 from databricks.labs.ucx.config import AccountConfig, ConnectConfig
@@ -17,6 +16,11 @@
 
 logger = logging.getLogger("databricks.labs.ucx")
 
+CANT_FIND_UCX_MSG = (
+    "Couldn't find UCX configuration in the user's home folder. "
+    "Make sure the current user has configured and installed UCX."
+)
+
 
 def workflows():
     ws = WorkspaceClient()
@@ -47,18 +51,13 @@ def skip(schema: str, table: str | None = None):
         logger.error("--Schema is a required parameter.")
         return None
     ws = WorkspaceClient()
-    installation_manager = WorkspaceInstaller(ws)
-    logger.info("Fetching installation config.")
-    try:
-        warehouse_id = installation_manager._current_config.warehouse_id
-        sql_backend = StatementExecutionBackend(ws, warehouse_id)
-    except NotFound:
-        logger.error(
-            "Couldn't find UCX configuration in the user's home folder. "
-            "Make sure the current user has configured and installed UCX."
-        )
+    installation_manager = InstallationManager(ws)
+    installation = installation_manager.for_user(ws.current_user.me())
+    if not installation:
+        logger.error(CANT_FIND_UCX_MSG)
         return None
-
+    warehouse_id = installation.config.warehouse_id
+    sql_backend = StatementExecutionBackend(ws, warehouse_id)
     mapping = TableMapping(ws)
     if table:
         mapping.skip_table(sql_backend, schema, table)
@@ -102,6 +101,18 @@ def validate_external_locations():
         webbrowser.open(f"{ws.config.host}/#workspace{path}")
 
 
+def ensure_assessment_run():
+    ws = WorkspaceClient()
+    installation_manager = InstallationManager(ws)
+    installation = installation_manager.for_user(ws.current_user.me())
+    if not installation:
+        logger.error(CANT_FIND_UCX_MSG)
+        return None
+    else:
+        workspace_installer = WorkspaceInstaller(ws)
+        workspace_installer.validate_and_run("assessment")
+
+
 MAPPING = {
     "open-remote-config": open_remote_config,
     "installations": list_installations,
@@ -110,6 +121,7 @@ def validate_external_locations():
     "manual-workspace-info": manual_workspace_info,
     "create-table-mapping": create_table_mapping,
     "validate-external-locations": validate_external_locations,
+    "ensure-assessment-run": ensure_assessment_run,
     "skip": skip,
 }
 
diff --git a/src/databricks/labs/ucx/install.py b/src/databricks/labs/ucx/install.py
@@ -17,6 +17,7 @@
     PermissionDenied,
 )
 from databricks.sdk.service import compute, jobs
+from databricks.sdk.service.jobs import RunLifeCycleState, RunResultState
 from databricks.sdk.service.sql import EndpointInfoWarehouseType, SpotInstancePolicy
 from databricks.sdk.service.workspace import ImportFormat
 
@@ -860,6 +861,29 @@ def _remove_install_folder(self):
         except InvalidParameterValue:
             logger.error("Error deleting install folder")
 
+    def validate_step(self, step: str) -> bool:
+        job_id = self._state.jobs[step]
+        logger.debug(f"Validating {step} workflow: {self._ws.config.host}#job/{job_id}")
+        current_runs = list(self._ws.jobs.list_runs(completed_only=False, job_id=job_id))
+        for run in current_runs:
+            if run.state and run.state.result_state == RunResultState.SUCCESS:
+                return True
+        for run in current_runs:
+            if (
+                run.run_id
+                and run.state
+                and run.state.life_cycle_state in (RunLifeCycleState.RUNNING, RunLifeCycleState.PENDING)
+            ):
+                logger.info("Identified a run in progress waiting for run completion")
+                self._ws.jobs.wait_get_run_job_terminated_or_skipped(run_id=run.run_id)
+                run_new_state = self._ws.jobs.get_run(run_id=run.run_id).state
+                return run_new_state is not None and run_new_state.result_state == RunResultState.SUCCESS
+        return False
+
+    def validate_and_run(self, step: str):
+        if not self.validate_step(step):
+            self.run_workflow(step)
+
 
 if __name__ == "__main__":
     ws = WorkspaceClient(product="ucx", product_version=__version__)
diff --git a/tests/unit/installer/test_installation_manager.py b/tests/unit/installer/test_installation_manager.py
@@ -1,10 +1,18 @@
 import io
+from unittest.mock import MagicMock
 
 import pytest
 from databricks.sdk.errors import NotFound
-from databricks.sdk.service.iam import User
+from databricks.sdk.service.iam import ComplexValue, User
+from databricks.sdk.service.jobs import (
+    BaseRun,
+    RunLifeCycleState,
+    RunResultState,
+    RunState,
+)
 
 from databricks.labs.ucx.framework.parallel import ManyError
+from databricks.labs.ucx.install import WorkspaceInstaller
 from databricks.labs.ucx.installer import InstallationManager
 
 
@@ -53,3 +61,62 @@ def test_corrupt_config(mocker):
     installation_manager = InstallationManager(ws)
     user_installations = installation_manager.user_installations()
     assert len(user_installations) == 0
+
+
+def test_validate_assessment(mocker):
+    ws = mocker.patch("databricks.sdk.WorkspaceClient.__init__")
+    current_user = MagicMock()
+    current_user.me.return_value = User(user_name="foo", groups=[ComplexValue(display="admins")])
+
+    state = MagicMock()
+    state.jobs = {"assessment": 123}
+
+    ws.current_user = current_user
+    ws.jobs.list_runs.return_value = [
+        BaseRun(run_id=123, state=RunState(result_state=RunResultState.SUCCESS)),
+        BaseRun(run_id=111, state=RunState(result_state=RunResultState.FAILED)),
+    ]
+    ws.jobs.wait_get_run_job_terminated_or_skipped = MagicMock(return_value=None)
+    installation_manager = WorkspaceInstaller(ws)
+    installation_manager._state = state
+
+    assert installation_manager.validate_step("assessment")
+
+    ws.jobs.list_runs.return_value = [
+        BaseRun(run_id=123, state=RunState(result_state=RunResultState.FAILED)),
+        BaseRun(run_id=111, state=RunState(result_state=RunResultState.FAILED)),
+    ]
+
+    assert not installation_manager.validate_step("assessment")
+
+    ws.jobs.list_runs.return_value = [
+        BaseRun(run_id=123, state=RunState(result_state=RunResultState.FAILED)),
+        BaseRun(run_id=111, state=RunState(life_cycle_state=RunLifeCycleState.RUNNING)),
+    ]
+
+    installation_manager.validate_step("assessment")
+    ws.jobs.wait_get_run_job_terminated_or_skipped.assert_called()
+
+
+def test_validate_run_assessment(mocker):
+    ws = mocker.patch("databricks.sdk.WorkspaceClient.__init__")
+    current_user = MagicMock()
+    current_user.me.return_value = User(user_name="foo", groups=[ComplexValue(display="admins")])
+
+    state = MagicMock()
+    state.jobs = {"assessment": 123}
+
+    ws.current_user = current_user
+    installation_manager = WorkspaceInstaller(ws)
+    installation_manager._state = state
+    installation_manager.validate_step = MagicMock(return_value=True)
+    # Test a use case where assessment ran successfully
+    installation_manager.validate_and_run("assessment")
+    installation_manager.validate_step.assert_called_with("assessment")
+
+    # Test a use case where assessment didn't run successfully
+    installation_manager.run_workflow = MagicMock()
+    installation_manager.validate_step = MagicMock(return_value=False)
+    installation_manager.validate_and_run("assessment")
+    installation_manager.validate_step.assert_called_with("assessment")
+    installation_manager.run_workflow.assert_called_with("assessment")
diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py
@@ -1,7 +1,9 @@
+from unittest.mock import MagicMock
+
 import pytest
 from databricks.sdk.errors import NotFound
 from databricks.sdk.service import iam
-from databricks.sdk.service.iam import User
+from databricks.sdk.service.iam import ComplexValue, User
 
 from databricks.labs.ucx.cli import skip
 
@@ -23,12 +25,12 @@ def test_skip_no_schema(mocker, caplog):
 
 def test_skip_no_ucx(caplog, mocker):
     mocker.patch("databricks.sdk.WorkspaceClient.__init__", return_value=None)
-    mocker.patch("databricks.labs.ucx.install.WorkspaceInstaller.__init__", return_value=None)
-    mocker.patch("databricks.labs.ucx.install.WorkspaceInstaller._current_config", return_value="foo")
-    mocker.patch(
-        "databricks.labs.ucx.framework.crawlers.StatementExecutionBackend.__init__",
-        return_value=None,
-        side_effect=NotFound("..."),
-    )
+    current_user = MagicMock()
+    current_user.me.return_value = User(user_name="foo", groups=[ComplexValue(display="admins")])
+    current_user.return_value = None
+    mocker.patch("databricks.sdk.WorkspaceClient.current_user", return_value=current_user)
+    # ws.current_user = current_user
+    mocker.patch("databricks.labs.ucx.installer.InstallationManager.__init__", return_value=None)
+    mocker.patch("databricks.labs.ucx.installer.InstallationManager.for_user", return_value=None)
     skip(schema="schema", table="table")
     assert [rec.message for rec in caplog.records if "UCX configuration" in rec.message]