Skip to content

Commit 9977732

Browse files
authored
Fixed Unsupported schema: XXX error on assess_workflows (#3104)
1 parent 6283f55 commit 9977732

File tree

3 files changed

+26
-19
lines changed

3 files changed

+26
-19
lines changed

src/databricks/labs/ucx/mixins/cached_workspace_path.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,11 @@ def unlink(self, missing_ok: bool = False) -> None:
111111
_CachedPathT = TypeVar("_CachedPathT", bound=_CachedPath)
112112

113113

114-
class WorkspaceCache:
114+
class InvalidPath(ValueError):
115+
pass
116+
115117

116-
class InvalidWorkspacePath(ValueError):
117-
pass
118+
class WorkspaceCache:
118119

119120
def __init__(self, ws: WorkspaceClient, max_entries: int = 2048) -> None:
120121
self._ws = ws
@@ -129,10 +130,10 @@ def get_workspace_path(self, path: str) -> WorkspacePath:
129130
Args:
130131
path: a valid workspace path (must be absolute)
131132
Raises:
132-
WorkspaceCache.InvalidWorkspacePath: this is raised immediately if the supplied path is not a syntactically
133+
InvalidPath: this is raised immediately if the supplied path is not a syntactically
133134
valid workspace path. (This is not raised if the path is syntactically valid but does not exist.)
134135
"""
135136
if not path.startswith("/"):
136137
msg = f"Invalid workspace path; must be absolute and start with a slash ('/'): {path}"
137-
raise WorkspaceCache.InvalidWorkspacePath(msg)
138+
raise InvalidPath(msg)
138139
return _CachedPath(self._cache, self._ws, path)

src/databricks/labs/ucx/source_code/jobs.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
from databricks.labs.ucx.assessment.crawlers import runtime_version_tuple
2727
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex
28-
from databricks.labs.ucx.mixins.cached_workspace_path import WorkspaceCache
28+
from databricks.labs.ucx.mixins.cached_workspace_path import WorkspaceCache, InvalidPath
2929
from databricks.labs.ucx.source_code.base import (
3030
CurrentSessionState,
3131
LocatedAdvice,
@@ -160,7 +160,7 @@ def _as_path(self, path: str) -> Path:
160160
return DBFSPath(self._ws, parsed_path.path)
161161
case other:
162162
msg = f"Unsupported schema: {other} (only DBFS or Workspace paths are allowed)"
163-
raise ValueError(msg)
163+
raise InvalidPath(msg)
164164

165165
@classmethod
166166
@contextmanager
@@ -183,7 +183,7 @@ def _register_library(self, graph: DependencyGraph, library: compute.Library) ->
183183
yield from self._register_whl(graph, library)
184184
if library.requirements:
185185
yield from self._register_requirements_txt(graph, library)
186-
except WorkspaceCache.InvalidWorkspacePath as e:
186+
except InvalidPath as e:
187187
yield DependencyProblem('cannot-load-file', str(e))
188188
except BadRequest as e:
189189
# see https://github.com/databrickslabs/ucx/issues/2916
@@ -209,9 +209,12 @@ def _register_requirements_txt(self, graph, library) -> Iterable[DependencyProbl
209209
yield from graph.register_library(clean_requirement)
210210

211211
def _register_whl(self, graph, library) -> Iterable[DependencyProblem]:
212-
wheel_path = self._as_path(library.whl)
213-
with self._temporary_copy(wheel_path) as local_file:
214-
yield from graph.register_library(local_file.as_posix())
212+
try:
213+
wheel_path = self._as_path(library.whl)
214+
with self._temporary_copy(wheel_path) as local_file:
215+
yield from graph.register_library(local_file.as_posix())
216+
except InvalidPath as e:
217+
yield DependencyProblem('cannot-load-file', str(e))
215218

216219
def _register_egg(self, graph, library) -> Iterable[DependencyProblem]:
217220
if self.runtime_version > (14, 0):
@@ -220,9 +223,12 @@ def _register_egg(self, graph, library) -> Iterable[DependencyProblem]:
220223
message='Installing eggs is no longer supported on Databricks 14.0 or higher',
221224
)
222225
logger.info(f"Registering library from {library.egg}")
223-
egg_path = self._as_path(library.egg)
224-
with self._temporary_copy(egg_path) as local_file:
225-
yield from graph.register_library(local_file.as_posix())
226+
try:
227+
egg_path = self._as_path(library.egg)
228+
with self._temporary_copy(egg_path) as local_file:
229+
yield from graph.register_library(local_file.as_posix())
230+
except InvalidPath as e:
231+
yield DependencyProblem('cannot-load-file', str(e))
226232

227233
def _register_notebook(self, graph: DependencyGraph) -> Iterable[DependencyProblem]:
228234
if not self._task.notebook_task:
@@ -237,7 +243,7 @@ def _register_notebook(self, graph: DependencyGraph) -> Iterable[DependencyProbl
237243
try:
238244
# Notebooks can't be on DBFS.
239245
path = self._cache.get_workspace_path(notebook_path)
240-
except WorkspaceCache.InvalidWorkspacePath as e:
246+
except InvalidPath as e:
241247
return [DependencyProblem('cannot-load-notebook', str(e))]
242248
return graph.register_notebook(path, False)
243249

@@ -249,7 +255,7 @@ def _register_spark_python_task(self, graph: DependencyGraph) -> Iterable[Depend
249255
logger.info(f'Discovering {self._task.task_key} entrypoint: {python_file}')
250256
try:
251257
path = self._as_path(python_file)
252-
except WorkspaceCache.InvalidWorkspacePath as e:
258+
except InvalidPath as e:
253259
return [DependencyProblem('cannot-load-file', str(e))]
254260
return graph.register_file(path)
255261

@@ -326,7 +332,7 @@ def _register_notebook_path(self, graph: DependencyGraph, notebook_path: str) ->
326332
try:
327333
# Notebooks can't be on DBFS.
328334
path = self._cache.get_workspace_path(notebook_path)
329-
except WorkspaceCache.InvalidWorkspacePath as e:
335+
except InvalidPath as e:
330336
yield DependencyProblem('cannot-load-notebook', str(e))
331337
return
332338
# the notebook is the root of the graph, so there's no context to inherit

tests/unit/mixins/test_cached_workspace_path.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from databricks.sdk import WorkspaceClient
99
from databricks.sdk.service.workspace import ObjectInfo, ObjectType
1010

11-
from databricks.labs.ucx.mixins.cached_workspace_path import WorkspaceCache
11+
from databricks.labs.ucx.mixins.cached_workspace_path import WorkspaceCache, InvalidPath
1212
from databricks.labs.ucx.source_code.base import decode_with_bom
1313

1414

@@ -29,7 +29,7 @@ def test_path_like_returns_cached_instance() -> None:
2929

3030
def test_non_absolute_path_error() -> None:
3131
cache = _WorkspaceCacheFriend(mock_workspace_client())
32-
with pytest.raises(WorkspaceCache.InvalidWorkspacePath, match="Invalid workspace path; must be absolute"):
32+
with pytest.raises(InvalidPath, match="Invalid workspace path; must be absolute"):
3333
_ = cache.get_workspace_path("not/an/absolute/path")
3434

3535

0 commit comments

Comments
 (0)