Skip to content

Commit 0217486

Browse files
authored
Added .suffix override for notebooks in WorkspacePath (#1557)
This PR allows for more unified work with workspace notebooks and files Fix #1455
1 parent deaf620 commit 0217486

File tree

3 files changed

+50
-6
lines changed

3 files changed

+50
-6
lines changed

src/databricks/labs/ucx/mixins/fixtures.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
Query,
4646
QueryInfo,
4747
)
48-
from databricks.sdk.service.workspace import ImportFormat
48+
from databricks.sdk.service.workspace import ImportFormat, Language
4949

5050
from databricks.labs.ucx.workspace_access.groups import MigratedGroup
5151

@@ -556,12 +556,20 @@ def create(*, scope: str, principal: str, permission: workspace.AclPermission):
556556

557557
@pytest.fixture
558558
def make_notebook(ws, make_random):
559-
def create(*, path: str | None = None, content: BinaryIO | None = None, **kwargs):
559+
def create(
560+
*,
561+
path: str | None = None,
562+
content: BinaryIO | None = None,
563+
language: Language = Language.PYTHON,
564+
format: ImportFormat = ImportFormat.SOURCE, # pylint: disable=redefined-builtin
565+
overwrite: bool = False,
566+
) -> str:
560567
if path is None:
561-
path = f"/Users/{ws.current_user.me().user_name}/sdk-{make_random(4)}.py"
568+
path = f"/Users/{ws.current_user.me().user_name}/sdk-{make_random(4)}"
562569
if content is None:
563570
content = io.BytesIO(b"print(1)")
564-
ws.workspace.upload(path, content, **kwargs)
571+
path = str(path)
572+
ws.workspace.upload(path, content, language=language, format=format, overwrite=overwrite)
565573
return path
566574

567575
yield from factory("notebook", create, lambda x: ws.workspace.delete(x))

src/databricks/labs/ucx/mixins/wspath.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@
1212

1313
from databricks.sdk import WorkspaceClient
1414
from databricks.sdk.errors import NotFound
15-
from databricks.sdk.service.workspace import ObjectInfo, ObjectType, ExportFormat, ImportFormat
16-
15+
from databricks.sdk.service.workspace import ObjectInfo, ObjectType, ExportFormat, ImportFormat, Language
1716

1817
logger = logging.getLogger(__name__)
1918

@@ -137,6 +136,8 @@ def __init__(self, ws: WorkspaceClient, path: str):
137136
class WorkspacePath(Path):
138137
"""Experimental implementation of pathlib.Path for Databricks Workspace."""
139138

139+
_SUFFIXES = {'.py': Language.PYTHON, '.sql': Language.SQL, '.scala': Language.SCALA, '.R': Language.R}
140+
140141
_ws: WorkspaceClient
141142
_flavour: _DatabricksFlavour
142143
_accessor: _DatabricksAccessor
@@ -270,6 +271,24 @@ def open(self, mode="r", buffering=-1, encoding=None, errors=None, newline=None)
270271
return _TextUploadIO(self._ws, self.as_posix())
271272
raise ValueError(f"invalid mode: {mode}")
272273

274+
@property
275+
def suffix(self):
276+
"""Return the file extension. If the file is a notebook, return the suffix based on the language."""
277+
suffix = super().suffix
278+
if suffix:
279+
return suffix
280+
if not self.is_notebook():
281+
return ""
282+
for sfx, lang in self._SUFFIXES.items():
283+
if self._object_info.language == lang:
284+
return sfx
285+
return ""
286+
287+
def __lt__(self, other: pathlib.PurePath):
288+
if not isinstance(other, pathlib.PurePath):
289+
return NotImplemented
290+
return self.as_posix() < other.as_posix()
291+
273292
@cached_property
274293
def _object_info(self) -> ObjectInfo:
275294
# this method is cached because it is used in multiple is_* methods.

tests/integration/mixins/test_wspath.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,3 +93,20 @@ def test_replace(ws, make_random):
9393

9494
assert not hello_txt.exists()
9595
assert (with_user / "hello2.txt").read_text() == 'Hello, World!'
96+
97+
98+
def test_file_and_notebook_in_same_folder_with_different_suffixes(ws, make_notebook, make_directory):
99+
folder = WorkspacePath(ws, make_directory())
100+
101+
txt_file = folder / "a.txt"
102+
py_notebook = folder / 'b' # notebooks have no file extension
103+
104+
make_notebook(path=py_notebook, content="display(spark.range(10))")
105+
txt_file.write_text("Hello, World!")
106+
107+
files = {_.name: _ for _ in folder.glob("**/*")}
108+
assert len(files) == 2
109+
110+
assert files['a.txt'].suffix == '.txt'
111+
assert files['b'].suffix == '.py' # suffix is determined from ObjectInfo
112+
assert files['b'].read_text() == "# Databricks notebook source\ndisplay(spark.range(10))"

0 commit comments

Comments
 (0)