Skip to content

Commit 7018156

Browse files
authored
Removed dependency on internal pathlib implementations (#1672)
This PR removes the dependency on internal `pathlib._Flavor` implementations, which could be not available on Windows.
1 parent a432131 commit 7018156

File tree

2 files changed

+82
-7
lines changed

2 files changed

+82
-7
lines changed

src/databricks/labs/ucx/mixins/wspath.py

Lines changed: 80 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
import abc
2+
import fnmatch
23
import locale
34
import logging
45
import os
56
import pathlib
7+
import posixpath
8+
import re
9+
import sys
610
from functools import cached_property
711

8-
# pylint: disable-next=import-private-name
9-
from pathlib import Path, _PosixFlavour # type: ignore
12+
from pathlib import Path
1013
from urllib.parse import quote_from_bytes as urlquote_from_bytes
1114
from io import BytesIO, StringIO
1215

@@ -17,12 +20,84 @@
1720
logger = logging.getLogger(__name__)
1821

1922

20-
class _DatabricksFlavour(_PosixFlavour):
23+
class _DatabricksFlavour:
24+
# adapted from pathlib._Flavour, where we ignore support for drives, as we
25+
# don't have that concept in Databricks. We also ignore support for Windows
26+
# paths, as we only support POSIX paths in Databricks.
27+
28+
sep = '/'
29+
altsep = ''
30+
has_drv = False
31+
pathmod = posixpath
32+
is_supported = True
33+
2134
def __init__(self, ws: WorkspaceClient):
22-
super().__init__()
35+
self.join = self.sep.join
2336
self._ws = ws
2437

25-
def make_uri(self, path):
38+
def parse_parts(self, parts: list[str]) -> tuple[str, str, list[str]]:
39+
# adapted from pathlib._Flavour.parse_parts,
40+
# where we ignore support for drives, as we
41+
# don't have that concept in Databricks
42+
parsed = []
43+
drv = root = ''
44+
for part in reversed(parts):
45+
if not part:
46+
continue
47+
drv, root, rel = self.splitroot(part)
48+
if self.sep not in rel:
49+
if rel and rel != '.':
50+
parsed.append(sys.intern(rel))
51+
continue
52+
for part_ in reversed(rel.split(self.sep)):
53+
if part_ and part_ != '.':
54+
parsed.append(sys.intern(part_))
55+
if drv or root:
56+
parsed.append(drv + root)
57+
parsed.reverse()
58+
return drv, root, parsed
59+
60+
@staticmethod
61+
def join_parsed_parts(
62+
drv: str,
63+
root: str,
64+
parts: list[str],
65+
_,
66+
root2: str,
67+
parts2: list[str],
68+
) -> tuple[str, str, list[str]]:
69+
# adapted from pathlib.PurePosixPath, where we ignore support for drives,
70+
# as we don't have that concept in Databricks
71+
if root2:
72+
return drv, root2, [drv + root2] + parts2[1:]
73+
return drv, root, parts + parts2
74+
75+
@staticmethod
76+
def splitroot(part, sep=sep) -> tuple[str, str, str]:
77+
if part and part[0] == sep:
78+
stripped_part = part.lstrip(sep)
79+
if len(part) - len(stripped_part) == 2:
80+
return '', sep * 2, stripped_part
81+
return '', sep, stripped_part
82+
return '', '', part
83+
84+
@staticmethod
85+
def casefold(value: str) -> str:
86+
return value
87+
88+
@staticmethod
89+
def casefold_parts(parts: list[str]) -> list[str]:
90+
return parts
91+
92+
@staticmethod
93+
def compile_pattern(pattern: str):
94+
return re.compile(fnmatch.translate(pattern)).fullmatch
95+
96+
@staticmethod
97+
def is_reserved(_) -> bool:
98+
return False
99+
100+
def make_uri(self, path) -> str:
26101
return self._ws.config.host + '#workspace' + urlquote_from_bytes(bytes(path))
27102

28103
def __repr__(self):

tests/integration/mixins/test_wspath.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77

88

99
def test_exists(ws):
10-
wsp = WorkspacePath(ws, "/Users")
11-
assert wsp.exists()
10+
wsp = WorkspacePath(ws, "/Users/foo/bar/baz")
11+
assert not wsp.exists()
1212

1313

1414
def test_mkdirs(ws, make_random):

0 commit comments

Comments
 (0)