Skip to content

Commit 3646cd2

Browse files
authored
upath: implement poc for flavour base vendoring (#200)
* upath: implement poc for flavour base vendoring * update sources generator * upath: update flavour implementation * upath.implementations: adjust for codechanges * upath: fix resolving issue * upath: provide default flavour * upath.implementations: cleanup * upath.core: fix prefix issue with glob on windows * upath._flavour: for file/local get drive on windows * upath._flavour: move _deprecated to upath._compat * upath._flavour: use local_file attribute in splitdrive * upath._flavour: use os.path for local_file in isabs * readme: fix toml entrypoint spelling * upath: fallback classmethod for UPath._parse_path and UPath._format_parsed_parts * flavours: fix reproducibility in flavour generate script * upath._flavour: refactor flavour settings * tests: test stat * upath: move flavour specializations to subclasses * tests: adjust resolve test for http paths * upath: ensure support for __fspath__ args * upath.implementations.local: suppress warning * test: adjust fspath test for windows * upath.local: WindowsUPath.path should return the posix version * upath.local: fix WindowsUPath.path
1 parent 032f437 commit 3646cd2

File tree

15 files changed

+1856
-318
lines changed

15 files changed

+1856
-318
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
default_language_version:
22
python: python3
3-
exclude: ^upath/tests/pathlib/test_pathlib.*\.py|^upath/tests/pathlib/_test_support\.py
3+
exclude: ^upath/tests/pathlib/test_pathlib.*\.py|^upath/tests/pathlib/_test_support\.py|^upath/_flavour_sources\.py
44
repos:
55
- repo: https://github.com/psf/black
66
rev: 24.1.1

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ If you distribute your implementation in your own Python package, you can inform
346346

347347
```
348348
# pyproject.toml
349-
[project.entry-points."unversal_pathlib.implementations"]
349+
[project.entry-points."universal_pathlib.implementations"]
350350
myproto = "my_module.submodule:MyPath"
351351
```
352352

dev/generate_flavours.py

Lines changed: 350 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,350 @@
1+
"""Generates the _flavour_sources.py file"""
2+
3+
from __future__ import annotations
4+
5+
import inspect
6+
import re
7+
import sys
8+
import warnings
9+
from io import StringIO
10+
from typing import Any
11+
from unittest.mock import Mock
12+
13+
from fsspec.registry import available_protocols
14+
from fsspec.registry import get_filesystem_class
15+
from fsspec.spec import AbstractFileSystem
16+
from fsspec.utils import get_package_version_without_import
17+
18+
HEADER = '''\
19+
""" upath._flavour_sources
20+
21+
<experimental!>
22+
23+
Warning
24+
-------
25+
Do not modify this file manually!
26+
It is generated by `dev/generate_flavours.py`
27+
28+
To be able to parse the different filesystem uri schemes, we need
29+
the string parsing functionality each of the filesystem implementations.
30+
In an attempt to support parsing uris without having to import the
31+
specific filesystems, we extract the necessary subset of the
32+
AbstractFileSystem classes and generate a new "flavour" class for
33+
each of the known filesystems. This will allow us to provide a
34+
`PurePath` equivalent `PureUPath` for each protocol in the future
35+
without a direct dependency on the underlying filesystem package.
36+
37+
"""
38+
'''
39+
40+
IMPORTS = """\
41+
from __future__ import annotations
42+
43+
import logging
44+
import re
45+
from typing import Any
46+
from typing import cast
47+
from urllib.parse import parse_qs
48+
from urllib.parse import urlsplit
49+
50+
from fsspec.implementations.local import make_path_posix
51+
from fsspec.utils import infer_storage_options
52+
from fsspec.utils import stringify_path
53+
54+
"""
55+
56+
INIT_CODE = '''\
57+
__all__ = [
58+
"AbstractFileSystemFlavour",
59+
"FileSystemFlavourBase",
60+
"flavour_registry",
61+
]
62+
63+
logger = logging.getLogger(__name__)
64+
flavour_registry: dict[str, type[FileSystemFlavourBase]] = {}
65+
66+
67+
class FileSystemFlavourBase:
68+
"""base class for the fsspec flavours"""
69+
70+
def __init_subclass__(cls: Any, **kwargs):
71+
if isinstance(cls.protocol, str):
72+
protocols = (cls.protocol,)
73+
else:
74+
protocols = tuple(cls.protocol)
75+
for protocol in protocols:
76+
if protocol in flavour_registry:
77+
raise ValueError(f"protocol {protocol!r} already registered")
78+
flavour_registry[protocol] = cls
79+
'''
80+
81+
BASE_CLASS_NAME_SUFFIX = "Flavour"
82+
BASE_CLASS_NAME = f"{AbstractFileSystem.__name__}{BASE_CLASS_NAME_SUFFIX}"
83+
84+
SKIP_PROTOCOLS = [
85+
"dir",
86+
"blockcache",
87+
"cached",
88+
"simplecache",
89+
"filecache",
90+
]
91+
92+
FIX_PROTOCOLS = {
93+
"MemFS": ("memfs",),
94+
"AsyncLocalFileSystem": (),
95+
}
96+
97+
FIX_METHODS = {
98+
"GCSFileSystem": ["_strip_protocol", "_get_kwargs_from_urls", "_split_path"],
99+
}
100+
101+
102+
def _fix_azure_blob_file_system(x: str) -> str:
103+
return re.sub(
104+
r"host = ops.get\(\"host\", None\)",
105+
'host: str | None = ops.get("host", None)',
106+
x,
107+
)
108+
109+
110+
def _fix_memfs_file_system(x: str) -> str:
111+
return re.sub(
112+
"_MemFS",
113+
"MemoryFileSystemFlavour",
114+
x,
115+
)
116+
117+
118+
def _fix_xrootd_file_system(x: str) -> str:
119+
x = re.sub(
120+
r"client.URL",
121+
"urlsplit",
122+
x,
123+
)
124+
return re.sub(
125+
"url.hostid",
126+
"url.netloc",
127+
x,
128+
)
129+
130+
131+
FIX_SOURCE = {
132+
"AzureBlobFileSystem": _fix_azure_blob_file_system,
133+
"MemFS": _fix_memfs_file_system,
134+
"XRootDFileSystem": _fix_xrootd_file_system,
135+
}
136+
137+
138+
def before_imports() -> None:
139+
"""allow to patch the generated state before importing anything"""
140+
# patch libarchive
141+
sys.modules["libarchive"] = Mock()
142+
sys.modules["libarchive.ffi"] = Mock()
143+
# patch xrootd
144+
sys.modules["XRootD"] = Mock()
145+
sys.modules["XRootD.client"] = Mock()
146+
sys.modules["XRootD.client.flags"] = Mock()
147+
sys.modules["XRootD.client.responses"] = Mock()
148+
149+
150+
def get_protos(cls: type, remove: str, add: str) -> tuple[str, ...]:
151+
try:
152+
return FIX_PROTOCOLS[cls.__name__]
153+
except KeyError:
154+
pass
155+
if isinstance(cls.protocol, str):
156+
p = [cls.protocol, add]
157+
else:
158+
p = [*cls.protocol, add]
159+
return tuple([x for x in p if x != remove])
160+
161+
162+
def get_fsspec_filesystems_and_protocol_errors() -> (
163+
tuple[dict[type[AbstractFileSystem], tuple[str, ...]], dict[str, str]]
164+
):
165+
before_imports()
166+
167+
classes: dict[type[AbstractFileSystem], tuple[str]] = {}
168+
errors: dict[str, str] = {}
169+
170+
for protocol in available_protocols():
171+
if protocol in SKIP_PROTOCOLS:
172+
continue
173+
try:
174+
cls = get_filesystem_class(protocol)
175+
except ImportError as err:
176+
errors[protocol] = str(err)
177+
else:
178+
protos = get_protos(cls, remove="abstract", add=protocol)
179+
cprotos = classes.get(cls, [])
180+
classes[cls] = tuple(dict.fromkeys([*cprotos, *protos]))
181+
return classes, errors
182+
183+
184+
def _get_plain_method(cls, name):
185+
for c in cls.__mro__:
186+
try:
187+
return c.__dict__[name]
188+
except KeyError:
189+
pass
190+
else:
191+
raise AttributeError(f"{cls.__name__}.{name} not found")
192+
193+
194+
def get_subclass_methods(cls: type) -> list[str]: # noqa: C901
195+
try:
196+
return FIX_METHODS[cls.__name__]
197+
except KeyError:
198+
pass
199+
errors = []
200+
201+
# storage options
202+
so = None
203+
base_get_kwargs_from_urls = _get_plain_method(
204+
AbstractFileSystem, "_get_kwargs_from_urls"
205+
)
206+
try:
207+
cls_get_kwargs_from_urls = _get_plain_method(cls, "_get_kwargs_from_urls")
208+
except AttributeError:
209+
errors.append("missing `_get_kwargs_from_urls()`")
210+
else:
211+
so = cls_get_kwargs_from_urls is base_get_kwargs_from_urls
212+
if not isinstance(cls_get_kwargs_from_urls, staticmethod):
213+
warnings.warn(
214+
f"{cls.__name__}: {cls_get_kwargs_from_urls!r} not a staticmethod",
215+
RuntimeWarning,
216+
stacklevel=2,
217+
)
218+
219+
# strip protocol
220+
sp = None
221+
base_strip_protocol = _get_plain_method(AbstractFileSystem, "_strip_protocol")
222+
try:
223+
cls_strip_protocol = _get_plain_method(cls, "_strip_protocol")
224+
except AttributeError:
225+
errors.append("missing `_strip_protocol()`")
226+
else:
227+
if isinstance(cls_strip_protocol, staticmethod):
228+
warnings.warn(
229+
f"{cls.__name__}: {cls_strip_protocol.__name__!r} is not a classmethod",
230+
UserWarning,
231+
stacklevel=2,
232+
)
233+
sp = False
234+
elif isinstance(cls_strip_protocol, classmethod):
235+
sp = cls_strip_protocol.__func__ is base_strip_protocol.__func__
236+
else:
237+
errors.append(
238+
f"{cls.__name__}: {cls_strip_protocol.__name__!r} not a classmethod"
239+
)
240+
241+
# _parent
242+
pt = None
243+
base_parent = _get_plain_method(AbstractFileSystem, "_parent")
244+
try:
245+
cls_parent = _get_plain_method(cls, "_parent")
246+
except AttributeError:
247+
errors.append("missing `_parent()`")
248+
else:
249+
pt = cls_parent is base_parent
250+
251+
if errors or sp is None or so is None:
252+
raise AttributeError(" AND ".join(errors))
253+
254+
methods = []
255+
if not sp:
256+
methods.append("_strip_protocol")
257+
if not so:
258+
methods.append("_get_kwargs_from_urls")
259+
if not pt:
260+
methods.append("_parent")
261+
return methods
262+
263+
264+
def generate_class_source_code(
265+
cls: type,
266+
methods: list[str],
267+
overrides: dict[str, Any],
268+
attributes: list[str],
269+
cls_suffix: str,
270+
base_cls: str | None,
271+
) -> str:
272+
s = ["\n"]
273+
if base_cls:
274+
s += [f"class {cls.__name__}{cls_suffix}({base_cls}):"]
275+
else:
276+
s += [f"class {cls.__name__}{cls_suffix}:"]
277+
mod_ver = get_package_version_without_import(cls.__module__.partition(".")[0])
278+
s.append(f" __orig_class__ = '{cls.__module__}.{cls.__name__}'")
279+
s.append(f" __orig_version__ = {mod_ver!r}")
280+
for attr, value in overrides.items():
281+
s.append(f" {attr} = {value!r}")
282+
for attr in attributes:
283+
s.append(f" {attr} = {getattr(cls, attr)!r}")
284+
s.append("")
285+
for method in methods:
286+
s.append(inspect.getsource(getattr(cls, method)))
287+
try:
288+
fix_func = FIX_SOURCE[cls.__name__]
289+
except KeyError:
290+
return "\n".join(s)
291+
else:
292+
return "\n".join(fix_func(line) for line in s)
293+
294+
295+
def create_source() -> str:
296+
buf = StringIO()
297+
buf.write(HEADER)
298+
299+
classes, errors = get_fsspec_filesystems_and_protocol_errors()
300+
301+
srcs = [
302+
generate_class_source_code(
303+
AbstractFileSystem,
304+
["_strip_protocol", "_get_kwargs_from_urls", "_parent"],
305+
{},
306+
["protocol", "root_marker"],
307+
cls_suffix=BASE_CLASS_NAME_SUFFIX,
308+
base_cls="FileSystemFlavourBase",
309+
)
310+
]
311+
312+
for cls in sorted(classes, key=lambda cls: cls.__name__):
313+
try:
314+
sub_cls_methods = get_subclass_methods(cls)
315+
except AttributeError as err:
316+
protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
317+
for proto in protos:
318+
errors[proto] = str(err)
319+
continue
320+
sub_cls = generate_class_source_code(
321+
cls,
322+
sub_cls_methods,
323+
{"protocol": classes[cls]},
324+
["root_marker", "sep"],
325+
cls_suffix=BASE_CLASS_NAME_SUFFIX,
326+
base_cls=BASE_CLASS_NAME,
327+
)
328+
srcs.append(sub_cls)
329+
330+
if SKIP_PROTOCOLS:
331+
buf.write("#\n# skipping protocols:\n")
332+
for protocol in sorted(SKIP_PROTOCOLS):
333+
buf.write(f"# - {protocol}\n")
334+
335+
if errors:
336+
buf.write("# protocol import errors:\n")
337+
for protocol, error_msg in sorted(errors.items()):
338+
buf.write(f"# - {protocol} ({error_msg})\n")
339+
buf.write("#\n")
340+
341+
buf.write(IMPORTS)
342+
buf.write(INIT_CODE)
343+
for cls_src in srcs:
344+
buf.write(cls_src)
345+
346+
return buf.getvalue().removesuffix("\n")
347+
348+
349+
if __name__ == "__main__":
350+
print(create_source())

0 commit comments

Comments
 (0)