|
| 1 | +"""Generates the _flavour_sources.py file""" |
| 2 | + |
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +import inspect |
| 6 | +import re |
| 7 | +import sys |
| 8 | +import warnings |
| 9 | +from io import StringIO |
| 10 | +from typing import Any |
| 11 | +from unittest.mock import Mock |
| 12 | + |
| 13 | +from fsspec.registry import available_protocols |
| 14 | +from fsspec.registry import get_filesystem_class |
| 15 | +from fsspec.spec import AbstractFileSystem |
| 16 | +from fsspec.utils import get_package_version_without_import |
| 17 | + |
| 18 | +HEADER = '''\ |
| 19 | +""" upath._flavour_sources |
| 20 | +
|
| 21 | +<experimental!> |
| 22 | +
|
| 23 | +Warning |
| 24 | +------- |
| 25 | + Do not modify this file manually! |
| 26 | + It is generated by `dev/generate_flavours.py` |
| 27 | +
|
| 28 | +To be able to parse the different filesystem uri schemes, we need |
| 29 | +the string parsing functionality each of the filesystem implementations. |
| 30 | +In an attempt to support parsing uris without having to import the |
| 31 | +specific filesystems, we extract the necessary subset of the |
| 32 | +AbstractFileSystem classes and generate a new "flavour" class for |
| 33 | +each of the known filesystems. This will allow us to provide a |
| 34 | +`PurePath` equivalent `PureUPath` for each protocol in the future |
| 35 | +without a direct dependency on the underlying filesystem package. |
| 36 | +
|
| 37 | +""" |
| 38 | +''' |
| 39 | + |
| 40 | +IMPORTS = """\ |
| 41 | +from __future__ import annotations |
| 42 | +
|
| 43 | +import logging |
| 44 | +import re |
| 45 | +from typing import Any |
| 46 | +from typing import cast |
| 47 | +from urllib.parse import parse_qs |
| 48 | +from urllib.parse import urlsplit |
| 49 | +
|
| 50 | +from fsspec.implementations.local import make_path_posix |
| 51 | +from fsspec.utils import infer_storage_options |
| 52 | +from fsspec.utils import stringify_path |
| 53 | +
|
| 54 | +""" |
| 55 | + |
| 56 | +INIT_CODE = '''\ |
| 57 | +__all__ = [ |
| 58 | + "AbstractFileSystemFlavour", |
| 59 | + "FileSystemFlavourBase", |
| 60 | + "flavour_registry", |
| 61 | +] |
| 62 | +
|
| 63 | +logger = logging.getLogger(__name__) |
| 64 | +flavour_registry: dict[str, type[FileSystemFlavourBase]] = {} |
| 65 | +
|
| 66 | +
|
| 67 | +class FileSystemFlavourBase: |
| 68 | + """base class for the fsspec flavours""" |
| 69 | +
|
| 70 | + def __init_subclass__(cls: Any, **kwargs): |
| 71 | + if isinstance(cls.protocol, str): |
| 72 | + protocols = (cls.protocol,) |
| 73 | + else: |
| 74 | + protocols = tuple(cls.protocol) |
| 75 | + for protocol in protocols: |
| 76 | + if protocol in flavour_registry: |
| 77 | + raise ValueError(f"protocol {protocol!r} already registered") |
| 78 | + flavour_registry[protocol] = cls |
| 79 | +''' |
| 80 | + |
| 81 | +BASE_CLASS_NAME_SUFFIX = "Flavour" |
| 82 | +BASE_CLASS_NAME = f"{AbstractFileSystem.__name__}{BASE_CLASS_NAME_SUFFIX}" |
| 83 | + |
| 84 | +SKIP_PROTOCOLS = [ |
| 85 | + "dir", |
| 86 | + "blockcache", |
| 87 | + "cached", |
| 88 | + "simplecache", |
| 89 | + "filecache", |
| 90 | +] |
| 91 | + |
| 92 | +FIX_PROTOCOLS = { |
| 93 | + "MemFS": ("memfs",), |
| 94 | + "AsyncLocalFileSystem": (), |
| 95 | +} |
| 96 | + |
| 97 | +FIX_METHODS = { |
| 98 | + "GCSFileSystem": ["_strip_protocol", "_get_kwargs_from_urls", "_split_path"], |
| 99 | +} |
| 100 | + |
| 101 | + |
| 102 | +def _fix_azure_blob_file_system(x: str) -> str: |
| 103 | + return re.sub( |
| 104 | + r"host = ops.get\(\"host\", None\)", |
| 105 | + 'host: str | None = ops.get("host", None)', |
| 106 | + x, |
| 107 | + ) |
| 108 | + |
| 109 | + |
| 110 | +def _fix_memfs_file_system(x: str) -> str: |
| 111 | + return re.sub( |
| 112 | + "_MemFS", |
| 113 | + "MemoryFileSystemFlavour", |
| 114 | + x, |
| 115 | + ) |
| 116 | + |
| 117 | + |
| 118 | +def _fix_xrootd_file_system(x: str) -> str: |
| 119 | + x = re.sub( |
| 120 | + r"client.URL", |
| 121 | + "urlsplit", |
| 122 | + x, |
| 123 | + ) |
| 124 | + return re.sub( |
| 125 | + "url.hostid", |
| 126 | + "url.netloc", |
| 127 | + x, |
| 128 | + ) |
| 129 | + |
| 130 | + |
| 131 | +FIX_SOURCE = { |
| 132 | + "AzureBlobFileSystem": _fix_azure_blob_file_system, |
| 133 | + "MemFS": _fix_memfs_file_system, |
| 134 | + "XRootDFileSystem": _fix_xrootd_file_system, |
| 135 | +} |
| 136 | + |
| 137 | + |
| 138 | +def before_imports() -> None: |
| 139 | + """allow to patch the generated state before importing anything""" |
| 140 | + # patch libarchive |
| 141 | + sys.modules["libarchive"] = Mock() |
| 142 | + sys.modules["libarchive.ffi"] = Mock() |
| 143 | + # patch xrootd |
| 144 | + sys.modules["XRootD"] = Mock() |
| 145 | + sys.modules["XRootD.client"] = Mock() |
| 146 | + sys.modules["XRootD.client.flags"] = Mock() |
| 147 | + sys.modules["XRootD.client.responses"] = Mock() |
| 148 | + |
| 149 | + |
| 150 | +def get_protos(cls: type, remove: str, add: str) -> tuple[str, ...]: |
| 151 | + try: |
| 152 | + return FIX_PROTOCOLS[cls.__name__] |
| 153 | + except KeyError: |
| 154 | + pass |
| 155 | + if isinstance(cls.protocol, str): |
| 156 | + p = [cls.protocol, add] |
| 157 | + else: |
| 158 | + p = [*cls.protocol, add] |
| 159 | + return tuple([x for x in p if x != remove]) |
| 160 | + |
| 161 | + |
| 162 | +def get_fsspec_filesystems_and_protocol_errors() -> ( |
| 163 | + tuple[dict[type[AbstractFileSystem], tuple[str, ...]], dict[str, str]] |
| 164 | +): |
| 165 | + before_imports() |
| 166 | + |
| 167 | + classes: dict[type[AbstractFileSystem], tuple[str]] = {} |
| 168 | + errors: dict[str, str] = {} |
| 169 | + |
| 170 | + for protocol in available_protocols(): |
| 171 | + if protocol in SKIP_PROTOCOLS: |
| 172 | + continue |
| 173 | + try: |
| 174 | + cls = get_filesystem_class(protocol) |
| 175 | + except ImportError as err: |
| 176 | + errors[protocol] = str(err) |
| 177 | + else: |
| 178 | + protos = get_protos(cls, remove="abstract", add=protocol) |
| 179 | + cprotos = classes.get(cls, []) |
| 180 | + classes[cls] = tuple(dict.fromkeys([*cprotos, *protos])) |
| 181 | + return classes, errors |
| 182 | + |
| 183 | + |
| 184 | +def _get_plain_method(cls, name): |
| 185 | + for c in cls.__mro__: |
| 186 | + try: |
| 187 | + return c.__dict__[name] |
| 188 | + except KeyError: |
| 189 | + pass |
| 190 | + else: |
| 191 | + raise AttributeError(f"{cls.__name__}.{name} not found") |
| 192 | + |
| 193 | + |
| 194 | +def get_subclass_methods(cls: type) -> list[str]: # noqa: C901 |
| 195 | + try: |
| 196 | + return FIX_METHODS[cls.__name__] |
| 197 | + except KeyError: |
| 198 | + pass |
| 199 | + errors = [] |
| 200 | + |
| 201 | + # storage options |
| 202 | + so = None |
| 203 | + base_get_kwargs_from_urls = _get_plain_method( |
| 204 | + AbstractFileSystem, "_get_kwargs_from_urls" |
| 205 | + ) |
| 206 | + try: |
| 207 | + cls_get_kwargs_from_urls = _get_plain_method(cls, "_get_kwargs_from_urls") |
| 208 | + except AttributeError: |
| 209 | + errors.append("missing `_get_kwargs_from_urls()`") |
| 210 | + else: |
| 211 | + so = cls_get_kwargs_from_urls is base_get_kwargs_from_urls |
| 212 | + if not isinstance(cls_get_kwargs_from_urls, staticmethod): |
| 213 | + warnings.warn( |
| 214 | + f"{cls.__name__}: {cls_get_kwargs_from_urls!r} not a staticmethod", |
| 215 | + RuntimeWarning, |
| 216 | + stacklevel=2, |
| 217 | + ) |
| 218 | + |
| 219 | + # strip protocol |
| 220 | + sp = None |
| 221 | + base_strip_protocol = _get_plain_method(AbstractFileSystem, "_strip_protocol") |
| 222 | + try: |
| 223 | + cls_strip_protocol = _get_plain_method(cls, "_strip_protocol") |
| 224 | + except AttributeError: |
| 225 | + errors.append("missing `_strip_protocol()`") |
| 226 | + else: |
| 227 | + if isinstance(cls_strip_protocol, staticmethod): |
| 228 | + warnings.warn( |
| 229 | + f"{cls.__name__}: {cls_strip_protocol.__name__!r} is not a classmethod", |
| 230 | + UserWarning, |
| 231 | + stacklevel=2, |
| 232 | + ) |
| 233 | + sp = False |
| 234 | + elif isinstance(cls_strip_protocol, classmethod): |
| 235 | + sp = cls_strip_protocol.__func__ is base_strip_protocol.__func__ |
| 236 | + else: |
| 237 | + errors.append( |
| 238 | + f"{cls.__name__}: {cls_strip_protocol.__name__!r} not a classmethod" |
| 239 | + ) |
| 240 | + |
| 241 | + # _parent |
| 242 | + pt = None |
| 243 | + base_parent = _get_plain_method(AbstractFileSystem, "_parent") |
| 244 | + try: |
| 245 | + cls_parent = _get_plain_method(cls, "_parent") |
| 246 | + except AttributeError: |
| 247 | + errors.append("missing `_parent()`") |
| 248 | + else: |
| 249 | + pt = cls_parent is base_parent |
| 250 | + |
| 251 | + if errors or sp is None or so is None: |
| 252 | + raise AttributeError(" AND ".join(errors)) |
| 253 | + |
| 254 | + methods = [] |
| 255 | + if not sp: |
| 256 | + methods.append("_strip_protocol") |
| 257 | + if not so: |
| 258 | + methods.append("_get_kwargs_from_urls") |
| 259 | + if not pt: |
| 260 | + methods.append("_parent") |
| 261 | + return methods |
| 262 | + |
| 263 | + |
| 264 | +def generate_class_source_code( |
| 265 | + cls: type, |
| 266 | + methods: list[str], |
| 267 | + overrides: dict[str, Any], |
| 268 | + attributes: list[str], |
| 269 | + cls_suffix: str, |
| 270 | + base_cls: str | None, |
| 271 | +) -> str: |
| 272 | + s = ["\n"] |
| 273 | + if base_cls: |
| 274 | + s += [f"class {cls.__name__}{cls_suffix}({base_cls}):"] |
| 275 | + else: |
| 276 | + s += [f"class {cls.__name__}{cls_suffix}:"] |
| 277 | + mod_ver = get_package_version_without_import(cls.__module__.partition(".")[0]) |
| 278 | + s.append(f" __orig_class__ = '{cls.__module__}.{cls.__name__}'") |
| 279 | + s.append(f" __orig_version__ = {mod_ver!r}") |
| 280 | + for attr, value in overrides.items(): |
| 281 | + s.append(f" {attr} = {value!r}") |
| 282 | + for attr in attributes: |
| 283 | + s.append(f" {attr} = {getattr(cls, attr)!r}") |
| 284 | + s.append("") |
| 285 | + for method in methods: |
| 286 | + s.append(inspect.getsource(getattr(cls, method))) |
| 287 | + try: |
| 288 | + fix_func = FIX_SOURCE[cls.__name__] |
| 289 | + except KeyError: |
| 290 | + return "\n".join(s) |
| 291 | + else: |
| 292 | + return "\n".join(fix_func(line) for line in s) |
| 293 | + |
| 294 | + |
| 295 | +def create_source() -> str: |
| 296 | + buf = StringIO() |
| 297 | + buf.write(HEADER) |
| 298 | + |
| 299 | + classes, errors = get_fsspec_filesystems_and_protocol_errors() |
| 300 | + |
| 301 | + srcs = [ |
| 302 | + generate_class_source_code( |
| 303 | + AbstractFileSystem, |
| 304 | + ["_strip_protocol", "_get_kwargs_from_urls", "_parent"], |
| 305 | + {}, |
| 306 | + ["protocol", "root_marker"], |
| 307 | + cls_suffix=BASE_CLASS_NAME_SUFFIX, |
| 308 | + base_cls="FileSystemFlavourBase", |
| 309 | + ) |
| 310 | + ] |
| 311 | + |
| 312 | + for cls in sorted(classes, key=lambda cls: cls.__name__): |
| 313 | + try: |
| 314 | + sub_cls_methods = get_subclass_methods(cls) |
| 315 | + except AttributeError as err: |
| 316 | + protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol |
| 317 | + for proto in protos: |
| 318 | + errors[proto] = str(err) |
| 319 | + continue |
| 320 | + sub_cls = generate_class_source_code( |
| 321 | + cls, |
| 322 | + sub_cls_methods, |
| 323 | + {"protocol": classes[cls]}, |
| 324 | + ["root_marker", "sep"], |
| 325 | + cls_suffix=BASE_CLASS_NAME_SUFFIX, |
| 326 | + base_cls=BASE_CLASS_NAME, |
| 327 | + ) |
| 328 | + srcs.append(sub_cls) |
| 329 | + |
| 330 | + if SKIP_PROTOCOLS: |
| 331 | + buf.write("#\n# skipping protocols:\n") |
| 332 | + for protocol in sorted(SKIP_PROTOCOLS): |
| 333 | + buf.write(f"# - {protocol}\n") |
| 334 | + |
| 335 | + if errors: |
| 336 | + buf.write("# protocol import errors:\n") |
| 337 | + for protocol, error_msg in sorted(errors.items()): |
| 338 | + buf.write(f"# - {protocol} ({error_msg})\n") |
| 339 | + buf.write("#\n") |
| 340 | + |
| 341 | + buf.write(IMPORTS) |
| 342 | + buf.write(INIT_CODE) |
| 343 | + for cls_src in srcs: |
| 344 | + buf.write(cls_src) |
| 345 | + |
| 346 | + return buf.getvalue().removesuffix("\n") |
| 347 | + |
| 348 | + |
| 349 | +if __name__ == "__main__": |
| 350 | + print(create_source()) |
0 commit comments