Skip to content

Commit 7dbad11

Browse files
authored
Implementation registration (#134)
* upath.registry: add available_implementations and register_implementation * upath.registry: minor cleanup and refactor * upath.registry: module docstring * upath.registry: fix mypy issues * upath.registry: fix entry_point issue on <3.10 * tests: add tests for upath.registry * upath.registry._register: separate entrypoint handling * upath.registry: fix protocol regex * upath.registry: fix typing issues * update readme
1 parent 24e15d3 commit 7dbad11

File tree

3 files changed

+314
-21
lines changed

3 files changed

+314
-21
lines changed

README.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,39 @@ with fs.open(p.path) as f:
147147
data = f.read()
148148
```
149149

150+
### Register custom UPath implementations
151+
152+
In case you develop a custom UPath implementation, feel free to open an issue to discuss integrating it
153+
in `universal_pathlib`. You can dynamically register your implementation too! Here are your options:
154+
155+
#### Dynamic registration from Python
156+
157+
```python
158+
# for example: mymodule/submodule.py
159+
from upath import UPath
160+
from upath.registry import register_implementation
161+
162+
my_protocol = "myproto"
163+
class MyPath(UPath):
164+
... # your custom implementation
165+
166+
register_implementation(my_protocol, MyPath)
167+
```
168+
169+
#### Registration via entry points
170+
171+
```toml
172+
# pyproject.toml
173+
[project.entry-points."unversal_pathlib.implementations"]
174+
myproto = "my_module.submodule:MyPath"
175+
```
176+
177+
```ini
178+
# setup.cfg
179+
[options.entry_points]
180+
universal_pathlib.implementations =
181+
myproto = my_module.submodule:MyPath
182+
```
150183

151184
## Contributing
152185

upath/registry.py

Lines changed: 155 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,63 @@
1+
"""upath.registry -- registry for file system specific implementations
2+
3+
Retrieve UPath implementations via `get_upath_class`.
4+
Register custom UPath subclasses in one of two ways:
5+
6+
### directly from Python
7+
8+
>>> from upath import UPath
9+
>>> from upath.registry import register_implementation
10+
>>> my_protocol = "myproto"
11+
>>> class MyPath(UPath):
12+
... pass
13+
>>> register_implementation(my_protocol, MyPath)
14+
15+
### via entry points
16+
17+
```toml
18+
# pyproject.toml
19+
[project.entry-points."unversal_pathlib.implementations"]
20+
myproto = "my_module.submodule:MyPath"
21+
```
22+
23+
```ini
24+
# setup.cfg
25+
[options.entry_points]
26+
universal_pathlib.implementations =
27+
myproto = my_module.submodule:MyPath
28+
```
29+
"""
130
from __future__ import annotations
231

3-
import importlib
432
import os
33+
import re
34+
import sys
535
import warnings
36+
from collections import ChainMap
637
from functools import lru_cache
7-
from typing import TYPE_CHECKING
38+
from importlib import import_module
39+
from importlib.metadata import entry_points
40+
from typing import Iterator
41+
from typing import MutableMapping
842

943
from fsspec.core import get_filesystem_class
44+
from fsspec.registry import available_protocols
1045

11-
if TYPE_CHECKING:
12-
from upath.core import UPath
46+
import upath.core
1347

1448
__all__ = [
1549
"get_upath_class",
50+
"available_implementations",
51+
"register_implementation",
1652
]
1753

1854

19-
class _Registry:
55+
_ENTRY_POINT_GROUP = "universal_pathlib.implementations"
56+
57+
58+
class _Registry(MutableMapping[str, "type[upath.core.UPath]"]):
59+
"""internal registry for UPath subclasses"""
60+
2061
known_implementations: dict[str, str] = {
2162
"abfs": "upath.implementations.cloud.AzurePath",
2263
"abfss": "upath.implementations.cloud.AzurePath",
@@ -35,26 +76,118 @@ class _Registry:
3576
"webdav+https": "upath.implementations.webdav.WebdavPath",
3677
}
3778

38-
def __getitem__(self, item: str) -> type[UPath] | None:
39-
try:
40-
fqn = self.known_implementations[item]
41-
except KeyError:
42-
return None
43-
module_name, name = fqn.rsplit(".", 1)
44-
mod = importlib.import_module(module_name)
45-
return getattr(mod, name) # type: ignore
79+
def __init__(self) -> None:
80+
if sys.version_info >= (3, 10):
81+
eps = entry_points(group=_ENTRY_POINT_GROUP)
82+
else:
83+
eps = entry_points().get(_ENTRY_POINT_GROUP, [])
84+
self._entries = {ep.name: ep for ep in eps}
85+
self._m = ChainMap({}, self.known_implementations) # type: ignore
86+
87+
def __contains__(self, item: object) -> bool:
88+
return item in set().union(self._m, self._entries)
89+
90+
def __getitem__(self, item: str) -> type[upath.core.UPath]:
91+
fqn = self._m.get(item)
92+
if fqn is None:
93+
if item in self._entries:
94+
fqn = self._m[item] = self._entries[item].load()
95+
if fqn is None:
96+
raise KeyError(f"{item} not in registry")
97+
if isinstance(fqn, str):
98+
module_name, name = fqn.rsplit(".", 1)
99+
mod = import_module(module_name)
100+
cls = getattr(mod, name) # type: ignore
101+
else:
102+
cls = fqn
103+
return cls
104+
105+
def __setitem__(self, item: str, value: type[upath.core.UPath] | str) -> None:
106+
if not (
107+
(isinstance(value, type) and issubclass(value, upath.core.UPath))
108+
or isinstance(value, str)
109+
):
110+
raise ValueError(
111+
f"expected UPath subclass or FQN-string, got: {type(value).__name__!r}"
112+
)
113+
self._m[item] = value
114+
115+
def __delitem__(self, __v: str) -> None:
116+
raise NotImplementedError("removal is unsupported")
117+
118+
def __len__(self) -> int:
119+
return len(set().union(self._m, self._entries))
120+
121+
def __iter__(self) -> Iterator[str]:
122+
return iter(set().union(self._m, self._entries))
46123

47124

48125
_registry = _Registry()
49126

50127

51-
@lru_cache
52-
def get_upath_class(protocol: str) -> type[UPath] | None:
53-
"""Return the upath cls for the given protocol."""
54-
cls: type[UPath] | None = _registry[protocol]
55-
if cls is not None:
56-
return cls
128+
def available_implementations(*, fallback: bool = False) -> list[str]:
129+
"""return a list of protocols for available implementations
130+
131+
Parameters
132+
----------
133+
fallback:
134+
If True, also return protocols for fsspec filesystems without
135+
an implementation in universal_pathlib.
136+
"""
137+
impl = list(_registry)
138+
if not fallback:
139+
return impl
57140
else:
141+
return list({*impl, *available_protocols()})
142+
143+
144+
def register_implementation(
145+
protocol: str,
146+
cls: type[upath.core.UPath] | str,
147+
*,
148+
clobber: bool = False,
149+
) -> None:
150+
"""register a UPath implementation with a protocol
151+
152+
Parameters
153+
----------
154+
protocol:
155+
Protocol name to associate with the class
156+
cls:
157+
The UPath subclass for the protocol or a str representing the
158+
full path to an implementation class like package.module.class.
159+
clobber:
160+
Whether to overwrite a protocol with the same name; if False,
161+
will raise instead.
162+
"""
163+
if not re.match(r"^[a-z][a-z0-9+_.]+$", protocol):
164+
raise ValueError(f"{protocol!r} is not a valid URI scheme")
165+
if not clobber and protocol in _registry:
166+
raise ValueError(f"{protocol!r} is already in registry and clobber is False!")
167+
_registry[protocol] = cls
168+
169+
170+
@lru_cache
171+
def get_upath_class(
172+
protocol: str,
173+
*,
174+
fallback: bool = True,
175+
) -> type[upath.core.UPath] | None:
176+
"""Return the upath cls for the given protocol.
177+
178+
Returns `None` if no matching protocol can be found.
179+
180+
Parameters
181+
----------
182+
protocol:
183+
The protocol string
184+
fallback:
185+
If fallback is False, don't return UPath instances for fsspec
186+
filesystems that don't have an implementation registered.
187+
"""
188+
try:
189+
return _registry[protocol]
190+
except KeyError:
58191
if not protocol:
59192
if os.name == "nt":
60193
from upath.implementations.local import WindowsUPath
@@ -64,6 +197,8 @@ def get_upath_class(protocol: str) -> type[UPath] | None:
64197
from upath.implementations.local import PosixUPath
65198

66199
return PosixUPath
200+
if not fallback:
201+
return None
67202
try:
68203
_ = get_filesystem_class(protocol)
69204
except ValueError:
@@ -76,5 +211,4 @@ def get_upath_class(protocol: str) -> type[UPath] | None:
76211
UserWarning,
77212
stacklevel=2,
78213
)
79-
mod = importlib.import_module("upath.core")
80-
return mod.UPath # type: ignore
214+
return upath.core.UPath

upath/tests/test_registry.py

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import pytest
2+
from fsspec.registry import available_protocols
3+
4+
from upath import UPath
5+
from upath.registry import available_implementations
6+
from upath.registry import get_upath_class
7+
from upath.registry import register_implementation
8+
9+
IMPLEMENTATIONS = {
10+
"abfs",
11+
"abfss",
12+
"adl",
13+
"az",
14+
"file",
15+
"gcs",
16+
"gs",
17+
"hdfs",
18+
"http",
19+
"https",
20+
"memory",
21+
"s3",
22+
"s3a",
23+
"webdav+http",
24+
"webdav+https",
25+
}
26+
27+
28+
@pytest.fixture(autouse=True)
29+
def reset_registry():
30+
from upath.registry import _registry
31+
32+
try:
33+
yield
34+
finally:
35+
_registry._m.maps[0].clear() # type: ignore
36+
37+
38+
@pytest.fixture()
39+
def fake_entrypoint():
40+
from importlib.metadata import EntryPoint
41+
42+
from upath.registry import _registry
43+
44+
ep = EntryPoint(
45+
name="myeps",
46+
value="upath.core:UPath",
47+
group="universal_pathlib.implementations",
48+
)
49+
old_registry = _registry._entries.copy()
50+
51+
try:
52+
_registry._entries["myeps"] = ep
53+
yield
54+
finally:
55+
_registry._entries.clear()
56+
_registry._entries.update(old_registry)
57+
58+
59+
def test_available_implementations():
60+
impl = available_implementations()
61+
assert len(impl) == len(set(impl))
62+
assert set(impl) == IMPLEMENTATIONS
63+
64+
65+
def test_available_implementations_with_fallback():
66+
impl = available_implementations(fallback=True)
67+
assert set(impl) == IMPLEMENTATIONS.union(available_protocols())
68+
69+
70+
def test_available_implementations_with_entrypoint(fake_entrypoint):
71+
impl = available_implementations()
72+
assert set(impl) == IMPLEMENTATIONS.union({"myeps"})
73+
74+
75+
def test_register_implementation():
76+
class MyProtoPath(UPath):
77+
pass
78+
79+
register_implementation("myproto", MyProtoPath)
80+
81+
assert get_upath_class("myproto") is MyProtoPath
82+
83+
84+
def test_register_implementation_wrong_input():
85+
with pytest.raises(TypeError):
86+
register_implementation(None, UPath) # type: ignore
87+
with pytest.raises(ValueError):
88+
register_implementation("incorrect**protocol", UPath)
89+
with pytest.raises(ValueError):
90+
register_implementation("myproto", object, clobber=True) # type: ignore
91+
with pytest.raises(ValueError):
92+
register_implementation("file", UPath, clobber=False)
93+
assert set(available_implementations()) == IMPLEMENTATIONS
94+
95+
96+
@pytest.mark.parametrize("protocol", IMPLEMENTATIONS)
97+
def test_get_upath_class(protocol):
98+
upath_cls = get_upath_class("file")
99+
assert issubclass(upath_cls, UPath)
100+
101+
102+
def test_get_upath_class_without_implementation(clear_registry):
103+
with pytest.warns(
104+
UserWarning, match="UPath 'mock' filesystem not explicitly implemented."
105+
):
106+
upath_cls = get_upath_class("mock")
107+
assert issubclass(upath_cls, UPath)
108+
109+
110+
def test_get_upath_class_without_implementation_no_fallback(clear_registry):
111+
assert get_upath_class("mock", fallback=False) is None
112+
113+
114+
def test_get_upath_class_unknown_protocol(clear_registry):
115+
assert get_upath_class("doesnotexist") is None
116+
117+
118+
def test_get_upath_class_from_entrypoint(fake_entrypoint):
119+
assert issubclass(get_upath_class("myeps"), UPath)
120+
121+
122+
@pytest.mark.parametrize(
123+
"protocol", [pytest.param("", id="empty-str"), pytest.param(None, id="none")]
124+
)
125+
def test_get_upath_class_falsey_protocol(protocol):
126+
assert issubclass(get_upath_class(protocol), UPath)

0 commit comments

Comments
 (0)