Skip to content

Commit af5ce09

Browse files
committed
Consolidate UPath, test fsspec local
1 parent 88b9cad commit af5ce09

File tree

10 files changed

+519
-523
lines changed

10 files changed

+519
-523
lines changed

notebooks/examples.ipynb

Lines changed: 208 additions & 208 deletions
Large diffs are not rendered by default.

upath/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,5 @@
22
__version__ = "0.0.11"
33

44
from upath.core import UPath
5+
6+
__all__ = ["UPath"]

upath/core.py

Lines changed: 273 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,90 @@
1-
from abc import ABCMeta
21
import os
32
import pathlib
3+
import re
44
import urllib
5+
from abc import ABCMeta
56

6-
from fsspec.registry import known_implementations, registry
7+
from fsspec.registry import (
8+
get_filesystem_class,
9+
known_implementations,
10+
registry,
11+
)
712
from fsspec.utils import stringify_path
813

9-
from upath.registry import _registry
14+
from upath.errors import NotDirectoryError
15+
16+
17+
class _FSSpecAccessor:
18+
def __init__(self, parsed_url, *args, **kwargs):
19+
self._url = parsed_url
20+
cls = get_filesystem_class(self._url.scheme)
21+
url_kwargs = cls._get_kwargs_from_urls(
22+
urllib.parse.urlunparse(self._url))
23+
url_kwargs.update(kwargs)
24+
self._fs = cls(**url_kwargs)
25+
26+
def transform_args_wrapper(self, func):
27+
"""Modifies the arguments that get passed to the filesystem so that
28+
the UniversalPath instance gets stripped as the first argument. If a
29+
path keyword argument is not given, then `UniversalPath.path` is
30+
formatted for the filesystem and inserted as the first argument.
31+
If it is, then the path keyword argument is formatted properly for
32+
the filesystem.
33+
"""
34+
def wrapper(*args, **kwargs):
35+
args, kwargs = self._transform_arg_paths(args, kwargs)
36+
return func(*args, **kwargs)
37+
38+
return wrapper
39+
40+
def _transform_arg_paths(self, args, kwargs):
41+
"""formats the path properly for the filesystem backend."""
42+
args = list(args)
43+
first_arg = args.pop(0)
44+
if not kwargs.get("path"):
45+
if isinstance(first_arg, UPath):
46+
first_arg = self._format_path(first_arg.path)
47+
args.insert(0, first_arg)
48+
args = tuple(args)
49+
else:
50+
kwargs["path"] = self._format_path(kwargs["path"])
51+
return args, kwargs
52+
53+
def _format_path(self, s):
54+
"""placeholder method for subclassed filesystems"""
55+
return s
56+
57+
def __getattribute__(self, item):
58+
class_attrs = ["_url", "_fs", "__class__"]
59+
if item in class_attrs:
60+
return super().__getattribute__(item)
61+
62+
class_methods = [
63+
"__init__",
64+
"__getattribute__",
65+
"transform_args_wrapper",
66+
"_transform_arg_paths",
67+
"_format_path",
68+
]
69+
if item in class_methods:
70+
return lambda *args, **kwargs: getattr(self.__class__, item)(
71+
self, *args, **kwargs)
72+
73+
d = object.__getattribute__(self, "__dict__")
74+
fs = d.get("_fs", None)
75+
if fs is not None:
76+
method = getattr(fs, item, None)
77+
if method:
78+
return lambda *args, **kwargs: (self.transform_args_wrapper(
79+
method)(*args, **kwargs)) # noqa: E501
80+
else:
81+
raise NotImplementedError(
82+
f"{fs.protocol} filesystem has no attribute {item}")
83+
84+
85+
class PureUPath(pathlib.PurePath):
86+
_flavour = pathlib._posix_flavour
87+
__slots__ = ()
1088

1189

1290
class UPathMeta(ABCMeta):
@@ -17,7 +95,28 @@ def __subclasscheck__(cls, subclass):
1795
return issubclass(subclass, pathlib.Path)
1896

1997

20-
class UPath(pathlib.Path, metaclass=UPathMeta):
98+
class UPath(pathlib.Path, PureUPath, metaclass=UPathMeta):
99+
100+
__slots__ = ("_url", "_kwargs", "_closed", "fs")
101+
102+
not_implemented = [
103+
"cwd",
104+
"home",
105+
"expanduser",
106+
"group",
107+
"is_mount",
108+
"is_symlink",
109+
"is_socket",
110+
"is_fifo",
111+
"is_block_device",
112+
"is_char_device",
113+
"lchmod",
114+
"lstat",
115+
"owner",
116+
"readlink",
117+
]
118+
_default_accessor = _FSSpecAccessor
119+
21120
def __new__(cls, *args, **kwargs):
22121
if issubclass(cls, UPath):
23122
args_list = list(args)
@@ -31,19 +130,18 @@ def __new__(cls, *args, **kwargs):
31130
# treat as local filesystem, return PosixPath or WindowsPath
32131
impls = list(registry) + list(known_implementations.keys())
33132
if not parsed_url.scheme or parsed_url.scheme not in impls:
34-
cls = (
35-
pathlib.WindowsPath
36-
if os.name == "nt"
37-
else pathlib.PosixPath
38-
)
133+
cls = (pathlib.WindowsPath
134+
if os.name == "nt" else pathlib.PosixPath)
39135
self = cls._from_parts(args, init=False)
40136
if not self._flavour.is_supported:
41137
raise NotImplementedError(
42-
"cannot instantiate %r on your system" % (cls.__name__,)
43-
)
138+
"cannot instantiate %r on your system" %
139+
(cls.__name__, ))
44140
self._init()
45141
else:
46-
cls = _registry[parsed_url.scheme]
142+
import upath.registry
143+
144+
cls = upath.registry._registry[parsed_url.scheme]
47145
kwargs["_url"] = parsed_url
48146
args_list.insert(0, parsed_url.path)
49147
args = tuple(args_list)
@@ -52,3 +150,166 @@ def __new__(cls, *args, **kwargs):
52150
else:
53151
self = super().__new__(*args, **kwargs)
54152
return self
153+
154+
def _init(self, *args, template=None, **kwargs):
155+
self._closed = False
156+
if not kwargs:
157+
kwargs = dict(**self._kwargs)
158+
else:
159+
self._kwargs = dict(**kwargs)
160+
self._url = kwargs.pop("_url") if kwargs.get("_url") else None
161+
162+
if not self._root:
163+
if not self._parts:
164+
self._root = "/"
165+
elif self._parts[0] == "/":
166+
self._root = self._parts.pop(0)
167+
if getattr(self, "_str", None):
168+
delattr(self, "_str")
169+
if template is not None:
170+
self._accessor = template._accessor
171+
else:
172+
self._accessor = self._default_accessor(self._url, *args, **kwargs)
173+
self.fs = self._accessor._fs
174+
175+
def __getattribute__(self, item):
176+
if item == "__class__":
177+
return super().__getattribute__("__class__")
178+
if item in getattr(self.__class__, "not_implemented"):
179+
raise NotImplementedError(f"UniversalPath has no attribute {item}")
180+
else:
181+
return super().__getattribute__(item)
182+
183+
def _format_parsed_parts(self, drv, root, parts):
184+
if parts:
185+
join_parts = parts[1:] if parts[0] == "/" else parts
186+
else:
187+
join_parts = []
188+
if drv or root:
189+
path = drv + root + self._flavour.join(join_parts)
190+
else:
191+
path = self._flavour.join(join_parts)
192+
scheme, netloc = self._url.scheme, self._url.netloc
193+
scheme = scheme + ":"
194+
netloc = "//" + netloc if netloc else ""
195+
formatted = scheme + netloc + path
196+
return formatted
197+
198+
@property
199+
def path(self):
200+
if self._parts:
201+
join_parts = (self._parts[1:]
202+
if self._parts[0] == "/" else self._parts)
203+
path = self._flavour.join(join_parts)
204+
return self._root + path
205+
else:
206+
return "/"
207+
208+
def open(self, *args, **kwargs):
209+
return self._accessor.open(self, *args, **kwargs)
210+
211+
def iterdir(self):
212+
"""Iterate over the files in this directory. Does not yield any
213+
result for the special paths '.' and '..'.
214+
"""
215+
if self._closed:
216+
self._raise_closed()
217+
for name in self._accessor.listdir(self):
218+
# fsspec returns dictionaries
219+
if isinstance(name, dict):
220+
name = name.get("name")
221+
if name in {".", ".."}:
222+
# Yielding a path object for these makes little sense
223+
continue
224+
# only want the path name with iterdir
225+
name = self._sub_path(name)
226+
yield self._make_child_relpath(name)
227+
if self._closed:
228+
self._raise_closed()
229+
230+
def glob(self, pattern):
231+
path = self.joinpath(pattern)
232+
for name in self._accessor.glob(self, path=path.path):
233+
name = self._sub_path(name)
234+
name = name.split(self._flavour.sep)
235+
yield self._make_child(name)
236+
237+
def _sub_path(self, name):
238+
# only want the path name with iterdir
239+
sp = self.path
240+
return re.sub(f"^({sp}|{sp[1:]})/", "", name)
241+
242+
def exists(self):
243+
"""
244+
Whether this path exists.
245+
"""
246+
if not getattr(self._accessor, "exists"):
247+
try:
248+
self._accessor.stat(self)
249+
except (FileNotFoundError):
250+
return False
251+
return True
252+
else:
253+
return self._accessor.exists(self)
254+
255+
def is_dir(self):
256+
info = self._accessor.info(self)
257+
if info["type"] == "directory":
258+
return True
259+
return False
260+
261+
def is_file(self):
262+
info = self._accessor.info(self)
263+
if info["type"] == "file":
264+
return True
265+
return False
266+
267+
def rename(self, target):
268+
# can be implimented, but may be tricky
269+
raise NotImplementedError
270+
271+
def touch(self, trunicate=True, **kwargs):
272+
self._accessor.touch(self, trunicate=trunicate, **kwargs)
273+
274+
def unlink(self, missing_ok=False):
275+
if not self.exists():
276+
if not missing_ok:
277+
raise FileNotFoundError
278+
else:
279+
return
280+
self._accessor.rm(self, recursive=False)
281+
282+
def rmdir(self, recursive=True):
283+
"""Add warning if directory not empty
284+
assert is_dir?
285+
"""
286+
try:
287+
assert self.is_dir()
288+
except AssertionError:
289+
raise NotDirectoryError
290+
self._accessor.rm(self, recursive=recursive)
291+
292+
@classmethod
293+
def _from_parts_init(cls, args, init=False):
294+
return super()._from_parts(args, init=init)
295+
296+
def _from_parts(self, args, init=True):
297+
# We need to call _parse_args on the instance, so as to get the
298+
# right flavour.
299+
obj = object.__new__(self.__class__)
300+
drv, root, parts = self._parse_args(args)
301+
obj._drv = drv
302+
obj._root = root
303+
obj._parts = parts
304+
if init:
305+
obj._init(**self._kwargs)
306+
return obj
307+
308+
def _from_parsed_parts(self, drv, root, parts, init=True):
309+
obj = object.__new__(self.__class__)
310+
obj._drv = drv
311+
obj._root = root
312+
obj._parts = parts
313+
if init:
314+
obj._init(**self._kwargs)
315+
return obj

upath/implementations/hdfs.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
from upath.universal_path import _FSSpecAccessor, UniversalPath
1+
import upath.core
22

33

4-
class _HDFSAccessor(_FSSpecAccessor):
4+
class _HDFSAccessor(upath.core._FSSpecAccessor):
55
def __init__(self, parsed_url, *args, **kwargs):
66
super().__init__(parsed_url, *args, **kwargs)
77
self._fs.root_marker = "/"
@@ -11,7 +11,6 @@ def transform_args_wrapper(self, func):
1111
argument is a UniversalPath instance, that argument is replaced with
1212
the UniversalPath's path attribute
1313
"""
14-
1514
def wrapper(*args, **kwargs):
1615
args, kwargs = self._transform_arg_paths(args, kwargs)
1716
if "trunicate" in kwargs:
@@ -23,5 +22,5 @@ def wrapper(*args, **kwargs):
2322
return wrapper
2423

2524

26-
class HDFSPath(UniversalPath):
25+
class HDFSPath(upath.core.UPath):
2726
_default_accessor = _HDFSAccessor

upath/implementations/http.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import urllib
22

3-
from upath.universal_path import UniversalPath, _FSSpecAccessor
3+
import upath.core
44

55

6-
class _HTTPAccessor(_FSSpecAccessor):
6+
class _HTTPAccessor(upath.core._FSSpecAccessor):
77
def __init__(self, parsed_url, *args, **kwargs):
88
super().__init__(parsed_url, *args, **kwargs)
99

@@ -12,13 +12,12 @@ def transform_args_wrapper(self, func):
1212
argument is a UniversalPath instance, that argument is replaced with
1313
the UniversalPath's path attribute
1414
"""
15-
1615
def wrapper(*args, **kwargs):
1716
if args:
1817
args = list(args)
1918
first_arg = args.pop(0)
2019
if not kwargs.get("path"):
21-
if isinstance(first_arg, UniversalPath):
20+
if isinstance(first_arg, upath.core.UPath):
2221
first_arg = str(first_arg)
2322
args.insert(0, first_arg)
2423
args = tuple(args)
@@ -31,5 +30,5 @@ def wrapper(*args, **kwargs):
3130
return wrapper
3231

3332

34-
class HTTPPath(UniversalPath):
33+
class HTTPPath(upath.core.UPath):
3534
_default_accessor = _HTTPAccessor

0 commit comments

Comments
 (0)