Skip to content

Commit 7139cf6

Browse files
committed
restructures and adds httppath
1 parent 162305c commit 7139cf6

File tree

8 files changed

+306
-286
lines changed

8 files changed

+306
-286
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
**/__pycache__
22
**/dist
3-
**/build
3+
**/build
4+
**.pyc

upath/core.py

Lines changed: 15 additions & 283 deletions
Original file line numberDiff line numberDiff line change
@@ -1,94 +1,9 @@
11
import os
22
import pathlib
33
import urllib
4-
import re
54

6-
from fsspec.registry import get_filesystem_class
7-
8-
from upath.errors import NotDirectoryError
9-
10-
11-
class _FSSpecAccessor:
12-
def __init__(self, parsed_url, *args, **kwargs):
13-
self._url = parsed_url
14-
cls = get_filesystem_class(self._url.scheme)
15-
url_kwargs = cls._get_kwargs_from_urls(
16-
urllib.parse.urlunparse(self._url)
17-
)
18-
url_kwargs.update(kwargs)
19-
self._fs = cls(**url_kwargs)
20-
if self._url.scheme in ["hdfs"]:
21-
self._fs.root_marker = "/"
22-
23-
def argument_upath_self_to_filepath(self, func):
24-
"""if arguments are passed to the wrapped function, and if the first
25-
argument is a UniversalPath instance, that argument is replaced with
26-
the UniversalPath's path attribute
27-
"""
28-
29-
def wrapper(*args, **kwargs):
30-
if args:
31-
args = list(args)
32-
first_arg = args.pop(0)
33-
if not kwargs.get("path"):
34-
if isinstance(first_arg, UniversalPath):
35-
first_arg = first_arg.path
36-
if not self._fs.root_marker and first_arg.startswith(
37-
"/"
38-
):
39-
first_arg = first_arg[1:]
40-
args.insert(0, first_arg)
41-
args = tuple(args)
42-
else:
43-
if not self._fs.root_marker and kwargs["path"].startswith(
44-
"/"
45-
):
46-
kwargs["path"] = kwargs["path"][1:]
47-
if self._url.scheme == "hdfs":
48-
if "trunicate" in kwargs:
49-
kwargs.pop("trunicate")
50-
if func.__name__ == "mkdir":
51-
args = args[:1]
52-
53-
return func(*args, **kwargs)
54-
55-
return wrapper
56-
57-
def __getattribute__(self, item):
58-
class_attrs = ["_url", "_fs"]
59-
if item in class_attrs:
60-
x = super().__getattribute__(item)
61-
return x
62-
class_methods = [
63-
"__init__",
64-
"__getattribute__",
65-
"argument_upath_self_to_filepath",
66-
]
67-
if item in class_methods:
68-
return lambda *args, **kwargs: getattr(_FSSpecAccessor, item)(
69-
self, *args, **kwargs
70-
)
71-
if item == "__class__":
72-
return _FSSpecAccessor
73-
d = object.__getattribute__(self, "__dict__")
74-
fs = d.get("_fs", None)
75-
if fs is not None:
76-
method = getattr(fs, item, None)
77-
if method:
78-
return lambda *args, **kwargs: (
79-
self.argument_upath_self_to_filepath(method)(
80-
*args, **kwargs
81-
)
82-
) # noqa: E501
83-
else:
84-
raise NotImplementedError(
85-
f"{fs.protocol} filesystem has not attribute {item}"
86-
)
87-
88-
89-
class PureUniversalPath(pathlib.PurePath):
90-
_flavour = pathlib._posix_flavour
91-
__slots__ = ()
5+
from upath.registry import _registry
6+
from upath.universal_path import UniversalPath
927

938

949
class UPath(pathlib.Path):
@@ -101,210 +16,27 @@ def __new__(cls, *args, **kwargs):
10116
val = kwargs.get(key)
10217
if val:
10318
parsed_url._replace(**{key: val})
19+
# treat as local filesystem, return PosixPath or
10420
if not parsed_url.scheme:
10521
cls = (
10622
pathlib.WindowsPath
10723
if os.name == "nt"
10824
else pathlib.PosixPath
10925
)
26+
self = cls._from_parts(args, init=False)
27+
if not self._flavour.is_supported:
28+
raise NotImplementedError(
29+
"cannot instantiate %r on your system" % (cls.__name__,)
30+
)
31+
self._init()
11032
else:
111-
cls = UniversalPath
112-
# cls._url = parsed_url
33+
if parsed_url.scheme in _registry:
34+
cls = _registry[parsed_url.scheme]
35+
else:
36+
cls = UniversalPath
11337
kwargs["_url"] = parsed_url
11438
new_args.insert(0, parsed_url.path)
11539
args = tuple(new_args)
116-
117-
if cls is UniversalPath:
118-
self = cls._from_parts_init(args, init=False)
119-
else:
120-
self = cls._from_parts(args, init=False)
121-
if not self._flavour.is_supported:
122-
raise NotImplementedError(
123-
"cannot instantiate %r on your system" % (cls.__name__,)
124-
)
125-
if cls is UniversalPath:
126-
self._init(*args, **kwargs)
127-
else:
128-
self._init()
40+
self = cls._from_parts_init(args, init=False)
41+
self._init(*args, **kwargs)
12942
return self
130-
131-
132-
class UniversalPath(UPath, PureUniversalPath):
133-
134-
__slots__ = ("_url", "_kwargs", "_closed", "fs")
135-
136-
not_implemented = [
137-
"cwd",
138-
"home",
139-
"expanduser",
140-
"group",
141-
"is_mount",
142-
"is_symlink",
143-
"is_socket",
144-
"is_fifo",
145-
"is_block_device",
146-
"is_char_device",
147-
"lchmod",
148-
"lstat",
149-
"owner",
150-
"readlink",
151-
]
152-
_default_accessor = _FSSpecAccessor
153-
154-
def _init(self, *args, template=None, **kwargs):
155-
self._closed = False
156-
if not kwargs:
157-
kwargs = dict(**self._kwargs)
158-
else:
159-
self._kwargs = dict(**kwargs)
160-
self._url = kwargs.pop("_url") if kwargs.get("_url") else None
161-
162-
if not self._root:
163-
if not self._parts:
164-
self._root = "/"
165-
elif self._parts[0] == "/":
166-
self._root = self._parts.pop(0)
167-
if getattr(self, "_str", None):
168-
delattr(self, "_str")
169-
if template is not None:
170-
self._accessor = template._accessor
171-
else:
172-
self._accessor = self._default_accessor(self._url, *args, **kwargs)
173-
self.fs = self._accessor._fs
174-
175-
def __getattribute__(self, item):
176-
if item == "__class__":
177-
return UniversalPath
178-
if item in getattr(UniversalPath, "not_implemented"):
179-
raise NotImplementedError(f"UniversalPath has no attribute {item}")
180-
else:
181-
return super().__getattribute__(item)
182-
183-
def _format_parsed_parts(self, drv, root, parts):
184-
join_parts = parts[1:] if parts[0] == "/" else parts
185-
if drv or root:
186-
path = drv + root + self._flavour.join(join_parts)
187-
else:
188-
path = self._flavour.join(join_parts)
189-
scheme, netloc = self._url.scheme, self._url.netloc
190-
scheme = scheme + ":"
191-
netloc = "//" + netloc if netloc else ""
192-
formatted = scheme + netloc + path
193-
return formatted
194-
195-
@property
196-
def path(self):
197-
if self._parts:
198-
join_parts = (
199-
self._parts[1:] if self._parts[0] == "/" else self._parts
200-
)
201-
path = self._flavour.join(join_parts)
202-
return self._root + path
203-
else:
204-
return "/"
205-
206-
def open(self, *args, **kwargs):
207-
return self._accessor.open(self, *args, **kwargs)
208-
209-
def iterdir(self):
210-
"""Iterate over the files in this directory. Does not yield any
211-
result for the special paths '.' and '..'.
212-
"""
213-
if self._closed:
214-
self._raise_closed()
215-
for name in self._accessor.listdir(self):
216-
# fsspec returns dictionaries
217-
if isinstance(name, dict):
218-
name = name.get("name")
219-
if name in {".", ".."}:
220-
# Yielding a path object for these makes little sense
221-
continue
222-
# only want the path name with iterdir
223-
sp = self.path
224-
name = re.sub(f"^({sp}|{sp[1:]})/", "", name)
225-
yield self._make_child_relpath(name)
226-
if self._closed:
227-
self._raise_closed()
228-
229-
def exists(self):
230-
"""
231-
Whether this path exists.
232-
"""
233-
if not getattr(self._accessor, "exists"):
234-
try:
235-
self._accessor.stat(self)
236-
except (FileNotFoundError):
237-
return False
238-
return True
239-
else:
240-
return self._accessor.exists(self)
241-
242-
def is_dir(self):
243-
info = self._accessor.info(self)
244-
if info["type"] == "directory":
245-
return True
246-
return False
247-
248-
def is_file(self):
249-
info = self._accessor.info(self)
250-
if info["type"] == "file":
251-
return True
252-
return False
253-
254-
def glob(self, pattern):
255-
path = self.joinpath(pattern)
256-
for name in self._accessor.glob(self, path=path.path):
257-
sp = self.path
258-
name = re.sub(f"^({sp}|{sp[1:]})/", "", name)
259-
name = name.split(self._flavour.sep)
260-
yield self._make_child(self._parts + name)
261-
262-
def rename(self, target):
263-
# can be implimented, but may be tricky
264-
raise NotImplementedError
265-
266-
def touch(self, trunicate=True, **kwargs):
267-
self._accessor.touch(self, trunicate=trunicate, **kwargs)
268-
269-
def unlink(self, missing_ok=False):
270-
if not self.exists():
271-
if not missing_ok:
272-
raise FileNotFoundError
273-
else:
274-
return
275-
self._accessor.rm(self, recursive=False)
276-
277-
def rmdir(self, recursive=True):
278-
"""Add warning if directory not empty
279-
assert is_dir?
280-
"""
281-
try:
282-
assert self.is_dir()
283-
except AssertionError:
284-
raise NotDirectoryError
285-
self._accessor.rm(self, recursive=recursive)
286-
287-
@classmethod
288-
def _from_parts_init(cls, args, init=False):
289-
return super()._from_parts(args, init=init)
290-
291-
def _from_parts(self, args, init=True):
292-
# We need to call _parse_args on the instance, so as to get the
293-
# right flavour.
294-
obj = object.__new__(UniversalPath)
295-
drv, root, parts = self._parse_args(args)
296-
obj._drv = drv
297-
obj._root = root
298-
obj._parts = parts
299-
if init:
300-
obj._init(**self._kwargs)
301-
return obj
302-
303-
def _from_parsed_parts(self, drv, root, parts, init=True):
304-
obj = object.__new__(UniversalPath)
305-
obj._drv = drv
306-
obj._root = root
307-
obj._parts = parts
308-
if init:
309-
obj._init(**self._kwargs)
310-
return obj

upath/implementations/http.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import urllib
22

3-
from upath.core import UniversalPath, _FSSpecAccessor
3+
from upath.universal_path import UniversalPath, _FSSpecAccessor
44

55

66
class _HTTPAccessor(_FSSpecAccessor):
@@ -12,6 +12,7 @@ def argument_upath_self_to_filepath(self, func):
1212
argument is a UniversalPath instance, that argument is replaced with
1313
the UniversalPath's path attribute
1414
"""
15+
1516
def wrapper(*args, **kwargs):
1617
if args:
1718
args = list(args)
@@ -26,6 +27,7 @@ def wrapper(*args, **kwargs):
2627
unparsed = urllib.urlunparse(new_url)
2728
kwargs["path"] = unparsed
2829
return func(*args, **kwargs)
30+
2931
return wrapper
3032

3133

upath/registry.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from upath.implementations import http
2+
3+
_registry = {"http": http.HTTPPath}

upath/tests/implementations/__init__.py

Whitespace-only changes.
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import pytest # noqa: F401
2+
3+
from upath import UPath
4+
from upath.implementations.http import HTTPPath
5+
6+
7+
def test_httppath():
8+
path = UPath("http://example.com")
9+
assert isinstance(path, HTTPPath)
10+
assert path.exists()

upath/tests/test_core.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ def test_home(self):
3333

3434
def test_stat(self):
3535
stat = self.path.stat()
36-
print(stat)
3736
assert stat
3837

3938
def test_chmod(self):

0 commit comments

Comments
 (0)