Skip to content

Commit 9f9c5d0

Browse files
Merge pull request #58 from leowe/http-sub-path
Fix iterdir for http-path and refactorings
2 parents 87aefbb + e7698f9 commit 9f9c5d0

File tree

16 files changed

+447
-314
lines changed

16 files changed

+447
-314
lines changed

noxfile.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ def install(session):
2727

2828
@nox.session(python=False)
2929
def smoke(session):
30-
session.install(*"pytest aiohttp requests gcsfs".split())
30+
session.install(
31+
*"pytest aiohttp requests gcsfs s3fs moto[s3,server]".split()
32+
)
3133
session.run(*"pytest --skiphdfs -vv upath".split())
3234

3335

upath/core.py

Lines changed: 110 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pathlib
22
import re
3+
import sys
34
from typing import Union
45
import urllib
56

@@ -88,21 +89,14 @@ def __getattribute__(self, item):
8889

8990
class UPath(pathlib.Path):
9091

91-
__slots__ = ("_url", "_kwargs", "_closed", "_accessor")
92+
__slots__ = (
93+
"_url",
94+
"_kwargs",
95+
"_accessor", # overwritten because of default in Python 3.10
96+
)
9297
_flavour = pathlib._posix_flavour
9398
_default_accessor = _FSSpecAccessor
9499

95-
not_implemented = [
96-
"cwd",
97-
"home",
98-
"expanduser",
99-
"group",
100-
"lchmod",
101-
"lstat",
102-
"owner",
103-
"readlink",
104-
]
105-
106100
def __new__(cls, *args, **kwargs) -> Union["UPath", pathlib.Path]:
107101
args_list = list(args)
108102
first = args_list.pop(0)
@@ -116,7 +110,6 @@ def __new__(cls, *args, **kwargs) -> Union["UPath", pathlib.Path]:
116110
)
117111

118112
new_kwargs = getattr(other, "_kwargs", {}).copy()
119-
new_kwargs.pop("_url", None)
120113
new_kwargs.update(kwargs)
121114

122115
return other.__class__(
@@ -136,44 +129,38 @@ def __new__(cls, *args, **kwargs) -> Union["UPath", pathlib.Path]:
136129
import upath.registry
137130

138131
cls = upath.registry._registry[parsed_url.scheme]
139-
kwargs["_url"] = parsed_url
140132
args_list.insert(0, parsed_url.path)
141-
return cls._from_parts(tuple(args_list), **kwargs)
133+
return cls._from_parts(tuple(args_list), url=parsed_url, **kwargs)
142134

143135
# treat as local filesystem, return PosixPath or WindowsPath
144136
return pathlib.Path(*args, **kwargs)
145137

146138
def __getattr__(self, item):
147139
if item == "_accessor":
148140
# cache the _accessor attribute on first access
149-
kw = self._kwargs.copy()
150-
kw.pop("_url", None)
151-
self._accessor = _accessor = self._default_accessor(self._url, **kw)
141+
kwargs = self._kwargs.copy()
142+
self._accessor = _accessor = self._default_accessor(
143+
self._url, **kwargs
144+
)
152145
return _accessor
153146
else:
154147
raise AttributeError(item)
155148

156-
def __getattribute__(self, item):
157-
if item == "__class__":
158-
return super().__getattribute__("__class__")
159-
if item in getattr(self.__class__, "not_implemented"):
160-
raise NotImplementedError(f"UPath has no attribute {item}")
161-
else:
162-
return super().__getattribute__(item)
163-
164149
def _make_child(self, args):
165-
drv, root, parts = self._parse_args(args, **self._kwargs)
150+
drv, root, parts = self._parse_args(args)
166151
drv, root, parts = self._flavour.join_parsed_parts(
167152
self._drv, self._root, self._parts, drv, root, parts
168153
)
169-
return self._from_parsed_parts(drv, root, parts, **self._kwargs)
154+
return self._from_parsed_parts(
155+
drv, root, parts, url=self._url, **self._kwargs
156+
)
170157

171158
def _make_child_relpath(self, part):
172159
# This is an optimization used for dir walking. `part` must be
173160
# a single part relative to this path.
174161
parts = self._parts + [part]
175162
return self._from_parsed_parts(
176-
self._drv, self._root, parts, **self._kwargs
163+
self._drv, self._root, parts, url=self._url, **self._kwargs
177164
)
178165

179166
def _format_parsed_parts(self, drv, root, parts):
@@ -213,7 +200,9 @@ def parent(self):
213200
parts = self._parts
214201
if len(parts) == 1 and (drv or root):
215202
return self
216-
return self._from_parsed_parts(drv, root, parts[:-1], **self._kwargs)
203+
return self._from_parsed_parts(
204+
drv, root, parts[:-1], url=self._url, **self._kwargs
205+
)
217206

218207
def stat(self):
219208
return self._accessor.stat(self)
@@ -222,8 +211,6 @@ def iterdir(self):
222211
"""Iterate over the files in this directory. Does not yield any
223212
result for the special paths '.' and '..'.
224213
"""
225-
if self._closed:
226-
self._raise_closed()
227214
for name in self._accessor.listdir(self):
228215
# fsspec returns dictionaries
229216
if isinstance(name, dict):
@@ -234,8 +221,29 @@ def iterdir(self):
234221
# only want the path name with iterdir
235222
name = self._sub_path(name)
236223
yield self._make_child_relpath(name)
237-
if self._closed:
238-
self._raise_closed()
224+
225+
def relative_to(self, *other):
226+
for other_item in other:
227+
if not isinstance(other_item, self.__class__) and not isinstance(
228+
other_item, str
229+
):
230+
raise ValueError(
231+
f"{repr(self)} and {repr(other_item)} are "
232+
"not of compatible classes."
233+
)
234+
if not isinstance(other_item, str) and (
235+
other_item._url.scheme != self._url.scheme
236+
or other_item._url.netloc != self._url.netloc
237+
or other_item._kwargs != self._kwargs
238+
):
239+
raise ValueError(
240+
f"{self} and {other_item} do not share the same "
241+
"base URL and storage options."
242+
)
243+
output = super().relative_to(*other)
244+
output._url = self._url
245+
output._kwargs = self._kwargs
246+
return output
239247

240248
def glob(self, pattern):
241249
path = self.joinpath(pattern)
@@ -263,24 +271,33 @@ def exists(self):
263271
return self._accessor.exists(self)
264272

265273
def is_dir(self):
266-
info = self._accessor.info(self)
267-
if info["type"] == "directory":
268-
return True
274+
try:
275+
info = self._accessor.info(self)
276+
if info["type"] == "directory":
277+
return True
278+
except FileNotFoundError:
279+
return False
269280
return False
270281

271282
def is_file(self):
272-
info = self._accessor.info(self)
273-
if info["type"] == "file":
274-
return True
283+
try:
284+
info = self._accessor.info(self)
285+
if info["type"] == "file":
286+
return True
287+
except FileNotFoundError:
288+
return False
275289
return False
276290

277291
def is_mount(self):
278292
return False
279293

280294
def is_symlink(self):
281-
info = self._accessor.info(self)
282-
if "islink" in info:
283-
return info["islink"]
295+
try:
296+
info = self._accessor.info(self)
297+
if "islink" in info:
298+
return info["islink"]
299+
except FileNotFoundError:
300+
return False
284301
return False
285302

286303
def is_socket(self):
@@ -295,16 +312,6 @@ def is_block_device(self):
295312
def is_char_device(self):
296313
return False
297314

298-
def chmod(self, mod):
299-
raise NotImplementedError
300-
301-
def rename(self, target):
302-
# can be implemented, but may be tricky
303-
raise NotImplementedError
304-
305-
def touch(self, trunicate=True, **kwargs):
306-
self._accessor.touch(self, trunicate=trunicate, **kwargs)
307-
308315
def unlink(self, missing_ok=False):
309316
if not self.exists():
310317
if not missing_ok:
@@ -323,45 +330,80 @@ def rmdir(self, recursive=True):
323330
raise NotDirectoryError
324331
self._accessor.rm(self, recursive=recursive)
325332

326-
@classmethod
327-
def _parse_args(cls, args, **kwargs):
328-
return super(UPath, cls)._parse_args(args)
333+
def chmod(self, mod):
334+
raise NotImplementedError
335+
336+
def rename(self, target):
337+
# can be implemented, but may be tricky
338+
raise NotImplementedError
339+
340+
def cwd(self):
341+
raise NotImplementedError
342+
343+
def home(self):
344+
raise NotImplementedError
345+
346+
def expanduser(self):
347+
raise NotImplementedError
348+
349+
def group(self):
350+
raise NotImplementedError
351+
352+
def lchmod(self, mode):
353+
raise NotImplementedError
354+
355+
def lstat(self):
356+
raise NotImplementedError
357+
358+
def owner(self):
359+
raise NotImplementedError
360+
361+
def readlink(self):
362+
raise NotImplementedError
363+
364+
def touch(self, trunicate=True, **kwargs):
365+
self._accessor.touch(self, trunicate=trunicate, **kwargs)
329366

330367
@classmethod
331-
def _from_parts(cls, args, **kwargs):
368+
def _from_parts(cls, args, url=None, **kwargs):
332369
obj = object.__new__(cls)
333-
drv, root, parts = obj._parse_args(args, **kwargs)
370+
drv, root, parts = obj._parse_args(args)
334371
obj._drv = drv
335-
obj._parts = parts
336-
obj._closed = False
372+
if sys.version_info < (3, 9):
373+
obj._closed = False
374+
obj._url = url
337375
obj._kwargs = kwargs.copy()
338-
obj._url = kwargs.pop("_url", None) or None
339376

340377
if not root:
341378
if not parts:
342379
root = "/"
380+
parts = ["/"]
343381
elif parts[0] == "/":
344-
root = parts.pop(0)
382+
root = parts[1:]
345383
obj._root = root
384+
obj._parts = parts
346385

347386
return obj
348387

349388
@classmethod
350-
def _from_parsed_parts(cls, drv, root, parts, **kwargs):
389+
def _from_parsed_parts(cls, drv, root, parts, url=None, **kwargs):
351390
obj = object.__new__(cls)
352391
obj._drv = drv
353392
obj._parts = parts
354-
obj._closed = False
393+
if sys.version_info < (3, 9):
394+
obj._closed = False
395+
obj._url = url
355396
obj._kwargs = kwargs.copy()
356-
obj._url = kwargs.pop("_url", None) or None
357397

358398
if not root:
359399
if not parts:
360400
root = "/"
361401
elif parts[0] == "/":
362402
root = parts.pop(0)
363-
obj._root = root
403+
if len(obj._parts) == 0 or obj._parts[0] != root:
404+
obj._parts.insert(0, root)
364405

406+
obj._root = root
365407
return obj
366408

367409
@property
@@ -380,7 +422,6 @@ def __truediv__(self, key):
380422
)
381423

382424
kwargs = self._kwargs.copy()
383-
kwargs.pop("_url")
384425

385426
# Create a new object
386427
out = self.__class__(
@@ -390,16 +431,11 @@ def __truediv__(self, key):
390431
return out
391432

392433
def __setstate__(self, state):
393-
kwargs = state["_kwargs"].copy()
394-
kwargs["_url"] = self._url
395-
self._kwargs = kwargs
434+
self._kwargs = state["_kwargs"].copy()
396435

397436
def __reduce__(self):
398-
kwargs = self._kwargs.copy()
399-
kwargs.pop("_url", None)
400-
401437
return (
402438
self.__class__,
403439
(self._format_parsed_parts(self._drv, self._root, self._parts),),
404-
{"_kwargs": kwargs},
440+
{"_kwargs": self._kwargs.copy()},
405441
)

upath/implementations/gcs.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import upath.core
2-
import os
32
import re
43

54

@@ -11,7 +10,7 @@ def _format_path(self, s):
1110
"""
1211
netloc has already been set to project via `GCSPath._from_parts`
1312
"""
14-
s = os.path.join(self._url.netloc, s.lstrip("/"))
13+
s = f"{self._url.netloc}/{s.lstrip('/')}"
1514
return s
1615

1716

@@ -20,25 +19,26 @@ class GCSPath(upath.core.UPath):
2019
_default_accessor = _GCSAccessor
2120

2221
@classmethod
23-
def _from_parts(cls, args, **kwargs):
24-
obj = super()._from_parts(args, **kwargs)
25-
if kwargs.get("bucket") and kwargs.get("_url"):
26-
bucket = obj._kwargs.pop("bucket")
27-
obj._url = obj._url._replace(netloc=bucket)
22+
def _from_parts(cls, args, url=None, **kwargs):
23+
if kwargs.get("bucket") and url is not None:
24+
bucket = kwargs.pop("bucket")
25+
url = url._replace(netloc=bucket)
26+
obj = super()._from_parts(args, url, **kwargs)
2827
return obj
2928

3029
@classmethod
31-
def _from_parsed_parts(cls, drv, root, parts, **kwargs):
32-
obj = super()._from_parsed_parts(drv, root, parts, **kwargs)
33-
if kwargs.get("bucket") and kwargs.get("_url"):
34-
bucket = obj._kwargs.pop("bucket")
35-
obj._url = obj._url._replace(netloc=bucket)
30+
def _from_parsed_parts(cls, drv, root, parts, url=None, **kwargs):
31+
if kwargs.get("bucket") and url is not None:
32+
bucket = kwargs.pop("bucket")
33+
url = url._replace(netloc=bucket)
34+
obj = super()._from_parsed_parts(drv, root, parts, url, **kwargs)
3635
return obj
3736

3837
def _sub_path(self, name):
39-
"""gcs returns path as `{bucket}/<path>` with listdir
40-
and glob, so here we can add the netloc to the sub string
41-
so it gets subbed out as well
38+
"""
39+
`gcsfs` returns the full path as `<bucket>/<path>` with `listdir` and
40+
`glob`. However, in `iterdir` and `glob` we only want the relative path
41+
to `self`.
4242
"""
4343
sp = self.path
4444
subed = re.sub(f"^({self._url.netloc})?/?({sp}|{sp[1:]})/?", "", name)

0 commit comments

Comments
 (0)