Skip to content

Commit 705d810

Browse files
committed
Merge remote-tracking branch 'upstream/main' into copy-upath
2 parents b6c4c64 + 651744b commit 705d810

File tree

6 files changed

+108
-60
lines changed

6 files changed

+108
-60
lines changed

.github/workflows/python.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ jobs:
77
runs-on: ${{ matrix.os }}
88
strategy:
99
matrix:
10-
python-version: [3.7, 3.8, 3.9]
10+
python-version: [3.7, 3.8, 3.9, "3.10"]
1111
os: [ubuntu-latest, windows-latest]
1212

1313
steps:

environment.yml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,14 @@ channels:
33
- defaults
44
- conda-forge
55
dependencies:
6-
- python==3.8
7-
- fsspec==2021.11.1
6+
- python==3.10
7+
- fsspec
88
# optional
99
- requests
1010
- s3fs
11-
- jupyter
11+
- jupyter
1212
- ipython
1313
- pytest
14-
- vcrpy
1514
- pylint
1615
- flake8
1716
- pyarrow

upath/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""Pathlib API extended to use fsspec backends"""
2-
__version__ = "0.0.12"
2+
__version__ = "0.0.15"
33

44
from upath.core import UPath
55

upath/core.py

Lines changed: 75 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def __subclasscheck__(cls, subclass):
102102

103103
class UPath(pathlib.Path, PureUPath, metaclass=UPathMeta):
104104

105-
__slots__ = ("_url", "_kwargs", "_closed", "fs")
105+
__slots__ = ("_url", "_kwargs", "_closed", "_accessor")
106106

107107
not_implemented = [
108108
"cwd",
@@ -158,45 +158,32 @@ def __new__(cls, *args, **kwargs):
158158
if os.name == "nt"
159159
else pathlib.PosixPath
160160
)
161-
self = cls._from_parts(args, init=False)
161+
self = cls._from_parts(args)
162162
if not self._flavour.is_supported:
163163
raise NotImplementedError(
164164
"cannot instantiate %r on your system" % (cls.__name__,)
165165
)
166-
self._init()
167166
else:
168167
import upath.registry
169168

170169
cls = upath.registry._registry[parsed_url.scheme]
171170
kwargs["_url"] = parsed_url
172171
args_list.insert(0, parsed_url.path)
173172
args = tuple(args_list)
174-
self = cls._from_parts_init(args, init=False)
175-
self._init(*args, **kwargs)
173+
self = cls._from_parts(args, **kwargs)
176174
else:
177175
self = super().__new__(*args, **kwargs)
178176
return self
179177

180-
def _init(self, *args, template=None, **kwargs):
181-
self._closed = False
182-
if not kwargs:
183-
kwargs = dict(**self._kwargs)
178+
def __getattr__(self, item):
179+
if item == "_accessor":
180+
# cache the _accessor attribute on first access
181+
kw = self._kwargs.copy()
182+
kw.pop("_url", None)
183+
self._accessor = _accessor = self._default_accessor(self._url, **kw)
184+
return _accessor
184185
else:
185-
self._kwargs = dict(**kwargs)
186-
self._url = kwargs.pop("_url") if kwargs.get("_url") else None
187-
188-
if not self._root:
189-
if not self._parts:
190-
self._root = "/"
191-
elif self._parts[0] == "/":
192-
self._root = self._parts.pop(0)
193-
if getattr(self, "_str", None):
194-
delattr(self, "_str")
195-
if template is not None:
196-
self._accessor = template._accessor
197-
else:
198-
self._accessor = self._default_accessor(self._url, *args, **kwargs)
199-
self.fs = self._accessor._fs
186+
raise AttributeError(item)
200187

201188
def __getattribute__(self, item):
202189
if item == "__class__":
@@ -206,6 +193,21 @@ def __getattribute__(self, item):
206193
else:
207194
return super().__getattribute__(item)
208195

196+
def _make_child(self, args):
197+
drv, root, parts = self._parse_args(args, **self._kwargs)
198+
drv, root, parts = self._flavour.join_parsed_parts(
199+
self._drv, self._root, self._parts, drv, root, parts
200+
)
201+
return self._from_parsed_parts(drv, root, parts, **self._kwargs)
202+
203+
def _make_child_relpath(self, part):
204+
# This is an optimization used for dir walking. `part` must be
205+
# a single part relative to this path.
206+
parts = self._parts + [part]
207+
return self._from_parsed_parts(
208+
self._drv, self._root, parts, **self._kwargs
209+
)
210+
209211
def _format_parsed_parts(self, drv, root, parts):
210212
if parts:
211213
join_parts = parts[1:] if parts[0] == "/" else parts
@@ -235,6 +237,19 @@ def path(self):
235237
def open(self, *args, **kwargs):
236238
return self._accessor.open(self, *args, **kwargs)
237239

240+
@property
241+
def parent(self):
242+
"""The logical parent of the path."""
243+
drv = self._drv
244+
root = self._root
245+
parts = self._parts
246+
if len(parts) == 1 and (drv or root):
247+
return self
248+
return self._from_parsed_parts(drv, root, parts[:-1], **self._kwargs)
249+
250+
def stat(self):
251+
return self._accessor.stat(self)
252+
238253
def iterdir(self):
239254
"""Iterate over the files in this directory. Does not yield any
240255
result for the special paths '.' and '..'.
@@ -320,30 +335,50 @@ def rmdir(self, recursive=True):
320335
self._accessor.rm(self, recursive=recursive)
321336

322337
@classmethod
323-
def _from_parts_init(cls, args, init=False):
324-
return super()._from_parts(args, init=init)
325-
326-
def _from_parts(self, args, init=True):
327-
# We need to call _parse_args on the instance, so as to get the
328-
# right flavour.
329-
obj = object.__new__(self.__class__)
330-
drv, root, parts = self._parse_args(args)
338+
def _parse_args(cls, args, **kwargs):
339+
return super(UPath, cls)._parse_args(args)
340+
341+
@classmethod
342+
def _from_parts(cls, args, **kwargs):
343+
obj = object.__new__(cls)
344+
drv, root, parts = obj._parse_args(args, **kwargs)
331345
obj._drv = drv
332-
obj._root = root
333346
obj._parts = parts
334-
if init:
335-
obj._init(**self._kwargs)
347+
obj._closed = False
348+
obj._kwargs = kwargs.copy()
349+
obj._url = kwargs.pop("_url", None) or None
350+
351+
if not root:
352+
if not parts:
353+
root = "/"
354+
elif parts[0] == "/":
355+
root = parts.pop(0)
356+
obj._root = root
357+
336358
return obj
337359

338-
def _from_parsed_parts(self, drv, root, parts, init=True):
339-
obj = object.__new__(self.__class__)
360+
@classmethod
361+
def _from_parsed_parts(cls, drv, root, parts, **kwargs):
362+
obj = object.__new__(cls)
340363
obj._drv = drv
341-
obj._root = root
342364
obj._parts = parts
343-
if init:
344-
obj._init(**self._kwargs)
365+
obj._closed = False
366+
obj._kwargs = kwargs.copy()
367+
obj._url = kwargs.pop("_url", None) or None
368+
369+
if not root:
370+
if not parts:
371+
root = "/"
372+
elif parts[0] == "/":
373+
root = parts.pop(0)
374+
obj._root = root
375+
345376
return obj
346377

378+
@property
379+
def fs(self):
380+
return self._accessor._fs
381+
347382
def __truediv__(self, key):
348383
# Add `/` root if not present
349384
if len(self._parts) == 0:
@@ -369,9 +404,6 @@ def __setstate__(self, state):
369404
kwargs = state["_kwargs"].copy()
370405
kwargs["_url"] = self._url
371406
self._kwargs = kwargs
372-
# _init needs to be called again, because when __new__ called _init,
373-
# the _kwargs were not yet set
374-
self._init()
375407

376408
def __reduce__(self):
377409
kwargs = self._kwargs.copy()

upath/implementations/gcs.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ def __init__(self, parsed_url, *args, **kwargs):
99

1010
def _format_path(self, s):
1111
"""
12-
netloc has already been set to project via `GCSPath._init`
12+
netloc has already been set to project via `GCSPath._from_parts`
1313
"""
1414
s = os.path.join(self._url.netloc, s.lstrip("/"))
1515
return s
@@ -19,12 +19,21 @@ def _format_path(self, s):
1919
class GCSPath(upath.core.UPath):
2020
_default_accessor = _GCSAccessor
2121

22-
def _init(self, *args, template=None, **kwargs):
23-
# ensure that the bucket is part of the netloc path
22+
@classmethod
23+
def _from_parts(cls, args, **kwargs):
24+
obj = super()._from_parts(args, **kwargs)
2425
if kwargs.get("bucket") and kwargs.get("_url"):
25-
bucket = kwargs.pop("bucket")
26-
kwargs["_url"] = kwargs["_url"]._replace(netloc=bucket)
27-
super()._init(*args, template=template, **kwargs)
26+
bucket = obj._kwargs.pop("bucket")
27+
obj._url = obj._url._replace(netloc=bucket)
28+
return obj
29+
30+
@classmethod
31+
def _from_parsed_parts(cls, drv, root, parts, **kwargs):
32+
obj = super()._from_parsed_parts(drv, root, parts, **kwargs)
33+
if kwargs.get("bucket") and kwargs.get("_url"):
34+
bucket = obj._kwargs.pop("bucket")
35+
obj._url = obj._url._replace(netloc=bucket)
36+
return obj
2837

2938
def _sub_path(self, name):
3039
"""gcs returns path as `{bucket}/<path>` with listdir

upath/implementations/s3.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,21 @@ def _format_path(self, s):
1919
class S3Path(upath.core.UPath):
2020
_default_accessor = _S3Accessor
2121

22-
def _init(self, *args, template=None, **kwargs):
23-
# ensure that the bucket is part of the netloc path
22+
@classmethod
23+
def _from_parts(cls, args, **kwargs):
24+
obj = super()._from_parts(args, **kwargs)
2425
if kwargs.get("bucket") and kwargs.get("_url"):
25-
bucket = kwargs.pop("bucket")
26-
kwargs["_url"] = kwargs["_url"]._replace(netloc=bucket)
26+
bucket = obj._kwargs.pop("bucket")
27+
obj._url = obj._url._replace(netloc=bucket)
28+
return obj
2729

28-
super()._init(*args, template=template, **kwargs)
30+
@classmethod
31+
def _from_parsed_parts(cls, drv, root, parts, **kwargs):
32+
obj = super()._from_parsed_parts(drv, root, parts, **kwargs)
33+
if kwargs.get("bucket") and kwargs.get("_url"):
34+
bucket = obj._kwargs.pop("bucket")
35+
obj._url = obj._url._replace(netloc=bucket)
36+
return obj
2937

3038
def _sub_path(self, name):
3139
"""s3fs returns path as `{bucket}/<path>` with listdir

0 commit comments

Comments
 (0)