Skip to content

Commit 42c2d7a

Browse files
committed
Merge remote-tracking branch 'upstream/main' into is_symlink_false
2 parents 9b39c02 + 651744b commit 42c2d7a

File tree

6 files changed

+108
-60
lines changed

6 files changed

+108
-60
lines changed

.github/workflows/python.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ jobs:
77
runs-on: ${{ matrix.os }}
88
strategy:
99
matrix:
10-
python-version: [3.7, 3.8, 3.9]
10+
python-version: [3.7, 3.8, 3.9, "3.10"]
1111
os: [ubuntu-latest, windows-latest]
1212

1313
steps:

environment.yml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,14 @@ channels:
33
- defaults
44
- conda-forge
55
dependencies:
6-
- python==3.8
7-
- fsspec==2021.11.1
6+
- python==3.10
7+
- fsspec
88
# optional
99
- requests
1010
- s3fs
11-
- jupyter
11+
- jupyter
1212
- ipython
1313
- pytest
14-
- vcrpy
1514
- pylint
1615
- flake8
1716
- pyarrow

upath/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""Pathlib API extended to use fsspec backends"""
2-
__version__ = "0.0.12"
2+
__version__ = "0.0.15"
33

44
from upath.core import UPath
55

upath/core.py

Lines changed: 75 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def __subclasscheck__(cls, subclass):
102102

103103
class UPath(pathlib.Path, PureUPath, metaclass=UPathMeta):
104104

105-
__slots__ = ("_url", "_kwargs", "_closed", "fs")
105+
__slots__ = ("_url", "_kwargs", "_closed", "_accessor")
106106

107107
not_implemented = [
108108
"cwd",
@@ -134,45 +134,32 @@ def __new__(cls, *args, **kwargs):
134134
if os.name == "nt"
135135
else pathlib.PosixPath
136136
)
137-
self = cls._from_parts(args, init=False)
137+
self = cls._from_parts(args)
138138
if not self._flavour.is_supported:
139139
raise NotImplementedError(
140140
"cannot instantiate %r on your system" % (cls.__name__,)
141141
)
142-
self._init()
143142
else:
144143
import upath.registry
145144

146145
cls = upath.registry._registry[parsed_url.scheme]
147146
kwargs["_url"] = parsed_url
148147
args_list.insert(0, parsed_url.path)
149148
args = tuple(args_list)
150-
self = cls._from_parts_init(args, init=False)
151-
self._init(*args, **kwargs)
149+
self = cls._from_parts(args, **kwargs)
152150
else:
153151
self = super().__new__(*args, **kwargs)
154152
return self
155153

156-
def _init(self, *args, template=None, **kwargs):
157-
self._closed = False
158-
if not kwargs:
159-
kwargs = dict(**self._kwargs)
154+
def __getattr__(self, item):
155+
if item == "_accessor":
156+
# cache the _accessor attribute on first access
157+
kw = self._kwargs.copy()
158+
kw.pop("_url", None)
159+
self._accessor = _accessor = self._default_accessor(self._url, **kw)
160+
return _accessor
160161
else:
161-
self._kwargs = dict(**kwargs)
162-
self._url = kwargs.pop("_url") if kwargs.get("_url") else None
163-
164-
if not self._root:
165-
if not self._parts:
166-
self._root = "/"
167-
elif self._parts[0] == "/":
168-
self._root = self._parts.pop(0)
169-
if getattr(self, "_str", None):
170-
delattr(self, "_str")
171-
if template is not None:
172-
self._accessor = template._accessor
173-
else:
174-
self._accessor = self._default_accessor(self._url, *args, **kwargs)
175-
self.fs = self._accessor._fs
162+
raise AttributeError(item)
176163

177164
def __getattribute__(self, item):
178165
if item == "__class__":
@@ -182,6 +169,21 @@ def __getattribute__(self, item):
182169
else:
183170
return super().__getattribute__(item)
184171

172+
def _make_child(self, args):
173+
drv, root, parts = self._parse_args(args, **self._kwargs)
174+
drv, root, parts = self._flavour.join_parsed_parts(
175+
self._drv, self._root, self._parts, drv, root, parts
176+
)
177+
return self._from_parsed_parts(drv, root, parts, **self._kwargs)
178+
179+
def _make_child_relpath(self, part):
180+
# This is an optimization used for dir walking. `part` must be
181+
# a single part relative to this path.
182+
parts = self._parts + [part]
183+
return self._from_parsed_parts(
184+
self._drv, self._root, parts, **self._kwargs
185+
)
186+
185187
def _format_parsed_parts(self, drv, root, parts):
186188
if parts:
187189
join_parts = parts[1:] if parts[0] == "/" else parts
@@ -211,6 +213,19 @@ def path(self):
211213
def open(self, *args, **kwargs):
212214
return self._accessor.open(self, *args, **kwargs)
213215

216+
@property
217+
def parent(self):
218+
"""The logical parent of the path."""
219+
drv = self._drv
220+
root = self._root
221+
parts = self._parts
222+
if len(parts) == 1 and (drv or root):
223+
return self
224+
return self._from_parsed_parts(drv, root, parts[:-1], **self._kwargs)
225+
226+
def stat(self):
227+
return self._accessor.stat(self)
228+
214229
def iterdir(self):
215230
"""Iterate over the files in this directory. Does not yield any
216231
result for the special paths '.' and '..'.
@@ -317,30 +332,50 @@ def rmdir(self, recursive=True):
317332
self._accessor.rm(self, recursive=recursive)
318333

319334
@classmethod
320-
def _from_parts_init(cls, args, init=False):
321-
return super()._from_parts(args, init=init)
322-
323-
def _from_parts(self, args, init=True):
324-
# We need to call _parse_args on the instance, so as to get the
325-
# right flavour.
326-
obj = object.__new__(self.__class__)
327-
drv, root, parts = self._parse_args(args)
335+
def _parse_args(cls, args, **kwargs):
336+
return super(UPath, cls)._parse_args(args)
337+
338+
@classmethod
339+
def _from_parts(cls, args, **kwargs):
340+
obj = object.__new__(cls)
341+
drv, root, parts = obj._parse_args(args, **kwargs)
328342
obj._drv = drv
329-
obj._root = root
330343
obj._parts = parts
331-
if init:
332-
obj._init(**self._kwargs)
344+
obj._closed = False
345+
obj._kwargs = kwargs.copy()
346+
obj._url = kwargs.pop("_url", None) or None
347+
348+
if not root:
349+
if not parts:
350+
root = "/"
351+
elif parts[0] == "/":
352+
root = parts.pop(0)
353+
obj._root = root
354+
333355
return obj
334356

335-
def _from_parsed_parts(self, drv, root, parts, init=True):
336-
obj = object.__new__(self.__class__)
357+
@classmethod
358+
def _from_parsed_parts(cls, drv, root, parts, **kwargs):
359+
obj = object.__new__(cls)
337360
obj._drv = drv
338-
obj._root = root
339361
obj._parts = parts
340-
if init:
341-
obj._init(**self._kwargs)
362+
obj._closed = False
363+
obj._kwargs = kwargs.copy()
364+
obj._url = kwargs.pop("_url", None) or None
365+
366+
if not root:
367+
if not parts:
368+
root = "/"
369+
elif parts[0] == "/":
370+
root = parts.pop(0)
371+
obj._root = root
372+
342373
return obj
343374

375+
@property
376+
def fs(self):
377+
return self._accessor._fs
378+
344379
def __truediv__(self, key):
345380
# Add `/` root if not present
346381
if len(self._parts) == 0:
@@ -366,9 +401,6 @@ def __setstate__(self, state):
366401
kwargs = state["_kwargs"].copy()
367402
kwargs["_url"] = self._url
368403
self._kwargs = kwargs
369-
# _init needs to be called again, because when __new__ called _init,
370-
# the _kwargs were not yet set
371-
self._init()
372404

373405
def __reduce__(self):
374406
kwargs = self._kwargs.copy()

upath/implementations/gcs.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ def __init__(self, parsed_url, *args, **kwargs):
99

1010
def _format_path(self, s):
1111
"""
12-
netloc has already been set to project via `GCSPath._init`
12+
netloc has already been set to project via `GCSPath._from_parts`
1313
"""
1414
s = os.path.join(self._url.netloc, s.lstrip("/"))
1515
return s
@@ -19,12 +19,21 @@ def _format_path(self, s):
1919
class GCSPath(upath.core.UPath):
2020
_default_accessor = _GCSAccessor
2121

22-
def _init(self, *args, template=None, **kwargs):
23-
# ensure that the bucket is part of the netloc path
22+
@classmethod
23+
def _from_parts(cls, args, **kwargs):
24+
obj = super()._from_parts(args, **kwargs)
2425
if kwargs.get("bucket") and kwargs.get("_url"):
25-
bucket = kwargs.pop("bucket")
26-
kwargs["_url"] = kwargs["_url"]._replace(netloc=bucket)
27-
super()._init(*args, template=template, **kwargs)
26+
bucket = obj._kwargs.pop("bucket")
27+
obj._url = obj._url._replace(netloc=bucket)
28+
return obj
29+
30+
@classmethod
31+
def _from_parsed_parts(cls, drv, root, parts, **kwargs):
32+
obj = super()._from_parsed_parts(drv, root, parts, **kwargs)
33+
if kwargs.get("bucket") and kwargs.get("_url"):
34+
bucket = obj._kwargs.pop("bucket")
35+
obj._url = obj._url._replace(netloc=bucket)
36+
return obj
2837

2938
def _sub_path(self, name):
3039
"""gcs returns path as `{bucket}/<path>` with listdir

upath/implementations/s3.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,21 @@ def _format_path(self, s):
1919
class S3Path(upath.core.UPath):
2020
_default_accessor = _S3Accessor
2121

22-
def _init(self, *args, template=None, **kwargs):
23-
# ensure that the bucket is part of the netloc path
22+
@classmethod
23+
def _from_parts(cls, args, **kwargs):
24+
obj = super()._from_parts(args, **kwargs)
2425
if kwargs.get("bucket") and kwargs.get("_url"):
25-
bucket = kwargs.pop("bucket")
26-
kwargs["_url"] = kwargs["_url"]._replace(netloc=bucket)
26+
bucket = obj._kwargs.pop("bucket")
27+
obj._url = obj._url._replace(netloc=bucket)
28+
return obj
2729

28-
super()._init(*args, template=template, **kwargs)
30+
@classmethod
31+
def _from_parsed_parts(cls, drv, root, parts, **kwargs):
32+
obj = super()._from_parsed_parts(drv, root, parts, **kwargs)
33+
if kwargs.get("bucket") and kwargs.get("_url"):
34+
bucket = obj._kwargs.pop("bucket")
35+
obj._url = obj._url._replace(netloc=bucket)
36+
return obj
2937

3038
def _sub_path(self, name):
3139
"""s3fs returns path as `{bucket}/<path>` with listdir

0 commit comments

Comments
 (0)