Skip to content

Commit 80904ef

Browse files
ap--andrewfulton9
andauthored
Support Python 3.10 (#43)
* ci: run tests against Python 3.10 * upath.core: fix implementation to support Python 3.10 See: https://bugs.python.org/issue40038 This commit removes the private internally only used `._init` method and restores the classmethods `_from_parts` and `_from_parsed_parts`. It also allows passing kwargs to those classmethods, which in turn requires to provide `_make_child`, `_make_child_relpath` and `parents`. Currently it doesn't cache `_accessor` and `fs`, which could be easily implemented. After this commit python versions 3.7 to 3.10 are supported. * flake8: fix line too long * Add UPath.stat to disinclude follow_symlinks kwarg now in pathlib * update environment for 310 * updates gcs and s3 paths to work with py310 updates * black formatting * upath.core: cache the accessor instance We emulate cached_property on the _accessor attribute to lazily instantiate the accessor via __getattr__. This is done because, we can't conveniently use cached_property because UPath uses __slots__, which doesn't support descriptors without significantly complicating the code. Co-authored-by: Andrew Fulton <[email protected]>
1 parent 87ad370 commit 80904ef

File tree

5 files changed

+107
-59
lines changed

5 files changed

+107
-59
lines changed

.github/workflows/python.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ jobs:
77
runs-on: ${{ matrix.os }}
88
strategy:
99
matrix:
10-
python-version: [3.7, 3.8, 3.9]
10+
python-version: [3.7, 3.8, 3.9, "3.10"]
1111
os: [ubuntu-latest, windows-latest]
1212

1313
steps:

environment.yml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,14 @@ channels:
33
- defaults
44
- conda-forge
55
dependencies:
6-
- python==3.8
7-
- fsspec==2021.11.1
6+
- python==3.10
7+
- fsspec
88
# optional
99
- requests
1010
- s3fs
11-
- jupyter
11+
- jupyter
1212
- ipython
1313
- pytest
14-
- vcrpy
1514
- pylint
1615
- flake8
1716
- pyarrow

upath/core.py

Lines changed: 75 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def __subclasscheck__(cls, subclass):
102102

103103
class UPath(pathlib.Path, PureUPath, metaclass=UPathMeta):
104104

105-
__slots__ = ("_url", "_kwargs", "_closed", "fs")
105+
__slots__ = ("_url", "_kwargs", "_closed", "_accessor")
106106

107107
not_implemented = [
108108
"cwd",
@@ -140,45 +140,32 @@ def __new__(cls, *args, **kwargs):
140140
if os.name == "nt"
141141
else pathlib.PosixPath
142142
)
143-
self = cls._from_parts(args, init=False)
143+
self = cls._from_parts(args)
144144
if not self._flavour.is_supported:
145145
raise NotImplementedError(
146146
"cannot instantiate %r on your system" % (cls.__name__,)
147147
)
148-
self._init()
149148
else:
150149
import upath.registry
151150

152151
cls = upath.registry._registry[parsed_url.scheme]
153152
kwargs["_url"] = parsed_url
154153
args_list.insert(0, parsed_url.path)
155154
args = tuple(args_list)
156-
self = cls._from_parts_init(args, init=False)
157-
self._init(*args, **kwargs)
155+
self = cls._from_parts(args, **kwargs)
158156
else:
159157
self = super().__new__(*args, **kwargs)
160158
return self
161159

162-
def _init(self, *args, template=None, **kwargs):
163-
self._closed = False
164-
if not kwargs:
165-
kwargs = dict(**self._kwargs)
160+
def __getattr__(self, item):
161+
if item == "_accessor":
162+
# cache the _accessor attribute on first access
163+
kw = self._kwargs.copy()
164+
kw.pop("_url", None)
165+
self._accessor = _accessor = self._default_accessor(self._url, **kw)
166+
return _accessor
166167
else:
167-
self._kwargs = dict(**kwargs)
168-
self._url = kwargs.pop("_url") if kwargs.get("_url") else None
169-
170-
if not self._root:
171-
if not self._parts:
172-
self._root = "/"
173-
elif self._parts[0] == "/":
174-
self._root = self._parts.pop(0)
175-
if getattr(self, "_str", None):
176-
delattr(self, "_str")
177-
if template is not None:
178-
self._accessor = template._accessor
179-
else:
180-
self._accessor = self._default_accessor(self._url, *args, **kwargs)
181-
self.fs = self._accessor._fs
168+
raise AttributeError(item)
182169

183170
def __getattribute__(self, item):
184171
if item == "__class__":
@@ -188,6 +175,21 @@ def __getattribute__(self, item):
188175
else:
189176
return super().__getattribute__(item)
190177

178+
def _make_child(self, args):
179+
drv, root, parts = self._parse_args(args, **self._kwargs)
180+
drv, root, parts = self._flavour.join_parsed_parts(
181+
self._drv, self._root, self._parts, drv, root, parts
182+
)
183+
return self._from_parsed_parts(drv, root, parts, **self._kwargs)
184+
185+
def _make_child_relpath(self, part):
186+
# This is an optimization used for dir walking. `part` must be
187+
# a single part relative to this path.
188+
parts = self._parts + [part]
189+
return self._from_parsed_parts(
190+
self._drv, self._root, parts, **self._kwargs
191+
)
192+
191193
def _format_parsed_parts(self, drv, root, parts):
192194
if parts:
193195
join_parts = parts[1:] if parts[0] == "/" else parts
@@ -217,6 +219,19 @@ def path(self):
217219
def open(self, *args, **kwargs):
218220
return self._accessor.open(self, *args, **kwargs)
219221

222+
@property
223+
def parent(self):
224+
"""The logical parent of the path."""
225+
drv = self._drv
226+
root = self._root
227+
parts = self._parts
228+
if len(parts) == 1 and (drv or root):
229+
return self
230+
return self._from_parsed_parts(drv, root, parts[:-1], **self._kwargs)
231+
232+
def stat(self):
233+
return self._accessor.stat(self)
234+
220235
def iterdir(self):
221236
"""Iterate over the files in this directory. Does not yield any
222237
result for the special paths '.' and '..'.
@@ -302,30 +317,50 @@ def rmdir(self, recursive=True):
302317
self._accessor.rm(self, recursive=recursive)
303318

304319
@classmethod
305-
def _from_parts_init(cls, args, init=False):
306-
return super()._from_parts(args, init=init)
307-
308-
def _from_parts(self, args, init=True):
309-
# We need to call _parse_args on the instance, so as to get the
310-
# right flavour.
311-
obj = object.__new__(self.__class__)
312-
drv, root, parts = self._parse_args(args)
320+
def _parse_args(cls, args, **kwargs):
321+
return super(UPath, cls)._parse_args(args)
322+
323+
@classmethod
324+
def _from_parts(cls, args, **kwargs):
325+
obj = object.__new__(cls)
326+
drv, root, parts = obj._parse_args(args, **kwargs)
313327
obj._drv = drv
314-
obj._root = root
315328
obj._parts = parts
316-
if init:
317-
obj._init(**self._kwargs)
329+
obj._closed = False
330+
obj._kwargs = kwargs.copy()
331+
obj._url = kwargs.pop("_url", None) or None
332+
333+
if not root:
334+
if not parts:
335+
root = "/"
336+
elif parts[0] == "/":
337+
root = parts.pop(0)
338+
obj._root = root
339+
318340
return obj
319341

320-
def _from_parsed_parts(self, drv, root, parts, init=True):
321-
obj = object.__new__(self.__class__)
342+
@classmethod
343+
def _from_parsed_parts(cls, drv, root, parts, **kwargs):
344+
obj = object.__new__(cls)
322345
obj._drv = drv
323-
obj._root = root
324346
obj._parts = parts
325-
if init:
326-
obj._init(**self._kwargs)
347+
obj._closed = False
348+
obj._kwargs = kwargs.copy()
349+
obj._url = kwargs.pop("_url", None) or None
350+
351+
if not root:
352+
if not parts:
353+
root = "/"
354+
elif parts[0] == "/":
355+
root = parts.pop(0)
356+
obj._root = root
357+
327358
return obj
328359

360+
@property
361+
def fs(self):
362+
return self._accessor._fs
363+
329364
def __truediv__(self, key):
330365
# Add `/` root if not present
331366
if len(self._parts) == 0:
@@ -351,9 +386,6 @@ def __setstate__(self, state):
351386
kwargs = state["_kwargs"].copy()
352387
kwargs["_url"] = self._url
353388
self._kwargs = kwargs
354-
# _init needs to be called again, because when __new__ called _init,
355-
# the _kwargs were not yet set
356-
self._init()
357389

358390
def __reduce__(self):
359391
kwargs = self._kwargs.copy()

upath/implementations/gcs.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ def __init__(self, parsed_url, *args, **kwargs):
99

1010
def _format_path(self, s):
1111
"""
12-
netloc has already been set to project via `GCSPath._init`
12+
netloc has already been set to project via `GCSPath._from_parts`
1313
"""
1414
s = os.path.join(self._url.netloc, s.lstrip("/"))
1515
return s
@@ -19,12 +19,21 @@ def _format_path(self, s):
1919
class GCSPath(upath.core.UPath):
2020
_default_accessor = _GCSAccessor
2121

22-
def _init(self, *args, template=None, **kwargs):
23-
# ensure that the bucket is part of the netloc path
22+
@classmethod
23+
def _from_parts(cls, args, **kwargs):
24+
obj = super()._from_parts(args, **kwargs)
2425
if kwargs.get("bucket") and kwargs.get("_url"):
25-
bucket = kwargs.pop("bucket")
26-
kwargs["_url"] = kwargs["_url"]._replace(netloc=bucket)
27-
super()._init(*args, template=template, **kwargs)
26+
bucket = obj._kwargs.pop("bucket")
27+
obj._url = obj._url._replace(netloc=bucket)
28+
return obj
29+
30+
@classmethod
31+
def _from_parsed_parts(cls, drv, root, parts, **kwargs):
32+
obj = super()._from_parsed_parts(drv, root, parts, **kwargs)
33+
if kwargs.get("bucket") and kwargs.get("_url"):
34+
bucket = obj._kwargs.pop("bucket")
35+
obj._url = obj._url._replace(netloc=bucket)
36+
return obj
2837

2938
def _sub_path(self, name):
3039
"""gcs returns path as `{bucket}/<path>` with listdir

upath/implementations/s3.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,21 @@ def _format_path(self, s):
1919
class S3Path(upath.core.UPath):
2020
_default_accessor = _S3Accessor
2121

22-
def _init(self, *args, template=None, **kwargs):
23-
# ensure that the bucket is part of the netloc path
22+
@classmethod
23+
def _from_parts(cls, args, **kwargs):
24+
obj = super()._from_parts(args, **kwargs)
2425
if kwargs.get("bucket") and kwargs.get("_url"):
25-
bucket = kwargs.pop("bucket")
26-
kwargs["_url"] = kwargs["_url"]._replace(netloc=bucket)
26+
bucket = obj._kwargs.pop("bucket")
27+
obj._url = obj._url._replace(netloc=bucket)
28+
return obj
2729

28-
super()._init(*args, template=template, **kwargs)
30+
@classmethod
31+
def _from_parsed_parts(cls, drv, root, parts, **kwargs):
32+
obj = super()._from_parsed_parts(drv, root, parts, **kwargs)
33+
if kwargs.get("bucket") and kwargs.get("_url"):
34+
bucket = obj._kwargs.pop("bucket")
35+
obj._url = obj._url._replace(netloc=bucket)
36+
return obj
2937

3038
def _sub_path(self, name):
3139
"""s3fs returns path as `{bucket}/<path>` with listdir

0 commit comments

Comments
 (0)