1- from abc import ABCMeta
21import os
32import pathlib
3+ import re
44import urllib
5+ from abc import ABCMeta
56
6- from fsspec .registry import known_implementations , registry
7+ from fsspec .registry import (
8+ get_filesystem_class ,
9+ known_implementations ,
10+ registry ,
11+ )
712from fsspec .utils import stringify_path
813
9- from upath .registry import _registry
14+ from upath .errors import NotDirectoryError
15+
16+
17+ class _FSSpecAccessor :
18+ def __init__ (self , parsed_url , * args , ** kwargs ):
19+ self ._url = parsed_url
20+ cls = get_filesystem_class (self ._url .scheme )
21+ url_kwargs = cls ._get_kwargs_from_urls (
22+ urllib .parse .urlunparse (self ._url ))
23+ url_kwargs .update (kwargs )
24+ self ._fs = cls (** url_kwargs )
25+
26+ def transform_args_wrapper (self , func ):
27+ """Modifies the arguments that get passed to the filesystem so that
28+ the UniversalPath instance gets stripped as the first argument. If a
29+ path keyword argument is not given, then `UniversalPath.path` is
30+ formatted for the filesystem and inserted as the first argument.
31+ If it is, then the path keyword argument is formatted properly for
32+ the filesystem.
33+ """
34+ def wrapper (* args , ** kwargs ):
35+ args , kwargs = self ._transform_arg_paths (args , kwargs )
36+ return func (* args , ** kwargs )
37+
38+ return wrapper
39+
40+ def _transform_arg_paths (self , args , kwargs ):
41+ """formats the path properly for the filesystem backend."""
42+ args = list (args )
43+ first_arg = args .pop (0 )
44+ if not kwargs .get ("path" ):
45+ if isinstance (first_arg , UPath ):
46+ first_arg = self ._format_path (first_arg .path )
47+ args .insert (0 , first_arg )
48+ args = tuple (args )
49+ else :
50+ kwargs ["path" ] = self ._format_path (kwargs ["path" ])
51+ return args , kwargs
52+
53+ def _format_path (self , s ):
54+ """placeholder method for subclassed filesystems"""
55+ return s
56+
57+ def __getattribute__ (self , item ):
58+ class_attrs = ["_url" , "_fs" , "__class__" ]
59+ if item in class_attrs :
60+ return super ().__getattribute__ (item )
61+
62+ class_methods = [
63+ "__init__" ,
64+ "__getattribute__" ,
65+ "transform_args_wrapper" ,
66+ "_transform_arg_paths" ,
67+ "_format_path" ,
68+ ]
69+ if item in class_methods :
70+ return lambda * args , ** kwargs : getattr (self .__class__ , item )(
71+ self , * args , ** kwargs )
72+
73+ d = object .__getattribute__ (self , "__dict__" )
74+ fs = d .get ("_fs" , None )
75+ if fs is not None :
76+ method = getattr (fs , item , None )
77+ if method :
78+ return lambda * args , ** kwargs : (self .transform_args_wrapper (
79+ method )(* args , ** kwargs )) # noqa: E501
80+ else :
81+ raise NotImplementedError (
82+ f"{ fs .protocol } filesystem has no attribute { item } " )
83+
84+
85+ class PureUPath (pathlib .PurePath ):
86+ _flavour = pathlib ._posix_flavour
87+ __slots__ = ()
1088
1189
1290class UPathMeta (ABCMeta ):
@@ -17,7 +95,28 @@ def __subclasscheck__(cls, subclass):
1795 return issubclass (subclass , pathlib .Path )
1896
1997
20- class UPath (pathlib .Path , metaclass = UPathMeta ):
98+ class UPath (pathlib .Path , PureUPath , metaclass = UPathMeta ):
99+
100+ __slots__ = ("_url" , "_kwargs" , "_closed" , "fs" )
101+
102+ not_implemented = [
103+ "cwd" ,
104+ "home" ,
105+ "expanduser" ,
106+ "group" ,
107+ "is_mount" ,
108+ "is_symlink" ,
109+ "is_socket" ,
110+ "is_fifo" ,
111+ "is_block_device" ,
112+ "is_char_device" ,
113+ "lchmod" ,
114+ "lstat" ,
115+ "owner" ,
116+ "readlink" ,
117+ ]
118+ _default_accessor = _FSSpecAccessor
119+
21120 def __new__ (cls , * args , ** kwargs ):
22121 if issubclass (cls , UPath ):
23122 args_list = list (args )
@@ -31,19 +130,18 @@ def __new__(cls, *args, **kwargs):
31130 # treat as local filesystem, return PosixPath or WindowsPath
32131 impls = list (registry ) + list (known_implementations .keys ())
33132 if not parsed_url .scheme or parsed_url .scheme not in impls :
34- cls = (
35- pathlib .WindowsPath
36- if os .name == "nt"
37- else pathlib .PosixPath
38- )
133+ cls = (pathlib .WindowsPath
134+ if os .name == "nt" else pathlib .PosixPath )
39135 self = cls ._from_parts (args , init = False )
40136 if not self ._flavour .is_supported :
41137 raise NotImplementedError (
42- "cannot instantiate %r on your system" % ( cls . __name__ ,)
43- )
138+ "cannot instantiate %r on your system" %
139+ ( cls . __name__ , ) )
44140 self ._init ()
45141 else :
46- cls = _registry [parsed_url .scheme ]
142+ import upath .registry
143+
144+ cls = upath .registry ._registry [parsed_url .scheme ]
47145 kwargs ["_url" ] = parsed_url
48146 args_list .insert (0 , parsed_url .path )
49147 args = tuple (args_list )
@@ -52,3 +150,166 @@ def __new__(cls, *args, **kwargs):
52150 else :
53151 self = super ().__new__ (* args , ** kwargs )
54152 return self
153+
154+ def _init (self , * args , template = None , ** kwargs ):
155+ self ._closed = False
156+ if not kwargs :
157+ kwargs = dict (** self ._kwargs )
158+ else :
159+ self ._kwargs = dict (** kwargs )
160+ self ._url = kwargs .pop ("_url" ) if kwargs .get ("_url" ) else None
161+
162+ if not self ._root :
163+ if not self ._parts :
164+ self ._root = "/"
165+ elif self ._parts [0 ] == "/" :
166+ self ._root = self ._parts .pop (0 )
167+ if getattr (self , "_str" , None ):
168+ delattr (self , "_str" )
169+ if template is not None :
170+ self ._accessor = template ._accessor
171+ else :
172+ self ._accessor = self ._default_accessor (self ._url , * args , ** kwargs )
173+ self .fs = self ._accessor ._fs
174+
175+ def __getattribute__ (self , item ):
176+ if item == "__class__" :
177+ return super ().__getattribute__ ("__class__" )
178+ if item in getattr (self .__class__ , "not_implemented" ):
179+ raise NotImplementedError (f"UniversalPath has no attribute { item } " )
180+ else :
181+ return super ().__getattribute__ (item )
182+
183+ def _format_parsed_parts (self , drv , root , parts ):
184+ if parts :
185+ join_parts = parts [1 :] if parts [0 ] == "/" else parts
186+ else :
187+ join_parts = []
188+ if drv or root :
189+ path = drv + root + self ._flavour .join (join_parts )
190+ else :
191+ path = self ._flavour .join (join_parts )
192+ scheme , netloc = self ._url .scheme , self ._url .netloc
193+ scheme = scheme + ":"
194+ netloc = "//" + netloc if netloc else ""
195+ formatted = scheme + netloc + path
196+ return formatted
197+
198+ @property
199+ def path (self ):
200+ if self ._parts :
201+ join_parts = (self ._parts [1 :]
202+ if self ._parts [0 ] == "/" else self ._parts )
203+ path = self ._flavour .join (join_parts )
204+ return self ._root + path
205+ else :
206+ return "/"
207+
208+ def open (self , * args , ** kwargs ):
209+ return self ._accessor .open (self , * args , ** kwargs )
210+
211+ def iterdir (self ):
212+ """Iterate over the files in this directory. Does not yield any
213+ result for the special paths '.' and '..'.
214+ """
215+ if self ._closed :
216+ self ._raise_closed ()
217+ for name in self ._accessor .listdir (self ):
218+ # fsspec returns dictionaries
219+ if isinstance (name , dict ):
220+ name = name .get ("name" )
221+ if name in {"." , ".." }:
222+ # Yielding a path object for these makes little sense
223+ continue
224+ # only want the path name with iterdir
225+ name = self ._sub_path (name )
226+ yield self ._make_child_relpath (name )
227+ if self ._closed :
228+ self ._raise_closed ()
229+
230+ def glob (self , pattern ):
231+ path = self .joinpath (pattern )
232+ for name in self ._accessor .glob (self , path = path .path ):
233+ name = self ._sub_path (name )
234+ name = name .split (self ._flavour .sep )
235+ yield self ._make_child (name )
236+
237+ def _sub_path (self , name ):
238+ # only want the path name with iterdir
239+ sp = self .path
240+ return re .sub (f"^({ sp } |{ sp [1 :]} )/" , "" , name )
241+
242+ def exists (self ):
243+ """
244+ Whether this path exists.
245+ """
246+ if not getattr (self ._accessor , "exists" ):
247+ try :
248+ self ._accessor .stat (self )
249+ except (FileNotFoundError ):
250+ return False
251+ return True
252+ else :
253+ return self ._accessor .exists (self )
254+
255+ def is_dir (self ):
256+ info = self ._accessor .info (self )
257+ if info ["type" ] == "directory" :
258+ return True
259+ return False
260+
261+ def is_file (self ):
262+ info = self ._accessor .info (self )
263+ if info ["type" ] == "file" :
264+ return True
265+ return False
266+
267+ def rename (self , target ):
268+ # can be implimented, but may be tricky
269+ raise NotImplementedError
270+
271+ def touch (self , trunicate = True , ** kwargs ):
272+ self ._accessor .touch (self , trunicate = trunicate , ** kwargs )
273+
274+ def unlink (self , missing_ok = False ):
275+ if not self .exists ():
276+ if not missing_ok :
277+ raise FileNotFoundError
278+ else :
279+ return
280+ self ._accessor .rm (self , recursive = False )
281+
282+ def rmdir (self , recursive = True ):
283+ """Add warning if directory not empty
284+ assert is_dir?
285+ """
286+ try :
287+ assert self .is_dir ()
288+ except AssertionError :
289+ raise NotDirectoryError
290+ self ._accessor .rm (self , recursive = recursive )
291+
292+ @classmethod
293+ def _from_parts_init (cls , args , init = False ):
294+ return super ()._from_parts (args , init = init )
295+
296+ def _from_parts (self , args , init = True ):
297+ # We need to call _parse_args on the instance, so as to get the
298+ # right flavour.
299+ obj = object .__new__ (self .__class__ )
300+ drv , root , parts = self ._parse_args (args )
301+ obj ._drv = drv
302+ obj ._root = root
303+ obj ._parts = parts
304+ if init :
305+ obj ._init (** self ._kwargs )
306+ return obj
307+
308+ def _from_parsed_parts (self , drv , root , parts , init = True ):
309+ obj = object .__new__ (self .__class__ )
310+ obj ._drv = drv
311+ obj ._root = root
312+ obj ._parts = parts
313+ if init :
314+ obj ._init (** self ._kwargs )
315+ return obj
0 commit comments