11import os
22import pathlib
33import urllib
4- import re
54
6- from fsspec .registry import get_filesystem_class
7-
8- from upath .errors import NotDirectoryError
9-
10-
11- class _FSSpecAccessor :
12- def __init__ (self , parsed_url , * args , ** kwargs ):
13- self ._url = parsed_url
14- cls = get_filesystem_class (self ._url .scheme )
15- url_kwargs = cls ._get_kwargs_from_urls (
16- urllib .parse .urlunparse (self ._url )
17- )
18- url_kwargs .update (kwargs )
19- self ._fs = cls (** url_kwargs )
20- if self ._url .scheme in ["hdfs" ]:
21- self ._fs .root_marker = "/"
22-
23- def argument_upath_self_to_filepath (self , func ):
24- """if arguments are passed to the wrapped function, and if the first
25- argument is a UniversalPath instance, that argument is replaced with
26- the UniversalPath's path attribute
27- """
28-
29- def wrapper (* args , ** kwargs ):
30- if args :
31- args = list (args )
32- first_arg = args .pop (0 )
33- if not kwargs .get ("path" ):
34- if isinstance (first_arg , UniversalPath ):
35- first_arg = first_arg .path
36- if not self ._fs .root_marker and first_arg .startswith (
37- "/"
38- ):
39- first_arg = first_arg [1 :]
40- args .insert (0 , first_arg )
41- args = tuple (args )
42- else :
43- if not self ._fs .root_marker and kwargs ["path" ].startswith (
44- "/"
45- ):
46- kwargs ["path" ] = kwargs ["path" ][1 :]
47- if self ._url .scheme == "hdfs" :
48- if "trunicate" in kwargs :
49- kwargs .pop ("trunicate" )
50- if func .__name__ == "mkdir" :
51- args = args [:1 ]
52-
53- return func (* args , ** kwargs )
54-
55- return wrapper
56-
57- def __getattribute__ (self , item ):
58- class_attrs = ["_url" , "_fs" ]
59- if item in class_attrs :
60- x = super ().__getattribute__ (item )
61- return x
62- class_methods = [
63- "__init__" ,
64- "__getattribute__" ,
65- "argument_upath_self_to_filepath" ,
66- ]
67- if item in class_methods :
68- return lambda * args , ** kwargs : getattr (_FSSpecAccessor , item )(
69- self , * args , ** kwargs
70- )
71- if item == "__class__" :
72- return _FSSpecAccessor
73- d = object .__getattribute__ (self , "__dict__" )
74- fs = d .get ("_fs" , None )
75- if fs is not None :
76- method = getattr (fs , item , None )
77- if method :
78- return lambda * args , ** kwargs : (
79- self .argument_upath_self_to_filepath (method )(
80- * args , ** kwargs
81- )
82- ) # noqa: E501
83- else :
84- raise NotImplementedError (
85- f"{ fs .protocol } filesystem has not attribute { item } "
86- )
87-
88-
89- class PureUniversalPath (pathlib .PurePath ):
90- _flavour = pathlib ._posix_flavour
91- __slots__ = ()
5+ from upath .registry import _registry
6+ from upath .universal_path import UniversalPath
927
938
949class UPath (pathlib .Path ):
@@ -101,209 +16,27 @@ def __new__(cls, *args, **kwargs):
10116 val = kwargs .get (key )
10217 if val :
10318 parsed_url ._replace (** {key : val })
19+ # treat as local filesystem, return PosixPath or
10420 if not parsed_url .scheme :
10521 cls = (
10622 pathlib .WindowsPath
10723 if os .name == "nt"
10824 else pathlib .PosixPath
10925 )
26+ self = cls ._from_parts (args , init = False )
27+ if not self ._flavour .is_supported :
28+ raise NotImplementedError (
29+ "cannot instantiate %r on your system" % (cls .__name__ ,)
30+ )
31+ self ._init ()
11032 else :
111- cls = UniversalPath
112- # cls._url = parsed_url
33+ if parsed_url .scheme in _registry :
34+ cls = _registry [parsed_url .scheme ]
35+ else :
36+ cls = UniversalPath
11337 kwargs ["_url" ] = parsed_url
11438 new_args .insert (0 , parsed_url .path )
11539 args = tuple (new_args )
116-
117- if cls is UniversalPath :
118- self = cls ._from_parts_init (args , init = False )
119- else :
120- self = cls ._from_parts (args , init = False )
121- if not self ._flavour .is_supported :
122- raise NotImplementedError (
123- "cannot instantiate %r on your system" % (cls .__name__ ,)
124- )
125- if cls is UniversalPath :
126- self ._init (* args , ** kwargs )
127- else :
128- self ._init ()
40+ self = cls ._from_parts_init (args , init = False )
41+ self ._init (* args , ** kwargs )
12942 return self
130-
131-
132- class UniversalPath (UPath , PureUniversalPath ):
133-
134- __slots__ = ("_url" , "_kwargs" , "_closed" , "fs" )
135-
136- not_implemented = [
137- "cwd" ,
138- "home" ,
139- "expanduser" ,
140- "group" ,
141- "is_mount" ,
142- "is_symlink" ,
143- "is_socket" ,
144- "is_fifo" ,
145- "is_block_device" ,
146- "is_char_device" ,
147- "lchmod" ,
148- "lstat" ,
149- "owner" ,
150- "readlink" ,
151- ]
152-
153- def _init (self , * args , template = None , ** kwargs ):
154- self ._closed = False
155- if not kwargs :
156- kwargs = dict (** self ._kwargs )
157- else :
158- self ._kwargs = dict (** kwargs )
159- self ._url = kwargs .pop ("_url" ) if kwargs .get ("_url" ) else None
160-
161- if not self ._root :
162- if not self ._parts :
163- self ._root = "/"
164- elif self ._parts [0 ] == "/" :
165- self ._root = self ._parts .pop (0 )
166- if getattr (self , "_str" , None ):
167- delattr (self , "_str" )
168- if template is not None :
169- self ._accessor = template ._accessor
170- else :
171- self ._accessor = _FSSpecAccessor (self ._url , * args , ** kwargs )
172- self .fs = self ._accessor ._fs
173-
174- def __getattribute__ (self , item ):
175- if item == "__class__" :
176- return UniversalPath
177- if item in getattr (UniversalPath , "not_implemented" ):
178- raise NotImplementedError (f"UniversalPath has no attribute { item } " )
179- else :
180- return super ().__getattribute__ (item )
181-
182- def _format_parsed_parts (self , drv , root , parts ):
183- join_parts = parts [1 :] if parts [0 ] == "/" else parts
184- if drv or root :
185- path = drv + root + self ._flavour .join (join_parts )
186- else :
187- path = self ._flavour .join (join_parts )
188- scheme , netloc = self ._url .scheme , self ._url .netloc
189- scheme = scheme + ":"
190- netloc = "//" + netloc if netloc else ""
191- formatted = scheme + netloc + path
192- return formatted
193-
194- @property
195- def path (self ):
196- if self ._parts :
197- join_parts = (
198- self ._parts [1 :] if self ._parts [0 ] == "/" else self ._parts
199- )
200- path = self ._flavour .join (join_parts )
201- return self ._root + path
202- else :
203- return "/"
204-
205- def open (self , * args , ** kwargs ):
206- return self ._accessor .open (self , * args , ** kwargs )
207-
208- def iterdir (self ):
209- """Iterate over the files in this directory. Does not yield any
210- result for the special paths '.' and '..'.
211- """
212- if self ._closed :
213- self ._raise_closed ()
214- for name in self ._accessor .listdir (self ):
215- # fsspec returns dictionaries
216- if isinstance (name , dict ):
217- name = name .get ("name" )
218- if name in {"." , ".." }:
219- # Yielding a path object for these makes little sense
220- continue
221- # only want the path name with iterdir
222- sp = self .path
223- name = re .sub (f"^({ sp } |{ sp [1 :]} )/" , "" , name )
224- yield self ._make_child_relpath (name )
225- if self ._closed :
226- self ._raise_closed ()
227-
228- def exists (self ):
229- """
230- Whether this path exists.
231- """
232- if not getattr (self ._accessor , "exists" ):
233- try :
234- self ._accessor .stat (self )
235- except (FileNotFoundError ):
236- return False
237- return True
238- else :
239- return self ._accessor .exists (self )
240-
241- def is_dir (self ):
242- info = self ._accessor .info (self )
243- if info ["type" ] == "directory" :
244- return True
245- return False
246-
247- def is_file (self ):
248- info = self ._accessor .info (self )
249- if info ["type" ] == "file" :
250- return True
251- return False
252-
253- def glob (self , pattern ):
254- path = self .joinpath (pattern )
255- for name in self ._accessor .glob (self , path = path .path ):
256- sp = self .path
257- name = re .sub (f"^({ sp } |{ sp [1 :]} )/" , "" , name )
258- name = name .split (self ._flavour .sep )
259- yield self ._make_child (self ._parts + name )
260-
261- def rename (self , target ):
262- # can be implimented, but may be tricky
263- raise NotImplementedError
264-
265- def touch (self , trunicate = True , ** kwargs ):
266- self ._accessor .touch (self , trunicate = trunicate , ** kwargs )
267-
268- def unlink (self , missing_ok = False ):
269- if not self .exists ():
270- if not missing_ok :
271- raise FileNotFoundError
272- else :
273- return
274- self ._accessor .rm (self , recursive = False )
275-
276- def rmdir (self , recursive = True ):
277- """Add warning if directory not empty
278- assert is_dir?
279- """
280- try :
281- assert self .is_dir ()
282- except AssertionError :
283- raise NotDirectoryError
284- self ._accessor .rm (self , recursive = recursive )
285-
286- @classmethod
287- def _from_parts_init (cls , args , init = False ):
288- return super ()._from_parts (args , init = init )
289-
290- def _from_parts (self , args , init = True ):
291- # We need to call _parse_args on the instance, so as to get the
292- # right flavour.
293- obj = object .__new__ (UniversalPath )
294- drv , root , parts = self ._parse_args (args )
295- obj ._drv = drv
296- obj ._root = root
297- obj ._parts = parts
298- if init :
299- obj ._init (** self ._kwargs )
300- return obj
301-
302- def _from_parsed_parts (self , drv , root , parts , init = True ):
303- obj = object .__new__ (UniversalPath )
304- obj ._drv = drv
305- obj ._root = root
306- obj ._parts = parts
307- if init :
308- obj ._init (** self ._kwargs )
309- return obj
0 commit comments