11import os
22import pathlib
33import urllib
4- import re
54
6- from fsspec .registry import get_filesystem_class
7-
8- from upath .errors import NotDirectoryError
9-
10-
11- class _FSSpecAccessor :
12- def __init__ (self , parsed_url , * args , ** kwargs ):
13- self ._url = parsed_url
14- cls = get_filesystem_class (self ._url .scheme )
15- url_kwargs = cls ._get_kwargs_from_urls (
16- urllib .parse .urlunparse (self ._url )
17- )
18- url_kwargs .update (kwargs )
19- self ._fs = cls (** url_kwargs )
20- if self ._url .scheme in ["hdfs" ]:
21- self ._fs .root_marker = "/"
22-
23- def argument_upath_self_to_filepath (self , func ):
24- """if arguments are passed to the wrapped function, and if the first
25- argument is a UniversalPath instance, that argument is replaced with
26- the UniversalPath's path attribute
27- """
28-
29- def wrapper (* args , ** kwargs ):
30- if args :
31- args = list (args )
32- first_arg = args .pop (0 )
33- if not kwargs .get ("path" ):
34- if isinstance (first_arg , UniversalPath ):
35- first_arg = first_arg .path
36- if not self ._fs .root_marker and first_arg .startswith (
37- "/"
38- ):
39- first_arg = first_arg [1 :]
40- args .insert (0 , first_arg )
41- args = tuple (args )
42- else :
43- if not self ._fs .root_marker and kwargs ["path" ].startswith (
44- "/"
45- ):
46- kwargs ["path" ] = kwargs ["path" ][1 :]
47- if self ._url .scheme == "hdfs" :
48- if "trunicate" in kwargs :
49- kwargs .pop ("trunicate" )
50- if func .__name__ == "mkdir" :
51- args = args [:1 ]
52-
53- return func (* args , ** kwargs )
54-
55- return wrapper
56-
57- def __getattribute__ (self , item ):
58- class_attrs = ["_url" , "_fs" ]
59- if item in class_attrs :
60- x = super ().__getattribute__ (item )
61- return x
62- class_methods = [
63- "__init__" ,
64- "__getattribute__" ,
65- "argument_upath_self_to_filepath" ,
66- ]
67- if item in class_methods :
68- return lambda * args , ** kwargs : getattr (_FSSpecAccessor , item )(
69- self , * args , ** kwargs
70- )
71- if item == "__class__" :
72- return _FSSpecAccessor
73- d = object .__getattribute__ (self , "__dict__" )
74- fs = d .get ("_fs" , None )
75- if fs is not None :
76- method = getattr (fs , item , None )
77- if method :
78- return lambda * args , ** kwargs : (
79- self .argument_upath_self_to_filepath (method )(
80- * args , ** kwargs
81- )
82- ) # noqa: E501
83- else :
84- raise NotImplementedError (
85- f"{ fs .protocol } filesystem has not attribute { item } "
86- )
87-
88-
89- class PureUniversalPath (pathlib .PurePath ):
90- _flavour = pathlib ._posix_flavour
91- __slots__ = ()
5+ from upath .registry import _registry
6+ from upath .universal_path import UniversalPath
927
938
949class UPath (pathlib .Path ):
@@ -101,210 +16,27 @@ def __new__(cls, *args, **kwargs):
10116 val = kwargs .get (key )
10217 if val :
10318 parsed_url ._replace (** {key : val })
19+ # treat as local filesystem, return PosixPath or
10420 if not parsed_url .scheme :
10521 cls = (
10622 pathlib .WindowsPath
10723 if os .name == "nt"
10824 else pathlib .PosixPath
10925 )
26+ self = cls ._from_parts (args , init = False )
27+ if not self ._flavour .is_supported :
28+ raise NotImplementedError (
29+ "cannot instantiate %r on your system" % (cls .__name__ ,)
30+ )
31+ self ._init ()
11032 else :
111- cls = UniversalPath
112- # cls._url = parsed_url
33+ if parsed_url .scheme in _registry :
34+ cls = _registry [parsed_url .scheme ]
35+ else :
36+ cls = UniversalPath
11337 kwargs ["_url" ] = parsed_url
11438 new_args .insert (0 , parsed_url .path )
11539 args = tuple (new_args )
116-
117- if cls is UniversalPath :
118- self = cls ._from_parts_init (args , init = False )
119- else :
120- self = cls ._from_parts (args , init = False )
121- if not self ._flavour .is_supported :
122- raise NotImplementedError (
123- "cannot instantiate %r on your system" % (cls .__name__ ,)
124- )
125- if cls is UniversalPath :
126- self ._init (* args , ** kwargs )
127- else :
128- self ._init ()
40+ self = cls ._from_parts_init (args , init = False )
41+ self ._init (* args , ** kwargs )
12942 return self
130-
131-
132- class UniversalPath (UPath , PureUniversalPath ):
133-
134- __slots__ = ("_url" , "_kwargs" , "_closed" , "fs" )
135-
136- not_implemented = [
137- "cwd" ,
138- "home" ,
139- "expanduser" ,
140- "group" ,
141- "is_mount" ,
142- "is_symlink" ,
143- "is_socket" ,
144- "is_fifo" ,
145- "is_block_device" ,
146- "is_char_device" ,
147- "lchmod" ,
148- "lstat" ,
149- "owner" ,
150- "readlink" ,
151- ]
152- _default_accessor = _FSSpecAccessor
153-
154- def _init (self , * args , template = None , ** kwargs ):
155- self ._closed = False
156- if not kwargs :
157- kwargs = dict (** self ._kwargs )
158- else :
159- self ._kwargs = dict (** kwargs )
160- self ._url = kwargs .pop ("_url" ) if kwargs .get ("_url" ) else None
161-
162- if not self ._root :
163- if not self ._parts :
164- self ._root = "/"
165- elif self ._parts [0 ] == "/" :
166- self ._root = self ._parts .pop (0 )
167- if getattr (self , "_str" , None ):
168- delattr (self , "_str" )
169- if template is not None :
170- self ._accessor = template ._accessor
171- else :
172- self ._accessor = self ._default_accessor (self ._url , * args , ** kwargs )
173- self .fs = self ._accessor ._fs
174-
175- def __getattribute__ (self , item ):
176- if item == "__class__" :
177- return UniversalPath
178- if item in getattr (UniversalPath , "not_implemented" ):
179- raise NotImplementedError (f"UniversalPath has no attribute { item } " )
180- else :
181- return super ().__getattribute__ (item )
182-
183- def _format_parsed_parts (self , drv , root , parts ):
184- join_parts = parts [1 :] if parts [0 ] == "/" else parts
185- if drv or root :
186- path = drv + root + self ._flavour .join (join_parts )
187- else :
188- path = self ._flavour .join (join_parts )
189- scheme , netloc = self ._url .scheme , self ._url .netloc
190- scheme = scheme + ":"
191- netloc = "//" + netloc if netloc else ""
192- formatted = scheme + netloc + path
193- return formatted
194-
195- @property
196- def path (self ):
197- if self ._parts :
198- join_parts = (
199- self ._parts [1 :] if self ._parts [0 ] == "/" else self ._parts
200- )
201- path = self ._flavour .join (join_parts )
202- return self ._root + path
203- else :
204- return "/"
205-
206- def open (self , * args , ** kwargs ):
207- return self ._accessor .open (self , * args , ** kwargs )
208-
209- def iterdir (self ):
210- """Iterate over the files in this directory. Does not yield any
211- result for the special paths '.' and '..'.
212- """
213- if self ._closed :
214- self ._raise_closed ()
215- for name in self ._accessor .listdir (self ):
216- # fsspec returns dictionaries
217- if isinstance (name , dict ):
218- name = name .get ("name" )
219- if name in {"." , ".." }:
220- # Yielding a path object for these makes little sense
221- continue
222- # only want the path name with iterdir
223- sp = self .path
224- name = re .sub (f"^({ sp } |{ sp [1 :]} )/" , "" , name )
225- yield self ._make_child_relpath (name )
226- if self ._closed :
227- self ._raise_closed ()
228-
229- def exists (self ):
230- """
231- Whether this path exists.
232- """
233- if not getattr (self ._accessor , "exists" ):
234- try :
235- self ._accessor .stat (self )
236- except (FileNotFoundError ):
237- return False
238- return True
239- else :
240- return self ._accessor .exists (self )
241-
242- def is_dir (self ):
243- info = self ._accessor .info (self )
244- if info ["type" ] == "directory" :
245- return True
246- return False
247-
248- def is_file (self ):
249- info = self ._accessor .info (self )
250- if info ["type" ] == "file" :
251- return True
252- return False
253-
254- def glob (self , pattern ):
255- path = self .joinpath (pattern )
256- for name in self ._accessor .glob (self , path = path .path ):
257- sp = self .path
258- name = re .sub (f"^({ sp } |{ sp [1 :]} )/" , "" , name )
259- name = name .split (self ._flavour .sep )
260- yield self ._make_child (self ._parts + name )
261-
262- def rename (self , target ):
263- # can be implimented, but may be tricky
264- raise NotImplementedError
265-
266- def touch (self , trunicate = True , ** kwargs ):
267- self ._accessor .touch (self , trunicate = trunicate , ** kwargs )
268-
269- def unlink (self , missing_ok = False ):
270- if not self .exists ():
271- if not missing_ok :
272- raise FileNotFoundError
273- else :
274- return
275- self ._accessor .rm (self , recursive = False )
276-
277- def rmdir (self , recursive = True ):
278- """Add warning if directory not empty
279- assert is_dir?
280- """
281- try :
282- assert self .is_dir ()
283- except AssertionError :
284- raise NotDirectoryError
285- self ._accessor .rm (self , recursive = recursive )
286-
287- @classmethod
288- def _from_parts_init (cls , args , init = False ):
289- return super ()._from_parts (args , init = init )
290-
291- def _from_parts (self , args , init = True ):
292- # We need to call _parse_args on the instance, so as to get the
293- # right flavour.
294- obj = object .__new__ (UniversalPath )
295- drv , root , parts = self ._parse_args (args )
296- obj ._drv = drv
297- obj ._root = root
298- obj ._parts = parts
299- if init :
300- obj ._init (** self ._kwargs )
301- return obj
302-
303- def _from_parsed_parts (self , drv , root , parts , init = True ):
304- obj = object .__new__ (UniversalPath )
305- obj ._drv = drv
306- obj ._root = root
307- obj ._parts = parts
308- if init :
309- obj ._init (** self ._kwargs )
310- return obj
0 commit comments