Skip to content

Commit 59c5933

Browse files
authored
handle broken unicode paths (#167)
* handle broken unicode paths * docs * version bump
1 parent e231315 commit 59c5933

File tree

14 files changed

+148
-59
lines changed

14 files changed

+148
-59
lines changed

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,16 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](http://keepachangelog.com/)
55
and this project adheres to [Semantic Versioning](http://semver.org/).
66

7+
## [2.0.22] - Unreleased
8+
9+
### Fixed
10+
11+
- Handling of broken unicode on Python2.7
12+
13+
### Added
14+
15+
- Added fs.getospath
16+
717
## [2.0.21] - 2018-05-02
818

919
### Added

fs/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@
66
from ._version import __version__
77
from .enums import ResourceType, Seek
88
from .opener import open_fs
9+
from ._fscompat import fsencode, fsdecode
910

1011
__all__ = ['__version__', 'ResourceType', 'Seek', 'open_fs']

fs/_fscompat.py

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -5,40 +5,7 @@
55
try:
66
from os import fsencode, fsdecode
77
except ImportError:
8-
def _fscodec():
9-
encoding = sys.getfilesystemencoding()
10-
errors = 'strict' if encoding == 'mbcs' else 'surrogateescape'
11-
12-
def fsencode(filename):
13-
"""
14-
Encode filename to the filesystem encoding with 'surrogateescape' error
15-
handler, return bytes unchanged. On Windows, use 'strict' error handler if
16-
the file system encoding is 'mbcs' (which is the default encoding).
17-
"""
18-
if isinstance(filename, bytes):
19-
return filename
20-
elif isinstance(filename, six.text_type):
21-
return filename.encode(encoding, errors)
22-
else:
23-
raise TypeError("expect string type, not %s" % type(filename).__name__)
24-
25-
def fsdecode(filename):
26-
"""
27-
Decode filename from the filesystem encoding with 'surrogateescape' error
28-
handler, return str unchanged. On Windows, use 'strict' error handler if
29-
the file system encoding is 'mbcs' (which is the default encoding).
30-
"""
31-
if isinstance(filename, six.text_type):
32-
return filename
33-
elif isinstance(filename, bytes):
34-
return filename.decode(encoding, errors)
35-
else:
36-
raise TypeError("expect string type, not %s" % type(filename).__name__)
37-
38-
return fsencode, fsdecode
39-
40-
fsencode, fsdecode = _fscodec()
41-
del _fscodec
8+
from backports.os import fsencode, fsdecode
429

4310
try:
4411
from os import fspath

fs/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Version, used in module and setup.py.
22
"""
3-
__version__ = "2.0.21"
3+
__version__ = "2.0.22a0"

fs/base.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
from . import copy
2626
from . import errors
27+
from . import fsencode
2728
from . import iotools
2829
from . import move
2930
from . import tools
@@ -732,6 +733,10 @@ def getsyspath(self, path):
732733
data anywhere the OS knows about. It is also possible for some
733734
paths to have a system path, whereas others don't.
734735
736+
This method will always return a str on Py3.* and unicode
737+
on Py2.7. See `~getospath` if you need to encode the path as
738+
bytes.
739+
735740
If ``path`` doesn't have a system path, a `~fs.errors.NoSysPath`
736741
exception will be thrown.
737742
@@ -743,6 +748,36 @@ def getsyspath(self, path):
743748
"""
744749
raise errors.NoSysPath(path=path)
745750

751+
def getospath(self, path):
752+
# type: (Text) -> bytes
753+
"""Get a *system path* to a resource, encoded in the operating
754+
system's prefered encoding.
755+
756+
Parameters:
757+
path (str): A path on the filesystem.
758+
759+
Returns:
760+
str: the *system path* of the resource, if any.
761+
762+
Raises:
763+
fs.errors.NoSysPath: If there is no corresponding system path.
764+
765+
This method takes the output of `~getsyspath` and encodes it to
766+
the filesystem's prefered encoding. In Python3 this step is
767+
not required, as the `os` module will do it automatically. In
768+
Python2.7, the encoding step is required to support filenames
769+
on the filesystem that don't encode correctly.
770+
771+
Note:
772+
If you want your code to work in Python2.7 and Python3 then
773+
use this method if you want to work will the OS filesystem
774+
outside of the OSFS interface.
775+
776+
"""
777+
syspath = self.getsyspath(path)
778+
ospath = fsencode(syspath)
779+
return ospath
780+
746781
def gettype(self, path):
747782
# type: (Text) -> ResourceType
748783
"""Get the type of a resource.

fs/info.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
import typing
99
from copy import deepcopy
1010

11+
import six
12+
1113
from .path import join
1214
from .enums import ResourceType
1315
from .errors import MissingInfoNamespace
@@ -24,6 +26,7 @@
2426
T = typing.TypeVar("T")
2527

2628

29+
@six.python_2_unicode_compatible
2730
class Info(object):
2831
"""Container for :ref:`info`.
2932
@@ -49,13 +52,15 @@ def __init__(self, raw_info, to_datetime=epoch_to_datetime):
4952
self._to_datetime = to_datetime
5053
self.namespaces = frozenset(self.raw.keys())
5154

52-
def __repr__(self):
55+
def __str__(self):
5356
# type: () -> str
5457
if self.is_dir:
5558
return "<dir '{}'>".format(self.name)
5659
else:
5760
return "<file '{}'>".format(self.name)
5861

62+
__repr__ = __str__
63+
5964
def __eq__(self, other):
6065
# type: (object) -> bool
6166
return self.raw == getattr(other, 'raw', None)

fs/osfs.py

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,13 @@ def __init__(self,
9292
"""Create an OSFS instance.
9393
"""
9494
super(OSFS, self).__init__()
95-
root_path = fsdecode(fspath(root_path))
95+
if isinstance(root_path, bytes):
96+
root_path = fsdecode(root_path)
97+
self.root_path = root_path
98+
_root_path = fsdecode(fspath(root_path))
9699
_root_path = os.path.expanduser(os.path.expandvars(root_path))
97100
_root_path = os.path.normpath(os.path.abspath(_root_path))
98-
self.root_path = _root_path
101+
self._root_path = _root_path
99102

100103
if create:
101104
try:
@@ -137,22 +140,24 @@ def __init__(self,
137140
def __repr__(self):
138141
# type: () -> str
139142
_fmt = "{}({!r})"
140-
return _fmt.format(self.__class__.__name__,
141-
self.root_path)
143+
_class_name = self.__class__.__name__
144+
return _fmt.format(_class_name, self.root_path)
142145

143146
def __str__(self):
144147
# type: () -> str
145148
fmt = "<{} '{}'>"
146-
return fmt.format(self.__class__.__name__.lower(),
147-
self.root_path)
149+
_class_name = self.__class__.__name__
150+
return fmt.format(_class_name.lower(), self.root_path)
148151

149152
def _to_sys_path(self, path):
150153
# type: (Text) -> Text
151154
"""Convert a FS path to a path on the OS.
152155
"""
153-
sys_path = os.path.join(
154-
self.root_path,
155-
path.lstrip('/').replace('/', os.sep)
156+
sys_path = fsencode(
157+
os.path.join(
158+
self._root_path,
159+
path.lstrip('/').replace('/', os.sep)
160+
)
156161
)
157162
return sys_path
158163

@@ -231,7 +236,7 @@ def _get_type_from_stat(cls, _stat):
231236
def _gettarget(self, sys_path):
232237
# type: (Text) -> Optional[Text]
233238
try:
234-
target = os.readlink(sys_path)
239+
target = os.readlink(fsencode(sys_path))
235240
except OSError:
236241
return None
237242
else:
@@ -250,9 +255,9 @@ def getinfo(self, path, namespaces=None):
250255
sys_path = self.getsyspath(_path)
251256
_lstat = None
252257
with convert_os_errors('getinfo', path):
253-
_stat = os.stat(sys_path)
258+
_stat = os.stat(fsencode(sys_path))
254259
if 'lstat' in namespaces:
255-
_stat = os.lstat(sys_path)
260+
_stat = os.lstat(fsencode(sys_path))
256261

257262
info = {
258263
'basic': {
@@ -285,8 +290,9 @@ def listdir(self, path):
285290
_path = self.validatepath(path)
286291
sys_path = self._to_sys_path(_path)
287292
with convert_os_errors('listdir', path, directory=True):
288-
names = os.listdir(sys_path)
289-
return names
293+
names = os.listdir(fsencode(sys_path))
294+
return [fsdecode(name) for name in names]
295+
#return names
290296

291297
def makedir(self, # type: _O
292298
path, # type: Text
@@ -368,7 +374,10 @@ def opendir(self, path, factory=None):
368374

369375
def getsyspath(self, path):
370376
# type: (Text) -> Text
371-
sys_path = self._to_sys_path(path)
377+
sys_path = os.path.join(
378+
self._root_path,
379+
path.lstrip('/').replace('/', os.sep)
380+
)
372381
return sys_path
373382

374383
def geturl(self, path, purpose='download'):
@@ -455,7 +464,7 @@ def _scandir(self, path, namespaces=None):
455464
for dir_entry in scandir(sys_path):
456465
info = {
457466
"basic": {
458-
"name": dir_entry.name,
467+
"name": fsdecode(dir_entry.name),
459468
"is_dir": dir_entry.is_dir()
460469
}
461470
}
@@ -493,14 +502,16 @@ def _scandir(self, path, namespaces=None):
493502
self.check()
494503
namespaces = namespaces or ()
495504
_path = self.validatepath(path)
496-
sys_path = self._to_sys_path(_path)
505+
sys_path = self.getsyspath(_path)
506+
_sys_path = fsencode(sys_path)
497507
with convert_os_errors('scandir', path, directory=True):
498508
for entry_name in os.listdir(sys_path):
499-
entry_path = os.path.join(sys_path, entry_name)
500-
stat_result = os.stat(entry_path)
509+
_entry_name = fsdecode(entry_name)
510+
entry_path = os.path.join(sys_path, _entry_name)
511+
stat_result = os.stat(fsencode(entry_path))
501512
info = {
502513
"basic": {
503-
"name": entry_name,
514+
"name": _entry_name,
504515
"is_dir": stat.S_ISDIR(stat_result.st_mode),
505516
}
506517
} # type: Dict[Text, Dict[Text, Any]]

fs/permissions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def __set__(self, obj, value):
4646

4747

4848
@six.python_2_unicode_compatible
49-
class Permissions(Container[Text], Iterable[Text]):
49+
class Permissions(object):
5050
"""An abstraction for file system permissions.
5151
5252
Permissions objects store information regarding the permissions

fs/test.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,10 +396,12 @@ def test_getsize(self):
396396
def test_getsyspath(self):
397397
self.fs.create('foo')
398398
try:
399-
self.fs.getsyspath('foo')
399+
syspath = self.fs.getsyspath('foo')
400400
except errors.NoSysPath:
401401
self.assertFalse(self.fs.hassyspath('foo'))
402402
else:
403+
self.assertIsInstance(syspath, text_type)
404+
self.assertIsInstance(self.fs.getospath('foo'), bytes)
403405
self.assertTrue(self.fs.hassyspath('foo'))
404406
# Should not throw an error
405407
self.fs.hassyspath('a/b/c/foo/bar')

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
appdirs==1.4.0
2+
backports.os==0.1.1; python_version == '2.7'
23
enum34==1.1.6 ; python_version < '3.4'
34
pytz
45
setuptools

0 commit comments

Comments
 (0)