Skip to content

Commit c36462c

Browse files
authored
Merge pull request #414 from nexB/413-no-crash-on-weird-file-names
#413 no crash on weird file names * This is a fairly significant change in particular on the extractcode side. * We now can handle properly files that could otherwise not be processed on some OS such as windows because they have illegal names for that OS.
2 parents 4cfeaf4 + 3c6d906 commit c36462c

File tree

105 files changed

+7052
-1242
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

105 files changed

+7052
-1242
lines changed

.travis.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,12 @@ os:
66

77
env:
88
matrix:
9-
- TEST_SUITE="bin/py.test -n 2 -s --ignore=tests/scancode"
10-
- TEST_SUITE="bin/py.test -vvs tests/scancode"
9+
- TEST_SUITE="bin/py.test -n 2 -vvs tests/scancode"
10+
- TEST_SUITE="bin/py.test -n 2 -vvs tests/extractcode"
11+
- TEST_SUITE="bin/py.test -n 2 -s tests/licensedcode"
12+
- TEST_SUITE="bin/py.test -n 2 -s tests/cluecode"
13+
- TEST_SUITE="bin/py.test -n 2 -s tests/packagedcode"
14+
- TEST_SUITE="bin/py.test -n 2 -s --ignore=tests/scancode --ignore=tests/extractcode --ignore=tests/licensedcode --ignore=tests/cluecode --ignore=tests/packagedcode"
1115
- TEST_SUITE="./etc/release/release.sh"
1216

1317
install:

appveyor.yml

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,26 @@
11
version: '{build}'
22

3+
environment:
4+
matrix:
5+
- TEST_SUITE: "py.test -n 2 -vvs tests/scancode"
6+
- TEST_SUITE: "py.test -n 2 -vvs tests/extractcode"
7+
- TEST_SUITE: "py.test -n 2 -s tests/licensedcode"
8+
- TEST_SUITE: "py.test -n 2 -s tests/cluecode"
9+
- TEST_SUITE: "py.test -n 2 -s tests/packagedcode"
10+
- TEST_SUITE: "py.test -n 2 -s --ignore=tests/scancode --ignore=tests/extractcode --ignore=tests/licensedcode --ignore=tests/cluecode --ignore=tests/packagedcode"
11+
312
install:
413
- configure
514

615
build: off
716

17+
818
test_script:
919
- python -c "import sys;print sys.getdefaultencoding()"
10-
- "py.test -n 2 -vvs tests/scancode"
11-
- "py.test -n 2 -s --ignore=tests/scancode"
20+
- cmd: "%TEST_SUITE%"
1221

1322
on_success:
14-
- "python etc/scripts/irc-notify.py aboutcode [{project_name}:{branch}] {short_commit}: \"{message}\" ({author}) {color_green}Succeeded,Details: {build_url},Commit: {commit_url}"
23+
- "python etc/scripts/irc-notify.py aboutcode [{project_name}:{branch}] {short_commit}: \"{message}\" ({author}) {color_green}Succeeded,Details: {build_url},Commit: {commit_url}"
1524

1625
on_failure:
17-
- "python etc/scripts/irc-notify.py aboutcode [{project_name}:{branch}] {short_commit}: \"{message}\" ({author}) {color_red}Failed,Details: {build_url},Commit: {commit_url}"
26+
- "python etc/scripts/irc-notify.py aboutcode [{project_name}:{branch}] {short_commit}: \"{message}\" ({author}) {color_red}Failed,Details: {build_url},Commit: {commit_url}"

etc/conf/dev/base.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# Testing
2+
apipkg
3+
py
24
pytest
35
colorama
46
execnet
5-
py
67
pytest-xdist
78
bumpversion

setup.cfg

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,19 @@ license_file = NOTICE
77

88
release = clean --all sdist --formats=gztar,bztar,zip bdist_wheel
99

10-
[pytest]
10+
[tool:pytest]
11+
testpaths =
12+
src
13+
tests
14+
etc
15+
1116
norecursedirs =
1217
.git
1318
bin
1419
dist
1520
build
1621
_build
17-
dist
1822
local
19-
ci
2023
docs
2124
man
2225
share
@@ -27,7 +30,6 @@ norecursedirs =
2730
include
2831
Lib
2932
lib
30-
Scripts
3133
thirdparty
3234
tmp
3335
src/*/data
@@ -41,6 +43,4 @@ python_functions=test
4143
addopts =
4244
-rfEsxXw
4345
--strict
44-
--ignore docs/conf.py
45-
--ignore setup.py
4646
--doctest-modules

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def read(*names, **kwargs):
6868
# to work around bug http://bugs.python.org/issue19839
6969
# on multistream bzip2 files: this can removed in Python 3.
7070
'bz2file >= 0.98',
71+
'text-unidecode >= 1.0, < 2.0',
7172

7273
# licensedcode
7374
'PyYAML >= 3.0, <4.0',

src/commoncode/fetch.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,25 +40,26 @@
4040
# logger.setLevel(logging.DEBUG)
4141

4242

43-
def download_url(url, file_name=None, verify=True):
43+
def download_url(url, file_name=None, verify=True, timeout=10):
4444
"""
45-
Return the temporary location of the file fetched at the remote url. Use
46-
file_name if provided or create a file name base on the last url segment. If
47-
verify is True, SSL certification is performed. Otherwise, no verification
48-
is done but a warning will be printed.
45+
Fetch `url` and return the temporary location where the fetched content was
46+
saved. Use `file_name` if provided or create a new `file_name` base on the last
47+
url segment. If `verify` is True, SSL certification is performed. Otherwise, no
48+
verification is done but a warning will be printed.
49+
`timeout` is the timeout in seconds.
4950
"""
50-
requests_args = dict(timeout=10, verify=verify)
51+
requests_args = dict(timeout=timeout, verify=verify)
5152
file_name = file_name or fileutils.file_name(url)
5253

5354
try:
5455
response = requests.get(url, **requests_args)
5556
except (ConnectionError, InvalidSchema) as e:
56-
logger.error('fetch: Download failed for %(url)r' % locals())
57+
logger.error('download_url: Download failed for %(url)r' % locals())
5758
raise
5859

5960
status = response.status_code
6061
if status != 200:
61-
msg = 'fetch: Download failed for %(url)r with %(status)r' % locals()
62+
msg = 'download_url: Download failed for %(url)r with %(status)r' % locals()
6263
logger.error(msg)
6364
raise Exception(msg)
6465

@@ -72,10 +73,11 @@ def download_url(url, file_name=None, verify=True):
7273

7374
def ping_url(url):
7475
"""
75-
Returns True is the URL is reachable.
76+
Returns True is `url` is reachable.
7677
"""
7778
import urllib2
7879

80+
# FIXME: if there is no 200 HTTP status, then the ULR may not be reachable.
7981
try:
8082
urllib2.urlopen(url)
8183
except Exception:

src/commoncode/filetype.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
# ScanCode is a free software code scanning tool from nexB Inc. and others.
2323
# Visit https://github.com/nexB/scancode-toolkit/ for support and download.
2424

25-
from __future__ import absolute_import, print_function
25+
from __future__ import absolute_import
26+
from __future__ import print_function
2627

2728
import os
2829
from collections import OrderedDict
@@ -32,6 +33,10 @@
3233
from commoncode.functional import memoize
3334

3435

36+
"""
37+
Low level file type utilities, essentially a wrapper around os.path and stat.
38+
"""
39+
3540
def is_link(location):
3641
"""
3742
Return True if `location` is a symbolic link.
@@ -93,19 +98,21 @@ def get_link_target(location):
9398
if on_posix and is_link(location):
9499
try:
95100
# return false on OSes not supporting links
96-
target = os.readlink(location) # @UndefinedVariable
97-
except UnicodeEncodeError: # @UnusedVariable
101+
target = os.readlink(location)
102+
except UnicodeEncodeError:
98103
# location is unicode but readlink can fail in some cases
99104
pass
100105
return target
101106

102107

103108
# Map of type checker function -> short type code
104109
# The order of types check matters: link -> file -> directory -> special
105-
TYPES = OrderedDict([(is_link, ('l', 'link',)),
106-
(is_file, ('f', 'file',)),
107-
(is_dir, ('d', 'directory',)),
108-
(is_special, ('s', 'special',))])
110+
TYPES = OrderedDict([
111+
(is_link, ('l', 'link',)),
112+
(is_file, ('f', 'file',)),
113+
(is_dir, ('d', 'directory',)),
114+
(is_special, ('s', 'special',))
115+
])
109116

110117

111118
def get_type(location, short=True):
@@ -144,6 +151,7 @@ def is_writable(location):
144151
else:
145152
return os.access(location, os.R_OK | os.W_OK)
146153

154+
147155
def is_executable(location):
148156
"""
149157
Return True if the file at location has executable permission set.

src/commoncode/fileutils.py

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# Copyright (c) 2015 nexB Inc. and others. All rights reserved.
2+
# Copyright (c) 2017 nexB Inc. and others. All rights reserved.
33
# http://nexb.com and https://github.com/nexB/scancode-toolkit/
44
# The ScanCode software is licensed under the Apache License version 2.0.
55
# Data generated with ScanCode require an acknowledgment.
@@ -59,8 +59,8 @@
5959

6060
def create_dir(location):
6161
"""
62-
Create directory and all sub-directories recursively at location ensuring
63-
these are readable and writeable.
62+
Create directory and all sub-directories recursively at location ensuring these
63+
are readable and writeable.
6464
Raise Exceptions if it fails to create the directory.
6565
"""
6666
if os.path.exists(location):
@@ -162,23 +162,65 @@ def read_text_file(location, universal_new_lines=True):
162162
# PATHS AND NAMES MANIPULATIONS
163163
#
164164

165+
# TODO: move these functions to paths.py
166+
167+
def is_posixpath(location):
168+
"""
169+
Return True if the `location` path is likely a POSIX-like path using POSIX path
170+
separators (slash or "/")or has no path separator.
171+
172+
Return False if the `location` path is likely a Windows-like path using backslash
173+
as path separators (e.g. "\").
174+
"""
175+
has_slashes = '/' in location
176+
has_backslashes = '\\' in location
177+
# windows paths with drive
178+
if location:
179+
drive, _ = ntpath.splitdrive(location)
180+
if drive:
181+
return False
182+
183+
184+
# a path is always POSIX unless it contains ONLY backslahes
185+
# which is a rough approximation (it could still be posix)
186+
is_posix = True
187+
if has_backslashes and not has_slashes:
188+
is_posix = False
189+
return is_posix
190+
191+
165192
def as_posixpath(location):
166193
"""
167-
Return a posix-like path using posix path separators (slash or "/") for a
168-
`location` path. This converts Windows paths to look like posix paths that
169-
Python accepts gracefully on Windows for path handling.
194+
Return a POSIX-like path using POSIX path separators (slash or "/") for a
195+
`location` path. This converts Windows paths to look like POSIX paths: Python
196+
accepts gracefully POSIX paths on Windows.
170197
"""
171198
return location.replace(ntpath.sep, posixpath.sep)
172199

173200

201+
def as_winpath(location):
202+
"""
203+
Return a Windows-like path using Windows path separators (backslash or "\") for a
204+
`location` path.
205+
"""
206+
return location.replace(posixpath.sep, ntpath.sep)
207+
208+
209+
def split_parent_resource(path, force_posix=False):
210+
"""
211+
Return a (tuple of parent directory path, resource name).
212+
"""
213+
splitter = is_posixpath(path) and posixpath or ntpath
214+
path = path.rstrip('/\\')
215+
return splitter.split(path)
216+
217+
174218
def resource_name(path):
175219
"""
176220
Return the resource name (file name or directory name) from `path` which
177221
is the last path segment.
178222
"""
179-
path = as_posixpath(path)
180-
path = path.rstrip('/')
181-
_left, right = posixpath.split(path)
223+
_left, right = split_parent_resource(path)
182224
return right or ''
183225

184226

@@ -191,12 +233,11 @@ def file_name(path):
191233

192234
def parent_directory(path):
193235
"""
194-
Return the parent directory of a file or directory path.
236+
Return the parent directory path of a file or directory `path`.
195237
"""
196-
path = as_posixpath(path)
197-
path = path.rstrip('/')
198-
left, _ = posixpath.split(path)
199-
trail = '/' if left != '/' else ''
238+
left, _right = split_parent_resource(path)
239+
sep = is_posixpath(path) and '/' or '\\'
240+
trail = sep if left != sep else ''
200241
return left + trail
201242

202243

@@ -267,7 +308,7 @@ def walk(location, ignored=ignore_nothing):
267308

268309
if filetype.is_file(location) :
269310
yield parent_directory(location), [], [file_name(location)]
270-
311+
271312
elif filetype.is_dir(location):
272313
dirs = []
273314
files = []
@@ -326,7 +367,7 @@ def resource_iter(location, ignored=ignore_nothing, with_files=True, with_dirs=T
326367
:param with_files: If True, include the files.
327368
:return: an iterable of file and directory locations.
328369
"""
329-
assert with_dirs or with_files, "One or both of 'with_dirs' and 'with_files' is required"
370+
assert with_dirs or with_files, "fileutils.resource_iter: One or both of 'with_dirs' and 'with_files' is required"
330371
for top, dirs, files in walk(location, ignored):
331372
if with_files:
332373
for f in files:

0 commit comments

Comments
 (0)