From 0150d3bfd32258e7e574c476b4267ba580fa158a Mon Sep 17 00:00:00 2001 From: "Liu Qiang(BSS-HZ)" Date: Wed, 22 Jan 2020 16:54:33 +0800 Subject: [PATCH 01/12] add python script encoding config support, to handle some error like `UnicodeDecodeError: 'gbk' codec can't decode byte 0xac in position 797: illegal multibyte sequence` --- pip_check_reqs/common.py | 2 +- pip_check_reqs/find_extra_reqs.py | 1 + pip_check_reqs/find_missing_reqs.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pip_check_reqs/common.py b/pip_check_reqs/common.py index 4298c31..b060ebc 100644 --- a/pip_check_reqs/common.py +++ b/pip_check_reqs/common.py @@ -115,7 +115,7 @@ def find_imported_modules(options): log.info('ignoring: %s', os.path.relpath(filename)) continue log.debug('scanning: %s', os.path.relpath(filename)) - with open(filename) as f: + with open(filename, options.encoding) as f: content = f.read() vis.set_location(filename) vis.visit(ast.parse(content)) diff --git a/pip_check_reqs/find_extra_reqs.py b/pip_check_reqs/find_extra_reqs.py index 4aba27e..addab9e 100644 --- a/pip_check_reqs/find_extra_reqs.py +++ b/pip_check_reqs/find_extra_reqs.py @@ -74,6 +74,7 @@ def main(): action="store_true", default=False, help="be *really* verbose") parser.add_option("--version", dest="version", action="store_true", default=False, help="display version information") + parser.add_option("--encoding", default=None, help="python script file encoding") (options, args) = parser.parse_args() diff --git a/pip_check_reqs/find_missing_reqs.py b/pip_check_reqs/find_missing_reqs.py index 71db4ce..3534d24 100644 --- a/pip_check_reqs/find_missing_reqs.py +++ b/pip_check_reqs/find_missing_reqs.py @@ -79,6 +79,7 @@ def main(): action="store_true", default=False, help="be *really* verbose") parser.add_option("--version", dest="version", action="store_true", default=False, help="display version information") + parser.add_option("--encoding", default=None, help="python script file encoding") (options, args) = parser.parse_args() From 9e268fd626c36377d82208f717926d39558864c9 Mon Sep 17 00:00:00 2001 From: "Liu Qiang(BSS-HZ)" Date: Wed, 22 Jan 2020 17:46:46 +0800 Subject: [PATCH 02/12] use codecs to support python2 --- pip_check_reqs/common.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pip_check_reqs/common.py b/pip_check_reqs/common.py index b060ebc..26d2901 100644 --- a/pip_check_reqs/common.py +++ b/pip_check_reqs/common.py @@ -5,6 +5,7 @@ import os import pkg_resources import re +import codecs from packaging.utils import canonicalize_name from pip._internal.download import PipSession @@ -115,7 +116,10 @@ def find_imported_modules(options): log.info('ignoring: %s', os.path.relpath(filename)) continue log.debug('scanning: %s', os.path.relpath(filename)) - with open(filename, options.encoding) as f: + charset = None + if hasattr(options, 'encoding'): + charset = options.encoding + with codecs.open(filename, encoding=charset) as f: content = f.read() vis.set_location(filename) vis.visit(ast.parse(content)) From 48febc11d7ce8fb79a56a1e69d53642304455acd Mon Sep 17 00:00:00 2001 From: "Liu Qiang(BSS-HZ)" Date: Mon, 22 Jun 2020 17:28:53 +0800 Subject: [PATCH 03/12] test that will fail on windows --- tests/gbk.py | 6 ++++++ tests/test_common.py | 33 +++++++++++++++++++++++++++++++++ tests/utf8.py | 6 ++++++ 3 files changed, 45 insertions(+) create mode 100644 tests/gbk.py create mode 100644 tests/utf8.py diff --git a/tests/gbk.py b/tests/gbk.py new file mode 100644 index 0000000..0b63d94 --- /dev/null +++ b/tests/gbk.py @@ -0,0 +1,6 @@ +from os import path +import ast, hashlib +from __future__ import braces +import ast, sys +from . import friend +print("ÊǺº×Ö") \ No newline at end of file diff --git a/tests/test_common.py b/tests/test_common.py index b1c9cf6..5f4a98e 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -145,6 +145,39 @@ def ignore_mods(module): assert caplog.records[0].message == 'ignoring: ham.py' +@pytest.mark.parametrize(["files","expect"], [ + (['utf8.py'],['ast', 'os', 'hashlib']), + (['gbk.py'],['ast', 'os', 'hashlib']) +]) +def test_find_imported_modules_charset(monkeypatch, caplog, + files, expect): + monkeypatch.setattr(common, 'pyfiles', + pretend.call_recorder(lambda x: files)) + + if sys.version_info[0] == 2: + # py2 will find sys module but py3k won't + expect.append('sys') + + + + caplog.set_level(logging.INFO) + + class options: + paths = ['.'] + verbose = True + + @staticmethod + def ignore_files(path): + return False + + @staticmethod + def ignore_mods(module): + return False + + result = common.find_imported_modules(options) + assert set(result) == set(expect) + + @pytest.mark.parametrize(["ignore_cfg", "candidate", "result"], [ ([], 'spam', False), ([], 'ham', False), diff --git a/tests/utf8.py b/tests/utf8.py new file mode 100644 index 0000000..4ecb963 --- /dev/null +++ b/tests/utf8.py @@ -0,0 +1,6 @@ +from os import path +import ast, hashlib +from __future__ import braces +import ast, sys +from . import friend +print("是汉字") \ No newline at end of file From 564ffbfdcf89aa4985730b26e7adf95dbd0ff520 Mon Sep 17 00:00:00 2001 From: "Liu Qiang(BSS-HZ)" Date: Mon, 22 Jun 2020 17:43:13 +0800 Subject: [PATCH 04/12] update and fix --- tests/test_common.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/test_common.py b/tests/test_common.py index dee9ec7..d3216f2 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -155,11 +155,6 @@ def test_find_imported_modules_charset(monkeypatch, caplog, monkeypatch.setattr(common, 'pyfiles', pretend.call_recorder(lambda x: files)) - if sys.version_info[0] == 2: - # py2 will find sys module but py3k won't - expect.append('sys') - - caplog.set_level(logging.INFO) From f895effb9664f71144fb95c38b172b5847cff246 Mon Sep 17 00:00:00 2001 From: "Liu Qiang(BSS-HZ)" Date: Mon, 22 Jun 2020 17:53:05 +0800 Subject: [PATCH 05/12] rebase --- pip_check_reqs/common.py | 2 +- pip_check_reqs/find_extra_reqs.py | 3 +++ pip_check_reqs/find_missing_reqs.py | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pip_check_reqs/common.py b/pip_check_reqs/common.py index 6b6e370..1da479a 100644 --- a/pip_check_reqs/common.py +++ b/pip_check_reqs/common.py @@ -118,7 +118,7 @@ def find_imported_modules(options): log.info('ignoring: %s', os.path.relpath(filename)) continue log.debug('scanning: %s', os.path.relpath(filename)) - with open(filename) as f: + with open(filename, options.encoding) as f: content = f.read() vis.set_location(filename) vis.visit(ast.parse(content)) diff --git a/pip_check_reqs/find_extra_reqs.py b/pip_check_reqs/find_extra_reqs.py index 5549d89..d7c36d7 100644 --- a/pip_check_reqs/find_extra_reqs.py +++ b/pip_check_reqs/find_extra_reqs.py @@ -98,6 +98,9 @@ def main(): action="store_true", default=False, help="display version information") + parser.add_option("--encoding", + default=None, + help="python script file encoding") (options, args) = parser.parse_args() diff --git a/pip_check_reqs/find_missing_reqs.py b/pip_check_reqs/find_missing_reqs.py index a20296a..df4827a 100644 --- a/pip_check_reqs/find_missing_reqs.py +++ b/pip_check_reqs/find_missing_reqs.py @@ -113,6 +113,9 @@ def main(): action="store_true", default=False, help="display version information") + parser.add_option("--encoding", + default=None, + help="python script file encoding") (options, args) = parser.parse_args() From ebf552462f34a9893cb18e9efc3b060998fd9e3f Mon Sep 17 00:00:00 2001 From: "Liu Qiang(BSS-HZ)" Date: Wed, 22 Jan 2020 17:46:46 +0800 Subject: [PATCH 06/12] use codecs to support python2 --- pip_check_reqs/common.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pip_check_reqs/common.py b/pip_check_reqs/common.py index 1da479a..18bcb45 100644 --- a/pip_check_reqs/common.py +++ b/pip_check_reqs/common.py @@ -4,6 +4,7 @@ import logging import os import re +import codecs from packaging.utils import canonicalize_name # Between different versions of pip the location of PipSession has changed. @@ -118,7 +119,10 @@ def find_imported_modules(options): log.info('ignoring: %s', os.path.relpath(filename)) continue log.debug('scanning: %s', os.path.relpath(filename)) - with open(filename, options.encoding) as f: + charset = None + if hasattr(options, 'encoding'): + charset = options.encoding + with codecs.open(filename, encoding=charset) as f: content = f.read() vis.set_location(filename) vis.visit(ast.parse(content)) From b5ef10a8d04f5d3b86f779c0c9f39961e6639af8 Mon Sep 17 00:00:00 2001 From: "Liu Qiang(BSS-HZ)" Date: Mon, 22 Jun 2020 18:20:36 +0800 Subject: [PATCH 07/12] test fix --- pip_check_reqs/common.py | 14 +++++++++----- tests/test_common.py | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/pip_check_reqs/common.py b/pip_check_reqs/common.py index 18bcb45..77d2b28 100644 --- a/pip_check_reqs/common.py +++ b/pip_check_reqs/common.py @@ -110,6 +110,14 @@ def pyfiles(root): if ext == '.py': yield os.path.join(root, f) +def openAndReadFile(fileName, options): + charset = None + if hasattr(options, 'encoding'): + charset = options.encoding + with codecs.open(fileName, encoding=charset) as f: + content = f.read() + return content + def find_imported_modules(options): vis = ImportVisitor(options) @@ -119,11 +127,7 @@ def find_imported_modules(options): log.info('ignoring: %s', os.path.relpath(filename)) continue log.debug('scanning: %s', os.path.relpath(filename)) - charset = None - if hasattr(options, 'encoding'): - charset = options.encoding - with codecs.open(filename, encoding=charset) as f: - content = f.read() + content = openAndReadFile(filename, options) vis.set_location(filename) vis.visit(ast.parse(content)) return vis.finalise() diff --git a/tests/test_common.py b/tests/test_common.py index d3216f2..783b18e 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -118,7 +118,7 @@ def __enter__(self): def __exit__(self, *args): pass - monkeypatch.setattr(common, 'open', FakeFile, raising=False) + monkeypatch.setattr(common, 'openAndReadFile', lambda x,y: FakeFile('').read(), raising=False) caplog.set_level(logging.INFO) From 77d580b1b229fd2cbf911d69eb33af66e18f0a46 Mon Sep 17 00:00:00 2001 From: "Liu Qiang(BSS-HZ)" Date: Mon, 22 Jun 2020 18:20:36 +0800 Subject: [PATCH 08/12] test fix --- pip_check_reqs/common.py | 14 +++++++++----- tests/test_common.py | 11 ++++++----- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/pip_check_reqs/common.py b/pip_check_reqs/common.py index 18bcb45..77d2b28 100644 --- a/pip_check_reqs/common.py +++ b/pip_check_reqs/common.py @@ -110,6 +110,14 @@ def pyfiles(root): if ext == '.py': yield os.path.join(root, f) +def openAndReadFile(fileName, options): + charset = None + if hasattr(options, 'encoding'): + charset = options.encoding + with codecs.open(fileName, encoding=charset) as f: + content = f.read() + return content + def find_imported_modules(options): vis = ImportVisitor(options) @@ -119,11 +127,7 @@ def find_imported_modules(options): log.info('ignoring: %s', os.path.relpath(filename)) continue log.debug('scanning: %s', os.path.relpath(filename)) - charset = None - if hasattr(options, 'encoding'): - charset = options.encoding - with codecs.open(filename, encoding=charset) as f: - content = f.read() + content = openAndReadFile(filename, options) vis.set_location(filename) vis.visit(ast.parse(content)) return vis.finalise() diff --git a/tests/test_common.py b/tests/test_common.py index d3216f2..7722dea 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -118,7 +118,7 @@ def __enter__(self): def __exit__(self, *args): pass - monkeypatch.setattr(common, 'open', FakeFile, raising=False) + monkeypatch.setattr(common, 'openAndReadFile', lambda x,y: FakeFile('').read(), raising=False) caplog.set_level(logging.INFO) @@ -146,12 +146,12 @@ def ignore_mods(module): assert caplog.records[0].message == 'ignoring: ham.py' -@pytest.mark.parametrize(["files","expect"], [ - (['utf8.py'],['ast', 'os', 'hashlib']), - (['gbk.py'],['ast', 'os', 'hashlib']) +@pytest.mark.parametrize(["files","encodingArg","expect"], [ + (['utf8.py'],'utf-8',['ast', 'os', 'hashlib']), + (['gbk.py'],'gbk',['ast', 'os', 'hashlib']) ]) def test_find_imported_modules_charset(monkeypatch, caplog, - files, expect): + files, encodingArg, expect): monkeypatch.setattr(common, 'pyfiles', pretend.call_recorder(lambda x: files)) @@ -161,6 +161,7 @@ def test_find_imported_modules_charset(monkeypatch, caplog, class options: paths = ['.'] verbose = True + encoding = encodingArg @staticmethod def ignore_files(path): From 46b63a93473ed4b501f7488bdec04b9110d047ba Mon Sep 17 00:00:00 2001 From: "Liu Qiang(BSS-HZ)" Date: Mon, 22 Jun 2020 18:38:24 +0800 Subject: [PATCH 09/12] fix lint check --- tests/gbk.py | 11 +++++++---- tests/test_common.py | 1 - tests/utf8.py | 11 +++++++---- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/tests/gbk.py b/tests/gbk.py index 0b63d94..10e0b0f 100644 --- a/tests/gbk.py +++ b/tests/gbk.py @@ -1,6 +1,9 @@ from os import path -import ast, hashlib -from __future__ import braces -import ast, sys -from . import friend +import ast +import hashlib + +print(ast.Add) +path.exists(".") +print(hashlib.md5(bytearray())) + print("ÊǺº×Ö") \ No newline at end of file diff --git a/tests/test_common.py b/tests/test_common.py index 7722dea..4c23ed3 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -155,7 +155,6 @@ def test_find_imported_modules_charset(monkeypatch, caplog, monkeypatch.setattr(common, 'pyfiles', pretend.call_recorder(lambda x: files)) - caplog.set_level(logging.INFO) class options: diff --git a/tests/utf8.py b/tests/utf8.py index 4ecb963..08f5ca4 100644 --- a/tests/utf8.py +++ b/tests/utf8.py @@ -1,6 +1,9 @@ from os import path -import ast, hashlib -from __future__ import braces -import ast, sys -from . import friend +import ast +import hashlib + +print(ast.Add) +path.exists(".") +print(hashlib.md5(bytearray()).digest()) + print("是汉字") \ No newline at end of file From 4b73439e30ae4ad0877ce1797e0840ed13a6c823 Mon Sep 17 00:00:00 2001 From: "Liu Qiang(BSS-HZ)" Date: Mon, 22 Jun 2020 18:42:21 +0800 Subject: [PATCH 10/12] fix lint check --- pip_check_reqs/common.py | 1 + tests/gbk.py | 2 +- tests/test_common.py | 16 +++++++++------- tests/utf8.py | 2 +- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pip_check_reqs/common.py b/pip_check_reqs/common.py index 77d2b28..00cb9f4 100644 --- a/pip_check_reqs/common.py +++ b/pip_check_reqs/common.py @@ -110,6 +110,7 @@ def pyfiles(root): if ext == '.py': yield os.path.join(root, f) + def openAndReadFile(fileName, options): charset = None if hasattr(options, 'encoding'): diff --git a/tests/gbk.py b/tests/gbk.py index 10e0b0f..c8eb0ad 100644 --- a/tests/gbk.py +++ b/tests/gbk.py @@ -6,4 +6,4 @@ path.exists(".") print(hashlib.md5(bytearray())) -print("ÊǺº×Ö") \ No newline at end of file +print("ÊǺº×Ö") diff --git a/tests/test_common.py b/tests/test_common.py index 4c23ed3..03b17f1 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -85,7 +85,7 @@ def test_pyfiles_package(monkeypatch): pretend.call_recorder(lambda x: walk_results)) assert list(common.pyfiles('spam')) == \ - ['spam/__init__.py', 'spam/ham.py', 'spam/dub/bass.py'] + ['spam/__init__.py', 'spam/ham.py', 'spam/dub/bass.py'] @pytest.mark.parametrize(["ignore_ham", "ignore_hashlib", "expect", "locs"], [ @@ -118,7 +118,9 @@ def __enter__(self): def __exit__(self, *args): pass - monkeypatch.setattr(common, 'openAndReadFile', lambda x,y: FakeFile('').read(), raising=False) + monkeypatch.setattr(common, 'openAndReadFile', + lambda x, y: FakeFile('').read(), + raising=False) caplog.set_level(logging.INFO) @@ -146,14 +148,14 @@ def ignore_mods(module): assert caplog.records[0].message == 'ignoring: ham.py' -@pytest.mark.parametrize(["files","encodingArg","expect"], [ - (['utf8.py'],'utf-8',['ast', 'os', 'hashlib']), - (['gbk.py'],'gbk',['ast', 'os', 'hashlib']) +@pytest.mark.parametrize(["files", "encodingArg", "expect"], [ + (['utf8.py'], 'utf-8', ['ast', 'os', 'hashlib']), + (['gbk.py'], 'gbk', ['ast', 'os', 'hashlib']) ]) def test_find_imported_modules_charset(monkeypatch, caplog, - files, encodingArg, expect): + files, encodingArg, expect): monkeypatch.setattr(common, 'pyfiles', - pretend.call_recorder(lambda x: files)) + pretend.call_recorder(lambda x: files)) caplog.set_level(logging.INFO) diff --git a/tests/utf8.py b/tests/utf8.py index 08f5ca4..66a690d 100644 --- a/tests/utf8.py +++ b/tests/utf8.py @@ -6,4 +6,4 @@ path.exists(".") print(hashlib.md5(bytearray()).digest()) -print("是汉字") \ No newline at end of file +print("是汉字") From 3c19d6ea29eaa31f3ad92718e53707f47c1d3ecd Mon Sep 17 00:00:00 2001 From: "Liu Qiang(BSS-HZ)" Date: Mon, 22 Jun 2020 18:45:50 +0800 Subject: [PATCH 11/12] path fix --- tests/{ => anylizeFiles}/gbk.py | 0 tests/{ => anylizeFiles}/utf8.py | 2 +- tests/test_common.py | 4 ++-- 3 files changed, 3 insertions(+), 3 deletions(-) rename tests/{ => anylizeFiles}/gbk.py (100%) rename tests/{ => anylizeFiles}/utf8.py (87%) diff --git a/tests/gbk.py b/tests/anylizeFiles/gbk.py similarity index 100% rename from tests/gbk.py rename to tests/anylizeFiles/gbk.py diff --git a/tests/utf8.py b/tests/anylizeFiles/utf8.py similarity index 87% rename from tests/utf8.py rename to tests/anylizeFiles/utf8.py index 66a690d..b235b45 100644 --- a/tests/utf8.py +++ b/tests/anylizeFiles/utf8.py @@ -3,7 +3,7 @@ import hashlib print(ast.Add) -path.exists(".") +path.exists("..") print(hashlib.md5(bytearray()).digest()) print("是汉字") diff --git a/tests/test_common.py b/tests/test_common.py index 03b17f1..f44e956 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -149,8 +149,8 @@ def ignore_mods(module): @pytest.mark.parametrize(["files", "encodingArg", "expect"], [ - (['utf8.py'], 'utf-8', ['ast', 'os', 'hashlib']), - (['gbk.py'], 'gbk', ['ast', 'os', 'hashlib']) + (['tests/anylizeFiles/utf8.py'], 'utf-8', ['ast', 'os', 'hashlib']), + (['tests/anylizeFiles/gbk.py'], 'gbk', ['ast', 'os', 'hashlib']) ]) def test_find_imported_modules_charset(monkeypatch, caplog, files, encodingArg, expect): From b83e75532a8e48abf92ebb4fbddc1db73ce48091 Mon Sep 17 00:00:00 2001 From: "Liu Qiang(BSS-HZ)" Date: Tue, 23 Jun 2020 09:54:30 +0800 Subject: [PATCH 12/12] improve test coverage --- tests/test_common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_common.py b/tests/test_common.py index f44e956..43a8767 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -150,7 +150,8 @@ def ignore_mods(module): @pytest.mark.parametrize(["files", "encodingArg", "expect"], [ (['tests/anylizeFiles/utf8.py'], 'utf-8', ['ast', 'os', 'hashlib']), - (['tests/anylizeFiles/gbk.py'], 'gbk', ['ast', 'os', 'hashlib']) + (['tests/anylizeFiles/gbk.py'], 'gbk', ['ast', 'os', 'hashlib']), + (['tests/anylizeFiles/utf8.py'], None, ['ast', 'os', 'hashlib']) ]) def test_find_imported_modules_charset(monkeypatch, caplog, files, encodingArg, expect):