Skip to content

Commit aa9c202

Browse files
authored
Merge pull request #374 from pre-commit/check_docstring_first_no_encoding
Don't require an encoding for check-docstring-first
2 parents cbc17d1 + 2f6a251 commit aa9c202

File tree

2 files changed

+35
-18
lines changed

2 files changed

+35
-18
lines changed

pre_commit_hooks/check_docstring_first.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,23 @@
88
from typing import Optional
99
from typing import Sequence
1010

11+
import six
1112

12-
NON_CODE_TOKENS = frozenset((
13-
tokenize.COMMENT, tokenize.ENDMARKER, tokenize.NEWLINE, tokenize.NL,
14-
))
13+
if six.PY2: # pragma: no cover (PY2)
14+
from tokenize import generate_tokens as tokenize_tokenize
15+
OTHER_NON_CODE = ()
16+
else: # pragma: no cover (PY3)
17+
from tokenize import tokenize as tokenize_tokenize
18+
OTHER_NON_CODE = (tokenize.ENCODING,)
19+
20+
NON_CODE_TOKENS = frozenset(
21+
(tokenize.COMMENT, tokenize.ENDMARKER, tokenize.NEWLINE, tokenize.NL) +
22+
OTHER_NON_CODE,
23+
)
1524

1625

1726
def check_docstring_first(src, filename='<unknown>'):
18-
# type: (str, str) -> int
27+
# type: (bytes, str) -> int
1928
"""Returns nonzero if the source has what looks like a docstring that is
2029
not at the beginning of the source.
2130
@@ -25,7 +34,7 @@ def check_docstring_first(src, filename='<unknown>'):
2534
found_docstring_line = None
2635
found_code_line = None
2736

28-
tok_gen = tokenize.generate_tokens(io.StringIO(src).readline)
37+
tok_gen = tokenize_tokenize(io.BytesIO(src).readline)
2938
for tok_type, _, (sline, scol), _, _ in tok_gen:
3039
# Looks like a docstring!
3140
if tok_type == tokenize.STRING and scol == 0:
@@ -61,7 +70,7 @@ def main(argv=None): # type: (Optional[Sequence[str]]) -> int
6170
retv = 0
6271

6372
for filename in args.filenames:
64-
with io.open(filename, encoding='UTF-8') as f:
73+
with open(filename, 'rb') as f:
6574
contents = f.read()
6675
retv |= check_docstring_first(contents, filename=filename)
6776

tests/check_docstring_first_test.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
from __future__ import absolute_import
23
from __future__ import unicode_literals
34

@@ -10,37 +11,37 @@
1011
# Contents, expected, expected_output
1112
TESTS = (
1213
# trivial
13-
('', 0, ''),
14+
(b'', 0, ''),
1415
# Acceptable
15-
('"foo"', 0, ''),
16+
(b'"foo"', 0, ''),
1617
# Docstring after code
1718
(
18-
'from __future__ import unicode_literals\n'
19-
'"foo"\n',
19+
b'from __future__ import unicode_literals\n'
20+
b'"foo"\n',
2021
1,
2122
'{filename}:2 Module docstring appears after code '
2223
'(code seen on line 1).\n',
2324
),
2425
# Test double docstring
2526
(
26-
'"The real docstring"\n'
27-
'from __future__ import absolute_import\n'
28-
'"fake docstring"\n',
27+
b'"The real docstring"\n'
28+
b'from __future__ import absolute_import\n'
29+
b'"fake docstring"\n',
2930
1,
3031
'{filename}:3 Multiple module docstrings '
3132
'(first docstring on line 1).\n',
3233
),
3334
# Test multiple lines of code above
3435
(
35-
'import os\n'
36-
'import sys\n'
37-
'"docstring"\n',
36+
b'import os\n'
37+
b'import sys\n'
38+
b'"docstring"\n',
3839
1,
3940
'{filename}:3 Module docstring appears after code '
4041
'(code seen on line 1).\n',
4142
),
4243
# String literals in expressions are ok.
43-
('x = "foo"\n', 0, ''),
44+
(b'x = "foo"\n', 0, ''),
4445
)
4546

4647

@@ -58,6 +59,13 @@ def test_unit(capsys, contents, expected, expected_out):
5859
@all_tests
5960
def test_integration(tmpdir, capsys, contents, expected, expected_out):
6061
f = tmpdir.join('test.py')
61-
f.write(contents)
62+
f.write_binary(contents)
6263
assert main([f.strpath]) == expected
6364
assert capsys.readouterr()[0] == expected_out.format(filename=f.strpath)
65+
66+
67+
def test_arbitrary_encoding(tmpdir):
68+
f = tmpdir.join('f.py')
69+
contents = '# -*- coding: cp1252\nx = "£"'.encode('cp1252')
70+
f.write_binary(contents)
71+
assert main([f.strpath]) == 0

0 commit comments

Comments
 (0)