Skip to content

Commit 4faed34

Browse files
pawamoyasottile
authored andcommitted
Fix parsing of git output with unusual characters
On Windows, all files are "executable". Therefore, to know if a file is supposed to be executed, we check how its attributes were recorded by git: we run a `git ls-files` command in a subprocess. By default, this command outputs information on multiple lines (file and their data separated by newlines). When a file contains an unusual character, the character is escaped with an integer sequence (such as `\303\261`), and git wraps the whole filename in double-quotes because of the backslashes. It breaks the current code because we try to open the filename containing the double-quotes: it doesn't exist, of course. Instead of trying to fix this special case by removing the double-quotes, and breaking other cases (a double-quote is a valid filename character on Linux), we tell git to separate each item with the null character `\0` instead of a new line `\n`, with the option `-z`. With this option, git doesn't escape unusual characters with integer sequence, so the output is fixed, and we parse it by splitting on `\0` instead of `\n`. Fixes #508.
1 parent 5372f44 commit 4faed34

File tree

2 files changed

+35
-2
lines changed

2 files changed

+35
-2
lines changed

pre_commit_hooks/check_executables_have_shebangs.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,14 @@
1212
EXECUTABLE_VALUES = frozenset(('1', '3', '5', '7'))
1313

1414

15+
def zsplit(s: str) -> List[str]:
16+
s = s.strip('\0')
17+
if s:
18+
return s.split('\0')
19+
else:
20+
return []
21+
22+
1523
def check_executables(paths: List[str]) -> int:
1624
if sys.platform == 'win32': # pragma: win32 cover
1725
return _check_git_filemode(paths)
@@ -26,9 +34,9 @@ def check_executables(paths: List[str]) -> int:
2634

2735

2836
def _check_git_filemode(paths: Sequence[str]) -> int:
29-
outs = cmd_output('git', 'ls-files', '--stage', '--', *paths)
37+
outs = cmd_output('git', 'ls-files', '-z', '--stage', '--', *paths)
3038
seen: Set[str] = set()
31-
for out in outs.splitlines():
39+
for out in zsplit(outs):
3240
metadata, path = out.split('\t')
3341
tagmode = metadata.split(' ', 1)[0]
3442

tests/check_executables_have_shebangs_test.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,21 @@ def test_check_git_filemode_passing(tmpdir):
7373
assert check_executables_have_shebangs._check_git_filemode(files) == 0
7474

7575

76+
def test_check_git_filemode_passing_unusual_characters(tmpdir):
77+
with tmpdir.as_cwd():
78+
cmd_output('git', 'init', '.')
79+
80+
f = tmpdir.join('mañana.txt')
81+
f.write('#!/usr/bin/env bash')
82+
f_path = str(f)
83+
cmd_output('chmod', '+x', f_path)
84+
cmd_output('git', 'add', f_path)
85+
cmd_output('git', 'update-index', '--chmod=+x', f_path)
86+
87+
files = (f_path,)
88+
assert check_executables_have_shebangs._check_git_filemode(files) == 0
89+
90+
7691
def test_check_git_filemode_failing(tmpdir):
7792
with tmpdir.as_cwd():
7893
cmd_output('git', 'init', '.')
@@ -87,6 +102,16 @@ def test_check_git_filemode_failing(tmpdir):
87102
assert check_executables_have_shebangs._check_git_filemode(files) == 1
88103

89104

105+
@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0'))
106+
def test_check_zsplits_correctly(out):
107+
assert check_executables_have_shebangs.zsplit(out) == ['f1', 'f2']
108+
109+
110+
@pytest.mark.parametrize('out', ('\0\0', '\0', ''))
111+
def test_check_zsplit_returns_empty(out):
112+
assert check_executables_have_shebangs.zsplit(out) == []
113+
114+
90115
@pytest.mark.parametrize(
91116
('content', 'mode', 'expected'),
92117
(

0 commit comments

Comments
 (0)