Skip to content

Commit 36af62c

Browse files
authored
Merge pull request #233 from pre-commit/mixed-line-ending
Add mixed-line-ending hook
2 parents 4eba5d0 + fbcd096 commit 36af62c

File tree

6 files changed

+207
-0
lines changed

6 files changed

+207
-0
lines changed

.pre-commit-hooks.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,15 @@
191191
# for backward compatibility
192192
files: ''
193193
minimum_pre_commit_version: 0.15.0
194+
- id: mixed-line-ending
195+
name: Mixed line ending
196+
description: Replaces or checks mixed line ending
197+
entry: mixed-line-ending
198+
language: python
199+
types: [text]
200+
# for backward compatibility
201+
files: ''
202+
minimum_pre_commit_version: 0.15.0
194203
- id: name-tests-test
195204
name: Tests should end in _test.py
196205
description: This verifies that test files are named correctly

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ Add this to your `.pre-commit-config.yaml`
5858
- `file-contents-sorter` - Sort the lines in specified files (defaults to alphabetical). You must provide list of target files as input to it. Note that this hook WILL remove blank lines and does NOT respect any comments.
5959
- `flake8` - Run flake8 on your python files.
6060
- `forbid-new-submodules` - Prevent addition of new git submodules.
61+
- `mixed-line-ending` - Replaces or checks mixed line ending.
62+
- `--fix={auto,crlf,lf,no}`
63+
- `auto` - Replaces automatically the most frequent line ending. This is the default argument.
64+
- `crlf`, `lf` - Forces to replace line ending by respectively CRLF and LF.
65+
- `no` - Checks if there is any mixed line ending without modifying any file.
6166
- `name-tests-test` - Assert that files in tests/ end in `_test.py`.
6267
- Use `args: ['--django']` to match `test*.py` instead.
6368
- `no-commit-to-branch` - Protect specific branches from direct checkins.

hooks.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,12 @@
130130
entry: upgrade-your-pre-commit-version
131131
files: ''
132132
minimum_pre_commit_version: 0.15.0
133+
- id: mixed-line-ending
134+
language: system
135+
name: upgrade-your-pre-commit-version
136+
entry: upgrade-your-pre-commit-version
137+
files: ''
138+
minimum_pre_commit_version: 0.15.0
133139
- id: name-tests-test
134140
language: system
135141
name: upgrade-your-pre-commit-version
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
from __future__ import absolute_import
2+
from __future__ import print_function
3+
from __future__ import unicode_literals
4+
5+
import argparse
6+
import collections
7+
8+
9+
CRLF = b'\r\n'
10+
LF = b'\n'
11+
CR = b'\r'
12+
# Prefer LF to CRLF to CR, but detect CRLF before LF
13+
ALL_ENDINGS = (CR, CRLF, LF)
14+
FIX_TO_LINE_ENDING = {'cr': CR, 'crlf': CRLF, 'lf': LF}
15+
16+
17+
def _fix(filename, contents, ending):
18+
new_contents = b''.join(
19+
line.rstrip(b'\r\n') + ending for line in contents.splitlines(True)
20+
)
21+
with open(filename, 'wb') as f:
22+
f.write(new_contents)
23+
24+
25+
def fix_filename(filename, fix):
26+
with open(filename, 'rb') as f:
27+
contents = f.read()
28+
29+
counts = collections.defaultdict(int)
30+
31+
for line in contents.splitlines(True):
32+
for ending in ALL_ENDINGS:
33+
if line.endswith(ending):
34+
counts[ending] += 1
35+
break
36+
37+
# Some amount of mixed line endings
38+
mixed = sum(bool(x) for x in counts.values()) > 1
39+
40+
if fix == 'no' or (fix == 'auto' and not mixed):
41+
return mixed
42+
43+
if fix == 'auto':
44+
max_ending = LF
45+
max_lines = 0
46+
# ordering is important here such that lf > crlf > cr
47+
for ending_type in ALL_ENDINGS:
48+
# also important, using >= to find a max that prefers the last
49+
if counts[ending_type] >= max_lines:
50+
max_ending = ending_type
51+
max_lines = counts[ending_type]
52+
53+
_fix(filename, contents, max_ending)
54+
return 1
55+
else:
56+
target_ending = FIX_TO_LINE_ENDING[fix]
57+
# find if there are lines with *other* endings
58+
del counts[target_ending]
59+
other_endings = bool(sum(counts.values()))
60+
if other_endings:
61+
_fix(filename, contents, target_ending)
62+
return other_endings
63+
64+
65+
def main(argv=None):
66+
parser = argparse.ArgumentParser()
67+
parser.add_argument(
68+
'-f', '--fix',
69+
choices=('auto', 'no') + tuple(FIX_TO_LINE_ENDING),
70+
default='auto',
71+
help='Replace line ending with the specified. Default is "auto"',
72+
)
73+
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
74+
args = parser.parse_args(argv)
75+
76+
retv = 0
77+
for filename in args.filenames:
78+
retv |= fix_filename(filename, args.fix)
79+
return retv
80+
81+
82+
if __name__ == '__main__':
83+
exit(main())

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
'file-contents-sorter = pre_commit_hooks.file_contents_sorter:main',
5454
'fix-encoding-pragma = pre_commit_hooks.fix_encoding_pragma:main',
5555
'forbid-new-submodules = pre_commit_hooks.forbid_new_submodules:main',
56+
'mixed-line-ending = pre_commit_hooks.mixed_line_ending:mixed_line_ending',
5657
'name-tests-test = pre_commit_hooks.tests_should_end_in_test:validate_files',
5758
'no-commit-to-branch = pre_commit_hooks.no_commit_to_branch:main',
5859
'pretty-format-json = pre_commit_hooks.pretty_format_json:pretty_format_json',

tests/mixed_line_ending_test.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
from __future__ import absolute_import
2+
from __future__ import unicode_literals
3+
4+
import pytest
5+
6+
from pre_commit_hooks.mixed_line_ending import main
7+
8+
9+
@pytest.mark.parametrize(
10+
('input_s', 'output'),
11+
(
12+
# mixed with majority of 'LF'
13+
(b'foo\r\nbar\nbaz\n', b'foo\nbar\nbaz\n'),
14+
# mixed with majority of 'CRLF'
15+
(b'foo\r\nbar\nbaz\r\n', b'foo\r\nbar\r\nbaz\r\n'),
16+
# mixed with majority of 'CR'
17+
(b'foo\rbar\nbaz\r', b'foo\rbar\rbaz\r'),
18+
# mixed with as much 'LF' as 'CRLF'
19+
(b'foo\r\nbar\n', b'foo\nbar\n'),
20+
# mixed with as much 'LF' as 'CR'
21+
(b'foo\rbar\n', b'foo\nbar\n'),
22+
# mixed with as much 'CRLF' as 'CR'
23+
(b'foo\r\nbar\r', b'foo\r\nbar\r\n'),
24+
# mixed with as much 'CRLF' as 'LF' as 'CR'
25+
(b'foo\r\nbar\nbaz\r', b'foo\nbar\nbaz\n'),
26+
),
27+
)
28+
def test_mixed_line_ending_fixes_auto(input_s, output, tmpdir):
29+
path = tmpdir.join('file.txt')
30+
path.write_binary(input_s)
31+
ret = main((path.strpath,))
32+
33+
assert ret == 1
34+
assert path.read_binary() == output
35+
36+
37+
def test_non_mixed_no_newline_end_of_file(tmpdir):
38+
path = tmpdir.join('f.txt')
39+
path.write_binary(b'foo\nbar\nbaz')
40+
assert not main((path.strpath,))
41+
# the hook *could* fix the end of the file, but leaves it alone
42+
# this is mostly to document the current behaviour
43+
assert path.read_binary() == b'foo\nbar\nbaz'
44+
45+
46+
def test_mixed_no_newline_end_of_file(tmpdir):
47+
path = tmpdir.join('f.txt')
48+
path.write_binary(b'foo\r\nbar\nbaz')
49+
assert main((path.strpath,))
50+
# the hook rewrites the end of the file, this is slightly inconsistent
51+
# with the non-mixed case but I think this is the better behaviour
52+
# this is mostly to document the current behaviour
53+
assert path.read_binary() == b'foo\nbar\nbaz\n'
54+
55+
56+
@pytest.mark.parametrize(
57+
('fix_option', 'input_s'),
58+
(
59+
# All --fix=auto with uniform line endings should be ok
60+
('--fix=auto', b'foo\r\nbar\r\nbaz\r\n'),
61+
('--fix=auto', b'foo\rbar\rbaz\r'),
62+
('--fix=auto', b'foo\nbar\nbaz\n'),
63+
# --fix=crlf with crlf endings
64+
('--fix=crlf', b'foo\r\nbar\r\nbaz\r\n'),
65+
# --fix=lf with lf endings
66+
('--fix=lf', b'foo\nbar\nbaz\n'),
67+
),
68+
)
69+
def test_line_endings_ok(fix_option, input_s, tmpdir):
70+
path = tmpdir.join('input.txt')
71+
path.write_binary(input_s)
72+
ret = main((fix_option, path.strpath))
73+
74+
assert ret == 0
75+
assert path.read_binary() == input_s
76+
77+
78+
def test_no_fix_does_not_modify(tmpdir):
79+
path = tmpdir.join('input.txt')
80+
contents = b'foo\r\nbar\rbaz\nwomp\n'
81+
path.write_binary(contents)
82+
ret = main(('--fix=no', path.strpath))
83+
84+
assert ret == 1
85+
assert path.read_binary() == contents
86+
87+
88+
def test_fix_lf(tmpdir):
89+
path = tmpdir.join('input.txt')
90+
path.write_binary(b'foo\r\nbar\rbaz\n')
91+
ret = main(('--fix=lf', path.strpath))
92+
93+
assert ret == 1
94+
assert path.read_binary() == b'foo\nbar\nbaz\n'
95+
96+
97+
def test_fix_crlf(tmpdir):
98+
path = tmpdir.join('input.txt')
99+
path.write_binary(b'foo\r\nbar\rbaz\n')
100+
ret = main(('--fix=crlf', path.strpath))
101+
102+
assert ret == 1
103+
assert path.read_binary() == b'foo\r\nbar\r\nbaz\r\n'

0 commit comments

Comments
 (0)