Skip to content

Commit fc8a5b2

Browse files
nagromcasottile
authored andcommitted
Add mixed-line-ending hook
1 parent 78dffcc commit fc8a5b2

File tree

6 files changed

+388
-0
lines changed

6 files changed

+388
-0
lines changed

.pre-commit-hooks.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,15 @@
191191
# for backward compatibility
192192
files: ''
193193
minimum_pre_commit_version: 0.15.0
194+
- id: mixed-line-ending
195+
name: Mixed line ending
196+
description: Replaces or checks mixed line ending
197+
entry: mixed-line-ending
198+
language: python
199+
types: [text]
200+
# for backward compatibility
201+
files: ''
202+
minimum_pre_commit_version: 0.15.0
194203
- id: name-tests-test
195204
name: Tests should end in _test.py
196205
description: This verifies that test files are named correctly

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ Add this to your `.pre-commit-config.yaml`
5858
- `file-contents-sorter` - Sort the lines in specified files (defaults to alphabetical). You must provide list of target files as input to it. Note that this hook WILL remove blank lines and does NOT respect any comments.
5959
- `flake8` - Run flake8 on your python files.
6060
- `forbid-new-submodules` - Prevent addition of new git submodules.
61+
- `mixed-line-ending` - Replaces or checks mixed line ending.
62+
- `--fix={auto,crlf,lf,no}`
63+
- `auto` - Replaces automatically the most frequent line ending. This is the default argument.
64+
- `crlf`, `lf` - Forces to replace line ending by respectively CRLF and LF.
65+
- `no` - Checks if there is any mixed line ending without modifying any file.
6166
- `name-tests-test` - Assert that files in tests/ end in `_test.py`.
6267
- Use `args: ['--django']` to match `test*.py` instead.
6368
- `no-commit-to-branch` - Protect specific branches from direct checkins.

hooks.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,12 @@
130130
entry: upgrade-your-pre-commit-version
131131
files: ''
132132
minimum_pre_commit_version: 0.15.0
133+
- id: mixed-line-ending
134+
language: system
135+
name: upgrade-your-pre-commit-version
136+
entry: upgrade-your-pre-commit-version
137+
files: ''
138+
minimum_pre_commit_version: 0.15.0
133139
- id: name-tests-test
134140
language: system
135141
name: upgrade-your-pre-commit-version
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
import argparse
2+
import re
3+
import sys
4+
5+
from enum import Enum
6+
7+
8+
class LineEnding(Enum):
9+
CR = b'\r', 'cr', re.compile(b'\r(?!\n)', re.DOTALL)
10+
CRLF = b'\r\n', 'crlf', re.compile(b'\r\n', re.DOTALL)
11+
LF = b'\n', 'lf', re.compile(b'(?<!\r)\n', re.DOTALL)
12+
13+
def __init__(self, string, opt_name, regex):
14+
self.string = string
15+
self.str_print = repr(string)
16+
self.opt_name = opt_name
17+
self.regex = regex
18+
19+
20+
class MixedLineEndingOption(Enum):
21+
AUTO = 'auto', None
22+
NO = 'no', None
23+
CRLF = LineEnding.CRLF.opt_name, LineEnding.CRLF
24+
LF = LineEnding.LF.opt_name, LineEnding.LF
25+
26+
def __init__(self, opt_name, line_ending_enum):
27+
self.opt_name = opt_name
28+
self.line_ending_enum = line_ending_enum
29+
30+
31+
class MixedLineDetection(Enum):
32+
NOT_MIXED = 1, False, None
33+
UNKNOWN = 2, False, None
34+
MIXED_MOSTLY_CRLF = 3, True, LineEnding.CRLF
35+
MIXED_MOSTLY_LF = 4, True, LineEnding.LF
36+
MIXED_MOSTLY_CR = 5, True, LineEnding.CR
37+
38+
def __init__(self, index, mle_found, line_ending_enum):
39+
# TODO hack to prevent enum overriding
40+
self.index = index
41+
self.mle_found = mle_found
42+
self.line_ending_enum = line_ending_enum
43+
44+
45+
ANY_LINE_ENDING_PATTERN = re.compile(
46+
b'(' + LineEnding.CRLF.regex.pattern +
47+
b'|' + LineEnding.LF.regex.pattern +
48+
b'|' + LineEnding.CR.regex.pattern + b')',
49+
)
50+
51+
52+
def mixed_line_ending(argv=None):
53+
options = _parse_arguments(argv)
54+
55+
filenames = options['filenames']
56+
fix_option = options['fix']
57+
58+
if fix_option == MixedLineEndingOption.NO:
59+
return _process_no_fix(filenames)
60+
elif fix_option == MixedLineEndingOption.AUTO:
61+
return _process_fix_auto(filenames)
62+
# when a line ending character is forced with --fix option
63+
else:
64+
return _process_fix_force(filenames, fix_option.line_ending_enum)
65+
66+
67+
def _parse_arguments(argv=None):
68+
parser = argparse.ArgumentParser()
69+
parser.add_argument(
70+
'-f',
71+
'--fix',
72+
choices=[m.opt_name for m in MixedLineEndingOption],
73+
default=MixedLineEndingOption.AUTO.opt_name,
74+
help='Replace line ending with the specified. Default is "auto"',
75+
)
76+
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
77+
args = parser.parse_args(argv)
78+
79+
fix, = (
80+
member for name, member
81+
in MixedLineEndingOption.__members__.items()
82+
if member.opt_name == args.fix
83+
)
84+
85+
options = {
86+
'fix': fix, 'filenames': args.filenames,
87+
}
88+
89+
return options
90+
91+
92+
def _detect_line_ending(filename):
93+
with open(filename, 'rb') as f:
94+
buf = f.read()
95+
96+
le_counts = {}
97+
98+
for le_enum in LineEnding:
99+
le_counts[le_enum] = len(le_enum.regex.findall(buf))
100+
101+
mixed = False
102+
le_found_previously = False
103+
most_le = None
104+
max_le_count = 0
105+
106+
for le, le_count in le_counts.items():
107+
le_found_cur = le_count > 0
108+
109+
mixed |= le_found_previously and le_found_cur
110+
le_found_previously |= le_found_cur
111+
112+
if le_count == max_le_count:
113+
most_le = None
114+
elif le_count > max_le_count:
115+
max_le_count = le_count
116+
most_le = le
117+
118+
if not mixed:
119+
return MixedLineDetection.NOT_MIXED
120+
121+
for mld in MixedLineDetection:
122+
if (
123+
mld.line_ending_enum is not None and
124+
mld.line_ending_enum == most_le
125+
):
126+
return mld
127+
128+
return MixedLineDetection.UNKNOWN
129+
130+
131+
def _process_no_fix(filenames):
132+
print('Checking if the files have mixed line ending.')
133+
134+
mle_filenames = []
135+
for filename in filenames:
136+
detect_result = _detect_line_ending(filename)
137+
138+
if detect_result.mle_found:
139+
mle_filenames.append(filename)
140+
141+
mle_found = len(mle_filenames) > 0
142+
143+
if mle_found:
144+
print(
145+
'The following files have mixed line endings:\n\t%s',
146+
'\n\t'.join(mle_filenames),
147+
)
148+
149+
return 1 if mle_found else 0
150+
151+
152+
def _process_fix_auto(filenames):
153+
mle_found = False
154+
155+
for filename in filenames:
156+
detect_result = _detect_line_ending(filename)
157+
158+
if detect_result == MixedLineDetection.NOT_MIXED:
159+
print('The file %s has no mixed line ending', filename)
160+
elif detect_result == MixedLineDetection.UNKNOWN:
161+
print(
162+
'Could not define most frequent line ending in '
163+
'file %s. File skiped.', filename,
164+
)
165+
166+
mle_found = True
167+
else:
168+
le_enum = detect_result.line_ending_enum
169+
170+
print(
171+
'The file %s has mixed line ending with a '
172+
'majority of %s. Converting...', filename, le_enum.str_print,
173+
)
174+
175+
_convert_line_ending(filename, le_enum.string)
176+
mle_found = True
177+
178+
print(
179+
'The file %s has been converted to %s line ending.',
180+
filename, le_enum.str_print,
181+
)
182+
183+
return 1 if mle_found else 0
184+
185+
186+
def _process_fix_force(filenames, line_ending_enum):
187+
for filename in filenames:
188+
_convert_line_ending(filename, line_ending_enum.string)
189+
190+
print(
191+
'The file %s has been forced to %s line ending.',
192+
filename, line_ending_enum.str_print,
193+
)
194+
195+
return 1
196+
197+
198+
def _convert_line_ending(filename, line_ending):
199+
with open(filename, 'rb+') as f:
200+
bufin = f.read()
201+
202+
# convert line ending
203+
bufout = ANY_LINE_ENDING_PATTERN.sub(line_ending, bufin)
204+
205+
# write the result in the file replacing the existing content
206+
f.seek(0)
207+
f.write(bufout)
208+
f.truncate()
209+
210+
211+
if __name__ == '__main__':
212+
sys.exit(mixed_line_ending())

setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
'simplejson',
3232
'six',
3333
],
34+
extras_require={':python_version=="2.7"': ['enum34']},
3435
entry_points={
3536
'console_scripts': [
3637
'autopep8-wrapper = pre_commit_hooks.autopep8_wrapper:main',
@@ -53,6 +54,7 @@
5354
'file-contents-sorter = pre_commit_hooks.file_contents_sorter:main',
5455
'fix-encoding-pragma = pre_commit_hooks.fix_encoding_pragma:main',
5556
'forbid-new-submodules = pre_commit_hooks.forbid_new_submodules:main',
57+
'mixed-line-ending = pre_commit_hooks.mixed_line_ending:mixed_line_ending',
5658
'name-tests-test = pre_commit_hooks.tests_should_end_in_test:validate_files',
5759
'no-commit-to-branch = pre_commit_hooks.no_commit_to_branch:main',
5860
'pretty-format-json = pre_commit_hooks.pretty_format_json:pretty_format_json',

0 commit comments

Comments
 (0)