Skip to content

Commit fbcd096

Browse files
committed
Simplify mixed-line-ending hook
1 parent 47c4d9e commit fbcd096

File tree

3 files changed

+139
-320
lines changed

3 files changed

+139
-320
lines changed
Lines changed: 63 additions & 192 deletions
Original file line numberDiff line numberDiff line change
@@ -1,212 +1,83 @@
1-
import argparse
2-
import re
3-
import sys
4-
5-
from enum import Enum
6-
7-
8-
class LineEnding(Enum):
9-
CR = b'\r', 'cr', re.compile(b'\r(?!\n)', re.DOTALL)
10-
CRLF = b'\r\n', 'crlf', re.compile(b'\r\n', re.DOTALL)
11-
LF = b'\n', 'lf', re.compile(b'(?<!\r)\n', re.DOTALL)
12-
13-
def __init__(self, string, opt_name, regex):
14-
self.string = string
15-
self.str_print = repr(string)
16-
self.opt_name = opt_name
17-
self.regex = regex
18-
19-
20-
class MixedLineEndingOption(Enum):
21-
AUTO = 'auto', None
22-
NO = 'no', None
23-
CRLF = LineEnding.CRLF.opt_name, LineEnding.CRLF
24-
LF = LineEnding.LF.opt_name, LineEnding.LF
1+
from __future__ import absolute_import
2+
from __future__ import print_function
3+
from __future__ import unicode_literals
254

26-
def __init__(self, opt_name, line_ending_enum):
27-
self.opt_name = opt_name
28-
self.line_ending_enum = line_ending_enum
29-
30-
31-
class MixedLineDetection(Enum):
32-
NOT_MIXED = 1, False, None
33-
UNKNOWN = 2, False, None
34-
MIXED_MOSTLY_CRLF = 3, True, LineEnding.CRLF
35-
MIXED_MOSTLY_LF = 4, True, LineEnding.LF
36-
MIXED_MOSTLY_CR = 5, True, LineEnding.CR
37-
38-
def __init__(self, index, mle_found, line_ending_enum):
39-
# TODO hack to prevent enum overriding
40-
self.index = index
41-
self.mle_found = mle_found
42-
self.line_ending_enum = line_ending_enum
5+
import argparse
6+
import collections
437

448

45-
ANY_LINE_ENDING_PATTERN = re.compile(
46-
b'(' + LineEnding.CRLF.regex.pattern +
47-
b'|' + LineEnding.LF.regex.pattern +
48-
b'|' + LineEnding.CR.regex.pattern + b')',
49-
)
9+
CRLF = b'\r\n'
10+
LF = b'\n'
11+
CR = b'\r'
12+
# Prefer LF to CRLF to CR, but detect CRLF before LF
13+
ALL_ENDINGS = (CR, CRLF, LF)
14+
FIX_TO_LINE_ENDING = {'cr': CR, 'crlf': CRLF, 'lf': LF}
5015

5116

52-
def mixed_line_ending(argv=None):
53-
options = _parse_arguments(argv)
17+
def _fix(filename, contents, ending):
18+
new_contents = b''.join(
19+
line.rstrip(b'\r\n') + ending for line in contents.splitlines(True)
20+
)
21+
with open(filename, 'wb') as f:
22+
f.write(new_contents)
5423

55-
filenames = options['filenames']
56-
fix_option = options['fix']
5724

58-
if fix_option == MixedLineEndingOption.NO:
59-
return _process_no_fix(filenames)
60-
elif fix_option == MixedLineEndingOption.AUTO:
61-
return _process_fix_auto(filenames)
62-
# when a line ending character is forced with --fix option
25+
def fix_filename(filename, fix):
26+
with open(filename, 'rb') as f:
27+
contents = f.read()
28+
29+
counts = collections.defaultdict(int)
30+
31+
for line in contents.splitlines(True):
32+
for ending in ALL_ENDINGS:
33+
if line.endswith(ending):
34+
counts[ending] += 1
35+
break
36+
37+
# Some amount of mixed line endings
38+
mixed = sum(bool(x) for x in counts.values()) > 1
39+
40+
if fix == 'no' or (fix == 'auto' and not mixed):
41+
return mixed
42+
43+
if fix == 'auto':
44+
max_ending = LF
45+
max_lines = 0
46+
# ordering is important here such that lf > crlf > cr
47+
for ending_type in ALL_ENDINGS:
48+
# also important, using >= to find a max that prefers the last
49+
if counts[ending_type] >= max_lines:
50+
max_ending = ending_type
51+
max_lines = counts[ending_type]
52+
53+
_fix(filename, contents, max_ending)
54+
return 1
6355
else:
64-
return _process_fix_force(filenames, fix_option.line_ending_enum)
56+
target_ending = FIX_TO_LINE_ENDING[fix]
57+
# find if there are lines with *other* endings
58+
del counts[target_ending]
59+
other_endings = bool(sum(counts.values()))
60+
if other_endings:
61+
_fix(filename, contents, target_ending)
62+
return other_endings
6563

6664

67-
def _parse_arguments(argv=None):
65+
def main(argv=None):
6866
parser = argparse.ArgumentParser()
6967
parser.add_argument(
70-
'-f',
71-
'--fix',
72-
choices=[m.opt_name for m in MixedLineEndingOption],
73-
default=MixedLineEndingOption.AUTO.opt_name,
68+
'-f', '--fix',
69+
choices=('auto', 'no') + tuple(FIX_TO_LINE_ENDING),
70+
default='auto',
7471
help='Replace line ending with the specified. Default is "auto"',
7572
)
7673
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
7774
args = parser.parse_args(argv)
7875

79-
fix, = (
80-
member for name, member
81-
in MixedLineEndingOption.__members__.items()
82-
if member.opt_name == args.fix
83-
)
84-
85-
options = {
86-
'fix': fix, 'filenames': args.filenames,
87-
}
88-
89-
return options
90-
91-
92-
def _detect_line_ending(filename):
93-
with open(filename, 'rb') as f:
94-
buf = f.read()
95-
96-
le_counts = {}
97-
98-
for le_enum in LineEnding:
99-
le_counts[le_enum] = len(le_enum.regex.findall(buf))
100-
101-
mixed = False
102-
le_found_previously = False
103-
most_le = None
104-
max_le_count = 0
105-
106-
for le, le_count in le_counts.items():
107-
le_found_cur = le_count > 0
108-
109-
mixed |= le_found_previously and le_found_cur
110-
le_found_previously |= le_found_cur
111-
112-
if le_count == max_le_count:
113-
most_le = None
114-
elif le_count > max_le_count:
115-
max_le_count = le_count
116-
most_le = le
117-
118-
if not mixed:
119-
return MixedLineDetection.NOT_MIXED
120-
121-
for mld in MixedLineDetection:
122-
if (
123-
mld.line_ending_enum is not None and
124-
mld.line_ending_enum == most_le
125-
):
126-
return mld
127-
128-
return MixedLineDetection.UNKNOWN
129-
130-
131-
def _process_no_fix(filenames):
132-
print('Checking if the files have mixed line ending.')
133-
134-
mle_filenames = []
135-
for filename in filenames:
136-
detect_result = _detect_line_ending(filename)
137-
138-
if detect_result.mle_found:
139-
mle_filenames.append(filename)
140-
141-
mle_found = len(mle_filenames) > 0
142-
143-
if mle_found:
144-
print(
145-
'The following files have mixed line endings:\n\t%s',
146-
'\n\t'.join(mle_filenames),
147-
)
148-
149-
return 1 if mle_found else 0
150-
151-
152-
def _process_fix_auto(filenames):
153-
mle_found = False
154-
155-
for filename in filenames:
156-
detect_result = _detect_line_ending(filename)
157-
158-
if detect_result == MixedLineDetection.NOT_MIXED:
159-
print('The file %s has no mixed line ending', filename)
160-
elif detect_result == MixedLineDetection.UNKNOWN:
161-
print(
162-
'Could not define most frequent line ending in '
163-
'file %s. File skiped.', filename,
164-
)
165-
166-
mle_found = True
167-
else:
168-
le_enum = detect_result.line_ending_enum
169-
170-
print(
171-
'The file %s has mixed line ending with a '
172-
'majority of %s. Converting...', filename, le_enum.str_print,
173-
)
174-
175-
_convert_line_ending(filename, le_enum.string)
176-
mle_found = True
177-
178-
print(
179-
'The file %s has been converted to %s line ending.',
180-
filename, le_enum.str_print,
181-
)
182-
183-
return 1 if mle_found else 0
184-
185-
186-
def _process_fix_force(filenames, line_ending_enum):
187-
for filename in filenames:
188-
_convert_line_ending(filename, line_ending_enum.string)
189-
190-
print(
191-
'The file %s has been forced to %s line ending.',
192-
filename, line_ending_enum.str_print,
193-
)
194-
195-
return 1
196-
197-
198-
def _convert_line_ending(filename, line_ending):
199-
with open(filename, 'rb+') as f:
200-
bufin = f.read()
201-
202-
# convert line ending
203-
bufout = ANY_LINE_ENDING_PATTERN.sub(line_ending, bufin)
204-
205-
# write the result in the file replacing the existing content
206-
f.seek(0)
207-
f.write(bufout)
208-
f.truncate()
76+
retv = 0
77+
for filename in args.filenames:
78+
retv |= fix_filename(filename, args.fix)
79+
return retv
20980

21081

21182
if __name__ == '__main__':
212-
sys.exit(mixed_line_ending())
83+
exit(main())

setup.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
'simplejson',
3232
'six',
3333
],
34-
extras_require={':python_version=="2.7"': ['enum34']},
3534
entry_points={
3635
'console_scripts': [
3736
'autopep8-wrapper = pre_commit_hooks.autopep8_wrapper:main',

0 commit comments

Comments
 (0)