|
1 | | -import argparse |
2 | | -import re |
3 | | -import sys |
4 | | - |
5 | | -from enum import Enum |
6 | | - |
7 | | - |
8 | | -class LineEnding(Enum): |
9 | | - CR = b'\r', 'cr', re.compile(b'\r(?!\n)', re.DOTALL) |
10 | | - CRLF = b'\r\n', 'crlf', re.compile(b'\r\n', re.DOTALL) |
11 | | - LF = b'\n', 'lf', re.compile(b'(?<!\r)\n', re.DOTALL) |
12 | | - |
13 | | - def __init__(self, string, opt_name, regex): |
14 | | - self.string = string |
15 | | - self.str_print = repr(string) |
16 | | - self.opt_name = opt_name |
17 | | - self.regex = regex |
18 | | - |
19 | | - |
20 | | -class MixedLineEndingOption(Enum): |
21 | | - AUTO = 'auto', None |
22 | | - NO = 'no', None |
23 | | - CRLF = LineEnding.CRLF.opt_name, LineEnding.CRLF |
24 | | - LF = LineEnding.LF.opt_name, LineEnding.LF |
| 1 | +from __future__ import absolute_import |
| 2 | +from __future__ import print_function |
| 3 | +from __future__ import unicode_literals |
25 | 4 |
|
26 | | - def __init__(self, opt_name, line_ending_enum): |
27 | | - self.opt_name = opt_name |
28 | | - self.line_ending_enum = line_ending_enum |
29 | | - |
30 | | - |
31 | | -class MixedLineDetection(Enum): |
32 | | - NOT_MIXED = 1, False, None |
33 | | - UNKNOWN = 2, False, None |
34 | | - MIXED_MOSTLY_CRLF = 3, True, LineEnding.CRLF |
35 | | - MIXED_MOSTLY_LF = 4, True, LineEnding.LF |
36 | | - MIXED_MOSTLY_CR = 5, True, LineEnding.CR |
37 | | - |
38 | | - def __init__(self, index, mle_found, line_ending_enum): |
39 | | - # TODO hack to prevent enum overriding |
40 | | - self.index = index |
41 | | - self.mle_found = mle_found |
42 | | - self.line_ending_enum = line_ending_enum |
| 5 | +import argparse |
| 6 | +import collections |
43 | 7 |
|
44 | 8 |
|
45 | | -ANY_LINE_ENDING_PATTERN = re.compile( |
46 | | - b'(' + LineEnding.CRLF.regex.pattern + |
47 | | - b'|' + LineEnding.LF.regex.pattern + |
48 | | - b'|' + LineEnding.CR.regex.pattern + b')', |
49 | | -) |
| 9 | +CRLF = b'\r\n' |
| 10 | +LF = b'\n' |
| 11 | +CR = b'\r' |
| 12 | +# Prefer LF to CRLF to CR, but detect CRLF before LF |
| 13 | +ALL_ENDINGS = (CR, CRLF, LF) |
| 14 | +FIX_TO_LINE_ENDING = {'cr': CR, 'crlf': CRLF, 'lf': LF} |
50 | 15 |
|
51 | 16 |
|
52 | | -def mixed_line_ending(argv=None): |
53 | | - options = _parse_arguments(argv) |
| 17 | +def _fix(filename, contents, ending): |
| 18 | + new_contents = b''.join( |
| 19 | + line.rstrip(b'\r\n') + ending for line in contents.splitlines(True) |
| 20 | + ) |
| 21 | + with open(filename, 'wb') as f: |
| 22 | + f.write(new_contents) |
54 | 23 |
|
55 | | - filenames = options['filenames'] |
56 | | - fix_option = options['fix'] |
57 | 24 |
|
58 | | - if fix_option == MixedLineEndingOption.NO: |
59 | | - return _process_no_fix(filenames) |
60 | | - elif fix_option == MixedLineEndingOption.AUTO: |
61 | | - return _process_fix_auto(filenames) |
62 | | - # when a line ending character is forced with --fix option |
| 25 | +def fix_filename(filename, fix): |
| 26 | + with open(filename, 'rb') as f: |
| 27 | + contents = f.read() |
| 28 | + |
| 29 | + counts = collections.defaultdict(int) |
| 30 | + |
| 31 | + for line in contents.splitlines(True): |
| 32 | + for ending in ALL_ENDINGS: |
| 33 | + if line.endswith(ending): |
| 34 | + counts[ending] += 1 |
| 35 | + break |
| 36 | + |
| 37 | + # Some amount of mixed line endings |
| 38 | + mixed = sum(bool(x) for x in counts.values()) > 1 |
| 39 | + |
| 40 | + if fix == 'no' or (fix == 'auto' and not mixed): |
| 41 | + return mixed |
| 42 | + |
| 43 | + if fix == 'auto': |
| 44 | + max_ending = LF |
| 45 | + max_lines = 0 |
| 46 | + # ordering is important here such that lf > crlf > cr |
| 47 | + for ending_type in ALL_ENDINGS: |
| 48 | + # also important, using >= to find a max that prefers the last |
| 49 | + if counts[ending_type] >= max_lines: |
| 50 | + max_ending = ending_type |
| 51 | + max_lines = counts[ending_type] |
| 52 | + |
| 53 | + _fix(filename, contents, max_ending) |
| 54 | + return 1 |
63 | 55 | else: |
64 | | - return _process_fix_force(filenames, fix_option.line_ending_enum) |
| 56 | + target_ending = FIX_TO_LINE_ENDING[fix] |
| 57 | + # find if there are lines with *other* endings |
| 58 | + del counts[target_ending] |
| 59 | + other_endings = bool(sum(counts.values())) |
| 60 | + if other_endings: |
| 61 | + _fix(filename, contents, target_ending) |
| 62 | + return other_endings |
65 | 63 |
|
66 | 64 |
|
67 | | -def _parse_arguments(argv=None): |
| 65 | +def main(argv=None): |
68 | 66 | parser = argparse.ArgumentParser() |
69 | 67 | parser.add_argument( |
70 | | - '-f', |
71 | | - '--fix', |
72 | | - choices=[m.opt_name for m in MixedLineEndingOption], |
73 | | - default=MixedLineEndingOption.AUTO.opt_name, |
| 68 | + '-f', '--fix', |
| 69 | + choices=('auto', 'no') + tuple(FIX_TO_LINE_ENDING), |
| 70 | + default='auto', |
74 | 71 | help='Replace line ending with the specified. Default is "auto"', |
75 | 72 | ) |
76 | 73 | parser.add_argument('filenames', nargs='*', help='Filenames to fix') |
77 | 74 | args = parser.parse_args(argv) |
78 | 75 |
|
79 | | - fix, = ( |
80 | | - member for name, member |
81 | | - in MixedLineEndingOption.__members__.items() |
82 | | - if member.opt_name == args.fix |
83 | | - ) |
84 | | - |
85 | | - options = { |
86 | | - 'fix': fix, 'filenames': args.filenames, |
87 | | - } |
88 | | - |
89 | | - return options |
90 | | - |
91 | | - |
92 | | -def _detect_line_ending(filename): |
93 | | - with open(filename, 'rb') as f: |
94 | | - buf = f.read() |
95 | | - |
96 | | - le_counts = {} |
97 | | - |
98 | | - for le_enum in LineEnding: |
99 | | - le_counts[le_enum] = len(le_enum.regex.findall(buf)) |
100 | | - |
101 | | - mixed = False |
102 | | - le_found_previously = False |
103 | | - most_le = None |
104 | | - max_le_count = 0 |
105 | | - |
106 | | - for le, le_count in le_counts.items(): |
107 | | - le_found_cur = le_count > 0 |
108 | | - |
109 | | - mixed |= le_found_previously and le_found_cur |
110 | | - le_found_previously |= le_found_cur |
111 | | - |
112 | | - if le_count == max_le_count: |
113 | | - most_le = None |
114 | | - elif le_count > max_le_count: |
115 | | - max_le_count = le_count |
116 | | - most_le = le |
117 | | - |
118 | | - if not mixed: |
119 | | - return MixedLineDetection.NOT_MIXED |
120 | | - |
121 | | - for mld in MixedLineDetection: |
122 | | - if ( |
123 | | - mld.line_ending_enum is not None and |
124 | | - mld.line_ending_enum == most_le |
125 | | - ): |
126 | | - return mld |
127 | | - |
128 | | - return MixedLineDetection.UNKNOWN |
129 | | - |
130 | | - |
131 | | -def _process_no_fix(filenames): |
132 | | - print('Checking if the files have mixed line ending.') |
133 | | - |
134 | | - mle_filenames = [] |
135 | | - for filename in filenames: |
136 | | - detect_result = _detect_line_ending(filename) |
137 | | - |
138 | | - if detect_result.mle_found: |
139 | | - mle_filenames.append(filename) |
140 | | - |
141 | | - mle_found = len(mle_filenames) > 0 |
142 | | - |
143 | | - if mle_found: |
144 | | - print( |
145 | | - 'The following files have mixed line endings:\n\t%s', |
146 | | - '\n\t'.join(mle_filenames), |
147 | | - ) |
148 | | - |
149 | | - return 1 if mle_found else 0 |
150 | | - |
151 | | - |
152 | | -def _process_fix_auto(filenames): |
153 | | - mle_found = False |
154 | | - |
155 | | - for filename in filenames: |
156 | | - detect_result = _detect_line_ending(filename) |
157 | | - |
158 | | - if detect_result == MixedLineDetection.NOT_MIXED: |
159 | | - print('The file %s has no mixed line ending', filename) |
160 | | - elif detect_result == MixedLineDetection.UNKNOWN: |
161 | | - print( |
162 | | - 'Could not define most frequent line ending in ' |
163 | | - 'file %s. File skiped.', filename, |
164 | | - ) |
165 | | - |
166 | | - mle_found = True |
167 | | - else: |
168 | | - le_enum = detect_result.line_ending_enum |
169 | | - |
170 | | - print( |
171 | | - 'The file %s has mixed line ending with a ' |
172 | | - 'majority of %s. Converting...', filename, le_enum.str_print, |
173 | | - ) |
174 | | - |
175 | | - _convert_line_ending(filename, le_enum.string) |
176 | | - mle_found = True |
177 | | - |
178 | | - print( |
179 | | - 'The file %s has been converted to %s line ending.', |
180 | | - filename, le_enum.str_print, |
181 | | - ) |
182 | | - |
183 | | - return 1 if mle_found else 0 |
184 | | - |
185 | | - |
186 | | -def _process_fix_force(filenames, line_ending_enum): |
187 | | - for filename in filenames: |
188 | | - _convert_line_ending(filename, line_ending_enum.string) |
189 | | - |
190 | | - print( |
191 | | - 'The file %s has been forced to %s line ending.', |
192 | | - filename, line_ending_enum.str_print, |
193 | | - ) |
194 | | - |
195 | | - return 1 |
196 | | - |
197 | | - |
198 | | -def _convert_line_ending(filename, line_ending): |
199 | | - with open(filename, 'rb+') as f: |
200 | | - bufin = f.read() |
201 | | - |
202 | | - # convert line ending |
203 | | - bufout = ANY_LINE_ENDING_PATTERN.sub(line_ending, bufin) |
204 | | - |
205 | | - # write the result in the file replacing the existing content |
206 | | - f.seek(0) |
207 | | - f.write(bufout) |
208 | | - f.truncate() |
| 76 | + retv = 0 |
| 77 | + for filename in args.filenames: |
| 78 | + retv |= fix_filename(filename, args.fix) |
| 79 | + return retv |
209 | 80 |
|
210 | 81 |
|
211 | 82 | if __name__ == '__main__': |
212 | | - sys.exit(mixed_line_ending()) |
| 83 | + exit(main()) |
0 commit comments