@@ -227,12 +227,14 @@ def init_chardet(self) -> None:
227227
228228 self .encdetector = UniversalDetector ()
229229
230- def open (self , filename : str ) -> tuple [list [str ], str ]:
230+ def open (self , filename : str ) -> tuple [list [tuple [ bool , int , list [ str ]] ], str ]:
231231 if self .use_chardet :
232232 return self .open_with_chardet (filename )
233233 return self .open_with_internal (filename )
234234
235- def open_with_chardet (self , filename : str ) -> tuple [list [str ], str ]:
235+ def open_with_chardet (
236+ self , filename : str
237+ ) -> tuple [list [tuple [bool , int , list [str ]]], str ]:
236238 self .encdetector .reset ()
237239 with open (filename , "rb" ) as fb :
238240 for line in fb :
@@ -259,7 +261,9 @@ def open_with_chardet(self, filename: str) -> tuple[list[str], str]:
259261
260262 return lines , f .encoding
261263
262- def open_with_internal (self , filename : str ) -> tuple [list [str ], str ]:
264+ def open_with_internal (
265+ self , filename : str
266+ ) -> tuple [list [tuple [bool , int , list [str ]]], str ]:
263267 encoding = None
264268 first_try = True
265269 for encoding in ("utf-8" , "iso-8859-1" ):
@@ -286,21 +290,25 @@ def open_with_internal(self, filename: str) -> tuple[list[str], str]:
286290
287291 return lines , encoding
288292
289- def get_lines (self , f : TextIO ) -> list [str ]:
293+ def get_lines (self , f : TextIO ) -> list [tuple [bool , int , list [str ]]]:
294+ fragments = []
295+ line_number = 0
290296 if self .ignore_multiline_regex :
291297 text = f .read ()
292298 pos = 0
293- text2 = ""
294299 for m in re .finditer (self .ignore_multiline_regex , text ):
295- text2 += text [pos : m .start ()]
296- # Replace with blank lines so line numbers are unchanged.
297- text2 += "\n " * m .group ().count ("\n " )
300+ lines = text [pos : m .start ()].splitlines (True )
301+ fragments .append ((False , line_number , lines ))
302+ line_number += len (lines )
303+ lines = m .group ().splitlines (True )
304+ fragments .append ((True , line_number , lines ))
305+ line_number += len (lines ) - 1
298306 pos = m .end ()
299- text2 + = text [pos :]
300- lines = text2 . split ( " \n " )
307+ lines = text [pos :]. splitlines ( True )
308+ fragments . append (( False , line_number , lines ) )
301309 else :
302- lines = f .readlines ()
303- return lines
310+ fragments . append (( False , line_number , f .readlines ()) )
311+ return fragments
304312
305313
306314# -.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-
@@ -869,86 +877,31 @@ def apply_uri_ignore_words(
869877 return check_matches
870878
871879
872- def parse_file (
880+ def parse_lines (
881+ fragment : tuple [bool , int , list [str ]],
873882 filename : str ,
874883 colors : TermColors ,
875884 summary : Optional [Summary ],
876885 misspellings : dict [str , Misspelling ],
877886 ignore_words_cased : set [str ],
878887 exclude_lines : set [str ],
879- file_opener : FileOpener ,
880888 word_regex : Pattern [str ],
881889 ignore_word_regex : Optional [Pattern [str ]],
882890 uri_regex : Pattern [str ],
883891 uri_ignore_words : set [str ],
884892 context : Optional [tuple [int , int ]],
885893 options : argparse .Namespace ,
886- ) -> int :
894+ ) -> tuple [ int , bool ] :
887895 bad_count = 0
888- lines = None
889896 changed = False
890897
891- if filename == "-" :
892- f = sys .stdin
893- encoding = "utf-8"
894- lines = f .readlines ()
895- else :
896- if options .check_filenames :
897- for word in extract_words (filename , word_regex , ignore_word_regex ):
898- if word in ignore_words_cased :
899- continue
900- lword = word .lower ()
901- if lword not in misspellings :
902- continue
903- fix = misspellings [lword ].fix
904- fixword = fix_case (word , misspellings [lword ].data )
905-
906- if summary and fix :
907- summary .update (lword )
908-
909- cfilename = f"{ colors .FILE } { filename } { colors .DISABLE } "
910- cwrongword = f"{ colors .WWORD } { word } { colors .DISABLE } "
911- crightword = f"{ colors .FWORD } { fixword } { colors .DISABLE } "
912-
913- reason = misspellings [lword ].reason
914- if reason :
915- if options .quiet_level & QuietLevels .DISABLED_FIXES :
916- continue
917- creason = f" | { colors .FILE } { reason } { colors .DISABLE } "
918- else :
919- if options .quiet_level & QuietLevels .NON_AUTOMATIC_FIXES :
920- continue
921- creason = ""
922-
923- bad_count += 1
924-
925- print (f"{ cfilename } : { cwrongword } ==> { crightword } { creason } " )
926-
927- # ignore irregular files
928- if not os .path .isfile (filename ):
929- return bad_count
930-
931- try :
932- text = is_text_file (filename )
933- except PermissionError as e :
934- print (f"WARNING: { e .strerror } : { filename } " , file = sys .stderr )
935- return bad_count
936- except OSError :
937- return bad_count
938-
939- if not text :
940- if not options .quiet_level & QuietLevels .BINARY_FILE :
941- print (f"WARNING: Binary file: { filename } " , file = sys .stderr )
942- return bad_count
943- try :
944- lines , encoding = file_opener .open (filename )
945- except OSError :
946- return bad_count
898+ _ , fragment_line_number , lines = fragment
947899
948900 for i , line in enumerate (lines ):
949901 line = line .rstrip ()
950902 if not line or line in exclude_lines :
951903 continue
904+ line_number = fragment_line_number + i
952905
953906 extra_words_to_ignore = set ()
954907 match = inline_ignore_regex .search (line )
@@ -1035,7 +988,7 @@ def parse_file(
1035988 continue
1036989
1037990 cfilename = f"{ colors .FILE } { filename } { colors .DISABLE } "
1038- cline = f"{ colors .FILE } { i + 1 } { colors .DISABLE } "
991+ cline = f"{ colors .FILE } { line_number + 1 } { colors .DISABLE } "
1039992 cwrongword = f"{ colors .WWORD } { word } { colors .DISABLE } "
1040993 crightword = f"{ colors .FWORD } { fixword } { colors .DISABLE } "
1041994
@@ -1067,19 +1020,127 @@ def parse_file(
10671020 f"==> { crightword } { creason } "
10681021 )
10691022
1023+ return bad_count , changed
1024+
1025+
1026+ def parse_file (
1027+ filename : str ,
1028+ colors : TermColors ,
1029+ summary : Optional [Summary ],
1030+ misspellings : dict [str , Misspelling ],
1031+ ignore_words_cased : set [str ],
1032+ exclude_lines : set [str ],
1033+ file_opener : FileOpener ,
1034+ word_regex : Pattern [str ],
1035+ ignore_word_regex : Optional [Pattern [str ]],
1036+ uri_regex : Pattern [str ],
1037+ uri_ignore_words : set [str ],
1038+ context : Optional [tuple [int , int ]],
1039+ options : argparse .Namespace ,
1040+ ) -> int :
1041+ bad_count = 0
1042+ fragments = None
1043+
1044+ # Read lines.
1045+ if filename == "-" :
1046+ f = sys .stdin
1047+ encoding = "utf-8"
1048+ fragments = file_opener .get_lines (f )
1049+ else :
1050+ if options .check_filenames :
1051+ for word in extract_words (filename , word_regex , ignore_word_regex ):
1052+ if word in ignore_words_cased :
1053+ continue
1054+ lword = word .lower ()
1055+ if lword not in misspellings :
1056+ continue
1057+ fix = misspellings [lword ].fix
1058+ fixword = fix_case (word , misspellings [lword ].data )
1059+
1060+ if summary and fix :
1061+ summary .update (lword )
1062+
1063+ cfilename = f"{ colors .FILE } { filename } { colors .DISABLE } "
1064+ cwrongword = f"{ colors .WWORD } { word } { colors .DISABLE } "
1065+ crightword = f"{ colors .FWORD } { fixword } { colors .DISABLE } "
1066+
1067+ reason = misspellings [lword ].reason
1068+ if reason :
1069+ if options .quiet_level & QuietLevels .DISABLED_FIXES :
1070+ continue
1071+ creason = f" | { colors .FILE } { reason } { colors .DISABLE } "
1072+ else :
1073+ if options .quiet_level & QuietLevels .NON_AUTOMATIC_FIXES :
1074+ continue
1075+ creason = ""
1076+
1077+ bad_count += 1
1078+
1079+ print (f"{ cfilename } : { cwrongword } ==> { crightword } { creason } " )
1080+
1081+ # ignore irregular files
1082+ if not os .path .isfile (filename ):
1083+ return bad_count
1084+
1085+ try :
1086+ text = is_text_file (filename )
1087+ except PermissionError as e :
1088+ print (f"WARNING: { e .strerror } : { filename } " , file = sys .stderr )
1089+ return bad_count
1090+ except OSError :
1091+ return bad_count
1092+
1093+ if not text :
1094+ if not options .quiet_level & QuietLevels .BINARY_FILE :
1095+ print (f"WARNING: Binary file: { filename } " , file = sys .stderr )
1096+ return bad_count
1097+ try :
1098+ fragments , encoding = file_opener .open (filename )
1099+ except OSError :
1100+ return bad_count
1101+
1102+ # Parse lines.
1103+ changed = False
1104+ for fragment in fragments :
1105+ ignore , _ , _ = fragment
1106+ if ignore :
1107+ continue
1108+
1109+ bad_count_update , changed_update = parse_lines (
1110+ fragment ,
1111+ filename ,
1112+ colors ,
1113+ summary ,
1114+ misspellings ,
1115+ ignore_words_cased ,
1116+ exclude_lines ,
1117+ word_regex ,
1118+ ignore_word_regex ,
1119+ uri_regex ,
1120+ uri_ignore_words ,
1121+ context ,
1122+ options ,
1123+ )
1124+ bad_count += bad_count_update
1125+ changed = changed or changed_update
1126+
1127+ # Write out lines, if changed.
10701128 if changed :
10711129 if filename == "-" :
10721130 print ("---" )
1073- for line in lines :
1074- print (line , end = "" )
1131+ for _ , _ , lines in fragments :
1132+ for line in lines :
1133+ print (line , end = "" )
10751134 else :
10761135 if not options .quiet_level & QuietLevels .FIXES :
10771136 print (
10781137 f"{ colors .FWORD } FIXED:{ colors .DISABLE } { filename } " ,
10791138 file = sys .stderr ,
10801139 )
10811140 with open (filename , "w" , encoding = encoding , newline = "" ) as f :
1082- f .writelines (lines )
1141+ for _ , _ , lines in fragments :
1142+ f .writelines (lines )
1143+
10831144 return bad_count
10841145
10851146
0 commit comments