Skip to content

Commit 200c31b

Browse files
authored
Fix three problems with --ignore-multiline-regex (#3832)
1 parent ff4bac3 commit 200c31b

File tree

2 files changed

+187
-88
lines changed

2 files changed

+187
-88
lines changed

codespell_lib/_codespell.py

Lines changed: 137 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -227,12 +227,14 @@ def init_chardet(self) -> None:
227227

228228
self.encdetector = UniversalDetector()
229229

230-
def open(self, filename: str) -> tuple[list[str], str]:
230+
def open(self, filename: str) -> tuple[list[tuple[bool, int, list[str]]], str]:
231231
if self.use_chardet:
232232
return self.open_with_chardet(filename)
233233
return self.open_with_internal(filename)
234234

235-
def open_with_chardet(self, filename: str) -> tuple[list[str], str]:
235+
def open_with_chardet(
236+
self, filename: str
237+
) -> tuple[list[tuple[bool, int, list[str]]], str]:
236238
self.encdetector.reset()
237239
with open(filename, "rb") as fb:
238240
for line in fb:
@@ -259,7 +261,9 @@ def open_with_chardet(self, filename: str) -> tuple[list[str], str]:
259261

260262
return lines, f.encoding
261263

262-
def open_with_internal(self, filename: str) -> tuple[list[str], str]:
264+
def open_with_internal(
265+
self, filename: str
266+
) -> tuple[list[tuple[bool, int, list[str]]], str]:
263267
encoding = None
264268
first_try = True
265269
for encoding in ("utf-8", "iso-8859-1"):
@@ -286,21 +290,25 @@ def open_with_internal(self, filename: str) -> tuple[list[str], str]:
286290

287291
return lines, encoding
288292

289-
def get_lines(self, f: TextIO) -> list[str]:
293+
def get_lines(self, f: TextIO) -> list[tuple[bool, int, list[str]]]:
294+
fragments = []
295+
line_number = 0
290296
if self.ignore_multiline_regex:
291297
text = f.read()
292298
pos = 0
293-
text2 = ""
294299
for m in re.finditer(self.ignore_multiline_regex, text):
295-
text2 += text[pos : m.start()]
296-
# Replace with blank lines so line numbers are unchanged.
297-
text2 += "\n" * m.group().count("\n")
300+
lines = text[pos : m.start()].splitlines(True)
301+
fragments.append((False, line_number, lines))
302+
line_number += len(lines)
303+
lines = m.group().splitlines(True)
304+
fragments.append((True, line_number, lines))
305+
line_number += len(lines) - 1
298306
pos = m.end()
299-
text2 += text[pos:]
300-
lines = text2.split("\n")
307+
lines = text[pos:].splitlines(True)
308+
fragments.append((False, line_number, lines))
301309
else:
302-
lines = f.readlines()
303-
return lines
310+
fragments.append((False, line_number, f.readlines()))
311+
return fragments
304312

305313

306314
# -.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-
@@ -869,86 +877,31 @@ def apply_uri_ignore_words(
869877
return check_matches
870878

871879

872-
def parse_file(
880+
def parse_lines(
881+
fragment: tuple[bool, int, list[str]],
873882
filename: str,
874883
colors: TermColors,
875884
summary: Optional[Summary],
876885
misspellings: dict[str, Misspelling],
877886
ignore_words_cased: set[str],
878887
exclude_lines: set[str],
879-
file_opener: FileOpener,
880888
word_regex: Pattern[str],
881889
ignore_word_regex: Optional[Pattern[str]],
882890
uri_regex: Pattern[str],
883891
uri_ignore_words: set[str],
884892
context: Optional[tuple[int, int]],
885893
options: argparse.Namespace,
886-
) -> int:
894+
) -> tuple[int, bool]:
887895
bad_count = 0
888-
lines = None
889896
changed = False
890897

891-
if filename == "-":
892-
f = sys.stdin
893-
encoding = "utf-8"
894-
lines = f.readlines()
895-
else:
896-
if options.check_filenames:
897-
for word in extract_words(filename, word_regex, ignore_word_regex):
898-
if word in ignore_words_cased:
899-
continue
900-
lword = word.lower()
901-
if lword not in misspellings:
902-
continue
903-
fix = misspellings[lword].fix
904-
fixword = fix_case(word, misspellings[lword].data)
905-
906-
if summary and fix:
907-
summary.update(lword)
908-
909-
cfilename = f"{colors.FILE}{filename}{colors.DISABLE}"
910-
cwrongword = f"{colors.WWORD}{word}{colors.DISABLE}"
911-
crightword = f"{colors.FWORD}{fixword}{colors.DISABLE}"
912-
913-
reason = misspellings[lword].reason
914-
if reason:
915-
if options.quiet_level & QuietLevels.DISABLED_FIXES:
916-
continue
917-
creason = f" | {colors.FILE}{reason}{colors.DISABLE}"
918-
else:
919-
if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES:
920-
continue
921-
creason = ""
922-
923-
bad_count += 1
924-
925-
print(f"{cfilename}: {cwrongword} ==> {crightword}{creason}")
926-
927-
# ignore irregular files
928-
if not os.path.isfile(filename):
929-
return bad_count
930-
931-
try:
932-
text = is_text_file(filename)
933-
except PermissionError as e:
934-
print(f"WARNING: {e.strerror}: {filename}", file=sys.stderr)
935-
return bad_count
936-
except OSError:
937-
return bad_count
938-
939-
if not text:
940-
if not options.quiet_level & QuietLevels.BINARY_FILE:
941-
print(f"WARNING: Binary file: {filename}", file=sys.stderr)
942-
return bad_count
943-
try:
944-
lines, encoding = file_opener.open(filename)
945-
except OSError:
946-
return bad_count
898+
_, fragment_line_number, lines = fragment
947899

948900
for i, line in enumerate(lines):
949901
line = line.rstrip()
950902
if not line or line in exclude_lines:
951903
continue
904+
line_number = fragment_line_number + i
952905

953906
extra_words_to_ignore = set()
954907
match = inline_ignore_regex.search(line)
@@ -1035,7 +988,7 @@ def parse_file(
1035988
continue
1036989

1037990
cfilename = f"{colors.FILE}{filename}{colors.DISABLE}"
1038-
cline = f"{colors.FILE}{i + 1}{colors.DISABLE}"
991+
cline = f"{colors.FILE}{line_number + 1}{colors.DISABLE}"
1039992
cwrongword = f"{colors.WWORD}{word}{colors.DISABLE}"
1040993
crightword = f"{colors.FWORD}{fixword}{colors.DISABLE}"
1041994

@@ -1067,19 +1020,127 @@ def parse_file(
10671020
f"==> {crightword}{creason}"
10681021
)
10691022

1023+
return bad_count, changed
1024+
1025+
1026+
def parse_file(
1027+
filename: str,
1028+
colors: TermColors,
1029+
summary: Optional[Summary],
1030+
misspellings: dict[str, Misspelling],
1031+
ignore_words_cased: set[str],
1032+
exclude_lines: set[str],
1033+
file_opener: FileOpener,
1034+
word_regex: Pattern[str],
1035+
ignore_word_regex: Optional[Pattern[str]],
1036+
uri_regex: Pattern[str],
1037+
uri_ignore_words: set[str],
1038+
context: Optional[tuple[int, int]],
1039+
options: argparse.Namespace,
1040+
) -> int:
1041+
bad_count = 0
1042+
fragments = None
1043+
1044+
# Read lines.
1045+
if filename == "-":
1046+
f = sys.stdin
1047+
encoding = "utf-8"
1048+
fragments = file_opener.get_lines(f)
1049+
else:
1050+
if options.check_filenames:
1051+
for word in extract_words(filename, word_regex, ignore_word_regex):
1052+
if word in ignore_words_cased:
1053+
continue
1054+
lword = word.lower()
1055+
if lword not in misspellings:
1056+
continue
1057+
fix = misspellings[lword].fix
1058+
fixword = fix_case(word, misspellings[lword].data)
1059+
1060+
if summary and fix:
1061+
summary.update(lword)
1062+
1063+
cfilename = f"{colors.FILE}{filename}{colors.DISABLE}"
1064+
cwrongword = f"{colors.WWORD}{word}{colors.DISABLE}"
1065+
crightword = f"{colors.FWORD}{fixword}{colors.DISABLE}"
1066+
1067+
reason = misspellings[lword].reason
1068+
if reason:
1069+
if options.quiet_level & QuietLevels.DISABLED_FIXES:
1070+
continue
1071+
creason = f" | {colors.FILE}{reason}{colors.DISABLE}"
1072+
else:
1073+
if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES:
1074+
continue
1075+
creason = ""
1076+
1077+
bad_count += 1
1078+
1079+
print(f"{cfilename}: {cwrongword} ==> {crightword}{creason}")
1080+
1081+
# ignore irregular files
1082+
if not os.path.isfile(filename):
1083+
return bad_count
1084+
1085+
try:
1086+
text = is_text_file(filename)
1087+
except PermissionError as e:
1088+
print(f"WARNING: {e.strerror}: {filename}", file=sys.stderr)
1089+
return bad_count
1090+
except OSError:
1091+
return bad_count
1092+
1093+
if not text:
1094+
if not options.quiet_level & QuietLevels.BINARY_FILE:
1095+
print(f"WARNING: Binary file: {filename}", file=sys.stderr)
1096+
return bad_count
1097+
try:
1098+
fragments, encoding = file_opener.open(filename)
1099+
except OSError:
1100+
return bad_count
1101+
1102+
# Parse lines.
1103+
changed = False
1104+
for fragment in fragments:
1105+
ignore, _, _ = fragment
1106+
if ignore:
1107+
continue
1108+
1109+
bad_count_update, changed_update = parse_lines(
1110+
fragment,
1111+
filename,
1112+
colors,
1113+
summary,
1114+
misspellings,
1115+
ignore_words_cased,
1116+
exclude_lines,
1117+
word_regex,
1118+
ignore_word_regex,
1119+
uri_regex,
1120+
uri_ignore_words,
1121+
context,
1122+
options,
1123+
)
1124+
bad_count += bad_count_update
1125+
changed = changed or changed_update
1126+
1127+
# Write out lines, if changed.
10701128
if changed:
10711129
if filename == "-":
10721130
print("---")
1073-
for line in lines:
1074-
print(line, end="")
1131+
for _, _, lines in fragments:
1132+
for line in lines:
1133+
print(line, end="")
10751134
else:
10761135
if not options.quiet_level & QuietLevels.FIXES:
10771136
print(
10781137
f"{colors.FWORD}FIXED:{colors.DISABLE} {filename}",
10791138
file=sys.stderr,
10801139
)
10811140
with open(filename, "w", encoding=encoding, newline="") as f:
1082-
f.writelines(lines)
1141+
for _, _, lines in fragments:
1142+
f.writelines(lines)
1143+
10831144
return bad_count
10841145

10851146

codespell_lib/tests/test_basic.py

Lines changed: 50 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -952,19 +952,19 @@ def test_ignore_multiline_regex_option(
952952
assert code == EX_USAGE
953953
assert "usage:" in stdout
954954

955+
text = """
956+
Please see http://example.com/abandonned for info
957+
# codespell:ignore-begin
958+
'''
959+
abandonned
960+
abandonned
961+
'''
962+
# codespell:ignore-end
963+
abandonned
964+
"""
965+
955966
fname = tmp_path / "flag.txt"
956-
fname.write_text(
957-
"""
958-
Please see http://example.com/abandonned for info
959-
# codespell:ignore-begin
960-
'''
961-
abandonned
962-
abandonned
963-
'''
964-
# codespell:ignore-end
965-
abandonned
966-
"""
967-
)
967+
fname.write_text(text)
968968
assert cs.main(fname) == 4
969969
assert (
970970
cs.main(
@@ -975,6 +975,44 @@ def test_ignore_multiline_regex_option(
975975
== 2
976976
)
977977

978+
with FakeStdin(text):
979+
assert (
980+
cs.main(
981+
"-",
982+
"--ignore-multiline-regex",
983+
"codespell:ignore-begin.*codespell:ignore-end",
984+
)
985+
== 2
986+
)
987+
988+
fname.write_text("This\nThsi")
989+
cs.main(
990+
fname,
991+
"-w",
992+
"--ignore-multiline-regex",
993+
"codespell:ignore-begin.*codespell:ignore-end",
994+
)
995+
assert fname.read_text() == "This\nThis"
996+
997+
fname.write_text(text)
998+
cs.main(
999+
fname,
1000+
"-w",
1001+
"--ignore-multiline-regex",
1002+
"codespell:ignore-begin.*codespell:ignore-end",
1003+
)
1004+
fixed_text = """
1005+
Please see http://example.com/abandoned for info
1006+
# codespell:ignore-begin
1007+
'''
1008+
abandonned
1009+
abandonned
1010+
'''
1011+
# codespell:ignore-end
1012+
abandoned
1013+
"""
1014+
assert fname.read_text() == fixed_text
1015+
9781016

9791017
def test_uri_regex_option(
9801018
tmp_path: Path,

0 commit comments

Comments
 (0)