Skip to content

Commit ed34048

Browse files
committed
Fix #1897 #3300 Add --git-only
1 parent a3c8113 commit ed34048

File tree

2 files changed

+282
-61
lines changed

2 files changed

+282
-61
lines changed

codespell_lib/_codespell.py

Lines changed: 108 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import itertools
2424
import os
2525
import re
26+
import subprocess
2627
import sys
2728
import textwrap
2829
from collections.abc import Iterable, Sequence
@@ -611,6 +612,11 @@ def parse_options(
611612
action="store_true",
612613
help="output just a single line for each misspelling in stdin mode",
613614
)
615+
parser.add_argument(
616+
"--git-only",
617+
action="store_true",
618+
help="When selected, only check files under git control",
619+
)
614620
parser.add_argument("--config", type=str, help="path to config file.")
615621
parser.add_argument("--toml", type=str, help="path to a pyproject.toml file.")
616622
parser.add_argument("files", nargs="*", help="files or directories to check")
@@ -1088,6 +1094,82 @@ def flatten_clean_comma_separated_arguments(
10881094
]
10891095

10901096

1097+
def get_git_tracked_files(
1098+
root: str, files: Iterable[str], glob_match: GlobMatch, check_hidden: bool
1099+
) -> Iterable[str]:
1100+
# Flatten the list of files into a single list of arguments for git ls-files
1101+
file_args = []
1102+
for filename in files:
1103+
if os.path.isdir(filename):
1104+
file_args.append(f"{filename}/**")
1105+
else:
1106+
file_args.append(filename)
1107+
1108+
# Add the glob patterns to exclude
1109+
exclude_patterns = [
1110+
f":(exclude)**/{pattern}" for pattern in glob_match.pattern_list
1111+
]
1112+
1113+
# Add pattern to exclude hidden files if check_hidden is False
1114+
if not check_hidden:
1115+
exclude_patterns.append(":(exclude)**/.*")
1116+
exclude_patterns.append(":(exclude).*")
1117+
1118+
git_executable = "git" # Could be future option
1119+
1120+
try:
1121+
# ruff: noqa: S603
1122+
result = subprocess.run(
1123+
[git_executable, "ls-files", *file_args, *exclude_patterns],
1124+
cwd=root,
1125+
capture_output=True,
1126+
check=True,
1127+
text=True,
1128+
)
1129+
return set(result.stdout.splitlines())
1130+
except subprocess.CalledProcessError:
1131+
# If the command fails, assume no files are tracked
1132+
return set()
1133+
1134+
1135+
def build_file_list_with_os_walk(
1136+
files: Iterable[str], glob_match: GlobMatch, check_hidden: bool
1137+
) -> Iterable[str]:
1138+
all_files = []
1139+
for filename in files:
1140+
# ignore hidden files
1141+
if is_hidden(filename, check_hidden):
1142+
continue
1143+
if os.path.isdir(filename):
1144+
for root, dirs, dirfiles in os.walk(filename):
1145+
if glob_match.match(root): # skip (absolute) directories
1146+
dirs.clear()
1147+
continue
1148+
if is_hidden(root, check_hidden): # dir itself hidden
1149+
continue
1150+
for file_ in dirfiles:
1151+
if is_hidden(
1152+
file_, check_hidden
1153+
): # ignore hidden files in directories
1154+
continue
1155+
if glob_match.match(file_): # skip files
1156+
continue
1157+
fname = os.path.join(root, file_)
1158+
if glob_match.match(fname): # skip paths
1159+
continue
1160+
all_files.append(fname)
1161+
1162+
# skip (relative) directories
1163+
dirs[:] = [
1164+
dir_
1165+
for dir_ in dirs
1166+
if not glob_match.match(dir_) and not is_hidden(dir_, check_hidden)
1167+
]
1168+
elif not glob_match.match(filename) and not is_hidden(filename, check_hidden):
1169+
all_files.append(filename)
1170+
return all_files
1171+
1172+
10911173
def _script_main() -> int:
10921174
"""Wrap to main() for setuptools."""
10931175
try:
@@ -1270,68 +1352,33 @@ def main(*args: str) -> int:
12701352
"try escaping special characters",
12711353
)
12721354

1273-
bad_count = 0
1274-
for filename in sorted(options.files):
1275-
# ignore hidden files
1276-
if is_hidden(filename, options.check_hidden):
1277-
continue
1278-
1279-
if os.path.isdir(filename):
1280-
for root, dirs, files in os.walk(filename):
1281-
if glob_match.match(root): # skip (absolute) directories
1282-
dirs.clear()
1283-
continue
1284-
if is_hidden(root, options.check_hidden): # dir itself hidden
1285-
continue
1286-
for file_ in sorted(files):
1287-
# ignore hidden files in directories
1288-
if is_hidden(file_, options.check_hidden):
1289-
continue
1290-
if glob_match.match(file_): # skip files
1291-
continue
1292-
fname = os.path.join(root, file_)
1293-
if glob_match.match(fname): # skip paths
1294-
continue
1295-
bad_count += parse_file(
1296-
fname,
1297-
colors,
1298-
summary,
1299-
misspellings,
1300-
ignore_words_cased,
1301-
exclude_lines,
1302-
file_opener,
1303-
word_regex,
1304-
ignore_word_regex,
1305-
uri_regex,
1306-
uri_ignore_words,
1307-
context,
1308-
options,
1309-
)
1310-
1311-
# skip (relative) directories
1312-
dirs[:] = [
1313-
dir_
1314-
for dir_ in dirs
1315-
if not glob_match.match(dir_)
1316-
and not is_hidden(dir_, options.check_hidden)
1317-
]
1355+
# Build the list of all files based on the git_only option
1356+
if options.git_only:
1357+
all_files = get_git_tracked_files(
1358+
os.getcwd(), options.files, glob_match, options.check_hidden
1359+
)
1360+
else:
1361+
all_files = build_file_list_with_os_walk(
1362+
options.files, glob_match, options.check_hidden
1363+
)
13181364

1319-
elif not glob_match.match(filename): # skip files
1320-
bad_count += parse_file(
1321-
filename,
1322-
colors,
1323-
summary,
1324-
misspellings,
1325-
ignore_words_cased,
1326-
exclude_lines,
1327-
file_opener,
1328-
word_regex,
1329-
ignore_word_regex,
1330-
uri_regex,
1331-
uri_ignore_words,
1332-
context,
1333-
options,
1334-
)
1365+
bad_count = 0
1366+
for filename in sorted(all_files):
1367+
bad_count += parse_file(
1368+
filename,
1369+
colors,
1370+
summary,
1371+
misspellings,
1372+
ignore_words_cased,
1373+
exclude_lines,
1374+
file_opener,
1375+
word_regex,
1376+
ignore_word_regex,
1377+
uri_regex,
1378+
uri_ignore_words,
1379+
context,
1380+
options,
1381+
)
13351382

13361383
if summary:
13371384
print("\n-------8<-------\nSUMMARY:")

codespell_lib/tests/test_basic.py

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,7 @@ def test_exclude_file(
500500
bad_name.write_bytes(
501501
(combinations + "5 abandonned 5\n6 abandonned 6").encode("utf-8")
502502
)
503+
503504
assert cs.main(bad_name) == 18
504505
fname = tmp_path / "tmp.txt"
505506
fname.write_bytes(
@@ -520,6 +521,77 @@ def test_exclude_file(
520521
assert cs.main("-x", f"{fname_dummy1},{fname},{fname_dummy2}", bad_name) == 1
521522

522523

524+
def run_git(path: Path, *args: Union[Path, str]) -> None:
525+
subprocess.run( # noqa: S603
526+
["git", "-C", path, *list(args)], # noqa: S607
527+
capture_output=False,
528+
check=True,
529+
text=True,
530+
)
531+
532+
533+
def test_git_only_exclude_file(
534+
tmp_path: Path, capsys: pytest.CaptureFixture[str], monkeypatch: pytest.MonkeyPatch
535+
) -> None:
536+
monkeypatch.chdir(tmp_path)
537+
"""Test exclude file functionality."""
538+
bad_name = tmp_path / "bad.txt"
539+
# check all possible combinations of lines to ignore and ignores
540+
combinations = "".join(
541+
f"{n} abandonned {n}\n"
542+
f"{n} abandonned {n}\r\n"
543+
f"{n} abandonned {n} \n"
544+
f"{n} abandonned {n} \r\n"
545+
for n in range(1, 5)
546+
)
547+
bad_name.write_bytes(
548+
(combinations + "5 abandonned 5\n6 abandonned 6").encode("utf-8")
549+
)
550+
551+
run_git(tmp_path, "init")
552+
run_git(tmp_path, "add", bad_name)
553+
554+
assert cs.main(bad_name) == 18
555+
fname = tmp_path / "tmp.txt"
556+
fname.write_bytes(
557+
b"1 abandonned 1\n"
558+
b"2 abandonned 2\r\n"
559+
b"3 abandonned 3 \n"
560+
b"4 abandonned 4 \r\n"
561+
b"6 abandonned 6\n"
562+
)
563+
564+
# Not adding fname to git to exclude it
565+
566+
# Should have 23 total errors (bad_name + fname)
567+
assert cs.main(tmp_path) == 23
568+
569+
# Before adding to git, should not report on fname, only 18 error in bad.txt
570+
assert cs.main("--git-only", tmp_path) == 18
571+
run_git(tmp_path, "add", fname)
572+
assert cs.main(tmp_path) == 23
573+
# After adding to git, should report on fname
574+
assert cs.main("--git-only", tmp_path) == 23
575+
# After adding to git, should not report on excluded file
576+
assert cs.main("--git-only", "-x", fname, tmp_path) == 1
577+
# comma-separated list of files
578+
fname_dummy1 = tmp_path / "dummy1.txt"
579+
fname_dummy1.touch()
580+
fname_dummy2 = tmp_path / "dummy2.txt"
581+
fname_dummy2.touch()
582+
run_git(tmp_path, "add", fname_dummy1, fname_dummy2)
583+
assert (
584+
cs.main(
585+
"--git-only", "-x", fname_dummy1, "-x", fname, "-x", fname_dummy2, bad_name
586+
)
587+
== 1
588+
)
589+
assert (
590+
cs.main("--git-only", "-x", f"{fname_dummy1},{fname},{fname_dummy2}", bad_name)
591+
== 1
592+
)
593+
594+
523595
def test_encoding(
524596
tmp_path: Path,
525597
capsys: pytest.CaptureFixture[str],
@@ -637,6 +709,108 @@ def test_check_filename_irregular_file(
637709
assert cs.main("-f", tmp_path) == 1
638710

639711

712+
def test_check_hidden_git(
713+
tmp_path: Path,
714+
capsys: pytest.CaptureFixture[str],
715+
monkeypatch: pytest.MonkeyPatch,
716+
) -> None:
717+
"""Test ignoring of hidden files."""
718+
monkeypatch.chdir(tmp_path)
719+
run_git(tmp_path, "init")
720+
# visible file
721+
#
722+
# tmp_path
723+
# └── test.txt
724+
#
725+
fname = tmp_path / "test.txt"
726+
fname.write_text("erorr\n")
727+
run_git(tmp_path, "add", ".")
728+
assert cs.main("--git-only", fname) == 1
729+
assert cs.main("--git-only", tmp_path) == 1
730+
731+
# hidden file
732+
#
733+
# tmp_path
734+
# └── .test.txt
735+
#
736+
hidden_file = tmp_path / ".test.txt"
737+
fname.rename(hidden_file)
738+
run_git(tmp_path, "add", ".")
739+
assert cs.main("--git-only", hidden_file) == 0
740+
assert cs.main("--git-only", tmp_path) == 0
741+
assert cs.main("--git-only", "--check-hidden", hidden_file) == 1
742+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 1
743+
744+
# hidden file with typo in name
745+
#
746+
# tmp_path
747+
# └── .abandonned.txt
748+
#
749+
typo_file = tmp_path / ".abandonned.txt"
750+
hidden_file.rename(typo_file)
751+
run_git(tmp_path, "add", ".")
752+
assert cs.main("--git-only", typo_file) == 0
753+
assert cs.main("--git-only", tmp_path) == 0
754+
assert cs.main("--git-only", "--check-hidden", typo_file) == 1
755+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 1
756+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", typo_file) == 2
757+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 2
758+
759+
# hidden directory
760+
#
761+
# tmp_path
762+
# ├── .abandonned
763+
# │ ├── .abandonned.txt
764+
# │ └── subdir
765+
# │ └── .abandonned.txt
766+
# └── .abandonned.txt
767+
#
768+
assert cs.main("--git-only", tmp_path) == 0
769+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 1
770+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 2
771+
hidden = tmp_path / ".abandonned"
772+
hidden.mkdir()
773+
copyfile(typo_file, hidden / typo_file.name)
774+
subdir = hidden / "subdir"
775+
subdir.mkdir()
776+
copyfile(typo_file, subdir / typo_file.name)
777+
run_git(tmp_path, "add", ".")
778+
assert cs.main("--git-only", tmp_path) == 0
779+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 3
780+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 8
781+
# check again with a relative path
782+
try:
783+
rel = op.relpath(tmp_path)
784+
except ValueError:
785+
# Windows: path is on mount 'C:', start on mount 'D:'
786+
pass
787+
else:
788+
assert cs.main("--git-only", rel) == 0
789+
assert cs.main("--git-only", "--check-hidden", rel) == 3
790+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", rel) == 8
791+
792+
# hidden subdirectory
793+
#
794+
# tmp_path
795+
# ├── .abandonned
796+
# │ ├── .abandonned.txt
797+
# │ └── subdir
798+
# │ └── .abandonned.txt
799+
# ├── .abandonned.txt
800+
# └── subdir
801+
# └── .abandonned
802+
# └── .abandonned.txt
803+
subdir = tmp_path / "subdir"
804+
subdir.mkdir()
805+
hidden = subdir / ".abandonned"
806+
hidden.mkdir()
807+
copyfile(typo_file, hidden / typo_file.name)
808+
run_git(tmp_path, "add", ".")
809+
assert cs.main("--git-only", tmp_path) == 0
810+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 4
811+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 11
812+
813+
640814
def test_check_hidden(
641815
tmp_path: Path,
642816
capsys: pytest.CaptureFixture[str],

0 commit comments

Comments
 (0)