Skip to content

Commit 868ae53

Browse files
committed
Fix #1897 #3300 Add --git-only
1 parent 75bab5c commit 868ae53

File tree

2 files changed

+282
-61
lines changed

2 files changed

+282
-61
lines changed

codespell_lib/_codespell.py

Lines changed: 108 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import itertools
2424
import os
2525
import re
26+
import subprocess
2627
import sys
2728
import textwrap
2829
from collections.abc import Iterable, Sequence
@@ -614,6 +615,11 @@ def parse_options(
614615
action="store_true",
615616
help="output just a single line for each misspelling in stdin mode",
616617
)
618+
parser.add_argument(
619+
"--git-only",
620+
action="store_true",
621+
help="When selected, only check files under git control",
622+
)
617623
parser.add_argument("--config", type=str, help="path to config file.")
618624
parser.add_argument("--toml", type=str, help="path to a pyproject.toml file.")
619625
parser.add_argument("files", nargs="*", help="files or directories to check")
@@ -1091,6 +1097,82 @@ def flatten_clean_comma_separated_arguments(
10911097
]
10921098

10931099

1100+
def get_git_tracked_files(
1101+
root: str, files: Iterable[str], glob_match: GlobMatch, check_hidden: bool
1102+
) -> Iterable[str]:
1103+
# Flatten the list of files into a single list of arguments for git ls-files
1104+
file_args = []
1105+
for filename in files:
1106+
if os.path.isdir(filename):
1107+
file_args.append(f"{filename}/**")
1108+
else:
1109+
file_args.append(filename)
1110+
1111+
# Add the glob patterns to exclude
1112+
exclude_patterns = [
1113+
f":(exclude)**/{pattern}" for pattern in glob_match.pattern_list
1114+
]
1115+
1116+
# Add pattern to exclude hidden files if check_hidden is False
1117+
if not check_hidden:
1118+
exclude_patterns.append(":(exclude)**/.*")
1119+
exclude_patterns.append(":(exclude).*")
1120+
1121+
git_executable = "git" # Could be future option
1122+
1123+
try:
1124+
# ruff: noqa: S603
1125+
result = subprocess.run(
1126+
[git_executable, "ls-files", *file_args, *exclude_patterns],
1127+
cwd=root,
1128+
capture_output=True,
1129+
check=True,
1130+
text=True,
1131+
)
1132+
return set(result.stdout.splitlines())
1133+
except subprocess.CalledProcessError:
1134+
# If the command fails, assume no files are tracked
1135+
return set()
1136+
1137+
1138+
def build_file_list_with_os_walk(
1139+
files: Iterable[str], glob_match: GlobMatch, check_hidden: bool
1140+
) -> Iterable[str]:
1141+
all_files = []
1142+
for filename in files:
1143+
# ignore hidden files
1144+
if is_hidden(filename, check_hidden):
1145+
continue
1146+
if os.path.isdir(filename):
1147+
for root, dirs, dirfiles in os.walk(filename):
1148+
if glob_match.match(root): # skip (absolute) directories
1149+
dirs.clear()
1150+
continue
1151+
if is_hidden(root, check_hidden): # dir itself hidden
1152+
continue
1153+
for file_ in dirfiles:
1154+
if is_hidden(
1155+
file_, check_hidden
1156+
): # ignore hidden files in directories
1157+
continue
1158+
if glob_match.match(file_): # skip files
1159+
continue
1160+
fname = os.path.join(root, file_)
1161+
if glob_match.match(fname): # skip paths
1162+
continue
1163+
all_files.append(fname)
1164+
1165+
# skip (relative) directories
1166+
dirs[:] = [
1167+
dir_
1168+
for dir_ in dirs
1169+
if not glob_match.match(dir_) and not is_hidden(dir_, check_hidden)
1170+
]
1171+
elif not glob_match.match(filename) and not is_hidden(filename, check_hidden):
1172+
all_files.append(filename)
1173+
return all_files
1174+
1175+
10941176
def _script_main() -> int:
10951177
"""Wrap to main() for setuptools."""
10961178
try:
@@ -1273,68 +1355,33 @@ def main(*args: str) -> int:
12731355
"try escaping special characters",
12741356
)
12751357

1276-
bad_count = 0
1277-
for filename in sorted(options.files):
1278-
# ignore hidden files
1279-
if is_hidden(filename, options.check_hidden):
1280-
continue
1281-
1282-
if os.path.isdir(filename):
1283-
for root, dirs, files in os.walk(filename):
1284-
if glob_match.match(root): # skip (absolute) directories
1285-
dirs.clear()
1286-
continue
1287-
if is_hidden(root, options.check_hidden): # dir itself hidden
1288-
continue
1289-
for file_ in sorted(files):
1290-
# ignore hidden files in directories
1291-
if is_hidden(file_, options.check_hidden):
1292-
continue
1293-
if glob_match.match(file_): # skip files
1294-
continue
1295-
fname = os.path.join(root, file_)
1296-
if glob_match.match(fname): # skip paths
1297-
continue
1298-
bad_count += parse_file(
1299-
fname,
1300-
colors,
1301-
summary,
1302-
misspellings,
1303-
ignore_words_cased,
1304-
exclude_lines,
1305-
file_opener,
1306-
word_regex,
1307-
ignore_word_regex,
1308-
uri_regex,
1309-
uri_ignore_words,
1310-
context,
1311-
options,
1312-
)
1313-
1314-
# skip (relative) directories
1315-
dirs[:] = [
1316-
dir_
1317-
for dir_ in dirs
1318-
if not glob_match.match(dir_)
1319-
and not is_hidden(dir_, options.check_hidden)
1320-
]
1358+
# Build the list of all files based on the git_only option
1359+
if options.git_only:
1360+
all_files = get_git_tracked_files(
1361+
os.getcwd(), options.files, glob_match, options.check_hidden
1362+
)
1363+
else:
1364+
all_files = build_file_list_with_os_walk(
1365+
options.files, glob_match, options.check_hidden
1366+
)
13211367

1322-
elif not glob_match.match(filename): # skip files
1323-
bad_count += parse_file(
1324-
filename,
1325-
colors,
1326-
summary,
1327-
misspellings,
1328-
ignore_words_cased,
1329-
exclude_lines,
1330-
file_opener,
1331-
word_regex,
1332-
ignore_word_regex,
1333-
uri_regex,
1334-
uri_ignore_words,
1335-
context,
1336-
options,
1337-
)
1368+
bad_count = 0
1369+
for filename in sorted(all_files):
1370+
bad_count += parse_file(
1371+
filename,
1372+
colors,
1373+
summary,
1374+
misspellings,
1375+
ignore_words_cased,
1376+
exclude_lines,
1377+
file_opener,
1378+
word_regex,
1379+
ignore_word_regex,
1380+
uri_regex,
1381+
uri_ignore_words,
1382+
context,
1383+
options,
1384+
)
13381385

13391386
if summary:
13401387
print("\n-------8<-------\nSUMMARY:")

codespell_lib/tests/test_basic.py

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,7 @@ def test_exclude_file(
500500
bad_name.write_bytes(
501501
(combinations + "5 abandonned 5\n6 abandonned 6").encode("utf-8")
502502
)
503+
503504
assert cs.main(bad_name) == 18
504505
fname = tmp_path / "tmp.txt"
505506
fname.write_bytes(
@@ -520,6 +521,77 @@ def test_exclude_file(
520521
assert cs.main("-x", f"{fname_dummy1},{fname},{fname_dummy2}", bad_name) == 1
521522

522523

524+
def run_git(path: Path, *args: Union[Path, str]) -> None:
525+
subprocess.run( # noqa: S603
526+
["git", "-C", path, *list(args)], # noqa: S607
527+
capture_output=False,
528+
check=True,
529+
text=True,
530+
)
531+
532+
533+
def test_git_only_exclude_file(
534+
tmp_path: Path, capsys: pytest.CaptureFixture[str], monkeypatch: pytest.MonkeyPatch
535+
) -> None:
536+
monkeypatch.chdir(tmp_path)
537+
"""Test exclude file functionality."""
538+
bad_name = tmp_path / "bad.txt"
539+
# check all possible combinations of lines to ignore and ignores
540+
combinations = "".join(
541+
f"{n} abandonned {n}\n"
542+
f"{n} abandonned {n}\r\n"
543+
f"{n} abandonned {n} \n"
544+
f"{n} abandonned {n} \r\n"
545+
for n in range(1, 5)
546+
)
547+
bad_name.write_bytes(
548+
(combinations + "5 abandonned 5\n6 abandonned 6").encode("utf-8")
549+
)
550+
551+
run_git(tmp_path, "init")
552+
run_git(tmp_path, "add", bad_name)
553+
554+
assert cs.main(bad_name) == 18
555+
fname = tmp_path / "tmp.txt"
556+
fname.write_bytes(
557+
b"1 abandonned 1\n"
558+
b"2 abandonned 2\r\n"
559+
b"3 abandonned 3 \n"
560+
b"4 abandonned 4 \r\n"
561+
b"6 abandonned 6\n"
562+
)
563+
564+
# Not adding fname to git to exclude it
565+
566+
# Should have 23 total errors (bad_name + fname)
567+
assert cs.main(tmp_path) == 23
568+
569+
# Before adding to git, should not report on fname, only 18 error in bad.txt
570+
assert cs.main("--git-only", tmp_path) == 18
571+
run_git(tmp_path, "add", fname)
572+
assert cs.main(tmp_path) == 23
573+
# After adding to git, should report on fname
574+
assert cs.main("--git-only", tmp_path) == 23
575+
# After adding to git, should not report on excluded file
576+
assert cs.main("--git-only", "-x", fname, tmp_path) == 1
577+
# comma-separated list of files
578+
fname_dummy1 = tmp_path / "dummy1.txt"
579+
fname_dummy1.touch()
580+
fname_dummy2 = tmp_path / "dummy2.txt"
581+
fname_dummy2.touch()
582+
run_git(tmp_path, "add", fname_dummy1, fname_dummy2)
583+
assert (
584+
cs.main(
585+
"--git-only", "-x", fname_dummy1, "-x", fname, "-x", fname_dummy2, bad_name
586+
)
587+
== 1
588+
)
589+
assert (
590+
cs.main("--git-only", "-x", f"{fname_dummy1},{fname},{fname_dummy2}", bad_name)
591+
== 1
592+
)
593+
594+
523595
def test_encoding(
524596
tmp_path: Path,
525597
capsys: pytest.CaptureFixture[str],
@@ -637,6 +709,108 @@ def test_check_filename_irregular_file(
637709
assert cs.main("-f", tmp_path) == 1
638710

639711

712+
def test_check_hidden_git(
713+
tmp_path: Path,
714+
capsys: pytest.CaptureFixture[str],
715+
monkeypatch: pytest.MonkeyPatch,
716+
) -> None:
717+
"""Test ignoring of hidden files."""
718+
monkeypatch.chdir(tmp_path)
719+
run_git(tmp_path, "init")
720+
# visible file
721+
#
722+
# tmp_path
723+
# └── test.txt
724+
#
725+
fname = tmp_path / "test.txt"
726+
fname.write_text("erorr\n")
727+
run_git(tmp_path, "add", ".")
728+
assert cs.main("--git-only", fname) == 1
729+
assert cs.main("--git-only", tmp_path) == 1
730+
731+
# hidden file
732+
#
733+
# tmp_path
734+
# └── .test.txt
735+
#
736+
hidden_file = tmp_path / ".test.txt"
737+
fname.rename(hidden_file)
738+
run_git(tmp_path, "add", ".")
739+
assert cs.main("--git-only", hidden_file) == 0
740+
assert cs.main("--git-only", tmp_path) == 0
741+
assert cs.main("--git-only", "--check-hidden", hidden_file) == 1
742+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 1
743+
744+
# hidden file with typo in name
745+
#
746+
# tmp_path
747+
# └── .abandonned.txt
748+
#
749+
typo_file = tmp_path / ".abandonned.txt"
750+
hidden_file.rename(typo_file)
751+
run_git(tmp_path, "add", ".")
752+
assert cs.main("--git-only", typo_file) == 0
753+
assert cs.main("--git-only", tmp_path) == 0
754+
assert cs.main("--git-only", "--check-hidden", typo_file) == 1
755+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 1
756+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", typo_file) == 2
757+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 2
758+
759+
# hidden directory
760+
#
761+
# tmp_path
762+
# ├── .abandonned
763+
# │ ├── .abandonned.txt
764+
# │ └── subdir
765+
# │ └── .abandonned.txt
766+
# └── .abandonned.txt
767+
#
768+
assert cs.main("--git-only", tmp_path) == 0
769+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 1
770+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 2
771+
hidden = tmp_path / ".abandonned"
772+
hidden.mkdir()
773+
copyfile(typo_file, hidden / typo_file.name)
774+
subdir = hidden / "subdir"
775+
subdir.mkdir()
776+
copyfile(typo_file, subdir / typo_file.name)
777+
run_git(tmp_path, "add", ".")
778+
assert cs.main("--git-only", tmp_path) == 0
779+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 3
780+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 8
781+
# check again with a relative path
782+
try:
783+
rel = op.relpath(tmp_path)
784+
except ValueError:
785+
# Windows: path is on mount 'C:', start on mount 'D:'
786+
pass
787+
else:
788+
assert cs.main("--git-only", rel) == 0
789+
assert cs.main("--git-only", "--check-hidden", rel) == 3
790+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", rel) == 8
791+
792+
# hidden subdirectory
793+
#
794+
# tmp_path
795+
# ├── .abandonned
796+
# │ ├── .abandonned.txt
797+
# │ └── subdir
798+
# │ └── .abandonned.txt
799+
# ├── .abandonned.txt
800+
# └── subdir
801+
# └── .abandonned
802+
# └── .abandonned.txt
803+
subdir = tmp_path / "subdir"
804+
subdir.mkdir()
805+
hidden = subdir / ".abandonned"
806+
hidden.mkdir()
807+
copyfile(typo_file, hidden / typo_file.name)
808+
run_git(tmp_path, "add", ".")
809+
assert cs.main("--git-only", tmp_path) == 0
810+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 4
811+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 11
812+
813+
640814
def test_check_hidden(
641815
tmp_path: Path,
642816
capsys: pytest.CaptureFixture[str],

0 commit comments

Comments
 (0)