diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index 04f1af7fe4..429688b824 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -23,6 +23,7 @@ import itertools import os import re +import subprocess import sys import textwrap from collections.abc import Iterable, Sequence @@ -614,6 +615,11 @@ def parse_options( action="store_true", help="output just a single line for each misspelling in stdin mode", ) + parser.add_argument( + "--git-only", + action="store_true", + help="When selected, only check files under git control", + ) parser.add_argument("--config", type=str, help="path to config file.") parser.add_argument("--toml", type=str, help="path to a pyproject.toml file.") parser.add_argument("files", nargs="*", help="files or directories to check") @@ -1091,6 +1097,82 @@ def flatten_clean_comma_separated_arguments( ] +def get_git_tracked_files( + root: str, files: Iterable[str], glob_match: GlobMatch, check_hidden: bool +) -> Iterable[str]: + # Flatten the list of files into a single list of arguments for git ls-files + file_args = [] + for filename in files: + if os.path.isdir(filename): + file_args.append(f"{filename}/**") + else: + file_args.append(filename) + + # Add the glob patterns to exclude + exclude_patterns = [ + f":(exclude)**/{pattern}" for pattern in glob_match.pattern_list + ] + + # Add pattern to exclude hidden files if check_hidden is False + if not check_hidden: + exclude_patterns.append(":(exclude)**/.*") + exclude_patterns.append(":(exclude).*") + + git_executable = "git" # Could be future option + + try: + # ruff: noqa: S603 + result = subprocess.run( + [git_executable, "ls-files", *file_args, *exclude_patterns], + cwd=root, + capture_output=True, + check=True, + text=True, + ) + return set(result.stdout.splitlines()) + except subprocess.CalledProcessError: + # If the command fails, assume no files are tracked + return set() + + +def build_file_list_with_os_walk( + files: Iterable[str], glob_match: GlobMatch, check_hidden: bool +) -> Iterable[str]: + all_files = [] + for filename in files: + # ignore hidden files + if is_hidden(filename, check_hidden): + continue + if os.path.isdir(filename): + for root, dirs, dirfiles in os.walk(filename): + if glob_match.match(root): # skip (absolute) directories + dirs.clear() + continue + if is_hidden(root, check_hidden): # dir itself hidden + continue + for file_ in dirfiles: + if is_hidden( + file_, check_hidden + ): # ignore hidden files in directories + continue + if glob_match.match(file_): # skip files + continue + fname = os.path.join(root, file_) + if glob_match.match(fname): # skip paths + continue + all_files.append(fname) + + # skip (relative) directories + dirs[:] = [ + dir_ + for dir_ in dirs + if not glob_match.match(dir_) and not is_hidden(dir_, check_hidden) + ] + elif not glob_match.match(filename) and not is_hidden(filename, check_hidden): + all_files.append(filename) + return all_files + + def _script_main() -> int: """Wrap to main() for setuptools.""" try: @@ -1273,68 +1355,33 @@ def main(*args: str) -> int: "try escaping special characters", ) - bad_count = 0 - for filename in sorted(options.files): - # ignore hidden files - if is_hidden(filename, options.check_hidden): - continue - - if os.path.isdir(filename): - for root, dirs, files in os.walk(filename): - if glob_match.match(root): # skip (absolute) directories - dirs.clear() - continue - if is_hidden(root, options.check_hidden): # dir itself hidden - continue - for file_ in sorted(files): - # ignore hidden files in directories - if is_hidden(file_, options.check_hidden): - continue - if glob_match.match(file_): # skip files - continue - fname = os.path.join(root, file_) - if glob_match.match(fname): # skip paths - continue - bad_count += parse_file( - fname, - colors, - summary, - misspellings, - ignore_words_cased, - exclude_lines, - file_opener, - word_regex, - ignore_word_regex, - uri_regex, - uri_ignore_words, - context, - options, - ) - - # skip (relative) directories - dirs[:] = [ - dir_ - for dir_ in dirs - if not glob_match.match(dir_) - and not is_hidden(dir_, options.check_hidden) - ] + # Build the list of all files based on the git_only option + if options.git_only: + all_files = get_git_tracked_files( + os.getcwd(), options.files, glob_match, options.check_hidden + ) + else: + all_files = build_file_list_with_os_walk( + options.files, glob_match, options.check_hidden + ) - elif not glob_match.match(filename): # skip files - bad_count += parse_file( - filename, - colors, - summary, - misspellings, - ignore_words_cased, - exclude_lines, - file_opener, - word_regex, - ignore_word_regex, - uri_regex, - uri_ignore_words, - context, - options, - ) + bad_count = 0 + for filename in sorted(all_files): + bad_count += parse_file( + filename, + colors, + summary, + misspellings, + ignore_words_cased, + exclude_lines, + file_opener, + word_regex, + ignore_word_regex, + uri_regex, + uri_ignore_words, + context, + options, + ) if summary: print("\n-------8<-------\nSUMMARY:") diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py index cfb5bca44f..12a76fbae8 100644 --- a/codespell_lib/tests/test_basic.py +++ b/codespell_lib/tests/test_basic.py @@ -500,6 +500,7 @@ def test_exclude_file( bad_name.write_bytes( (combinations + "5 abandonned 5\n6 abandonned 6").encode("utf-8") ) + assert cs.main(bad_name) == 18 fname = tmp_path / "tmp.txt" fname.write_bytes( @@ -520,6 +521,77 @@ def test_exclude_file( assert cs.main("-x", f"{fname_dummy1},{fname},{fname_dummy2}", bad_name) == 1 +def run_git(path: Path, *args: Union[Path, str]) -> None: + subprocess.run( # noqa: S603 + ["git", "-C", path, *list(args)], # noqa: S607 + capture_output=False, + check=True, + text=True, + ) + + +def test_git_only_exclude_file( + tmp_path: Path, capsys: pytest.CaptureFixture[str], monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.chdir(tmp_path) + """Test exclude file functionality.""" + bad_name = tmp_path / "bad.txt" + # check all possible combinations of lines to ignore and ignores + combinations = "".join( + f"{n} abandonned {n}\n" + f"{n} abandonned {n}\r\n" + f"{n} abandonned {n} \n" + f"{n} abandonned {n} \r\n" + for n in range(1, 5) + ) + bad_name.write_bytes( + (combinations + "5 abandonned 5\n6 abandonned 6").encode("utf-8") + ) + + run_git(tmp_path, "init") + run_git(tmp_path, "add", bad_name) + + assert cs.main(bad_name) == 18 + fname = tmp_path / "tmp.txt" + fname.write_bytes( + b"1 abandonned 1\n" + b"2 abandonned 2\r\n" + b"3 abandonned 3 \n" + b"4 abandonned 4 \r\n" + b"6 abandonned 6\n" + ) + + # Not adding fname to git to exclude it + + # Should have 23 total errors (bad_name + fname) + assert cs.main(tmp_path) == 23 + + # Before adding to git, should not report on fname, only 18 error in bad.txt + assert cs.main("--git-only", tmp_path) == 18 + run_git(tmp_path, "add", fname) + assert cs.main(tmp_path) == 23 + # After adding to git, should report on fname + assert cs.main("--git-only", tmp_path) == 23 + # After adding to git, should not report on excluded file + assert cs.main("--git-only", "-x", fname, tmp_path) == 1 + # comma-separated list of files + fname_dummy1 = tmp_path / "dummy1.txt" + fname_dummy1.touch() + fname_dummy2 = tmp_path / "dummy2.txt" + fname_dummy2.touch() + run_git(tmp_path, "add", fname_dummy1, fname_dummy2) + assert ( + cs.main( + "--git-only", "-x", fname_dummy1, "-x", fname, "-x", fname_dummy2, bad_name + ) + == 1 + ) + assert ( + cs.main("--git-only", "-x", f"{fname_dummy1},{fname},{fname_dummy2}", bad_name) + == 1 + ) + + def test_encoding( tmp_path: Path, capsys: pytest.CaptureFixture[str], @@ -637,6 +709,108 @@ def test_check_filename_irregular_file( assert cs.main("-f", tmp_path) == 1 +def test_check_hidden_git( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Test ignoring of hidden files.""" + monkeypatch.chdir(tmp_path) + run_git(tmp_path, "init") + # visible file + # + # tmp_path + # └── test.txt + # + fname = tmp_path / "test.txt" + fname.write_text("erorr\n") + run_git(tmp_path, "add", ".") + assert cs.main("--git-only", fname) == 1 + assert cs.main("--git-only", tmp_path) == 1 + + # hidden file + # + # tmp_path + # └── .test.txt + # + hidden_file = tmp_path / ".test.txt" + fname.rename(hidden_file) + run_git(tmp_path, "add", ".") + assert cs.main("--git-only", hidden_file) == 0 + assert cs.main("--git-only", tmp_path) == 0 + assert cs.main("--git-only", "--check-hidden", hidden_file) == 1 + assert cs.main("--git-only", "--check-hidden", tmp_path) == 1 + + # hidden file with typo in name + # + # tmp_path + # └── .abandonned.txt + # + typo_file = tmp_path / ".abandonned.txt" + hidden_file.rename(typo_file) + run_git(tmp_path, "add", ".") + assert cs.main("--git-only", typo_file) == 0 + assert cs.main("--git-only", tmp_path) == 0 + assert cs.main("--git-only", "--check-hidden", typo_file) == 1 + assert cs.main("--git-only", "--check-hidden", tmp_path) == 1 + assert cs.main("--git-only", "--check-hidden", "--check-filenames", typo_file) == 2 + assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 2 + + # hidden directory + # + # tmp_path + # ├── .abandonned + # │ ├── .abandonned.txt + # │ └── subdir + # │ └── .abandonned.txt + # └── .abandonned.txt + # + assert cs.main("--git-only", tmp_path) == 0 + assert cs.main("--git-only", "--check-hidden", tmp_path) == 1 + assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 2 + hidden = tmp_path / ".abandonned" + hidden.mkdir() + copyfile(typo_file, hidden / typo_file.name) + subdir = hidden / "subdir" + subdir.mkdir() + copyfile(typo_file, subdir / typo_file.name) + run_git(tmp_path, "add", ".") + assert cs.main("--git-only", tmp_path) == 0 + assert cs.main("--git-only", "--check-hidden", tmp_path) == 3 + assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 8 + # check again with a relative path + try: + rel = op.relpath(tmp_path) + except ValueError: + # Windows: path is on mount 'C:', start on mount 'D:' + pass + else: + assert cs.main("--git-only", rel) == 0 + assert cs.main("--git-only", "--check-hidden", rel) == 3 + assert cs.main("--git-only", "--check-hidden", "--check-filenames", rel) == 8 + + # hidden subdirectory + # + # tmp_path + # ├── .abandonned + # │ ├── .abandonned.txt + # │ └── subdir + # │ └── .abandonned.txt + # ├── .abandonned.txt + # └── subdir + # └── .abandonned + # └── .abandonned.txt + subdir = tmp_path / "subdir" + subdir.mkdir() + hidden = subdir / ".abandonned" + hidden.mkdir() + copyfile(typo_file, hidden / typo_file.name) + run_git(tmp_path, "add", ".") + assert cs.main("--git-only", tmp_path) == 0 + assert cs.main("--git-only", "--check-hidden", tmp_path) == 4 + assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 11 + + def test_check_hidden( tmp_path: Path, capsys: pytest.CaptureFixture[str],