diff --git a/files_to_prompt/cli.py b/files_to_prompt/cli.py index 7eee04f..e2cfddb 100644 --- a/files_to_prompt/cli.py +++ b/files_to_prompt/cli.py @@ -1,7 +1,8 @@ import os import sys from fnmatch import fnmatch - +from files_to_prompt.utils import allowed_by_gitignore +import pathlib import click global_index = 1 @@ -24,25 +25,6 @@ } -def should_ignore(path, gitignore_rules): - for rule in gitignore_rules: - if fnmatch(os.path.basename(path), rule): - return True - if os.path.isdir(path) and fnmatch(os.path.basename(path) + "/", rule): - return True - return False - - -def read_gitignore(path): - gitignore_path = os.path.join(path, ".gitignore") - if os.path.isfile(gitignore_path): - with open(gitignore_path, "r") as f: - return [ - line.strip() for line in f if line.strip() and not line.startswith("#") - ] - return [] - - def add_line_numbers(content): lines = content.splitlines() @@ -104,7 +86,6 @@ def process_path( include_hidden, ignore_files_only, ignore_gitignore, - gitignore_rules, ignore_patterns, writer, claude_xml, @@ -124,17 +105,13 @@ def process_path( dirs[:] = [d for d in dirs if not d.startswith(".")] files = [f for f in files if not f.startswith(".")] + root_path = pathlib.Path(root) if not ignore_gitignore: - gitignore_rules.extend(read_gitignore(root)) dirs[:] = [ - d - for d in dirs - if not should_ignore(os.path.join(root, d), gitignore_rules) + d for d in dirs if allowed_by_gitignore(root_path, root_path / d) ] files = [ - f - for f in files - if not should_ignore(os.path.join(root, f), gitignore_rules) + f for f in files if allowed_by_gitignore(root_path, root_path / f) ] if ignore_patterns: @@ -302,7 +279,6 @@ def cli( # Combine paths from arguments and stdin paths = [*paths, *stdin_paths] - gitignore_rules = [] writer = click.echo fp = None if output_file: @@ -311,8 +287,6 @@ def cli( for path in paths: if not os.path.exists(path): raise click.BadArgumentUsage(f"Path does not exist: {path}") - if not ignore_gitignore: - gitignore_rules.extend(read_gitignore(os.path.dirname(path))) if claude_xml and path == paths[0]: writer("") process_path( @@ -321,7 +295,6 @@ def cli( include_hidden, ignore_files_only, ignore_gitignore, - gitignore_rules, ignore_patterns, writer, claude_xml, diff --git a/files_to_prompt/utils.py b/files_to_prompt/utils.py new file mode 100644 index 0000000..ddd7bd9 --- /dev/null +++ b/files_to_prompt/utils.py @@ -0,0 +1,75 @@ +from pathlib import Path +from pathspec.gitignore import GitIgnoreSpec + + +def allowed_by_gitignore(root: Path, file_path: Path) -> bool: + """ + Check whether the file (file_path) should be included (i.e. not ignored) + based on all .gitignore files encountered from the root directory down to + the directory where the file resides. + + Parameters: + root (Path): The root directory under which .gitignore files are searched. + file_path (Path): The file to be checked. + + Returns: + bool: True if the file should be included (not ignored); False if it should be ignored. + """ + # Resolve absolute paths. + abs_root = root.resolve() + abs_file = file_path.resolve() + + # Ensure file is under the provided root. + try: + _ = abs_file.relative_to(abs_root) + except ValueError: + raise ValueError(f"File {abs_file!r} is not under the root {abs_root!r}.") + + # Build a list of directories from the root to the file's directory. + directories = [abs_root] + file_dir = abs_file.parent + rel_dir = file_dir.relative_to(abs_root) + for part in rel_dir.parts: + directories.append(directories[-1] / part) + + # The decision will be updated by any matching .gitignore rule encountered. + decision = None + + # Process each directory (from root to file's directory) + for directory in directories: + gitignore_file = directory / ".gitignore" + if gitignore_file.is_file(): + try: + # Read nonempty lines (ignoring blank lines). + lines = [ + line.rstrip("\n") + for line in gitignore_file.read_text(encoding="utf-8").splitlines() + if line.strip() + ] + except Exception as e: + print(f"Could not read {gitignore_file}: {e}") + continue + + # Compile a GitIgnoreSpec for the rules in the current directory. + spec = GitIgnoreSpec.from_lines(lines) + + # .gitignore patterns are relative to the directory they are in. + # Compute the file path relative to this directory in POSIX format. + rel_file = abs_file.relative_to(directory).as_posix() + + # Check the file against these rules. + result = spec.check_file(rel_file) + + # If a rule from this .gitignore file applied, update the decision. + if result.include is not None: + decision = result.include + + # If no .gitignore rule matched, the file is included by default. + if decision is None: + return True + + # Interpretation: + # • decision == True --> a normal ignore rule matched (file should be ignored) + # • decision == False --> a negation rule matched (file re-included) + # So, we return not decision. + return not decision diff --git a/pyproject.toml b/pyproject.toml index c7bc943..f241495 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,8 @@ classifiers = [ "License :: OSI Approved :: Apache Software License" ] dependencies = [ - "click" + "click", + "pathspec", ] [project.urls] diff --git a/tests/test_allowed_by_gitignore.py b/tests/test_allowed_by_gitignore.py new file mode 100644 index 0000000..f4ba46d --- /dev/null +++ b/tests/test_allowed_by_gitignore.py @@ -0,0 +1,54 @@ +from files_to_prompt.utils import allowed_by_gitignore +from pathlib import Path + + +def test_allowed_by_gitignore(tmpdir): + # Create a temporary directory structure. + base = Path(tmpdir) + repo = base / "repo" + repo.mkdir() + + # Create a top-level .gitignore in repo that ignores the "build/" directory. + (repo / ".gitignore").write_text("build/\n", encoding="utf-8") + + # Create a "build" subdirectory and add an output file which should be ignored. + build_dir = repo / "build" + build_dir.mkdir() + output_file = build_dir / "output.txt" + output_file.write_text("dummy build output", encoding="utf-8") + + # Create a "src" subdirectory with its own .gitignore. + src_dir = repo / "src" + src_dir.mkdir() + # In src, ignore "temp.txt" + (src_dir / ".gitignore").write_text("temp.txt\n", encoding="utf-8") + + # Create files in "src" + main_file = src_dir / "main.py" + main_file.write_text("print('Hello')", encoding="utf-8") + temp_file = src_dir / "temp.txt" + temp_file.write_text("should be ignored", encoding="utf-8") + keep_file = src_dir / "keep.txt" + keep_file.write_text("keep this file", encoding="utf-8") + + # Create a file at repo root that is not ignored. + root_file = repo / "README.md" + root_file.write_text("# Repo README", encoding="utf-8") + + # Test cases: + # 1. File in "build" should be ignored. + assert ( + allowed_by_gitignore(repo, output_file) is False + ), "build/output.txt should be ignored" + + # 2. File in "src" that is ignored per src/.gitignore. + assert allowed_by_gitignore(repo, temp_file) is False, "src/temp.txt should be ignored" + + # 3. Files in "src" not mentioned in .gitignore should be included. + assert allowed_by_gitignore(repo, main_file) is True, "src/main.py should be included" + assert allowed_by_gitignore(repo, keep_file) is True, "src/keep.txt should be included" + + # 4. File at the repo root not mentioned in .gitignore. + assert ( + allowed_by_gitignore(repo, root_file) is True + ), "repo/README.md should be included" diff --git a/tests/test_files_to_prompt.py b/tests/test_files_to_prompt.py index 5268995..f33e2cf 100644 --- a/tests/test_files_to_prompt.py +++ b/tests/test_files_to_prompt.py @@ -21,7 +21,7 @@ def test_basic_functionality(tmpdir): with open("test_dir/file2.txt", "w") as f: f.write("Contents of file2") - result = runner.invoke(cli, ["test_dir"]) + result = runner.invoke(cli, ["test_dir"], catch_exceptions=False) assert result.exit_code == 0 assert "test_dir/file1.txt" in result.output assert "Contents of file1" in result.output @@ -36,7 +36,7 @@ def test_include_hidden(tmpdir): with open("test_dir/.hidden.txt", "w") as f: f.write("Contents of hidden file") - result = runner.invoke(cli, ["test_dir"]) + result = runner.invoke(cli, ["test_dir"], catch_exceptions=False) assert result.exit_code == 0 assert "test_dir/.hidden.txt" not in result.output @@ -61,11 +61,9 @@ def test_ignore_gitignore(tmpdir): with open("test_dir/nested_include/included2.txt", "w") as f: f.write("This nested file should be included") with open("test_dir/nested_ignore/.gitignore", "w") as f: - f.write("nested_ignore.txt") + f.write("*") with open("test_dir/nested_ignore/nested_ignore.txt", "w") as f: f.write("This nested file should not be included") - with open("test_dir/nested_ignore/actually_include.txt", "w") as f: - f.write("This nested file should actually be included") result = runner.invoke(cli, ["test_dir", "-c"]) assert result.exit_code == 0 @@ -74,7 +72,6 @@ def test_ignore_gitignore(tmpdir): assert filenames == { "test_dir/included.txt", "test_dir/nested_include/included2.txt", - "test_dir/nested_ignore/actually_include.txt", } result2 = runner.invoke(cli, ["test_dir", "-c", "--ignore-gitignore"]) @@ -86,7 +83,6 @@ def test_ignore_gitignore(tmpdir): "test_dir/ignored.txt", "test_dir/nested_include/included2.txt", "test_dir/nested_ignore/nested_ignore.txt", - "test_dir/nested_ignore/actually_include.txt", } @@ -243,7 +239,7 @@ def test_binary_file_warning(tmpdir): with open("test_dir/text_file.txt", "w") as f: f.write("This is a text file") - result = runner.invoke(cli, ["test_dir"]) + result = runner.invoke(cli, ["test_dir"], catch_exceptions=False) assert result.exit_code == 0 stdout = result.stdout @@ -331,7 +327,7 @@ def test_line_numbers(tmpdir): with open("test_dir/multiline.txt", "w") as f: f.write(test_content) - result = runner.invoke(cli, ["test_dir"]) + result = runner.invoke(cli, ["test_dir"], catch_exceptions=False) assert result.exit_code == 0 assert "1 First line" not in result.output assert test_content in result.output