Skip to content

New gitignore implementation based on pathspec #45

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 5 additions & 32 deletions files_to_prompt/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os
import sys
from fnmatch import fnmatch

from files_to_prompt.utils import allowed_by_gitignore
import pathlib
import click

global_index = 1
Expand All @@ -24,25 +25,6 @@
}


def should_ignore(path, gitignore_rules):
for rule in gitignore_rules:
if fnmatch(os.path.basename(path), rule):
return True
if os.path.isdir(path) and fnmatch(os.path.basename(path) + "/", rule):
return True
return False


def read_gitignore(path):
gitignore_path = os.path.join(path, ".gitignore")
if os.path.isfile(gitignore_path):
with open(gitignore_path, "r") as f:
return [
line.strip() for line in f if line.strip() and not line.startswith("#")
]
return []


def add_line_numbers(content):
lines = content.splitlines()

Expand Down Expand Up @@ -104,7 +86,6 @@ def process_path(
include_hidden,
ignore_files_only,
ignore_gitignore,
gitignore_rules,
ignore_patterns,
writer,
claude_xml,
Expand All @@ -124,17 +105,13 @@ def process_path(
dirs[:] = [d for d in dirs if not d.startswith(".")]
files = [f for f in files if not f.startswith(".")]

root_path = pathlib.Path(root)
if not ignore_gitignore:
gitignore_rules.extend(read_gitignore(root))
dirs[:] = [
d
for d in dirs
if not should_ignore(os.path.join(root, d), gitignore_rules)
d for d in dirs if allowed_by_gitignore(root_path, root_path / d)
]
files = [
f
for f in files
if not should_ignore(os.path.join(root, f), gitignore_rules)
f for f in files if allowed_by_gitignore(root_path, root_path / f)
]

if ignore_patterns:
Expand Down Expand Up @@ -302,7 +279,6 @@ def cli(
# Combine paths from arguments and stdin
paths = [*paths, *stdin_paths]

gitignore_rules = []
writer = click.echo
fp = None
if output_file:
Expand All @@ -311,8 +287,6 @@ def cli(
for path in paths:
if not os.path.exists(path):
raise click.BadArgumentUsage(f"Path does not exist: {path}")
if not ignore_gitignore:
gitignore_rules.extend(read_gitignore(os.path.dirname(path)))
if claude_xml and path == paths[0]:
writer("<documents>")
process_path(
Expand All @@ -321,7 +295,6 @@ def cli(
include_hidden,
ignore_files_only,
ignore_gitignore,
gitignore_rules,
ignore_patterns,
writer,
claude_xml,
Expand Down
75 changes: 75 additions & 0 deletions files_to_prompt/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from pathlib import Path
from pathspec.gitignore import GitIgnoreSpec


def allowed_by_gitignore(root: Path, file_path: Path) -> bool:
"""
Check whether the file (file_path) should be included (i.e. not ignored)
based on all .gitignore files encountered from the root directory down to
the directory where the file resides.

Parameters:
root (Path): The root directory under which .gitignore files are searched.
file_path (Path): The file to be checked.

Returns:
bool: True if the file should be included (not ignored); False if it should be ignored.
"""
# Resolve absolute paths.
abs_root = root.resolve()
abs_file = file_path.resolve()

# Ensure file is under the provided root.
try:
_ = abs_file.relative_to(abs_root)
except ValueError:
raise ValueError(f"File {abs_file!r} is not under the root {abs_root!r}.")

# Build a list of directories from the root to the file's directory.
directories = [abs_root]
file_dir = abs_file.parent
rel_dir = file_dir.relative_to(abs_root)
for part in rel_dir.parts:
directories.append(directories[-1] / part)

# The decision will be updated by any matching .gitignore rule encountered.
decision = None

# Process each directory (from root to file's directory)
for directory in directories:
gitignore_file = directory / ".gitignore"
if gitignore_file.is_file():
try:
# Read nonempty lines (ignoring blank lines).
lines = [
line.rstrip("\n")
for line in gitignore_file.read_text(encoding="utf-8").splitlines()
if line.strip()
]
except Exception as e:
print(f"Could not read {gitignore_file}: {e}")
continue

# Compile a GitIgnoreSpec for the rules in the current directory.
spec = GitIgnoreSpec.from_lines(lines)

# .gitignore patterns are relative to the directory they are in.
# Compute the file path relative to this directory in POSIX format.
rel_file = abs_file.relative_to(directory).as_posix()

# Check the file against these rules.
result = spec.check_file(rel_file)

# If a rule from this .gitignore file applied, update the decision.
if result.include is not None:
decision = result.include

# If no .gitignore rule matched, the file is included by default.
if decision is None:
return True

# Interpretation:
# • decision == True --> a normal ignore rule matched (file should be ignored)
# • decision == False --> a negation rule matched (file re-included)
# So, we return not decision.
return not decision
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ classifiers = [
"License :: OSI Approved :: Apache Software License"
]
dependencies = [
"click"
"click",
"pathspec",
]

[project.urls]
Expand Down
54 changes: 54 additions & 0 deletions tests/test_allowed_by_gitignore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from files_to_prompt.utils import allowed_by_gitignore
from pathlib import Path


def test_allowed_by_gitignore(tmpdir):
# Create a temporary directory structure.
base = Path(tmpdir)
repo = base / "repo"
repo.mkdir()

# Create a top-level .gitignore in repo that ignores the "build/" directory.
(repo / ".gitignore").write_text("build/\n", encoding="utf-8")

# Create a "build" subdirectory and add an output file which should be ignored.
build_dir = repo / "build"
build_dir.mkdir()
output_file = build_dir / "output.txt"
output_file.write_text("dummy build output", encoding="utf-8")

# Create a "src" subdirectory with its own .gitignore.
src_dir = repo / "src"
src_dir.mkdir()
# In src, ignore "temp.txt"
(src_dir / ".gitignore").write_text("temp.txt\n", encoding="utf-8")

# Create files in "src"
main_file = src_dir / "main.py"
main_file.write_text("print('Hello')", encoding="utf-8")
temp_file = src_dir / "temp.txt"
temp_file.write_text("should be ignored", encoding="utf-8")
keep_file = src_dir / "keep.txt"
keep_file.write_text("keep this file", encoding="utf-8")

# Create a file at repo root that is not ignored.
root_file = repo / "README.md"
root_file.write_text("# Repo README", encoding="utf-8")

# Test cases:
# 1. File in "build" should be ignored.
assert (
allowed_by_gitignore(repo, output_file) is False
), "build/output.txt should be ignored"

# 2. File in "src" that is ignored per src/.gitignore.
assert allowed_by_gitignore(repo, temp_file) is False, "src/temp.txt should be ignored"

# 3. Files in "src" not mentioned in .gitignore should be included.
assert allowed_by_gitignore(repo, main_file) is True, "src/main.py should be included"
assert allowed_by_gitignore(repo, keep_file) is True, "src/keep.txt should be included"

# 4. File at the repo root not mentioned in .gitignore.
assert (
allowed_by_gitignore(repo, root_file) is True
), "repo/README.md should be included"
14 changes: 5 additions & 9 deletions tests/test_files_to_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def test_basic_functionality(tmpdir):
with open("test_dir/file2.txt", "w") as f:
f.write("Contents of file2")

result = runner.invoke(cli, ["test_dir"])
result = runner.invoke(cli, ["test_dir"], catch_exceptions=False)
assert result.exit_code == 0
assert "test_dir/file1.txt" in result.output
assert "Contents of file1" in result.output
Expand All @@ -36,7 +36,7 @@ def test_include_hidden(tmpdir):
with open("test_dir/.hidden.txt", "w") as f:
f.write("Contents of hidden file")

result = runner.invoke(cli, ["test_dir"])
result = runner.invoke(cli, ["test_dir"], catch_exceptions=False)
assert result.exit_code == 0
assert "test_dir/.hidden.txt" not in result.output

Expand All @@ -61,11 +61,9 @@ def test_ignore_gitignore(tmpdir):
with open("test_dir/nested_include/included2.txt", "w") as f:
f.write("This nested file should be included")
with open("test_dir/nested_ignore/.gitignore", "w") as f:
f.write("nested_ignore.txt")
f.write("*")
with open("test_dir/nested_ignore/nested_ignore.txt", "w") as f:
f.write("This nested file should not be included")
with open("test_dir/nested_ignore/actually_include.txt", "w") as f:
f.write("This nested file should actually be included")

result = runner.invoke(cli, ["test_dir", "-c"])
assert result.exit_code == 0
Expand All @@ -74,7 +72,6 @@ def test_ignore_gitignore(tmpdir):
assert filenames == {
"test_dir/included.txt",
"test_dir/nested_include/included2.txt",
"test_dir/nested_ignore/actually_include.txt",
}

result2 = runner.invoke(cli, ["test_dir", "-c", "--ignore-gitignore"])
Expand All @@ -86,7 +83,6 @@ def test_ignore_gitignore(tmpdir):
"test_dir/ignored.txt",
"test_dir/nested_include/included2.txt",
"test_dir/nested_ignore/nested_ignore.txt",
"test_dir/nested_ignore/actually_include.txt",
}


Expand Down Expand Up @@ -243,7 +239,7 @@ def test_binary_file_warning(tmpdir):
with open("test_dir/text_file.txt", "w") as f:
f.write("This is a text file")

result = runner.invoke(cli, ["test_dir"])
result = runner.invoke(cli, ["test_dir"], catch_exceptions=False)
assert result.exit_code == 0

stdout = result.stdout
Expand Down Expand Up @@ -331,7 +327,7 @@ def test_line_numbers(tmpdir):
with open("test_dir/multiline.txt", "w") as f:
f.write(test_content)

result = runner.invoke(cli, ["test_dir"])
result = runner.invoke(cli, ["test_dir"], catch_exceptions=False)
assert result.exit_code == 0
assert "1 First line" not in result.output
assert test_content in result.output
Expand Down