Skip to content

Commit e853b6d

Browse files
committed
New gitignore implementation based on pathspec
Refs #40
1 parent 6164edf commit e853b6d

File tree

5 files changed

+141
-42
lines changed

5 files changed

+141
-42
lines changed

files_to_prompt/cli.py

Lines changed: 5 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import os
22
import sys
33
from fnmatch import fnmatch
4-
4+
from files_to_prompt.utils import allowed_by_gitignore
5+
import pathlib
56
import click
67

78
global_index = 1
@@ -24,25 +25,6 @@
2425
}
2526

2627

27-
def should_ignore(path, gitignore_rules):
28-
for rule in gitignore_rules:
29-
if fnmatch(os.path.basename(path), rule):
30-
return True
31-
if os.path.isdir(path) and fnmatch(os.path.basename(path) + "/", rule):
32-
return True
33-
return False
34-
35-
36-
def read_gitignore(path):
37-
gitignore_path = os.path.join(path, ".gitignore")
38-
if os.path.isfile(gitignore_path):
39-
with open(gitignore_path, "r") as f:
40-
return [
41-
line.strip() for line in f if line.strip() and not line.startswith("#")
42-
]
43-
return []
44-
45-
4628
def add_line_numbers(content):
4729
lines = content.splitlines()
4830

@@ -104,7 +86,6 @@ def process_path(
10486
include_hidden,
10587
ignore_files_only,
10688
ignore_gitignore,
107-
gitignore_rules,
10889
ignore_patterns,
10990
writer,
11091
claude_xml,
@@ -124,17 +105,13 @@ def process_path(
124105
dirs[:] = [d for d in dirs if not d.startswith(".")]
125106
files = [f for f in files if not f.startswith(".")]
126107

108+
root_path = pathlib.Path(root)
127109
if not ignore_gitignore:
128-
gitignore_rules.extend(read_gitignore(root))
129110
dirs[:] = [
130-
d
131-
for d in dirs
132-
if not should_ignore(os.path.join(root, d), gitignore_rules)
111+
d for d in dirs if allowed_by_gitignore(root_path, root_path / d)
133112
]
134113
files = [
135-
f
136-
for f in files
137-
if not should_ignore(os.path.join(root, f), gitignore_rules)
114+
f for f in files if allowed_by_gitignore(root_path, root_path / f)
138115
]
139116

140117
if ignore_patterns:
@@ -302,7 +279,6 @@ def cli(
302279
# Combine paths from arguments and stdin
303280
paths = [*paths, *stdin_paths]
304281

305-
gitignore_rules = []
306282
writer = click.echo
307283
fp = None
308284
if output_file:
@@ -311,8 +287,6 @@ def cli(
311287
for path in paths:
312288
if not os.path.exists(path):
313289
raise click.BadArgumentUsage(f"Path does not exist: {path}")
314-
if not ignore_gitignore:
315-
gitignore_rules.extend(read_gitignore(os.path.dirname(path)))
316290
if claude_xml and path == paths[0]:
317291
writer("<documents>")
318292
process_path(
@@ -321,7 +295,6 @@ def cli(
321295
include_hidden,
322296
ignore_files_only,
323297
ignore_gitignore,
324-
gitignore_rules,
325298
ignore_patterns,
326299
writer,
327300
claude_xml,

files_to_prompt/utils.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
from pathlib import Path
2+
from pathspec.gitignore import GitIgnoreSpec
3+
4+
5+
def allowed_by_gitignore(root: Path, file_path: Path) -> bool:
6+
"""
7+
Check whether the file (file_path) should be included (i.e. not ignored)
8+
based on all .gitignore files encountered from the root directory down to
9+
the directory where the file resides.
10+
11+
Parameters:
12+
root (Path): The root directory under which .gitignore files are searched.
13+
file_path (Path): The file to be checked.
14+
15+
Returns:
16+
bool: True if the file should be included (not ignored); False if it should be ignored.
17+
"""
18+
# Resolve absolute paths.
19+
abs_root = root.resolve()
20+
abs_file = file_path.resolve()
21+
22+
# Ensure file is under the provided root.
23+
try:
24+
_ = abs_file.relative_to(abs_root)
25+
except ValueError:
26+
raise ValueError(f"File {abs_file!r} is not under the root {abs_root!r}.")
27+
28+
# Build a list of directories from the root to the file's directory.
29+
directories = [abs_root]
30+
file_dir = abs_file.parent
31+
rel_dir = file_dir.relative_to(abs_root)
32+
for part in rel_dir.parts:
33+
directories.append(directories[-1] / part)
34+
35+
# The decision will be updated by any matching .gitignore rule encountered.
36+
decision = None
37+
38+
# Process each directory (from root to file's directory)
39+
for directory in directories:
40+
gitignore_file = directory / ".gitignore"
41+
if gitignore_file.is_file():
42+
try:
43+
# Read nonempty lines (ignoring blank lines).
44+
lines = [
45+
line.rstrip("\n")
46+
for line in gitignore_file.read_text(encoding="utf-8").splitlines()
47+
if line.strip()
48+
]
49+
except Exception as e:
50+
print(f"Could not read {gitignore_file}: {e}")
51+
continue
52+
53+
# Compile a GitIgnoreSpec for the rules in the current directory.
54+
spec = GitIgnoreSpec.from_lines(lines)
55+
56+
# .gitignore patterns are relative to the directory they are in.
57+
# Compute the file path relative to this directory in POSIX format.
58+
rel_file = abs_file.relative_to(directory).as_posix()
59+
60+
# Check the file against these rules.
61+
result = spec.check_file(rel_file)
62+
63+
# If a rule from this .gitignore file applied, update the decision.
64+
if result.include is not None:
65+
decision = result.include
66+
67+
# If no .gitignore rule matched, the file is included by default.
68+
if decision is None:
69+
return True
70+
71+
# Interpretation:
72+
# • decision == True --> a normal ignore rule matched (file should be ignored)
73+
# • decision == False --> a negation rule matched (file re-included)
74+
# So, we return not decision.
75+
return not decision

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ classifiers = [
1010
"License :: OSI Approved :: Apache Software License"
1111
]
1212
dependencies = [
13-
"click"
13+
"click",
14+
"pathspec",
1415
]
1516

1617
[project.urls]

tests/test_allowed_by_gitignore.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
from files_to_prompt.utils import allowed_by_gitignore
2+
from pathlib import Path
3+
4+
5+
def test_allowed_by_gitignore(tmpdir):
6+
# Create a temporary directory structure.
7+
base = Path(tmpdir)
8+
repo = base / "repo"
9+
repo.mkdir()
10+
11+
# Create a top-level .gitignore in repo that ignores the "build/" directory.
12+
(repo / ".gitignore").write_text("build/\n", encoding="utf-8")
13+
14+
# Create a "build" subdirectory and add an output file which should be ignored.
15+
build_dir = repo / "build"
16+
build_dir.mkdir()
17+
output_file = build_dir / "output.txt"
18+
output_file.write_text("dummy build output", encoding="utf-8")
19+
20+
# Create a "src" subdirectory with its own .gitignore.
21+
src_dir = repo / "src"
22+
src_dir.mkdir()
23+
# In src, ignore "temp.txt"
24+
(src_dir / ".gitignore").write_text("temp.txt\n", encoding="utf-8")
25+
26+
# Create files in "src"
27+
main_file = src_dir / "main.py"
28+
main_file.write_text("print('Hello')", encoding="utf-8")
29+
temp_file = src_dir / "temp.txt"
30+
temp_file.write_text("should be ignored", encoding="utf-8")
31+
keep_file = src_dir / "keep.txt"
32+
keep_file.write_text("keep this file", encoding="utf-8")
33+
34+
# Create a file at repo root that is not ignored.
35+
root_file = repo / "README.md"
36+
root_file.write_text("# Repo README", encoding="utf-8")
37+
38+
# Test cases:
39+
# 1. File in "build" should be ignored.
40+
assert (
41+
allowed_by_gitignore(repo, output_file) is False
42+
), "build/output.txt should be ignored"
43+
44+
# 2. File in "src" that is ignored per src/.gitignore.
45+
assert allowed_by_gitignore(repo, temp_file) is False, "src/temp.txt should be ignored"
46+
47+
# 3. Files in "src" not mentioned in .gitignore should be included.
48+
assert allowed_by_gitignore(repo, main_file) is True, "src/main.py should be included"
49+
assert allowed_by_gitignore(repo, keep_file) is True, "src/keep.txt should be included"
50+
51+
# 4. File at the repo root not mentioned in .gitignore.
52+
assert (
53+
allowed_by_gitignore(repo, root_file) is True
54+
), "repo/README.md should be included"

tests/test_files_to_prompt.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def test_basic_functionality(tmpdir):
2121
with open("test_dir/file2.txt", "w") as f:
2222
f.write("Contents of file2")
2323

24-
result = runner.invoke(cli, ["test_dir"])
24+
result = runner.invoke(cli, ["test_dir"], catch_exceptions=False)
2525
assert result.exit_code == 0
2626
assert "test_dir/file1.txt" in result.output
2727
assert "Contents of file1" in result.output
@@ -36,7 +36,7 @@ def test_include_hidden(tmpdir):
3636
with open("test_dir/.hidden.txt", "w") as f:
3737
f.write("Contents of hidden file")
3838

39-
result = runner.invoke(cli, ["test_dir"])
39+
result = runner.invoke(cli, ["test_dir"], catch_exceptions=False)
4040
assert result.exit_code == 0
4141
assert "test_dir/.hidden.txt" not in result.output
4242

@@ -61,11 +61,9 @@ def test_ignore_gitignore(tmpdir):
6161
with open("test_dir/nested_include/included2.txt", "w") as f:
6262
f.write("This nested file should be included")
6363
with open("test_dir/nested_ignore/.gitignore", "w") as f:
64-
f.write("nested_ignore.txt")
64+
f.write("*")
6565
with open("test_dir/nested_ignore/nested_ignore.txt", "w") as f:
6666
f.write("This nested file should not be included")
67-
with open("test_dir/nested_ignore/actually_include.txt", "w") as f:
68-
f.write("This nested file should actually be included")
6967

7068
result = runner.invoke(cli, ["test_dir", "-c"])
7169
assert result.exit_code == 0
@@ -74,7 +72,6 @@ def test_ignore_gitignore(tmpdir):
7472
assert filenames == {
7573
"test_dir/included.txt",
7674
"test_dir/nested_include/included2.txt",
77-
"test_dir/nested_ignore/actually_include.txt",
7875
}
7976

8077
result2 = runner.invoke(cli, ["test_dir", "-c", "--ignore-gitignore"])
@@ -86,7 +83,6 @@ def test_ignore_gitignore(tmpdir):
8683
"test_dir/ignored.txt",
8784
"test_dir/nested_include/included2.txt",
8885
"test_dir/nested_ignore/nested_ignore.txt",
89-
"test_dir/nested_ignore/actually_include.txt",
9086
}
9187

9288

@@ -243,7 +239,7 @@ def test_binary_file_warning(tmpdir):
243239
with open("test_dir/text_file.txt", "w") as f:
244240
f.write("This is a text file")
245241

246-
result = runner.invoke(cli, ["test_dir"])
242+
result = runner.invoke(cli, ["test_dir"], catch_exceptions=False)
247243
assert result.exit_code == 0
248244

249245
stdout = result.stdout
@@ -331,7 +327,7 @@ def test_line_numbers(tmpdir):
331327
with open("test_dir/multiline.txt", "w") as f:
332328
f.write(test_content)
333329

334-
result = runner.invoke(cli, ["test_dir"])
330+
result = runner.invoke(cli, ["test_dir"], catch_exceptions=False)
335331
assert result.exit_code == 0
336332
assert "1 First line" not in result.output
337333
assert test_content in result.output

0 commit comments

Comments
 (0)