Skip to content

Commit eaaecc2

Browse files
chambridgeclaude
andauthored
fix(assessors): FileSizeLimitsAssessor now respects .gitignore (#248)
Refactored FileSizeLimitsAssessor to use `git ls-files` instead of `glob()` to enumerate source files. This ensures that files in .gitignore'd directories (like .venv/, node_modules/) are not scanned, which was causing false failures when large vendored files exceeded the line count thresholds. Fixes #245 🤖 Generated with [Claude Code](https://claude.ai/code) Signed-off-by: Chris Hambridge <[email protected]> Co-authored-by: Claude <[email protected]>
1 parent f8deada commit eaaecc2

File tree

2 files changed

+233
-38
lines changed

2 files changed

+233
-38
lines changed

src/agentready/assessors/stub_assessors.py

Lines changed: 57 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@
44
enhanced later with more sophisticated detection and scoring logic.
55
"""
66

7+
from pathlib import Path
8+
79
from ..models.attribute import Attribute
810
from ..models.finding import Citation, Finding, Remediation
911
from ..models.repository import Repository
12+
from ..utils.subprocess_utils import safe_subprocess_run
1013
from .base import BaseAssessor
1114

1215

@@ -539,51 +542,67 @@ def assess(self, repository: Repository) -> Finding:
539542
- 100: All files <500 lines
540543
- 75-99: Some files 500-1000 lines
541544
- 0-74: Files >1000 lines exist
545+
546+
Note: Uses git ls-files to respect .gitignore (fixes issue #245).
542547
"""
543548
# Count files by size
544-
large_files = [] # 500-1000 lines
545-
huge_files = [] # >1000 lines
549+
large_files: list[tuple[Path, int]] = [] # 500-1000 lines
550+
huge_files: list[tuple[Path, int]] = [] # >1000 lines
546551
total_files = 0
547552

548553
# Check common source file extensions
549-
extensions = {
550-
".py",
551-
".js",
552-
".ts",
553-
".jsx",
554-
".tsx",
555-
".go",
556-
".java",
557-
".rb",
558-
".rs",
559-
".cpp",
560-
".c",
561-
".h",
562-
}
554+
extensions = [
555+
"py",
556+
"js",
557+
"ts",
558+
"jsx",
559+
"tsx",
560+
"go",
561+
"java",
562+
"rb",
563+
"rs",
564+
"cpp",
565+
"c",
566+
"h",
567+
]
563568

564-
for ext in extensions:
565-
pattern = f"**/*{ext}"
566-
try:
569+
# Get git-tracked files (respects .gitignore)
570+
# This fixes issue #245 where .venv files were incorrectly scanned
571+
try:
572+
patterns = [f"*.{ext}" for ext in extensions]
573+
result = safe_subprocess_run(
574+
["git", "ls-files"] + patterns,
575+
cwd=repository.path,
576+
capture_output=True,
577+
text=True,
578+
timeout=30,
579+
check=True,
580+
)
581+
tracked_files = [f for f in result.stdout.strip().split("\n") if f]
582+
except Exception:
583+
# Fallback for non-git repos: use glob (less accurate)
584+
tracked_files = []
585+
for ext in extensions:
586+
tracked_files.extend(
587+
str(f.relative_to(repository.path))
588+
for f in repository.path.rglob(f"*.{ext}")
589+
if f.is_file()
590+
)
567591

568-
for file_path in repository.path.glob(pattern):
569-
if file_path.is_file():
570-
try:
571-
with open(file_path, "r", encoding="utf-8") as f:
572-
lines = len(f.readlines())
573-
total_files += 1
574-
575-
if lines > 1000:
576-
huge_files.append(
577-
(file_path.relative_to(repository.path), lines)
578-
)
579-
elif lines > 500:
580-
large_files.append(
581-
(file_path.relative_to(repository.path), lines)
582-
)
583-
except (OSError, UnicodeDecodeError):
584-
# Skip files we can't read
585-
pass
586-
except Exception:
592+
# Count lines in tracked files
593+
for rel_path in tracked_files:
594+
file_path = repository.path / rel_path
595+
try:
596+
with open(file_path, "r", encoding="utf-8") as f:
597+
lines = len(f.readlines())
598+
total_files += 1
599+
600+
if lines > 1000:
601+
huge_files.append((Path(rel_path), lines))
602+
elif lines > 500:
603+
large_files.append((Path(rel_path), lines))
604+
except (OSError, UnicodeDecodeError):
605+
# Skip files we can't read
587606
pass
588607

589608
if total_files == 0:

tests/unit/test_assessors_stub.py

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from agentready.assessors.stub_assessors import (
66
DependencyPinningAssessor,
7+
FileSizeLimitsAssessor,
78
GitignoreAssessor,
89
)
910
from agentready.models.repository import Repository
@@ -513,3 +514,178 @@ def test_no_languages_detected(self, tmp_path):
513514

514515
# Should still give points if file exists with content
515516
assert finding.score > 0
517+
518+
519+
class TestFileSizeLimitsAssessor:
520+
"""Tests for FileSizeLimitsAssessor - Issue #245 fix."""
521+
522+
def test_respects_gitignore_venv(self, tmp_path):
523+
"""Verify .venv files are NOT counted (fixes issue #245)."""
524+
# Initialize git repository
525+
subprocess.run(["git", "init"], cwd=tmp_path, capture_output=True, check=True)
526+
527+
# Create .gitignore with .venv/
528+
gitignore = tmp_path / ".gitignore"
529+
gitignore.write_text(".venv/\n")
530+
531+
# Create .venv directory with large file (should be IGNORED)
532+
venv_dir = tmp_path / ".venv"
533+
venv_dir.mkdir()
534+
large_venv_file = venv_dir / "large_module.py"
535+
large_venv_file.write_text("x = 1\n" * 2000) # 2000 lines - huge
536+
537+
# Create src directory with small file (should be counted)
538+
src_dir = tmp_path / "src"
539+
src_dir.mkdir()
540+
small_file = src_dir / "main.py"
541+
small_file.write_text("print('hello')\n" * 50) # 50 lines
542+
543+
# Add only the tracked file to git
544+
subprocess.run(["git", "add", "src/main.py"], cwd=tmp_path, capture_output=True)
545+
546+
repo = Repository(
547+
path=tmp_path,
548+
name="test-repo",
549+
url=None,
550+
branch="main",
551+
commit_hash="abc123",
552+
languages={"Python": 1},
553+
total_files=1,
554+
total_lines=50,
555+
)
556+
557+
assessor = FileSizeLimitsAssessor()
558+
finding = assessor.assess(repo)
559+
560+
# Should pass because .venv file is ignored
561+
assert finding.status == "pass"
562+
assert finding.score == 100.0
563+
# Evidence should NOT mention the 2000-line file
564+
assert "2000" not in str(finding.evidence)
565+
566+
def test_no_source_files_returns_not_applicable(self, tmp_path):
567+
"""Test not_applicable when no source files exist."""
568+
# Initialize git repository
569+
subprocess.run(["git", "init"], cwd=tmp_path, capture_output=True, check=True)
570+
571+
# Create only non-source files
572+
readme = tmp_path / "README.md"
573+
readme.write_text("# Test\n")
574+
subprocess.run(["git", "add", "README.md"], cwd=tmp_path, capture_output=True)
575+
576+
repo = Repository(
577+
path=tmp_path,
578+
name="test-repo",
579+
url=None,
580+
branch="main",
581+
commit_hash="abc123",
582+
languages={"Markdown": 1},
583+
total_files=1,
584+
total_lines=1,
585+
)
586+
587+
assessor = FileSizeLimitsAssessor()
588+
finding = assessor.assess(repo)
589+
590+
assert finding.status == "not_applicable"
591+
592+
def test_huge_files_detected(self, tmp_path):
593+
"""Test that files >1000 lines are flagged."""
594+
# Initialize git repository
595+
subprocess.run(["git", "init"], cwd=tmp_path, capture_output=True, check=True)
596+
597+
# Create a huge file
598+
huge_file = tmp_path / "huge_module.py"
599+
huge_file.write_text("x = 1\n" * 1500) # 1500 lines
600+
subprocess.run(
601+
["git", "add", "huge_module.py"], cwd=tmp_path, capture_output=True
602+
)
603+
604+
repo = Repository(
605+
path=tmp_path,
606+
name="test-repo",
607+
url=None,
608+
branch="main",
609+
commit_hash="abc123",
610+
languages={"Python": 1},
611+
total_files=1,
612+
total_lines=1500,
613+
)
614+
615+
assessor = FileSizeLimitsAssessor()
616+
finding = assessor.assess(repo)
617+
618+
assert finding.status == "fail"
619+
assert finding.score < 70
620+
assert "1500" in str(finding.evidence) or ">1000" in str(finding.evidence)
621+
622+
def test_small_files_pass(self, tmp_path):
623+
"""Test that all files <500 lines gives perfect score."""
624+
# Initialize git repository
625+
subprocess.run(["git", "init"], cwd=tmp_path, capture_output=True, check=True)
626+
627+
# Create small files
628+
for i in range(5):
629+
small_file = tmp_path / f"module_{i}.py"
630+
small_file.write_text("x = 1\n" * 100) # 100 lines each
631+
subprocess.run(
632+
["git", "add", f"module_{i}.py"], cwd=tmp_path, capture_output=True
633+
)
634+
635+
repo = Repository(
636+
path=tmp_path,
637+
name="test-repo",
638+
url=None,
639+
branch="main",
640+
commit_hash="abc123",
641+
languages={"Python": 5},
642+
total_files=5,
643+
total_lines=500,
644+
)
645+
646+
assessor = FileSizeLimitsAssessor()
647+
finding = assessor.assess(repo)
648+
649+
assert finding.status == "pass"
650+
assert finding.score == 100.0
651+
assert "All 5 source files are <500 lines" in str(finding.evidence)
652+
653+
def test_respects_gitignore_node_modules(self, tmp_path):
654+
"""Verify node_modules files are NOT counted."""
655+
# Initialize git repository
656+
subprocess.run(["git", "init"], cwd=tmp_path, capture_output=True, check=True)
657+
658+
# Create .gitignore with node_modules/
659+
gitignore = tmp_path / ".gitignore"
660+
gitignore.write_text("node_modules/\n")
661+
662+
# Create node_modules directory with large JS file (should be IGNORED)
663+
nm_dir = tmp_path / "node_modules"
664+
nm_dir.mkdir()
665+
large_js = nm_dir / "large_lib.js"
666+
large_js.write_text("var x = 1;\n" * 3000) # 3000 lines
667+
668+
# Create src directory with small JS file (should be counted)
669+
src_dir = tmp_path / "src"
670+
src_dir.mkdir()
671+
small_js = src_dir / "app.js"
672+
small_js.write_text("console.log('hi');\n" * 30) # 30 lines
673+
674+
subprocess.run(["git", "add", "src/app.js"], cwd=tmp_path, capture_output=True)
675+
676+
repo = Repository(
677+
path=tmp_path,
678+
name="test-repo",
679+
url=None,
680+
branch="main",
681+
commit_hash="abc123",
682+
languages={"JavaScript": 1},
683+
total_files=1,
684+
total_lines=30,
685+
)
686+
687+
assessor = FileSizeLimitsAssessor()
688+
finding = assessor.assess(repo)
689+
690+
assert finding.status == "pass"
691+
assert "3000" not in str(finding.evidence)

0 commit comments

Comments
 (0)