Skip to content

Commit caa6d5c

Browse files
committed
fix: list files
1 parent 478b4d9 commit caa6d5c

File tree

2 files changed

+257
-5
lines changed

2 files changed

+257
-5
lines changed

moatless/actions/list_files.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@
1616
DEFAULT_IGNORED_DIRS = [".git", ".cursor", ".mvn", ".venv"]
1717

1818

19+
def sort_breadth_first(paths):
20+
"""Sort paths breadth-first: by depth (number of slashes), then alphabetically."""
21+
return sorted(paths, key=lambda path: (path.count('/'), path))
22+
23+
1924
class ListFilesArgs(ActionArguments):
2025
"""List files and directories in a specified directory."""
2126

@@ -154,13 +159,15 @@ async def execute(
154159
else:
155160
files_command = "git ls-files --directory | grep -v '/' | sort"
156161

157-
# Command for directories (git doesn't track directories, so we use find)
162+
# Command for directories (git doesn't track directories, so we use find with git check-ignore)
163+
# Use grep to filter out target directory, then check each remaining directory
158164
if list_files_args.recursive:
159-
dirs_command = f"find {target_dir} -xdev -type d | sort | xargs -I{{}} bash -c 'git check-ignore {{}} > /dev/null || echo {{}}'"
165+
# Filter out the target directory upfront, then check git ignore status
166+
dirs_command = f"find {target_dir} -xdev -type d | grep -v '^{target_dir}$' | while read -r dir; do git check-ignore \"$dir/\" >/dev/null || echo \"$dir\"; done | sort"
160167
if ignore_pattern:
161168
dirs_command += ignore_pattern
162169
else:
163-
dirs_command = f"find {target_dir} -xdev -maxdepth 1 -type d | grep -v '^{target_dir}$' | sort | xargs -I{{}} bash -c 'git check-ignore {{}} > /dev/null || echo {{}}'"
170+
dirs_command = f"find {target_dir} -xdev -maxdepth 1 -type d | grep -v '^{target_dir}$' | while read -r dir; do git check-ignore \"$dir/\" >/dev/null || echo \"$dir\"; done | sort"
164171
if ignore_pattern:
165172
dirs_command += ignore_pattern
166173
else:
@@ -314,7 +321,7 @@ async def execute(
314321

315322
# Create a result object
316323
result = {
317-
"directories": sorted(directories),
324+
"directories": sort_breadth_first(directories),
318325
"files": sorted(files),
319326
"total_dirs": len(directories),
320327
"total_files": len(files),
@@ -347,7 +354,10 @@ async def execute(
347354
if not any(f"/{ignored_dir}/" in f"/{f}/" for ignored_dir in self.ignored_dirs)
348355
]
349356

350-
result = {"directories": filtered_dirs, "files": filtered_files}
357+
result = {
358+
"directories": sort_breadth_first(filtered_dirs),
359+
"files": sorted(filtered_files)
360+
}
351361
else:
352362
return Observation.create(
353363
message=f"Error listing directory: {str(e)}",

tests/actions/test_list_files.py

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,3 +525,245 @@ async def test_recursive_hidden_directories_excluded(list_files_action, file_con
525525
directories = result.properties.get("directories", [])
526526
hidden_dirs = [d for d in directories if ".hidden_subdir" in d]
527527
assert len(hidden_dirs) == 0, "No hidden directories should be in properties for recursive listing"
528+
529+
530+
@pytest.mark.asyncio
531+
async def test_breadth_first_directory_ordering(list_files_action, file_context, temp_repo):
532+
"""Test that directories are listed breadth-first (root level first, then by depth)."""
533+
# Create a complex nested directory structure
534+
nested_dirs = [
535+
"level1_a",
536+
"level1_b",
537+
"level1_a/level2_a",
538+
"level1_a/level2_b",
539+
"level1_b/level2_c",
540+
"level1_a/level2_a/level3_a",
541+
"level1_a/level2_a/level3_b",
542+
"level1_b/level2_c/level3_c"
543+
]
544+
545+
for dir_path in nested_dirs:
546+
os.makedirs(os.path.join(temp_repo, dir_path), exist_ok=True)
547+
# Create a file in each directory to ensure it's tracked
548+
with open(os.path.join(temp_repo, dir_path, "file.txt"), "w") as f:
549+
f.write("content")
550+
551+
# Add to git
552+
try:
553+
subprocess.run(["git", "add", "."], cwd=temp_repo, check=True, capture_output=True)
554+
subprocess.run(["git", "commit", "-m", "Add nested directories"], cwd=temp_repo, check=True, capture_output=True)
555+
except Exception:
556+
pass
557+
558+
# Execute recursive listing
559+
args = ListFilesArgs(directory="", recursive=True, thoughts="Testing breadth-first ordering")
560+
result = await list_files_action.execute(args, file_context)
561+
562+
# Get the directory order from properties
563+
directories = result.properties.get("directories", [])
564+
565+
# Filter to only our test directories
566+
test_dirs = [d for d in directories if d.startswith("level1_")]
567+
568+
# Expected order: level 1 dirs first, then level 2, then level 3
569+
# Within each level, alphabetical order
570+
expected_order = [
571+
"level1_a", # depth 0
572+
"level1_b", # depth 0
573+
"level1_a/level2_a", # depth 1
574+
"level1_a/level2_b", # depth 1
575+
"level1_b/level2_c", # depth 1
576+
"level1_a/level2_a/level3_a", # depth 2
577+
"level1_a/level2_a/level3_b", # depth 2
578+
"level1_b/level2_c/level3_c" # depth 2
579+
]
580+
581+
# Check that our test directories appear in breadth-first order
582+
actual_order = [d for d in directories if d in expected_order]
583+
assert actual_order == expected_order, f"Expected breadth-first order {expected_order}, got {actual_order}"
584+
585+
586+
@pytest.mark.asyncio
587+
async def test_performance_improvement_no_xargs(list_files_action, file_context, temp_repo):
588+
"""Test that the new commands don't use slow xargs approach."""
589+
import time
590+
591+
# Create many directories to test performance
592+
for i in range(50):
593+
os.makedirs(os.path.join(temp_repo, f"perf_test_dir_{i:02d}"), exist_ok=True)
594+
for j in range(5):
595+
subdir = os.path.join(temp_repo, f"perf_test_dir_{i:02d}", f"subdir_{j}")
596+
os.makedirs(subdir, exist_ok=True)
597+
with open(os.path.join(subdir, "file.txt"), "w") as f:
598+
f.write("test content")
599+
600+
# Add to git
601+
try:
602+
subprocess.run(["git", "add", "."], cwd=temp_repo, check=True, capture_output=True)
603+
subprocess.run(["git", "commit", "-m", "Add performance test dirs"], cwd=temp_repo, check=True, capture_output=True)
604+
except Exception:
605+
pass
606+
607+
# Measure time for recursive listing
608+
start_time = time.time()
609+
args = ListFilesArgs(directory="", recursive=True, thoughts="Testing performance")
610+
result = await list_files_action.execute(args, file_context)
611+
end_time = time.time()
612+
613+
execution_time = end_time - start_time
614+
615+
# Should complete much faster than the old 3+ minute xargs approach
616+
# Even with 250+ directories, should complete in under 5 seconds
617+
assert execution_time < 5.0, f"Directory listing took {execution_time:.2f}s, expected < 5.0s"
618+
619+
# Verify we got results
620+
assert len(result.properties.get("directories", [])) > 50, "Should have found many test directories"
621+
622+
623+
@pytest.mark.asyncio
624+
async def test_sort_breadth_first_function():
625+
"""Test the sort_breadth_first utility function directly."""
626+
from moatless.actions.list_files import sort_breadth_first
627+
628+
# Test various directory paths
629+
unsorted_paths = [
630+
"deep/nested/path/dir",
631+
"root_dir",
632+
"another_root",
633+
"level1/level2",
634+
"level1/another_level2",
635+
"different_level1/level2/level3",
636+
"level1/level2/level3"
637+
]
638+
639+
result = sort_breadth_first(unsorted_paths)
640+
641+
# Expected: depth 0 first, then 1, then 2, then 3, alphabetical within each depth
642+
expected = [
643+
"another_root", # depth 0
644+
"root_dir", # depth 0
645+
"level1/another_level2", # depth 1
646+
"level1/level2", # depth 1
647+
"different_level1/level2/level3", # depth 2
648+
"level1/level2/level3", # depth 2
649+
"deep/nested/path/dir" # depth 3
650+
]
651+
652+
assert result == expected, f"Expected {expected}, got {result}"
653+
654+
655+
@pytest.mark.asyncio
656+
async def test_directories_with_spaces_and_special_chars(list_files_action, file_context, temp_repo):
657+
"""Test that directories with spaces and special characters are handled correctly."""
658+
# Create directories with problematic names
659+
problematic_dirs = [
660+
"dir with spaces",
661+
"dir-with-dashes",
662+
"dir_with_underscores",
663+
"dir.with.dots",
664+
"dir(with)parens",
665+
"dir[with]brackets",
666+
"dir{with}braces"
667+
]
668+
669+
for dir_name in problematic_dirs:
670+
dir_path = os.path.join(temp_repo, dir_name)
671+
os.makedirs(dir_path, exist_ok=True)
672+
# Create a file in each directory
673+
with open(os.path.join(dir_path, "test.txt"), "w") as f:
674+
f.write("test content")
675+
676+
# Add to git
677+
try:
678+
subprocess.run(["git", "add", "."], cwd=temp_repo, check=True, capture_output=True)
679+
subprocess.run(["git", "commit", "-m", "Add special directories"], cwd=temp_repo, check=True, capture_output=True)
680+
except Exception:
681+
pass
682+
683+
# Set up environment
684+
env = LocalBashEnvironment(cwd=temp_repo)
685+
list_files_action.workspace.environment = env
686+
687+
# Execute
688+
args = ListFilesArgs(directory="", recursive=False, thoughts="Testing special characters")
689+
result = await list_files_action.execute(args, file_context)
690+
691+
# Should not contain bash error messages
692+
assert "unary operator expected" not in result.message
693+
assert "bash: line" not in result.message
694+
695+
# Should contain the directories
696+
for dir_name in problematic_dirs:
697+
assert dir_name in result.message or dir_name.replace(" ", r"\ ") in result.message
698+
699+
700+
@pytest.mark.asyncio
701+
async def test_empty_and_malformed_directory_names(list_files_action, file_context, temp_repo):
702+
"""Test handling of edge cases that might cause bash parsing issues."""
703+
# Create some normal directories
704+
normal_dirs = ["normal1", "normal2"]
705+
for dir_name in normal_dirs:
706+
os.makedirs(os.path.join(temp_repo, dir_name), exist_ok=True)
707+
with open(os.path.join(temp_repo, dir_name, "file.txt"), "w") as f:
708+
f.write("content")
709+
710+
# Add to git
711+
try:
712+
subprocess.run(["git", "add", "."], cwd=temp_repo, check=True, capture_output=True)
713+
subprocess.run(["git", "commit", "-m", "Add normal directories"], cwd=temp_repo, check=True, capture_output=True)
714+
except Exception:
715+
pass
716+
717+
# Set up environment
718+
env = LocalBashEnvironment(cwd=temp_repo)
719+
list_files_action.workspace.environment = env
720+
721+
# Execute
722+
args = ListFilesArgs(directory="", recursive=True, thoughts="Testing bash robustness")
723+
result = await list_files_action.execute(args, file_context)
724+
725+
# The key test: should not contain bash error messages
726+
assert "unary operator expected" not in result.message
727+
assert "bash: line" not in result.message
728+
assert "command not found" not in result.message
729+
730+
# Should successfully list directories
731+
assert "normal1" in result.message
732+
assert "normal2" in result.message
733+
734+
735+
@pytest.mark.asyncio
736+
async def test_very_deep_directory_structure(list_files_action, file_context, temp_repo):
737+
"""Test with deeply nested directories that might stress bash command parsing."""
738+
# Create a very deep directory structure
739+
deep_path = temp_repo
740+
for i in range(10): # Create 10 levels deep
741+
deep_path = os.path.join(deep_path, f"level_{i}")
742+
os.makedirs(deep_path, exist_ok=True)
743+
744+
# Add a file at the deepest level
745+
with open(os.path.join(deep_path, "deep_file.txt"), "w") as f:
746+
f.write("very deep content")
747+
748+
# Add to git
749+
try:
750+
subprocess.run(["git", "add", "."], cwd=temp_repo, check=True, capture_output=True)
751+
subprocess.run(["git", "commit", "-m", "Add deep structure"], cwd=temp_repo, check=True, capture_output=True)
752+
except Exception:
753+
pass
754+
755+
# Set up environment
756+
env = LocalBashEnvironment(cwd=temp_repo)
757+
list_files_action.workspace.environment = env
758+
759+
# Execute recursive listing
760+
args = ListFilesArgs(directory="", recursive=True, thoughts="Testing deep structure")
761+
result = await list_files_action.execute(args, file_context)
762+
763+
# Should not contain bash errors
764+
assert "unary operator expected" not in result.message
765+
assert "bash: line" not in result.message
766+
767+
# Should contain some of the deep directories
768+
assert "level_0" in result.message
769+
assert "level_1" in result.message

0 commit comments

Comments
 (0)