Skip to content

Commit 2e5bdfc

Browse files
committed
Fix list files
1 parent 9728ef1 commit 2e5bdfc

File tree

1 file changed

+174
-171
lines changed

1 file changed

+174
-171
lines changed

moatless/actions/list_files.py

Lines changed: 174 additions & 171 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from pydantic import ConfigDict, Field
22
import logging
33
from typing import List
4+
import shlex
45

56
from moatless.actions.action import Action
67
from moatless.actions.schema import (
@@ -16,11 +17,6 @@
1617
DEFAULT_IGNORED_DIRS = [".git", ".cursor", ".mvn", ".venv"]
1718

1819

19-
def sort_breadth_first(paths):
20-
"""Sort paths breadth-first: by depth (number of slashes), then alphabetically."""
21-
return sorted(paths, key=lambda path: (path.count('/'), path))
22-
23-
2420
class ListFilesArgs(ActionArguments):
2521
"""List files and directories in a specified directory."""
2622

@@ -136,201 +132,82 @@ async def execute(
136132
# If the command fails, assume git is not available
137133
git_available = False
138134

139-
# Create the ignore_pattern for find commands
140-
ignore_pattern = ""
141-
if self.ignored_dirs:
142-
# Create a pattern to exclude specified directories
143-
ignore_dirs = "|".join([f"^{d}$" for d in self.ignored_dirs])
144-
ignore_pattern = f" | grep -v -E '{ignore_dirs}'"
135+
# Legacy variables no longer needed due to clearer builders
136+
# (kept minimal changes to surrounding logic)
145137

146-
# Escape the target directory for use in regex patterns
147-
escaped_target_dir = target_dir.replace(".", r"\.")
148-
149-
# Build the appropriate find command based on recursion setting and git availability
138+
# Build the commands using helpers for clarity
150139
if git_available:
151-
# Use git commands when git is available to respect .gitignore
152-
if list_files_args.recursive:
153-
# For recursive mode with gitignore
154-
if dir_path:
155-
files_command = f"cd {target_dir} && git ls-files | sort"
156-
else:
157-
files_command = "git ls-files | sort"
158-
else:
159-
# For non-recursive mode with gitignore
160-
if dir_path:
161-
files_command = f"cd {target_dir} && git ls-files --directory | grep -v '/' | sort"
162-
else:
163-
files_command = "git ls-files --directory | grep -v '/' | sort"
164-
165-
# Command for directories (git doesn't track directories, so we use find with git check-ignore)
166-
# Use grep to filter out target directory, then check each remaining directory
167-
if list_files_args.recursive:
168-
# Filter out the target directory upfront, then check git ignore status
169-
# Suppress stderr from git check-ignore to avoid fatal messages leaking into output
170-
dirs_command = (
171-
f"find {target_dir} -xdev -type d | "
172-
f"grep -v '^{escaped_target_dir}$' | "
173-
f"while read -r dir; do git check-ignore \"$dir/\" >/dev/null 2>&1 || echo \"$dir\"; done | sort"
174-
)
175-
if ignore_pattern:
176-
dirs_command += ignore_pattern
177-
else:
178-
# Suppress stderr from git check-ignore to avoid fatal messages leaking into output
179-
dirs_command = (
180-
f"find {target_dir} -xdev -maxdepth 1 -type d | "
181-
f"grep -v '^{escaped_target_dir}$' | "
182-
f"while read -r dir; do git check-ignore \"$dir/\" >/dev/null 2>&1 || echo \"$dir\"; done | sort"
183-
)
184-
if ignore_pattern:
185-
dirs_command += ignore_pattern
140+
dirs_command, files_command = build_git_commands(dir_path, target_dir, list_files_args.recursive, self.ignored_dirs)
186141
else:
187-
# Use regular find when git is not available
188-
if list_files_args.recursive:
189-
# Get all files and directories recursively using find
190-
# -xdev: don't cross filesystem boundaries
191-
dirs_command = f"find {target_dir} -xdev -type d | grep -v '^{escaped_target_dir}$' | sort"
192-
if ignore_pattern:
193-
dirs_command += ignore_pattern
194-
files_command = f"find {target_dir} -xdev -type f | sort"
195-
else:
196-
# List only immediate files and directories using find with maxdepth
197-
# -xdev: don't cross filesystem boundaries
198-
dirs_command = f"find {target_dir} -xdev -maxdepth 1 -type d | grep -v '^{escaped_target_dir}$' | sort"
199-
if ignore_pattern:
200-
dirs_command += ignore_pattern
201-
files_command = f"find {target_dir} -xdev -maxdepth 1 -type f | sort"
142+
dirs_command, files_command = build_fs_commands(dir_path, target_dir, list_files_args.recursive, self.ignored_dirs)
202143

203144
try:
204145
# Execute commands to get directories and files
205146
try:
206-
dirs_output = await local_env.execute(dirs_command, patch=patch)
147+
# Execute commands to get directories and files
148+
if dirs_command is not None:
149+
dirs_output = await local_env.execute(dirs_command, patch=patch)
150+
else:
151+
dirs_output = ""
207152
files_output = await local_env.execute(files_command, patch=patch)
208153

209154
except Exception as e:
210155
# Check if it's a "no such file or directory" error
211156
if "No such file or directory" in str(e):
212157
return Observation.create(
213-
message=f"Error listing directory: No such directory '{list_files_args.directory}'",
158+
message=f"Error: Directory {list_files_args.directory} does not exist",
214159
properties={"fail_reason": "directory_not_found"},
215160
)
216161
raise # Re-raise if it's a different error
217162

218-
# Process directory results
219-
directories = []
220-
# Check for common error patterns in command output
221-
no_such_file_error = False
222-
for cmd_output in [dirs_output, files_output]:
223-
if cmd_output and "No such file or directory" in cmd_output:
224-
no_such_file_error = True
225-
break
226-
227-
if no_such_file_error:
163+
# Process directory results (helpers for clarity)
164+
directories: list[str] = []
165+
if (dirs_output and "No such file or directory" in dirs_output) or (
166+
files_output and "No such file or directory" in files_output
167+
):
228168
return Observation.create(
229-
message=f"Error listing directory: No such directory '{list_files_args.directory}'",
169+
message=f"Error: Directory {list_files_args.directory} does not exist",
230170
properties={"fail_reason": "directory_not_found"},
231171
)
232172

233173
for line in dirs_output.strip().split("\n"):
234-
if line.strip():
235-
# Convert path to relative format
236-
if line.startswith("./"):
237-
rel_path = line[2:]
238-
else:
239-
rel_path = line
240-
241-
# Skip if the directory should be ignored (relative path)
242-
if any(
243-
rel_path == ignored_dir or rel_path.startswith(f"{ignored_dir}/")
244-
for ignored_dir in self.ignored_dirs
245-
):
174+
rel_path = normalize_rel(line.strip())
175+
if not rel_path or rel_path == dir_path:
176+
continue
177+
if not list_files_args.recursive:
178+
dir_name = rel_path.replace(f"{dir_path}/", "") if dir_path else rel_path
179+
if not dir_name or should_skip_dir(dir_name, self.ignored_dirs):
180+
continue
181+
directories.append(dir_name)
182+
else:
183+
if should_skip_dir(rel_path, self.ignored_dirs):
246184
continue
185+
directories.append(rel_path)
247186

248-
# For recursive listing, filter out the target directory itself
249-
if rel_path and rel_path != dir_path:
250-
if not list_files_args.recursive:
251-
# For non-recursive, show only the directory name
252-
if dir_path:
253-
# Strip the parent directory part to get just the name
254-
dir_name = rel_path.replace(f"{dir_path}/", "")
255-
if dir_name: # Skip if empty after replacement
256-
# Skip if the directory name should be ignored or starts with '.'
257-
if dir_name.startswith('.') or any(
258-
dir_name == ignored_dir or dir_name.startswith(f"{ignored_dir}/")
259-
for ignored_dir in self.ignored_dirs
260-
):
261-
continue
262-
directories.append(dir_name)
263-
else:
264-
# Skip if the directory name should be ignored or starts with '.'
265-
if rel_path.startswith('.') or any(
266-
rel_path == ignored_dir or rel_path.startswith(f"{ignored_dir}/")
267-
for ignored_dir in self.ignored_dirs
268-
):
269-
continue
270-
directories.append(rel_path)
271-
else:
272-
# For recursive, show full relative paths
273-
# Skip if the directory path should be ignored or any component starts with '.'
274-
path_components = rel_path.split('/')
275-
should_skip = (
276-
rel_path.startswith('.') or
277-
any(component.startswith('.') for component in path_components) or
278-
any(
279-
rel_path == ignored_dir or rel_path.startswith(f"{ignored_dir}/")
280-
for ignored_dir in self.ignored_dirs
281-
)
282-
)
283-
if should_skip:
284-
continue
285-
directories.append(rel_path)
286-
287-
# Process file results
288-
files = []
187+
# Process file results (helpers for clarity)
188+
files: list[str] = []
289189
for line in files_output.strip().split("\n"):
290-
if line.strip():
291-
# Convert path to relative format
292-
if line.startswith("./"):
293-
rel_path = line[2:]
190+
rel_path = normalize_rel(line.strip())
191+
if not rel_path or should_skip_file(rel_path, self.ignored_dirs):
192+
continue
193+
194+
if not list_files_args.recursive:
195+
if dir_path:
196+
remainder = rel_path.replace(f"{dir_path}/", "", 1)
197+
if remainder and "/" not in remainder:
198+
files.append(remainder)
294199
else:
295-
rel_path = line
296-
297-
# Skip if the file is in an ignored directory or hidden directory, or if the file itself starts with a dot
298-
path_components = rel_path.split('/')
299-
file_name = path_components[-1] # Get the actual file name
300-
should_skip_file = (
301-
any(f"/{ignored_dir}/" in f"/{rel_path}/" for ignored_dir in self.ignored_dirs) or
302-
any(component.startswith('.') for component in path_components[:-1]) or # Check dirs
303-
file_name.startswith('.') # Also check if the file name itself starts with a dot
304-
)
305-
if should_skip_file:
306-
continue
307-
308-
if rel_path:
309-
if not list_files_args.recursive:
310-
# For non-recursive, show only the file name
311-
if dir_path:
312-
# Strip the parent directory part to get just the name
313-
file_name = rel_path.replace(f"{dir_path}/", "")
314-
if file_name: # Skip if empty after replacement
315-
files.append(file_name)
316-
else:
317-
files.append(rel_path)
318-
else:
319-
# For recursive, show full relative paths
200+
if "/" not in rel_path:
320201
files.append(rel_path)
202+
else:
203+
if dir_path:
204+
if rel_path.startswith(f"{dir_path}/"):
205+
files.append(rel_path[len(dir_path) + 1 :])
206+
else:
207+
files.append(rel_path)
321208

322209
# Apply max_results limit, prioritizing directories over files
323-
total_results = list_files_args.max_results
324-
if len(directories) + len(files) > total_results:
325-
# Prioritize directories first, then files with remaining slots
326-
if len(directories) >= total_results:
327-
# If we have enough directories to fill max_results, only show directories
328-
directories = directories[:total_results]
329-
files = []
330-
else:
331-
# Show all directories, then files with remaining slots
332-
remaining_slots = total_results - len(directories)
333-
files = files[:remaining_slots]
210+
directories, files, _ = apply_limits(directories, files, list_files_args.max_results)
334211

335212
# Create a result object
336213
result = {
@@ -439,3 +316,129 @@ def get_evaluation_criteria(cls, trajectory_length) -> list[str]:
439316
"Git Integration: Confirm if .gitignore patterns are respected when git is available in the workspace.",
440317
"Directory Filtering: Check if specified directories are properly ignored in the output.",
441318
]
319+
320+
321+
def normalize_rel(path: str) -> str:
322+
"""Convert './a/b' -> 'a/b' and normalize simple cases."""
323+
if path.startswith("./"):
324+
return path[2:]
325+
return path
326+
327+
328+
def is_hidden_segment(segments: list[str]) -> bool:
329+
return any(seg.startswith(".") for seg in segments)
330+
331+
332+
def should_skip_dir(rel_path: str, ignored_dirs: list[str]) -> bool:
333+
if not rel_path:
334+
return True
335+
if rel_path in ignored_dirs or any(rel_path.startswith(f"{d}/") for d in ignored_dirs):
336+
return True
337+
if rel_path.startswith("."):
338+
return True
339+
parts = rel_path.split("/")
340+
if is_hidden_segment(parts):
341+
return True
342+
return False
343+
344+
345+
def should_skip_file(rel_path: str, ignored_dirs: list[str]) -> bool:
346+
if not rel_path:
347+
return True
348+
parts = rel_path.split("/")
349+
base = parts[-1]
350+
# In ignored dir
351+
if any(f"/{ignored_dir}/" in f"/{rel_path}/" for ignored_dir in ignored_dirs):
352+
return True
353+
# Hidden dir or hidden file
354+
if is_hidden_segment(parts[:-1]) or base.startswith("."):
355+
return True
356+
return False
357+
358+
359+
def apply_limits(directories: list[str], files: list[str], max_results: int) -> tuple[list[str], list[str], dict]:
360+
"""Prioritize directories when enforcing max_results."""
361+
total_dirs = len(directories)
362+
total_files = len(files)
363+
if total_dirs + total_files <= max_results:
364+
return directories, files, {
365+
"total_dirs": total_dirs,
366+
"total_files": total_files,
367+
"results_limited": False,
368+
}
369+
370+
if total_dirs >= max_results:
371+
return directories[: max_results], [], {
372+
"total_dirs": total_dirs,
373+
"total_files": total_files,
374+
"results_limited": True,
375+
}
376+
377+
remaining = max_results - total_dirs
378+
return directories, files[:remaining], {
379+
"total_dirs": total_dirs,
380+
"total_files": total_files,
381+
"results_limited": True,
382+
}
383+
384+
385+
def _ignored_prune_expr(ignored_dirs: list[str]) -> str:
386+
if not ignored_dirs:
387+
return ""
388+
names = " -o ".join([f"-name {shlex.quote(d)}" for d in ignored_dirs])
389+
# Wrap in grouping for -prune usage
390+
return f"\( {names} \) -prune -o"
391+
392+
393+
def build_fs_commands(dir_path: str, target_dir: str, recursive: bool, ignored_dirs: list[str]) -> tuple[str, str]:
394+
td = shlex.quote(target_dir)
395+
prune = _ignored_prune_expr(ignored_dirs)
396+
if recursive:
397+
# All directories (excluding the root itself via -mindepth 1)
398+
dirs_cmd = f"find {td} -xdev -mindepth 1 -type d"
399+
if prune:
400+
dirs_cmd = f"find {td} -xdev {prune} -mindepth 1 -type d -print | sort"
401+
else:
402+
dirs_cmd = f"{dirs_cmd} | sort"
403+
files_cmd = f"find {td} -xdev -type f | sort"
404+
else:
405+
# Immediate children only
406+
dirs_cmd = f"find {td} -xdev -maxdepth 1 -mindepth 1 -type d"
407+
if prune:
408+
dirs_cmd = f"find {td} -xdev -maxdepth 1 -mindepth 1 {prune} -type d -print | sort"
409+
else:
410+
dirs_cmd = f"{dirs_cmd} | sort"
411+
files_cmd = f"find {td} -xdev -maxdepth 1 -type f | sort"
412+
return dirs_cmd, files_cmd
413+
414+
415+
def build_git_commands(dir_path: str, target_dir: str, recursive: bool, ignored_dirs: list[str]) -> tuple[str, str]:
416+
# Files via git to respect .gitignore
417+
if dir_path:
418+
files_cmd = f"git ls-files --cached --others --exclude-standard -- {shlex.quote(dir_path)}/** | sort"
419+
else:
420+
files_cmd = "git ls-files --cached --others --exclude-standard | sort"
421+
422+
# Directories using find, then filter through git check-ignore
423+
td = shlex.quote(target_dir)
424+
prune = _ignored_prune_expr(ignored_dirs)
425+
if recursive:
426+
base_find = f"find {td} -xdev -mindepth 1 -type d"
427+
if prune:
428+
base_find = f"find {td} -xdev {prune} -mindepth 1 -type d -print"
429+
else:
430+
base_find = f"find {td} -xdev -maxdepth 1 -mindepth 1 -type d"
431+
if prune:
432+
base_find = f"find {td} -xdev -maxdepth 1 -mindepth 1 {prune} -type d -print"
433+
434+
# Keep the existing behavior: check each dir with git check-ignore
435+
dirs_cmd = (
436+
f"{base_find} | while read -r dir; do git check-ignore \"$dir/\" >/dev/null 2>&1 || echo \"$dir\"; done | sort"
437+
)
438+
439+
return dirs_cmd, files_cmd
440+
441+
442+
def sort_breadth_first(paths):
443+
"""Sort paths breadth-first: by depth (number of slashes), then alphabetically."""
444+
return sorted(paths, key=lambda path: (path.count('/'), path))

0 commit comments

Comments
 (0)