|
1 | 1 | from pydantic import ConfigDict, Field |
2 | 2 | import logging |
3 | 3 | from typing import List |
| 4 | +import shlex |
4 | 5 |
|
5 | 6 | from moatless.actions.action import Action |
6 | 7 | from moatless.actions.schema import ( |
|
16 | 17 | DEFAULT_IGNORED_DIRS = [".git", ".cursor", ".mvn", ".venv"] |
17 | 18 |
|
18 | 19 |
|
19 | | -def sort_breadth_first(paths): |
20 | | - """Sort paths breadth-first: by depth (number of slashes), then alphabetically.""" |
21 | | - return sorted(paths, key=lambda path: (path.count('/'), path)) |
22 | | - |
23 | | - |
24 | 20 | class ListFilesArgs(ActionArguments): |
25 | 21 | """List files and directories in a specified directory.""" |
26 | 22 |
|
@@ -136,201 +132,82 @@ async def execute( |
136 | 132 | # If the command fails, assume git is not available |
137 | 133 | git_available = False |
138 | 134 |
|
139 | | - # Create the ignore_pattern for find commands |
140 | | - ignore_pattern = "" |
141 | | - if self.ignored_dirs: |
142 | | - # Create a pattern to exclude specified directories |
143 | | - ignore_dirs = "|".join([f"^{d}$" for d in self.ignored_dirs]) |
144 | | - ignore_pattern = f" | grep -v -E '{ignore_dirs}'" |
| 135 | + # Legacy variables no longer needed due to clearer builders |
| 136 | + # (kept minimal changes to surrounding logic) |
145 | 137 |
|
146 | | - # Escape the target directory for use in regex patterns |
147 | | - escaped_target_dir = target_dir.replace(".", r"\.") |
148 | | - |
149 | | - # Build the appropriate find command based on recursion setting and git availability |
| 138 | + # Build the commands using helpers for clarity |
150 | 139 | if git_available: |
151 | | - # Use git commands when git is available to respect .gitignore |
152 | | - if list_files_args.recursive: |
153 | | - # For recursive mode with gitignore |
154 | | - if dir_path: |
155 | | - files_command = f"cd {target_dir} && git ls-files | sort" |
156 | | - else: |
157 | | - files_command = "git ls-files | sort" |
158 | | - else: |
159 | | - # For non-recursive mode with gitignore |
160 | | - if dir_path: |
161 | | - files_command = f"cd {target_dir} && git ls-files --directory | grep -v '/' | sort" |
162 | | - else: |
163 | | - files_command = "git ls-files --directory | grep -v '/' | sort" |
164 | | - |
165 | | - # Command for directories (git doesn't track directories, so we use find with git check-ignore) |
166 | | - # Use grep to filter out target directory, then check each remaining directory |
167 | | - if list_files_args.recursive: |
168 | | - # Filter out the target directory upfront, then check git ignore status |
169 | | - # Suppress stderr from git check-ignore to avoid fatal messages leaking into output |
170 | | - dirs_command = ( |
171 | | - f"find {target_dir} -xdev -type d | " |
172 | | - f"grep -v '^{escaped_target_dir}$' | " |
173 | | - f"while read -r dir; do git check-ignore \"$dir/\" >/dev/null 2>&1 || echo \"$dir\"; done | sort" |
174 | | - ) |
175 | | - if ignore_pattern: |
176 | | - dirs_command += ignore_pattern |
177 | | - else: |
178 | | - # Suppress stderr from git check-ignore to avoid fatal messages leaking into output |
179 | | - dirs_command = ( |
180 | | - f"find {target_dir} -xdev -maxdepth 1 -type d | " |
181 | | - f"grep -v '^{escaped_target_dir}$' | " |
182 | | - f"while read -r dir; do git check-ignore \"$dir/\" >/dev/null 2>&1 || echo \"$dir\"; done | sort" |
183 | | - ) |
184 | | - if ignore_pattern: |
185 | | - dirs_command += ignore_pattern |
| 140 | + dirs_command, files_command = build_git_commands(dir_path, target_dir, list_files_args.recursive, self.ignored_dirs) |
186 | 141 | else: |
187 | | - # Use regular find when git is not available |
188 | | - if list_files_args.recursive: |
189 | | - # Get all files and directories recursively using find |
190 | | - # -xdev: don't cross filesystem boundaries |
191 | | - dirs_command = f"find {target_dir} -xdev -type d | grep -v '^{escaped_target_dir}$' | sort" |
192 | | - if ignore_pattern: |
193 | | - dirs_command += ignore_pattern |
194 | | - files_command = f"find {target_dir} -xdev -type f | sort" |
195 | | - else: |
196 | | - # List only immediate files and directories using find with maxdepth |
197 | | - # -xdev: don't cross filesystem boundaries |
198 | | - dirs_command = f"find {target_dir} -xdev -maxdepth 1 -type d | grep -v '^{escaped_target_dir}$' | sort" |
199 | | - if ignore_pattern: |
200 | | - dirs_command += ignore_pattern |
201 | | - files_command = f"find {target_dir} -xdev -maxdepth 1 -type f | sort" |
| 142 | + dirs_command, files_command = build_fs_commands(dir_path, target_dir, list_files_args.recursive, self.ignored_dirs) |
202 | 143 |
|
203 | 144 | try: |
204 | 145 | # Execute commands to get directories and files |
205 | 146 | try: |
206 | | - dirs_output = await local_env.execute(dirs_command, patch=patch) |
| 147 | + # Execute commands to get directories and files |
| 148 | + if dirs_command is not None: |
| 149 | + dirs_output = await local_env.execute(dirs_command, patch=patch) |
| 150 | + else: |
| 151 | + dirs_output = "" |
207 | 152 | files_output = await local_env.execute(files_command, patch=patch) |
208 | 153 |
|
209 | 154 | except Exception as e: |
210 | 155 | # Check if it's a "no such file or directory" error |
211 | 156 | if "No such file or directory" in str(e): |
212 | 157 | return Observation.create( |
213 | | - message=f"Error listing directory: No such directory '{list_files_args.directory}'", |
| 158 | + message=f"Error: Directory {list_files_args.directory} does not exist", |
214 | 159 | properties={"fail_reason": "directory_not_found"}, |
215 | 160 | ) |
216 | 161 | raise # Re-raise if it's a different error |
217 | 162 |
|
218 | | - # Process directory results |
219 | | - directories = [] |
220 | | - # Check for common error patterns in command output |
221 | | - no_such_file_error = False |
222 | | - for cmd_output in [dirs_output, files_output]: |
223 | | - if cmd_output and "No such file or directory" in cmd_output: |
224 | | - no_such_file_error = True |
225 | | - break |
226 | | - |
227 | | - if no_such_file_error: |
| 163 | + # Process directory results (helpers for clarity) |
| 164 | + directories: list[str] = [] |
| 165 | + if (dirs_output and "No such file or directory" in dirs_output) or ( |
| 166 | + files_output and "No such file or directory" in files_output |
| 167 | + ): |
228 | 168 | return Observation.create( |
229 | | - message=f"Error listing directory: No such directory '{list_files_args.directory}'", |
| 169 | + message=f"Error: Directory {list_files_args.directory} does not exist", |
230 | 170 | properties={"fail_reason": "directory_not_found"}, |
231 | 171 | ) |
232 | 172 |
|
233 | 173 | for line in dirs_output.strip().split("\n"): |
234 | | - if line.strip(): |
235 | | - # Convert path to relative format |
236 | | - if line.startswith("./"): |
237 | | - rel_path = line[2:] |
238 | | - else: |
239 | | - rel_path = line |
240 | | - |
241 | | - # Skip if the directory should be ignored (relative path) |
242 | | - if any( |
243 | | - rel_path == ignored_dir or rel_path.startswith(f"{ignored_dir}/") |
244 | | - for ignored_dir in self.ignored_dirs |
245 | | - ): |
| 174 | + rel_path = normalize_rel(line.strip()) |
| 175 | + if not rel_path or rel_path == dir_path: |
| 176 | + continue |
| 177 | + if not list_files_args.recursive: |
| 178 | + dir_name = rel_path.replace(f"{dir_path}/", "") if dir_path else rel_path |
| 179 | + if not dir_name or should_skip_dir(dir_name, self.ignored_dirs): |
| 180 | + continue |
| 181 | + directories.append(dir_name) |
| 182 | + else: |
| 183 | + if should_skip_dir(rel_path, self.ignored_dirs): |
246 | 184 | continue |
| 185 | + directories.append(rel_path) |
247 | 186 |
|
248 | | - # For recursive listing, filter out the target directory itself |
249 | | - if rel_path and rel_path != dir_path: |
250 | | - if not list_files_args.recursive: |
251 | | - # For non-recursive, show only the directory name |
252 | | - if dir_path: |
253 | | - # Strip the parent directory part to get just the name |
254 | | - dir_name = rel_path.replace(f"{dir_path}/", "") |
255 | | - if dir_name: # Skip if empty after replacement |
256 | | - # Skip if the directory name should be ignored or starts with '.' |
257 | | - if dir_name.startswith('.') or any( |
258 | | - dir_name == ignored_dir or dir_name.startswith(f"{ignored_dir}/") |
259 | | - for ignored_dir in self.ignored_dirs |
260 | | - ): |
261 | | - continue |
262 | | - directories.append(dir_name) |
263 | | - else: |
264 | | - # Skip if the directory name should be ignored or starts with '.' |
265 | | - if rel_path.startswith('.') or any( |
266 | | - rel_path == ignored_dir or rel_path.startswith(f"{ignored_dir}/") |
267 | | - for ignored_dir in self.ignored_dirs |
268 | | - ): |
269 | | - continue |
270 | | - directories.append(rel_path) |
271 | | - else: |
272 | | - # For recursive, show full relative paths |
273 | | - # Skip if the directory path should be ignored or any component starts with '.' |
274 | | - path_components = rel_path.split('/') |
275 | | - should_skip = ( |
276 | | - rel_path.startswith('.') or |
277 | | - any(component.startswith('.') for component in path_components) or |
278 | | - any( |
279 | | - rel_path == ignored_dir or rel_path.startswith(f"{ignored_dir}/") |
280 | | - for ignored_dir in self.ignored_dirs |
281 | | - ) |
282 | | - ) |
283 | | - if should_skip: |
284 | | - continue |
285 | | - directories.append(rel_path) |
286 | | - |
287 | | - # Process file results |
288 | | - files = [] |
| 187 | + # Process file results (helpers for clarity) |
| 188 | + files: list[str] = [] |
289 | 189 | for line in files_output.strip().split("\n"): |
290 | | - if line.strip(): |
291 | | - # Convert path to relative format |
292 | | - if line.startswith("./"): |
293 | | - rel_path = line[2:] |
| 190 | + rel_path = normalize_rel(line.strip()) |
| 191 | + if not rel_path or should_skip_file(rel_path, self.ignored_dirs): |
| 192 | + continue |
| 193 | + |
| 194 | + if not list_files_args.recursive: |
| 195 | + if dir_path: |
| 196 | + remainder = rel_path.replace(f"{dir_path}/", "", 1) |
| 197 | + if remainder and "/" not in remainder: |
| 198 | + files.append(remainder) |
294 | 199 | else: |
295 | | - rel_path = line |
296 | | - |
297 | | - # Skip if the file is in an ignored directory or hidden directory, or if the file itself starts with a dot |
298 | | - path_components = rel_path.split('/') |
299 | | - file_name = path_components[-1] # Get the actual file name |
300 | | - should_skip_file = ( |
301 | | - any(f"/{ignored_dir}/" in f"/{rel_path}/" for ignored_dir in self.ignored_dirs) or |
302 | | - any(component.startswith('.') for component in path_components[:-1]) or # Check dirs |
303 | | - file_name.startswith('.') # Also check if the file name itself starts with a dot |
304 | | - ) |
305 | | - if should_skip_file: |
306 | | - continue |
307 | | - |
308 | | - if rel_path: |
309 | | - if not list_files_args.recursive: |
310 | | - # For non-recursive, show only the file name |
311 | | - if dir_path: |
312 | | - # Strip the parent directory part to get just the name |
313 | | - file_name = rel_path.replace(f"{dir_path}/", "") |
314 | | - if file_name: # Skip if empty after replacement |
315 | | - files.append(file_name) |
316 | | - else: |
317 | | - files.append(rel_path) |
318 | | - else: |
319 | | - # For recursive, show full relative paths |
| 200 | + if "/" not in rel_path: |
320 | 201 | files.append(rel_path) |
| 202 | + else: |
| 203 | + if dir_path: |
| 204 | + if rel_path.startswith(f"{dir_path}/"): |
| 205 | + files.append(rel_path[len(dir_path) + 1 :]) |
| 206 | + else: |
| 207 | + files.append(rel_path) |
321 | 208 |
|
322 | 209 | # Apply max_results limit, prioritizing directories over files |
323 | | - total_results = list_files_args.max_results |
324 | | - if len(directories) + len(files) > total_results: |
325 | | - # Prioritize directories first, then files with remaining slots |
326 | | - if len(directories) >= total_results: |
327 | | - # If we have enough directories to fill max_results, only show directories |
328 | | - directories = directories[:total_results] |
329 | | - files = [] |
330 | | - else: |
331 | | - # Show all directories, then files with remaining slots |
332 | | - remaining_slots = total_results - len(directories) |
333 | | - files = files[:remaining_slots] |
| 210 | + directories, files, _ = apply_limits(directories, files, list_files_args.max_results) |
334 | 211 |
|
335 | 212 | # Create a result object |
336 | 213 | result = { |
@@ -439,3 +316,129 @@ def get_evaluation_criteria(cls, trajectory_length) -> list[str]: |
439 | 316 | "Git Integration: Confirm if .gitignore patterns are respected when git is available in the workspace.", |
440 | 317 | "Directory Filtering: Check if specified directories are properly ignored in the output.", |
441 | 318 | ] |
| 319 | + |
| 320 | + |
| 321 | +def normalize_rel(path: str) -> str: |
| 322 | + """Convert './a/b' -> 'a/b' and normalize simple cases.""" |
| 323 | + if path.startswith("./"): |
| 324 | + return path[2:] |
| 325 | + return path |
| 326 | + |
| 327 | + |
| 328 | +def is_hidden_segment(segments: list[str]) -> bool: |
| 329 | + return any(seg.startswith(".") for seg in segments) |
| 330 | + |
| 331 | + |
| 332 | +def should_skip_dir(rel_path: str, ignored_dirs: list[str]) -> bool: |
| 333 | + if not rel_path: |
| 334 | + return True |
| 335 | + if rel_path in ignored_dirs or any(rel_path.startswith(f"{d}/") for d in ignored_dirs): |
| 336 | + return True |
| 337 | + if rel_path.startswith("."): |
| 338 | + return True |
| 339 | + parts = rel_path.split("/") |
| 340 | + if is_hidden_segment(parts): |
| 341 | + return True |
| 342 | + return False |
| 343 | + |
| 344 | + |
| 345 | +def should_skip_file(rel_path: str, ignored_dirs: list[str]) -> bool: |
| 346 | + if not rel_path: |
| 347 | + return True |
| 348 | + parts = rel_path.split("/") |
| 349 | + base = parts[-1] |
| 350 | + # In ignored dir |
| 351 | + if any(f"/{ignored_dir}/" in f"/{rel_path}/" for ignored_dir in ignored_dirs): |
| 352 | + return True |
| 353 | + # Hidden dir or hidden file |
| 354 | + if is_hidden_segment(parts[:-1]) or base.startswith("."): |
| 355 | + return True |
| 356 | + return False |
| 357 | + |
| 358 | + |
| 359 | +def apply_limits(directories: list[str], files: list[str], max_results: int) -> tuple[list[str], list[str], dict]: |
| 360 | + """Prioritize directories when enforcing max_results.""" |
| 361 | + total_dirs = len(directories) |
| 362 | + total_files = len(files) |
| 363 | + if total_dirs + total_files <= max_results: |
| 364 | + return directories, files, { |
| 365 | + "total_dirs": total_dirs, |
| 366 | + "total_files": total_files, |
| 367 | + "results_limited": False, |
| 368 | + } |
| 369 | + |
| 370 | + if total_dirs >= max_results: |
| 371 | + return directories[: max_results], [], { |
| 372 | + "total_dirs": total_dirs, |
| 373 | + "total_files": total_files, |
| 374 | + "results_limited": True, |
| 375 | + } |
| 376 | + |
| 377 | + remaining = max_results - total_dirs |
| 378 | + return directories, files[:remaining], { |
| 379 | + "total_dirs": total_dirs, |
| 380 | + "total_files": total_files, |
| 381 | + "results_limited": True, |
| 382 | + } |
| 383 | + |
| 384 | + |
| 385 | +def _ignored_prune_expr(ignored_dirs: list[str]) -> str: |
| 386 | + if not ignored_dirs: |
| 387 | + return "" |
| 388 | + names = " -o ".join([f"-name {shlex.quote(d)}" for d in ignored_dirs]) |
| 389 | + # Wrap in grouping for -prune usage |
| 390 | + return f"\( {names} \) -prune -o" |
| 391 | + |
| 392 | + |
| 393 | +def build_fs_commands(dir_path: str, target_dir: str, recursive: bool, ignored_dirs: list[str]) -> tuple[str, str]: |
| 394 | + td = shlex.quote(target_dir) |
| 395 | + prune = _ignored_prune_expr(ignored_dirs) |
| 396 | + if recursive: |
| 397 | + # All directories (excluding the root itself via -mindepth 1) |
| 398 | + dirs_cmd = f"find {td} -xdev -mindepth 1 -type d" |
| 399 | + if prune: |
| 400 | + dirs_cmd = f"find {td} -xdev {prune} -mindepth 1 -type d -print | sort" |
| 401 | + else: |
| 402 | + dirs_cmd = f"{dirs_cmd} | sort" |
| 403 | + files_cmd = f"find {td} -xdev -type f | sort" |
| 404 | + else: |
| 405 | + # Immediate children only |
| 406 | + dirs_cmd = f"find {td} -xdev -maxdepth 1 -mindepth 1 -type d" |
| 407 | + if prune: |
| 408 | + dirs_cmd = f"find {td} -xdev -maxdepth 1 -mindepth 1 {prune} -type d -print | sort" |
| 409 | + else: |
| 410 | + dirs_cmd = f"{dirs_cmd} | sort" |
| 411 | + files_cmd = f"find {td} -xdev -maxdepth 1 -type f | sort" |
| 412 | + return dirs_cmd, files_cmd |
| 413 | + |
| 414 | + |
| 415 | +def build_git_commands(dir_path: str, target_dir: str, recursive: bool, ignored_dirs: list[str]) -> tuple[str, str]: |
| 416 | + # Files via git to respect .gitignore |
| 417 | + if dir_path: |
| 418 | + files_cmd = f"git ls-files --cached --others --exclude-standard -- {shlex.quote(dir_path)}/** | sort" |
| 419 | + else: |
| 420 | + files_cmd = "git ls-files --cached --others --exclude-standard | sort" |
| 421 | + |
| 422 | + # Directories using find, then filter through git check-ignore |
| 423 | + td = shlex.quote(target_dir) |
| 424 | + prune = _ignored_prune_expr(ignored_dirs) |
| 425 | + if recursive: |
| 426 | + base_find = f"find {td} -xdev -mindepth 1 -type d" |
| 427 | + if prune: |
| 428 | + base_find = f"find {td} -xdev {prune} -mindepth 1 -type d -print" |
| 429 | + else: |
| 430 | + base_find = f"find {td} -xdev -maxdepth 1 -mindepth 1 -type d" |
| 431 | + if prune: |
| 432 | + base_find = f"find {td} -xdev -maxdepth 1 -mindepth 1 {prune} -type d -print" |
| 433 | + |
| 434 | + # Keep the existing behavior: check each dir with git check-ignore |
| 435 | + dirs_cmd = ( |
| 436 | + f"{base_find} | while read -r dir; do git check-ignore \"$dir/\" >/dev/null 2>&1 || echo \"$dir\"; done | sort" |
| 437 | + ) |
| 438 | + |
| 439 | + return dirs_cmd, files_cmd |
| 440 | + |
| 441 | + |
| 442 | +def sort_breadth_first(paths): |
| 443 | + """Sort paths breadth-first: by depth (number of slashes), then alphabetically.""" |
| 444 | + return sorted(paths, key=lambda path: (path.count('/'), path)) |
0 commit comments