|
1 | 1 | import os |
2 | 2 | import yaml |
3 | | -import fnmatch |
4 | 3 | from pocketflow import Node, BatchNode |
5 | 4 | from utils.crawl_github_files import crawl_github_files |
6 | | -from utils.call_llm import call_llm # Assuming you have this utility |
7 | | - |
8 | | -def crawl_local_files(directory, include_patterns=None, exclude_patterns=None, max_file_size=None, use_relative_paths=True): |
9 | | - """ |
10 | | - Crawl files in a local directory with similar interface as crawl_github_files. |
11 | | - |
12 | | - Args: |
13 | | - directory (str): Path to local directory |
14 | | - include_patterns (set): File patterns to include (e.g. {"*.py", "*.js"}) |
15 | | - exclude_patterns (set): File patterns to exclude (e.g. {"tests/*"}) |
16 | | - max_file_size (int): Maximum file size in bytes |
17 | | - use_relative_paths (bool): Whether to use paths relative to directory |
18 | | - |
19 | | - Returns: |
20 | | - dict: {"files": {filepath: content}} |
21 | | - """ |
22 | | - if not os.path.isdir(directory): |
23 | | - raise ValueError(f"Directory does not exist: {directory}") |
24 | | - |
25 | | - files_dict = {} |
26 | | - |
27 | | - for root, _, files in os.walk(directory): |
28 | | - for filename in files: |
29 | | - filepath = os.path.join(root, filename) |
30 | | - |
31 | | - # Get path relative to directory if requested |
32 | | - if use_relative_paths: |
33 | | - relpath = os.path.relpath(filepath, directory) |
34 | | - else: |
35 | | - relpath = filepath |
36 | | - |
37 | | - # Check if file matches any include pattern |
38 | | - included = False |
39 | | - if include_patterns: |
40 | | - for pattern in include_patterns: |
41 | | - if fnmatch.fnmatch(relpath, pattern): |
42 | | - included = True |
43 | | - break |
44 | | - else: |
45 | | - included = True |
46 | | - |
47 | | - # Check if file matches any exclude pattern |
48 | | - excluded = False |
49 | | - if exclude_patterns: |
50 | | - for pattern in exclude_patterns: |
51 | | - if fnmatch.fnmatch(relpath, pattern): |
52 | | - excluded = True |
53 | | - break |
54 | | - |
55 | | - if not included or excluded: |
56 | | - continue |
57 | | - |
58 | | - # Check file size |
59 | | - if max_file_size and os.path.getsize(filepath) > max_file_size: |
60 | | - continue |
61 | | - |
62 | | - try: |
63 | | - with open(filepath, 'r', encoding='utf-8') as f: |
64 | | - content = f.read() |
65 | | - files_dict[relpath] = content |
66 | | - except Exception as e: |
67 | | - print(f"Warning: Could not read file {filepath}: {e}") |
68 | | - |
69 | | - return {"files": files_dict} |
| 5 | +from utils.call_llm import call_llm |
| 6 | +from utils.crawl_local_files import crawl_local_files |
70 | 7 |
|
71 | 8 | # Helper to create context from files, respecting limits (basic example) |
72 | 9 | def create_llm_context(files_data): |
|
0 commit comments