11# %%
22
3- import os
4- import json
53import argparse
4+ import json
65from pathlib import Path
7- from typing import Dict , List
6+ from typing import Dict
7+
88
99def collect_python_files (directory : str ) -> Dict [str , str ]:
1010 """
11- Collect paths and contents of all Python files in the specified directory
12-
11+ Collect paths and contents of all Python files in the specified directory.
12+
1313 Args:
1414 directory: Directory path to scan
15-
16- Returns:
17- Dictionary containing file paths and their contents
15+
16+ Returns
17+ -------
18+ Dictionary containing file paths and their contents
1819 """
1920 python_files = {}
2021 base_dir = Path (directory ).name
21-
22+
2223 # Use Path object for directory traversal
2324 for file_path in Path (directory ).rglob ("*.py" ):
2425 # Skip __pycache__ directory
2526 if "__pycache__" in str (file_path ):
2627 continue
27-
28+
2829 try :
2930 # Read file content
30- with open (file_path , 'r' , encoding = ' utf-8' ) as f :
31+ with open (file_path , encoding = " utf-8" ) as f :
3132 content = f .read ()
32-
33+
3334 # Convert to relative path and include base directory
3435 relative_path = str (file_path .relative_to (directory ))
3536 full_path = f"{ base_dir } /{ relative_path } "
3637 python_files [full_path ] = content
37-
38+
3839 except Exception as e :
39- print (f"Error reading file { file_path } : { str ( e ) } " )
40-
40+ print (f"Error reading file { file_path } : { e !s } " )
41+
4142 return python_files
4243
43- def group_by_directory (files : Dict [str , str ], max_files_per_group : int = 20 ) -> Dict [str , Dict [str , str ]]:
44+
45+ def group_by_directory (
46+ files : Dict [str , str ], max_files_per_group : int = 20
47+ ) -> Dict [str , Dict [str , str ]]:
4448 """
45- Group files by their directory structure
46-
49+ Group files by their directory structure.
50+
4751 Args:
4852 files: Dictionary of file paths and contents
4953 max_files_per_group: Maximum number of files per group
50-
51- Returns:
52- Dictionary of grouped files
54+
55+ Returns
56+ -------
57+ Dictionary of grouped files
5358 """
5459 # First group by directory
5560 dir_groups = {}
@@ -58,92 +63,101 @@ def group_by_directory(files: Dict[str, str], max_files_per_group: int = 20) ->
5863 if dir_name not in dir_groups :
5964 dir_groups [dir_name ] = {}
6065 dir_groups [dir_name ][file_path ] = content
61-
66+
6267 # Merge small groups and split large groups
6368 final_groups = {}
6469 current_group = {}
6570 current_group_size = 0
6671 group_index = 1
67-
72+
6873 for dir_name , dir_files in dir_groups .items ():
6974 # If adding this directory's files would exceed the limit
70- if current_group_size + len (dir_files ) > max_files_per_group and current_group_size > 0 :
75+ if (
76+ current_group_size + len (dir_files ) > max_files_per_group
77+ and current_group_size > 0
78+ ):
7179 # Save current group and start a new one
7280 final_groups [f"group_{ group_index } " ] = current_group
7381 current_group = {}
7482 current_group_size = 0
7583 group_index += 1
76-
84+
7785 # Add files to current group
7886 current_group .update (dir_files )
7987 current_group_size += len (dir_files )
80-
88+
8189 # Don't forget to save the last group
8290 if current_group :
8391 final_groups [f"group_{ group_index } " ] = current_group
84-
92+
8593 return final_groups
8694
95+
8796def save_grouped_json (groups : Dict [str , Dict [str , str ]], output_base : str ) -> None :
8897 """
89- Save each group to a separate JSON file
90-
98+ Save each group to a separate JSON file.
99+
91100 Args:
92101 groups: Grouped files dictionary
93102 output_base: Base name for output files
94103 """
95104 output_base = Path (output_base )
96105 base_name = output_base .stem
97106 parent_dir = output_base .parent
98-
107+
99108 for group_name , group_files in groups .items ():
100109 output_file = parent_dir / f"{ base_name } _{ group_name } .json"
101110 wrapper = {
102111 "project_files" : {
103112 "description" : f"Python source files collection - { group_name } " ,
104113 "base_directory" : Path ().absolute ().name ,
105- "files" : group_files
114+ "files" : group_files ,
106115 }
107116 }
108-
109- with open (output_file , 'w' , encoding = ' utf-8' ) as f :
117+
118+ with open (output_file , "w" , encoding = " utf-8" ) as f :
110119 json .dump (wrapper , f , ensure_ascii = False , indent = 2 )
111120 print (f"Saved { len (group_files )} files to { output_file } " )
112121
122+
113123def main ():
114124 parser = argparse .ArgumentParser (
115- description = ' Collect Python files content into JSON for context'
125+ description = " Collect Python files content into JSON for context"
116126 )
117127 parser .add_argument (
118- '-d' , '--directory' ,
119- default = '../../../dpti/' ,
120- help = 'Directory to scan (default: current directory)'
128+ "-d" ,
129+ "--directory" ,
130+ default = "../../../dpti/" ,
131+ help = "Directory to scan (default: current directory)" ,
121132 )
122133 parser .add_argument (
123- '-o' , '--output' ,
124- default = 'build/python_files_context.json' ,
125- help = 'Output JSON file base name (default: python_files_context.json)'
134+ "-o" ,
135+ "--output" ,
136+ default = "build/python_files_context.json" ,
137+ help = "Output JSON file base name (default: python_files_context.json)" ,
126138 )
127139 parser .add_argument (
128- '-n' , '--num-files' ,
140+ "-n" ,
141+ "--num-files" ,
129142 type = int ,
130143 default = 20 ,
131- help = ' Maximum number of files per group (default: 20)'
144+ help = " Maximum number of files per group (default: 20)" ,
132145 )
133-
146+
134147 args = parser .parse_args ()
135-
148+
136149 # Collect Python files
137150 python_files = collect_python_files (args .directory )
138-
151+
139152 # Group files
140153 groups = group_by_directory (python_files , args .num_files )
141-
154+
142155 # Save grouped files
143156 save_grouped_json (groups , args .output )
144-
157+
145158 print (f"\n Total files collected: { len (python_files )} " )
146159 print (f"Split into { len (groups )} groups" )
147160
161+
148162if __name__ == "__main__" :
149163 main ()
0 commit comments