@@ -27,15 +27,6 @@ def extract_paths_from_markdown(markdown_file: str, sdk_name: str) -> dict:
2727
2828 return paths
2929
30- def get_python_files (directory : str ) -> List [str ]:
31- """Get all Python files in directory recursively"""
32- python_files = []
33- for root , dirs , files in os .walk (directory ):
34- for file in files :
35- if file .endswith ('.py' ):
36- python_files .append (os .path .join (root , file ))
37- return python_files
38-
3930def extract_subtrees (tree : Tree ) -> List [Node ]:
4031 """Extract terminal subtrees from AST"""
4132 terminal = [
@@ -70,29 +61,23 @@ def extract_subtree(subtree_root):
7061 queue .extend (children )
7162 return all_subtrees
7263
73- def process_python_file (file_path : str , s3_client , bucket_name : str , level : str ):
74- """Process a single Python file and upload chunks to S3"""
75- try :
76- PY_LANGUAGE = Language (tree_sitter_python .language ())
77- parser = Parser ()
78- parser .language = PY_LANGUAGE
79-
80- code = Path (file_path ).read_text ()
81- tree = parser .parse (bytes (code , "utf8" ))
82-
83- subtrees = extract_subtrees (tree )
64+ def process_python_file (file_path : str , level : str ):
65+ PY_LANGUAGE = Language (tree_sitter_python .language ())
66+ parser = Parser ()
67+ parser .language = PY_LANGUAGE
68+
69+ code = Path (file_path ).read_text ()
70+ tree = parser .parse (bytes (code , "utf8" ))
71+ subtrees = extract_subtrees (tree )
72+
73+ os .makedirs (f"./extracted_snippets/{ level } " , exist_ok = True )
74+
75+ for i , subtree in enumerate (subtrees ):
76+ chunk_text = code [subtree .start_byte :subtree .end_byte ]
77+ output_file = f"./extracted_snippets/{ level } /{ Path (file_path ).stem } _chunk_{ i } _{ subtree .type } .py"
8478
85- for i , subtree in enumerate (subtrees ):
86- chunk_text = code [subtree .start_byte :subtree .end_byte ]
87- file_key = f"{ level } /{ Path (file_path ).stem } _chunk_{ i } _{ subtree .type } .py"
88-
89- s3_client .put_object (
90- Bucket = bucket_name ,
91- Key = file_key ,
92- Body = chunk_text ,
93- ContentType = 'text/plain'
94- )
95-
79+ with open (output_file , 'w' ) as f :
80+ f .write (chunk_text )
9681 except Exception as e :
9782 print (f"Error processing { file_path } : { e } " )
9883
0 commit comments