Skip to content

Commit 86702ea

Browse files
author
Tejas Ganesh Naik
committed
clean up premium-processor
1 parent 12fed71 commit 86702ea

File tree

1 file changed

+16
-31
lines changed

1 file changed

+16
-31
lines changed

premium-processor.py

Lines changed: 16 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,6 @@ def extract_paths_from_markdown(markdown_file: str, sdk_name: str) -> dict:
2727

2828
return paths
2929

30-
def get_python_files(directory: str) -> List[str]:
31-
"""Get all Python files in directory recursively"""
32-
python_files = []
33-
for root, dirs, files in os.walk(directory):
34-
for file in files:
35-
if file.endswith('.py'):
36-
python_files.append(os.path.join(root, file))
37-
return python_files
38-
3930
def extract_subtrees(tree: Tree) -> List[Node]:
4031
"""Extract terminal subtrees from AST"""
4132
terminal = [
@@ -70,29 +61,23 @@ def extract_subtree(subtree_root):
7061
queue.extend(children)
7162
return all_subtrees
7263

73-
def process_python_file(file_path: str, s3_client, bucket_name: str, level: str):
74-
"""Process a single Python file and upload chunks to S3"""
75-
try:
76-
PY_LANGUAGE = Language(tree_sitter_python.language())
77-
parser = Parser()
78-
parser.language = PY_LANGUAGE
79-
80-
code = Path(file_path).read_text()
81-
tree = parser.parse(bytes(code, "utf8"))
82-
83-
subtrees = extract_subtrees(tree)
64+
def process_python_file(file_path: str, level: str):
65+
PY_LANGUAGE = Language(tree_sitter_python.language())
66+
parser = Parser()
67+
parser.language = PY_LANGUAGE
68+
69+
code = Path(file_path).read_text()
70+
tree = parser.parse(bytes(code, "utf8"))
71+
subtrees = extract_subtrees(tree)
72+
73+
os.makedirs(f"./extracted_snippets/{level}", exist_ok=True)
74+
75+
for i, subtree in enumerate(subtrees):
76+
chunk_text = code[subtree.start_byte:subtree.end_byte]
77+
output_file = f"./extracted_snippets/{level}/{Path(file_path).stem}_chunk_{i}_{subtree.type}.py"
8478

85-
for i, subtree in enumerate(subtrees):
86-
chunk_text = code[subtree.start_byte:subtree.end_byte]
87-
file_key = f"{level}/{Path(file_path).stem}_chunk_{i}_{subtree.type}.py"
88-
89-
s3_client.put_object(
90-
Bucket=bucket_name,
91-
Key=file_key,
92-
Body=chunk_text,
93-
ContentType='text/plain'
94-
)
95-
79+
with open(output_file, 'w') as f:
80+
f.write(chunk_text)
9681
except Exception as e:
9782
print(f"Error processing {file_path}: {e}")
9883

0 commit comments

Comments
 (0)