11import os
2+ import re
23import yaml
34from pocketflow import Node , BatchNode
45from utils .crawl_github_files import crawl_github_files
56from utils .call_llm import call_llm
67from utils .crawl_local_files import crawl_local_files
8+ from utils .fix_yaml import add_indentation
9+
710
811# Helper to get content for specific file indices
912def get_content_for_indices (files_data , indices ):
@@ -79,6 +82,7 @@ def prep(self, shared):
7982 files_data = shared ["files" ]
8083 project_name = shared ["project_name" ] # Get project name
8184 language = shared .get ("language" , "english" ) # Get language
85+ use_cache = shared .get ("use_cache" , True ) # Get use_cache flag, default to True
8286
8387 # Helper to create context from files, respecting limits (basic example)
8488 def create_llm_context (files_data ):
@@ -94,10 +98,10 @@ def create_llm_context(files_data):
9498 context , file_info = create_llm_context (files_data )
9599 # Format file info for the prompt (comment is just a hint for LLM)
96100 file_listing_for_prompt = "\n " .join ([f"- { idx } # { path } " for idx , path in file_info ])
97- return context , file_listing_for_prompt , len (files_data ), project_name , language # Return language
101+ return context , file_listing_for_prompt , len (files_data ), project_name , language , use_cache # Return use_cache
98102
99103 def exec (self , prep_res ):
100- context , file_listing_for_prompt , file_count , project_name , language = prep_res # Unpack project name and language
104+ context , file_listing_for_prompt , file_count , project_name , language , use_cache = prep_res # Unpack use_cache
101105 print (f"Identifying abstractions using LLM..." )
102106
103107 # Add language instruction and hints only if not English
@@ -117,7 +121,7 @@ def exec(self, prep_res):
117121{ context }
118122
119123{ language_instruction } Analyze the codebase context.
120- Identify the top 5-10 core most important abstractions to help those new to the codebase.
124+ Identify the top 5-20 core most important abstractions to help those new to the codebase.
121125
122126For each abstraction, provide:
1231271. A concise `name`{ name_lang_hint } .
@@ -144,12 +148,14 @@ def exec(self, prep_res):
144148 Another core concept, similar to a blueprint for objects.{ desc_lang_hint }
145149 file_indices:
146150 - 5 # path/to/another.js
147- # ... up to 10 abstractions
151+ # ... up to 20 abstractions
148152```"""
149- response = call_llm (prompt )
153+ response = call_llm (prompt , use_cache = use_cache ) # Pass use_cache parameter
150154
151155 # --- Validation ---
152156 yaml_str = response .strip ().split ("```yaml" )[1 ].split ("```" )[0 ].strip ()
157+ # add whitespace to fix llm generation error(except -)
158+ yaml_str = add_indentation (yaml_str )
153159 abstractions = yaml .safe_load (yaml_str )
154160
155161 if not isinstance (abstractions , list ):
@@ -203,6 +209,7 @@ def prep(self, shared):
203209 files_data = shared ["files" ]
204210 project_name = shared ["project_name" ] # Get project name
205211 language = shared .get ("language" , "english" ) # Get language
212+ use_cache = shared .get ("use_cache" , True ) # Get use_cache flag, default to True
206213
207214 # Create context with abstraction names, indices, descriptions, and relevant file snippets
208215 context = "Identified Abstractions:\n "
@@ -230,10 +237,10 @@ def prep(self, shared):
230237 )
231238 context += file_context_str
232239
233- return context , "\n " .join (abstraction_info_for_prompt ), project_name , language # Return language
240+ return context , "\n " .join (abstraction_info_for_prompt ), project_name , language , use_cache # Return use_cache
234241
235242 def exec (self , prep_res ):
236- context , abstraction_listing , project_name , language = prep_res # Unpack project name and language
243+ context , abstraction_listing , project_name , language , use_cache = prep_res # Unpack use_cache
237244 print (f"Analyzing relationships using LLM..." )
238245
239246 # Add language instruction and hints only if not English
@@ -339,6 +346,7 @@ def prep(self, shared):
339346 relationships = shared ["relationships" ] # Summary/label might be translated
340347 project_name = shared ["project_name" ] # Get project name
341348 language = shared .get ("language" , "english" ) # Get language
349+ use_cache = shared .get ("use_cache" , True ) # Get use_cache flag, default to True
342350
343351 # Prepare context for the LLM
344352 abstraction_info_for_prompt = []
@@ -363,10 +371,10 @@ def prep(self, shared):
363371 if language .lower () != "english" :
364372 list_lang_note = f" (Names might be in { language .capitalize ()} )"
365373
366- return abstraction_listing , context , len (abstractions ), project_name , list_lang_note
374+ return abstraction_listing , context , len (abstractions ), project_name , list_lang_note , use_cache # Return use_cache
367375
368376 def exec (self , prep_res ):
369- abstraction_listing , context , num_abstractions , project_name , list_lang_note = prep_res
377+ abstraction_listing , context , num_abstractions , project_name , list_lang_note , use_cache = prep_res # Unpack use_cache
370378 print ("Determining chapter order using LLM..." )
371379 # No language variation needed here in prompt instructions, just ordering based on structure
372380 # The input names might be translated, hence the note.
@@ -437,10 +445,12 @@ def post(self, shared, prep_res, exec_res):
437445class WriteChapters (BatchNode ):
438446 def prep (self , shared ):
439447 chapter_order = shared ["chapter_order" ] # List of indices
440- abstractions = shared ["abstractions" ] # List of dicts, name/desc potentially translated
441- files_data = shared ["files" ]
442- language = shared .get ("language" , "english" ) # Get language
443-
448+ abstractions = shared ["abstractions" ] # List of {"name": str, "description": str, "files": [int]}
449+ files_data = shared ["files" ] # List of (path, content) tuples
450+ project_name = shared ["project_name" ]
451+ language = shared .get ("language" , "english" )
452+ use_cache = shared .get ("use_cache" , True ) # Get use_cache flag, default to True
453+
444454 # Get already written chapters to provide context
445455 # We store them temporarily during the batch run, not in shared memory yet
446456 # The 'previous_chapters_summary' will be built progressively in the exec context
0 commit comments