The-Pocket
diff --git a/‎.env.sample‎
Lines changed: 4 additions & 1 deletion b/‎.env.sample‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎.gitignore‎
Lines changed: 8 additions & 1 deletion b/‎.gitignore‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎main.py‎
Lines changed: 9 additions & 1 deletion b/‎main.py‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎nodes.py‎
Lines changed: 23 additions & 13 deletions b/‎nodes.py‎
Lines changed: 23 additions & 13 deletions
diff --git a/‎requirements.txt‎
Lines changed: 1 addition & 1 deletion b/‎requirements.txt‎
Lines changed: 1 addition & 1 deletion
@@ -1,2 +1,5 @@
 GEMINI_PROJECT_ID=<GEMINI_PROJECT_ID>
-GITHUB_TOKEN=<GITHUB_TOKEN>
+GEMINI_API_KEY=<GEMINI_API_KEY>
+GITHUB_TOKEN=<GITHUB_TOKEN>
+OPENROUTER_API_KEY = <OPENROUTER_API_KEY>
+OPENROUTER_MODEL = <OPENROUTER_MODEL>
@@ -99,4 +99,11 @@ coverage/
 llm_cache.json
 
 # Output files
-output/
+output/
+
+# uv manage
+pyproject.toml
+uv.lock
+
+docs/*.pdf
+docs/design-cn.md
@@ -14,8 +14,10 @@
 }
 
 DEFAULT_EXCLUDE_PATTERNS = {
+    "assets/*", "data/*", "examples/*", "images/*", "public/*", "static/*", "temp/*",
+    "docs/*", 
     "venv/*", ".venv/*", "*test*", "tests/*", "docs/*", "examples/*", "v1/*",
-    "dist/*", "build/*", "experimental/*", "deprecated/*",
+    "dist/*", "build/*", "experimental/*", "deprecated/*", "misc/*", 
     "legacy/*", ".git/*", ".github/*", ".next/*", ".vscode/*", "obj/*", "bin/*", "node_modules/*", "*.log"
 }
 
@@ -36,6 +38,8 @@ def main():
     parser.add_argument("-s", "--max-size", type=int, default=100000, help="Maximum file size in bytes (default: 100000, about 100KB).")
     # Add language parameter for multi-language support
     parser.add_argument("--language", default="english", help="Language for the generated tutorial (default: english)")
+    # Add use_cache parameter to control LLM caching
+    parser.add_argument("--no-cache", action="store_true", help="Disable LLM response caching (default: caching enabled)")
 
     args = parser.parse_args()
 
@@ -61,6 +65,9 @@ def main():
 
         # Add language for multi-language support
         "language": args.language,
+        
+        # Add use_cache flag (inverse of no-cache flag)
+        "use_cache": not args.no_cache,
 
         # Outputs will be populated by the nodes
         "files": [],
@@ -73,6 +80,7 @@ def main():
 
     # Display starting message with repository/directory and language
     print(f"Starting tutorial generation for: {args.repo or args.dir} in {args.language.capitalize()} language")
+    print(f"LLM caching: {'Disabled' if args.no_cache else 'Enabled'}")
 
     # Create the flow instance
     tutorial_flow = create_tutorial_flow()
 
@@ -1,9 +1,12 @@
 import os
+import re
 import yaml
 from pocketflow import Node, BatchNode
 from utils.crawl_github_files import crawl_github_files
 from utils.call_llm import call_llm
 from utils.crawl_local_files import crawl_local_files
+from utils.fix_yaml import add_indentation
+
 
 # Helper to get content for specific file indices
 def get_content_for_indices(files_data, indices):
@@ -79,6 +82,7 @@ def prep(self, shared):
         files_data = shared["files"]
         project_name = shared["project_name"]  # Get project name
         language = shared.get("language", "english") # Get language
+        use_cache = shared.get("use_cache", True)  # Get use_cache flag, default to True
 
         # Helper to create context from files, respecting limits (basic example)
         def create_llm_context(files_data):
@@ -94,10 +98,10 @@ def create_llm_context(files_data):
         context, file_info = create_llm_context(files_data)
         # Format file info for the prompt (comment is just a hint for LLM)
         file_listing_for_prompt = "\n".join([f"- {idx} # {path}" for idx, path in file_info])
-        return context, file_listing_for_prompt, len(files_data), project_name, language # Return language
+        return context, file_listing_for_prompt, len(files_data), project_name, language, use_cache # Return use_cache
 
     def exec(self, prep_res):
-        context, file_listing_for_prompt, file_count, project_name, language = prep_res  # Unpack project name and language
+        context, file_listing_for_prompt, file_count, project_name, language, use_cache = prep_res  # Unpack use_cache
         print(f"Identifying abstractions using LLM...")
 
         # Add language instruction and hints only if not English
@@ -117,7 +121,7 @@ def exec(self, prep_res):
 {context}
 
 {language_instruction}Analyze the codebase context.
-Identify the top 5-10 core most important abstractions to help those new to the codebase.
+Identify the top 5-20 core most important abstractions to help those new to the codebase.
 
 For each abstraction, provide:
 1. A concise `name`{name_lang_hint}.
@@ -144,12 +148,14 @@ def exec(self, prep_res):
     Another core concept, similar to a blueprint for objects.{desc_lang_hint}
   file_indices:
     - 5 # path/to/another.js
-# ... up to 10 abstractions
+# ... up to 20 abstractions
 ```"""
-        response = call_llm(prompt)
+        response = call_llm(prompt, use_cache=use_cache)  # Pass use_cache parameter
 
         # --- Validation ---
         yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip()
+        # add whitespace to fix llm generation error(except -)
+        yaml_str = add_indentation(yaml_str)
         abstractions = yaml.safe_load(yaml_str)
 
         if not isinstance(abstractions, list):
@@ -203,6 +209,7 @@ def prep(self, shared):
         files_data = shared["files"]
         project_name = shared["project_name"]  # Get project name
         language = shared.get("language", "english") # Get language
+        use_cache = shared.get("use_cache", True)  # Get use_cache flag, default to True
 
         # Create context with abstraction names, indices, descriptions, and relevant file snippets
         context = "Identified Abstractions:\n"
@@ -230,10 +237,10 @@ def prep(self, shared):
         )
         context += file_context_str
 
-        return context, "\n".join(abstraction_info_for_prompt), project_name, language # Return language
+        return context, "\n".join(abstraction_info_for_prompt), project_name, language, use_cache # Return use_cache
 
     def exec(self, prep_res):
-        context, abstraction_listing, project_name, language = prep_res  # Unpack project name and language
+        context, abstraction_listing, project_name, language, use_cache = prep_res  # Unpack use_cache
         print(f"Analyzing relationships using LLM...")
 
         # Add language instruction and hints only if not English
@@ -339,6 +346,7 @@ def prep(self, shared):
         relationships = shared["relationships"] # Summary/label might be translated
         project_name = shared["project_name"]  # Get project name
         language = shared.get("language", "english") # Get language
+        use_cache = shared.get("use_cache", True)  # Get use_cache flag, default to True
 
         # Prepare context for the LLM
         abstraction_info_for_prompt = []
@@ -363,10 +371,10 @@ def prep(self, shared):
         if language.lower() != "english":
              list_lang_note = f" (Names might be in {language.capitalize()})"
 
-        return abstraction_listing, context, len(abstractions), project_name, list_lang_note
+        return abstraction_listing, context, len(abstractions), project_name, list_lang_note, use_cache  # Return use_cache
 
     def exec(self, prep_res):
-        abstraction_listing, context, num_abstractions, project_name, list_lang_note = prep_res
+        abstraction_listing, context, num_abstractions, project_name, list_lang_note, use_cache = prep_res  # Unpack use_cache
         print("Determining chapter order using LLM...")
         # No language variation needed here in prompt instructions, just ordering based on structure
         # The input names might be translated, hence the note.
@@ -437,10 +445,12 @@ def post(self, shared, prep_res, exec_res):
 class WriteChapters(BatchNode):
     def prep(self, shared):
         chapter_order = shared["chapter_order"] # List of indices
-        abstractions = shared["abstractions"]   # List of dicts, name/desc potentially translated
-        files_data = shared["files"]
-        language = shared.get("language", "english") # Get language
-
+        abstractions = shared["abstractions"] # List of {"name": str, "description": str, "files": [int]}
+        files_data = shared["files"] # List of (path, content) tuples
+        project_name = shared["project_name"]
+        language = shared.get("language", "english")
+        use_cache = shared.get("use_cache", True)  # Get use_cache flag, default to True
+        
         # Get already written chapters to provide context
         # We store them temporarily during the batch run, not in shared memory yet
         # The 'previous_chapters_summary' will be built progressively in the exec context
 
@@ -4,4 +4,4 @@ requests>=2.28.0
 gitpython>=3.1.0
 google-cloud-aiplatform>=1.25.0
 google-genai>=1.9.0
-python-dotenv>=1.0.0
+python-dotenv>=1.0.0