LightChen233 · znnnn1234 · Dec 11, 2025 · Dec 11, 2025 · Dec 16, 2025
diff --git a/README.md b/README.md
@@ -122,7 +122,7 @@ You also need to download the [DocLayout-YOLO](https://huggingface.co/juliozhao/
 After generation, use the evaluation script to assess the quality of the posts in your output directory.
 
 ```bash
-chmod +x scripts/run_eval.sh
+chmod +x script/run_eval.sh
 ./scripts/run_eval.sh
 ```
 
@@ -131,8 +131,8 @@ chmod +x scripts/run_eval.sh
 Finally, run the calculation script to aggregate the raw evaluation data into a formatted results table.
 
 ```bash
-chmod +x scripts/calc_results.sh
-./scripts/calc_results.sh
+chmod +x script/calc_results.sh
+./script/calc_results.sh
 ```
 
 ## 🕹️ 6. PRAgent Generation
@@ -170,7 +170,7 @@ If you have run ``download_and_reconstruct.py``, you can use the ``papers`` fold
 
 Next, configure and run the generation script.
 ```bash
-chmod +x scripts/run_pragent.sh
+chmod +x script/run_generation.sh
 ./script/run_generation.sh
 ```
 

diff --git a/app.py b/app.py
@@ -19,6 +19,8 @@
 import mimetypes
 import re
 
+YOLO_MODEL_PATH = "DocLayout-YOLO-DocStructBench/doclayout_yolo_docstructbench_imgsz1024.pt"
+
 FORMAT_PROMPT_TEMPLATE = '''
 You are an expert in structuring social media content. Your task is to convert a post written in Markdown format into a structured JSON format. The JSON structure depends on the target platform.
 
@@ -66,6 +68,40 @@
 }}
 '''
 
+TWITTER_INSTRUCTIONS_CHINESE = '''
+将内容转换为表示Twitter线程的JSON数组。数组中的每个元素都是一条推文对象。
+- 每条推文对象必须有一个"text"键。文本应该是纯文本，不包含任何Markdown格式（如`*`、`#`、`[]()` 等）
+- 如果推文关联有图片，添加"image_index"键，值为提供的Asset list中对应的从零开始的索引。例如，如果使用了第一张图`![...](img_0.png)`，其索引为0。
+- 确保逻辑流畅连贯。如有必要，将文本分成多条推文。
+
+**Asset list（仅供参考）：**
+{asset_list}
+
+**JSON输出格式：**
+[
+  {{ "text": "第一条推文的文本", "image_index": 0 }},
+  {{ "text": "第二条推文的文本" }},
+  {{ "text": "第三条推文的文本", "image_index": 1 }}
+]
+'''
+
+XIAOHONGSHU_INSTRUCTIONS_CHINESE = '''
+将内容转换为小红书帖子的单个JSON对象。
+- JSON对象必须有"title"键。从Markdown中提取主标题（通常是第一个H1/H2标题）。标题应该是纯文本。
+- JSON对象必须有"body"键，包含主要文本内容和表情符号。正文应该是纯文本，不包含任何Markdown格式（如`*`、`#`、`[]()` 等）
+- JSON对象必须有"image_indices"键，值为一个数组，包含帖子中使用的所有图片索引，按出现顺序排列。
+
+**Asset list（仅供参考）：**
+{asset_list}
+
+**JSON输出格式：**
+{{
+  "title": "你的吸引人的标题",
+  "body": "帖子的完整正文内容...",
+  "image_indices": [0, 1, 2, 3]
+}}
+'''
+
 def image_to_base64(path: str) -> str:
 
     try:
@@ -97,12 +133,13 @@ async def format_post_for_display(
     assets: Optional[List[Dict]],
     platform: str,
     client,
-    model: str
+    model: str,
+    language: str = 'en'
 ) -> Optional[Dict]:
     if platform == 'twitter':
-        instructions = TWITTER_INSTRUCTIONS
+        instructions = TWITTER_INSTRUCTIONS_CHINESE if language == 'zh' else TWITTER_INSTRUCTIONS
     elif platform == 'xiaohongshu':
-        instructions = XIAOHONGSHU_INSTRUCTIONS
+        instructions = XIAOHONGSHU_INSTRUCTIONS_CHINESE if language == 'zh' else XIAOHONGSHU_INSTRUCTIONS
     else:
         return None
 
@@ -230,7 +267,7 @@ async def process_pdf(
         progress(0.3, desc="Step 2/5: Extracting figures from PDF...")
         extraction_work_dir = work_dir / "figure_extraction"
         extraction_work_dir.mkdir()
-        paired_dir = await run_figure_extraction(str(pdf_path), str(extraction_work_dir), progress=progress)
+        paired_dir = await run_figure_extraction(str(pdf_path), str(extraction_work_dir), model_path=YOLO_MODEL_PATH, progress=progress)
         if not paired_dir or not any(Path(paired_dir).iterdir()):
             raise gr.Error("Failed to extract any figures from the PDF.")
 
@@ -279,7 +316,7 @@ async def process_pdf(
         progress(0.9, desc="Step 5/5: Formatting for rich display...")
         async with setup_client(text_api_key, base_url) as client:
             structured_data = await format_post_for_display(
-                final_post_md, assets_info, platform, client, text_model
+                final_post_md, assets_info, platform, client, text_model, language
             )
         if not structured_data:
             raise gr.Error("Failed to format post for display.")

diff --git a/pragent/backend/agents.py b/pragent/backend/agents.py
@@ -18,7 +18,7 @@ def _prepare_extra_body(model_name: str, disable_qwen_thinking: bool) -> Optiona
 async def setup_client(api_key: str, base_url: str) -> AsyncIterator[AsyncOpenAI]:
     """Use an asynchronous context manager to create and properly destroy the API client."""
     client = None
-    if not api_key or "sk-" not in api_key:
+    if not api_key :
         tqdm.write("[!] Error: API Key is invalid or not set.")
         yield None
         return

diff --git a/pragent/backend/figure_table_pipeline.py b/pragent/backend/figure_table_pipeline.py
@@ -7,7 +7,11 @@
 from pragent.backend.yolo import extract_and_save_layout_components
 from tqdm.asyncio import tqdm
 
-def run_figure_extraction(pdf_path: str, base_work_dir: str, model_path: str) -> str:
+import asyncio
+from typing import Optional, Callable
+
+async def run_figure_extraction(pdf_path: str, base_work_dir: str, model_path: Optional[str] = None, progress: Optional[Callable] = None, conf_threshold: float = 0.8) -> str:
+#def run_figure_extraction(pdf_path: str, base_work_dir: str, model_path: str) -> str:
     """
     A complete workflow for extracting and pairing charts from a PDF.
     This is the main function called by app.py.
@@ -16,7 +20,9 @@ def run_figure_extraction(pdf_path: str, base_work_dir: str, model_path: str) ->
         pdf_path (str): The path to the PDF uploaded by the user.
         base_work_dir (str): The temporary working directory for this session.
         model_path (str): The path to the YOLO model for document layout analysis.
-
+        progress (Optional[Callable]): A progress callback function.
+        conf_threshold (float): The confidence threshold for YOLO model detection (default: 0.8).
+
     Returns:
         str: The directory path of the final pairing result, or None if it fails.
     """