Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ You also need to download the [DocLayout-YOLO](https://huggingface.co/juliozhao/
After generation, use the evaluation script to assess the quality of the posts in your output directory.

```bash
chmod +x scripts/run_eval.sh
chmod +x script/run_eval.sh
./scripts/run_eval.sh
```

Expand All @@ -131,8 +131,8 @@ chmod +x scripts/run_eval.sh
Finally, run the calculation script to aggregate the raw evaluation data into a formatted results table.

```bash
chmod +x scripts/calc_results.sh
./scripts/calc_results.sh
chmod +x script/calc_results.sh
./script/calc_results.sh
```

## 🕹️ 6. PRAgent Generation
Expand Down Expand Up @@ -170,7 +170,7 @@ If you have run ``download_and_reconstruct.py``, you can use the ``papers`` fold

Next, configure and run the generation script.
```bash
chmod +x scripts/run_pragent.sh
chmod +x script/run_generation.sh
./script/run_generation.sh
```

Expand Down
47 changes: 42 additions & 5 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import mimetypes
import re

YOLO_MODEL_PATH = "DocLayout-YOLO-DocStructBench/doclayout_yolo_docstructbench_imgsz1024.pt"

FORMAT_PROMPT_TEMPLATE = '''
You are an expert in structuring social media content. Your task is to convert a post written in Markdown format into a structured JSON format. The JSON structure depends on the target platform.

Expand Down Expand Up @@ -66,6 +68,40 @@
}}
'''

TWITTER_INSTRUCTIONS_CHINESE = '''
将内容转换为表示Twitter线程的JSON数组。数组中的每个元素都是一条推文对象。
- 每条推文对象必须有一个"text"键。文本应该是纯文本,不包含任何Markdown格式(如`*`、`#`、`[]()` 等)
- 如果推文关联有图片,添加"image_index"键,值为提供的Asset list中对应的从零开始的索引。例如,如果使用了第一张图`![...](img_0.png)`,其索引为0。
- 确保逻辑流畅连贯。如有必要,将文本分成多条推文。

**Asset list(仅供参考):**
{asset_list}

**JSON输出格式:**
[
{{ "text": "第一条推文的文本", "image_index": 0 }},
{{ "text": "第二条推文的文本" }},
{{ "text": "第三条推文的文本", "image_index": 1 }}
]
'''

XIAOHONGSHU_INSTRUCTIONS_CHINESE = '''
将内容转换为小红书帖子的单个JSON对象。
- JSON对象必须有"title"键。从Markdown中提取主标题(通常是第一个H1/H2标题)。标题应该是纯文本。
- JSON对象必须有"body"键,包含主要文本内容和表情符号。正文应该是纯文本,不包含任何Markdown格式(如`*`、`#`、`[]()` 等)
- JSON对象必须有"image_indices"键,值为一个数组,包含帖子中使用的所有图片索引,按出现顺序排列。

**Asset list(仅供参考):**
{asset_list}

**JSON输出格式:**
{{
"title": "你的吸引人的标题",
"body": "帖子的完整正文内容...",
"image_indices": [0, 1, 2, 3]
}}
'''

def image_to_base64(path: str) -> str:

try:
Expand Down Expand Up @@ -97,12 +133,13 @@ async def format_post_for_display(
assets: Optional[List[Dict]],
platform: str,
client,
model: str
model: str,
language: str = 'en'
) -> Optional[Dict]:
if platform == 'twitter':
instructions = TWITTER_INSTRUCTIONS
instructions = TWITTER_INSTRUCTIONS_CHINESE if language == 'zh' else TWITTER_INSTRUCTIONS
elif platform == 'xiaohongshu':
instructions = XIAOHONGSHU_INSTRUCTIONS
instructions = XIAOHONGSHU_INSTRUCTIONS_CHINESE if language == 'zh' else XIAOHONGSHU_INSTRUCTIONS
else:
return None

Expand Down Expand Up @@ -230,7 +267,7 @@ async def process_pdf(
progress(0.3, desc="Step 2/5: Extracting figures from PDF...")
extraction_work_dir = work_dir / "figure_extraction"
extraction_work_dir.mkdir()
paired_dir = await run_figure_extraction(str(pdf_path), str(extraction_work_dir), progress=progress)
paired_dir = await run_figure_extraction(str(pdf_path), str(extraction_work_dir), model_path=YOLO_MODEL_PATH, progress=progress)
if not paired_dir or not any(Path(paired_dir).iterdir()):
raise gr.Error("Failed to extract any figures from the PDF.")

Expand Down Expand Up @@ -279,7 +316,7 @@ async def process_pdf(
progress(0.9, desc="Step 5/5: Formatting for rich display...")
async with setup_client(text_api_key, base_url) as client:
structured_data = await format_post_for_display(
final_post_md, assets_info, platform, client, text_model
final_post_md, assets_info, platform, client, text_model, language
)
if not structured_data:
raise gr.Error("Failed to format post for display.")
Expand Down
2 changes: 1 addition & 1 deletion pragent/backend/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def _prepare_extra_body(model_name: str, disable_qwen_thinking: bool) -> Optiona
async def setup_client(api_key: str, base_url: str) -> AsyncIterator[AsyncOpenAI]:
"""Use an asynchronous context manager to create and properly destroy the API client."""
client = None
if not api_key or "sk-" not in api_key:
if not api_key :
tqdm.write("[!] Error: API Key is invalid or not set.")
yield None
return
Expand Down
10 changes: 8 additions & 2 deletions pragent/backend/figure_table_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
from pragent.backend.yolo import extract_and_save_layout_components
from tqdm.asyncio import tqdm

def run_figure_extraction(pdf_path: str, base_work_dir: str, model_path: str) -> str:
import asyncio
from typing import Optional, Callable

async def run_figure_extraction(pdf_path: str, base_work_dir: str, model_path: Optional[str] = None, progress: Optional[Callable] = None, conf_threshold: float = 0.8) -> str:
#def run_figure_extraction(pdf_path: str, base_work_dir: str, model_path: str) -> str:
"""
A complete workflow for extracting and pairing charts from a PDF.
This is the main function called by app.py.
Expand All @@ -16,7 +20,9 @@ def run_figure_extraction(pdf_path: str, base_work_dir: str, model_path: str) ->
pdf_path (str): The path to the PDF uploaded by the user.
base_work_dir (str): The temporary working directory for this session.
model_path (str): The path to the YOLO model for document layout analysis.

progress (Optional[Callable]): A progress callback function.
conf_threshold (float): The confidence threshold for YOLO model detection (default: 0.8).

Returns:
str: The directory path of the final pairing result, or None if it fails.
"""
Expand Down