docs: 将 PaddleX 统一更新为 PP-StructureV3

xerrors · xerrors · commit 6d9f3add6ce7 · 2025-12-30T19:40:09.000+08:00
更新文档、代码注释和变量命名，将 PaddleX 统一替换为 PP-StructureV3 以反映实际使用的技术栈
diff --git a/docs/latest/advanced/document-processing.md b/docs/latest/advanced/document-processing.md
@@ -5,7 +5,7 @@
 - **RapidOCR**: CPU 友好，无需 GPU，适合基础文字识别
 - **MinerU**: 本地化高精度 VLM 解析，适合复杂 PDF 和表格文档
 - **MinerU Official**: 官方云服务 API，无需本地部署，开箱即用
-- **PaddleX**: 结构化解析，适合表格、票据等特殊格式
+- **PP-StructureV3**: 结构化解析，适合表格、票据等特殊格式
 - **DeepSeek OCR**: 基于 SiliconFlow API 的 DeepSeek OCR OCR 服务
 
 ## 支持的文件类型
@@ -83,17 +83,17 @@ MINERU_API_KEY="your-api-key-here"
 
 然后使用 `docker compose up api -d` 重启后端服务。
 
-### 4. 结构化解析 (PaddleX)
+### 4. 结构化解析 (PP-StructureV3)
 
 ```bash
-# 需要 GPU，启动 PaddleX 服务
+# 需要 GPU，启动 PP-StructureV3 服务
 docker compose up -d paddlex
 
 # 启动主服务
 docker compose up -d api
 ```
 
-### 5. 智能云端 OCR (DeepSeek OCR)
+### 5. DeepSeek OCR (SiliconFlow)
 
 DeepSeek OCR 基于 SiliconFlow API，提供智能文档理解和 Markdown 格式输出。
 
@@ -121,7 +121,7 @@ docker compose restart api
 | **RapidOCR** | 基础文字识别 | CPU | 速度快，资源占用低 |
 | **MinerU** | 复杂 PDF、表格、公式 | GPU | 精度高，版面分析好 |
 | **MinerU Official** | 复杂文档解析（云服务） | 无特殊要求 | 官方云服务，开箱即用，有 API 配额 |
-| **PaddleX** | 表格、票据、结构化文档 | GPU | 专业版面解析 |
+| **PP-StructureV3** | 表格、票据、结构化文档 | GPU | 专业版面解析 |
 | **DeepSeek OCR** | 智能文档理解和 Markdown 输出 | 无特殊要求 | 云端服务 |
 
 ## 参数说明
@@ -134,12 +134,12 @@ docker compose restart api
 - `onnx_rapid_ocr`: RapidOCR 处理
 - `mineru_ocr`: MinerU HTTP API 处理
 - `mineru_official`: MinerU 官方云服务 API 处理
-- `paddlex_ocr`: PaddleX 处理
+- `paddlex_ocr`: PP-StructureV3 处理
 - `deepseek_ocr`: DeepSeek OCR（SiliconFlow API）处理
 
 ### 注意事项
 - **图片文件必须启用 OCR**，否则无法提取内容
-- MinerU 和 PaddleX 需要 GPU 支持
+- MinerU 和 PP-StructureV3 需要 GPU 支持
 - MinerU Official 需要设置 `MINERU_API_KEY` 环境变量
 - DeepSeek OCR 需要设置 `SILICONFLOW_API_KEY` 环境变量
 - RapidOCR 适合 CPU 环境和基础识别需求
diff --git a/docs/latest/advanced/misc.md b/docs/latest/advanced/misc.md
@@ -43,7 +43,7 @@
 | **9000/9001** | MinIO | milvus-minio | 对象存储 |
 | **19530/9091** | Milvus | milvus | 向量数据库 |
 | **30000** | MinerU | mineru | PDF 解析（可选）|
-| **8080** | PaddleX | paddlex-ocr | OCR 服务（可选）|
+| **8080** | PP-StructureV3 | paddlex-ocr | OCR 服务（可选）|
 | **8081** | vLLM | - | 本地推理（可选）|
 
 ::: tip 端口访问
diff --git a/docs/latest/changelog/faq.md b/docs/latest/changelog/faq.md
@@ -64,7 +64,7 @@
 
 ### OCR 模型或服务不可用？
   - RapidOCR 本地模型：确保 `MODEL_DIR/SWHL/RapidOCR` 下存在 `PP-OCRv4` 模型
-  - MinerU/PaddleX：检查健康检查接口与 GPU/CUDA 版本
+  - MinerU/PP-StructureV3：检查健康检查接口与 GPU/CUDA 版本
 
 ### 登录失败被锁定？
   - 多次失败会临时锁定账户，请根据提示等待后重试
diff --git a/docs/latest/changelog/roadmap.md b/docs/latest/changelog/roadmap.md
@@ -2,7 +2,7 @@
 
 路线图可能会经常变更，如果有强烈的建议，可以在 [issue](https://github.com/xerrors/Yuxi-Know/issues) 中提。
 
-## v0.4
+v0.5
 
 ### 看板
 
@@ -28,6 +28,8 @@
 - 工具传递给模型的时候，使用英文，但部分模型不支持中文函数名（如gpt-4o-mini）
 - 首页加载的问题
 
+## v0.4
+
 ### 新增
 - 新增对于上传附件的智能体中间件，详见[文档](https://xerrors.github.io/Yuxi-Know/latest/advanced/agents-config.html#%E6%96%87%E4%BB%B6%E4%B8%8A%E4%BC%A0%E4%B8%AD%E9%97%B4%E4%BB%B6)
 - 新增多模态模型支持（当前仅支持图片），详见[文档](https://xerrors.github.io/Yuxi-Know/latest/advanced/agents-config.html#%E5%A4%9A%E6%A8%A1%E6%80%81%E5%9B%BE%E7%89%87%E6%94%AF%E6%8C%81)
diff --git a/src/plugins/document_processor_factory.py b/src/plugins/document_processor_factory.py
@@ -40,7 +40,7 @@ def get_processor(cls, processor_type: str, **kwargs) -> BaseDocumentProcessor:
                 - "onnx_rapid_ocr": RapidOCR 本地 OCR
                 - "mineru_ocr": MinerU HTTP API 文档解析
                 - "mineru_official": MinerU 官方云服务 API 文档解析
-                - "paddlex_ocr": PaddleX 版面解析
+                - "paddlex_ocr": PP-StructureV3 版面解析
                 - "deepseek_ocr": DeepSeek-OCR SiliconFlow API
             **kwargs: 处理器初始化参数
 
diff --git a/src/plugins/paddlex_parser.py b/src/plugins/paddlex_parser.py
@@ -1,7 +1,7 @@
 """
-PaddleX 文档解析器
+PP-StructureV3 文档解析器
 
-使用 PaddleX PP-StructureV3 进行文档版面解析和内容提取
+使用 PP-StructureV3 进行文档版面解析和内容提取
 """
 
 import base64
@@ -17,7 +17,7 @@
 
 
 class PaddleXDocumentParser(BaseDocumentProcessor):
-    """PaddleX 文档解析器 - 使用 PP-StructureV3 进行版面解析"""
+    """PP-StructureV3 文档解析器 - 使用 PP-StructureV3 进行版面解析"""
 
     def __init__(self, server_url: str | None = None):
         self.server_url = server_url or os.getenv("PADDLEX_URI") or "http://localhost:8080"
@@ -28,7 +28,7 @@ def get_service_name(self) -> str:
         return "paddlex_ocr"
 
     def get_supported_extensions(self) -> list[str]:
-        """PaddleX 支持 PDF 和多种图像格式"""
+        """PP-StructureV3 支持 PDF 和多种图像格式"""
         return [".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]
 
     def _encode_file_to_base64(self, file_path: str) -> str:
@@ -64,7 +64,7 @@ def _call_layout_api(
         use_seal_recognition: bool = False,
         **kwargs,
     ) -> dict[str, Any]:
-        """调用PaddleX版面解析API"""
+        """调用PP-StructureV3版面解析API"""
         # 处理文件输入
         processed_file_input = self._process_file_input(file_input)
         payload = {"file": processed_file_input}
@@ -92,7 +92,7 @@ def _call_layout_api(
         if response.status_code == 200:
             return response.json()
         else:
-            error_msg = f"PaddleX API请求失败: {response.status_code}"
+            error_msg = f"PP-StructureV3 API请求失败: {response.status_code}"
             try:
                 error_result = response.json()
                 raise DocumentParserException(f"{error_msg}: {error_result}", self.get_service_name(), "api_error")
@@ -157,45 +157,45 @@ def _parse_api_result(self, api_result: dict[str, Any], file_path: str) -> dict[
         return parsed_result
 
     def check_health(self) -> dict:
-        """检查 PaddleX 服务健康状态"""
+        """检查 PP-StructureV3 服务健康状态"""
         try:
             response = requests.get(f"{self.base_url}/health", timeout=5)
 
             if response.status_code == 200:
                 return {
                     "status": "healthy",
-                    "message": "PaddleX 服务运行正常",
+                    "message": "PP-StructureV3 服务运行正常",
                     "details": {"server_url": self.server_url},
                 }
             else:
                 return {
                     "status": "unhealthy",
-                    "message": f"PaddleX 服务响应异常: {response.status_code}",
+                    "message": f"PP-StructureV3 服务响应异常: {response.status_code}",
                     "details": {"server_url": self.server_url},
                 }
 
         except requests.exceptions.ConnectionError:
             return {
                 "status": "unavailable",
-                "message": "PaddleX 服务无法连接,请检查服务是否启动",
+                "message": "PP-StructureV3 服务无法连接,请检查服务是否启动",
                 "details": {"server_url": self.server_url},
             }
         except requests.exceptions.Timeout:
             return {
                 "status": "timeout",
-                "message": "PaddleX 服务连接超时",
+                "message": "PP-StructureV3 服务连接超时",
                 "details": {"server_url": self.server_url},
             }
         except Exception as e:
             return {
                 "status": "error",
-                "message": f"PaddleX 健康检查失败: {str(e)}",
+                "message": f"PP-StructureV3 健康检查失败: {str(e)}",
                 "details": {"server_url": self.server_url, "error": str(e)},
             }
 
     def process_file(self, file_path: str, params: dict | None = None) -> str:
         """
-        使用 PaddleX 处理文档
+        使用 PP-StructureV3 处理文档
 
         Args:
             file_path: 文件路径
@@ -220,7 +220,7 @@ def process_file(self, file_path: str, params: dict | None = None) -> str:
         health = self.check_health()
         if health["status"] != "healthy":
             raise DocumentParserException(
-                f"PaddleX 服务不可用: {health['message']}", self.get_service_name(), health["status"]
+                f"PP-StructureV3 服务不可用: {health['message']}", self.get_service_name(), health["status"]
             )
 
         try:
@@ -230,7 +230,7 @@ def process_file(self, file_path: str, params: dict | None = None) -> str:
             # 判断文件类型
             file_type = 0 if file_ext == ".pdf" else 1
 
-            logger.info(f"PaddleX 开始处理: {os.path.basename(file_path)}")
+            logger.info(f"PP-StructureV3 开始处理: {os.path.basename(file_path)}")
 
             # 调用API
             api_result = self._call_layout_api(
@@ -244,15 +244,15 @@ def process_file(self, file_path: str, params: dict | None = None) -> str:
             # 检查API调用是否成功
             if api_result.get("errorCode") != 0:
                 raise DocumentParserException(
-                    f"PaddleX API错误: {api_result.get('errorMsg', '未知错误')}", self.get_service_name(), "api_error"
+                    f"PP-StructureV3 API错误: {api_result.get('errorMsg', '未知错误')}", self.get_service_name(), "api_error"
                 )
 
             # 解析结果
             result = self._parse_api_result(api_result, file_path)
             text = result.get("full_text", "")
 
             processing_time = time.time() - start_time
-            logger.info(f"PaddleX 处理成功: {os.path.basename(file_path)} - {len(text)} 字符 ({processing_time:.2f}s)")
+            logger.info(f"PP-StructureV3 处理成功: {os.path.basename(file_path)} - {len(text)} 字符 ({processing_time:.2f}s)")
 
             # 记录统计信息
             summary = result.get("summary", {})
@@ -265,6 +265,6 @@ def process_file(self, file_path: str, params: dict | None = None) -> str:
             raise
         except Exception as e:
             processing_time = time.time() - start_time
-            error_msg = f"PaddleX 处理失败: {str(e)}"
+            error_msg = f"PP-StructureV3 处理失败: {str(e)}"
             logger.error(f"{error_msg} ({processing_time:.2f}s)")
             raise DocumentParserException(error_msg, self.get_service_name(), "processing_failed")
diff --git a/web/src/components/AgentChatComponent.vue b/web/src/components/AgentChatComponent.vue
@@ -1292,10 +1292,34 @@ watch(conversations, () => {
 
   .generating-text {
     margin-left: 12px;
-    color: var(--gray-700);
     font-size: 14px;
     font-weight: 500;
     letter-spacing: 0.025em;
+    /* 恢复灰色调：深灰 -> 亮灰(高光) -> 深灰 */
+    background: linear-gradient(
+      90deg,
+      var(--gray-700) 0%,
+      var(--gray-700) 40%,
+      var(--gray-300) 45%,
+      var(--gray-200) 50%,
+      var(--gray-300) 55%,
+      var(--gray-700) 60%,
+      var(--gray-700) 100%
+    );
+    background-size: 200% auto;
+    -webkit-background-clip: text;
+    background-clip: text;
+    color: transparent;
+    animation: waveFlash 2s linear infinite;
+  }
+}
+
+@keyframes waveFlash {
+  0% {
+    background-position: 200% center;
+  }
+  100% {
+    background-position: -200% center;
   }
 }
 
diff --git a/web/src/components/FileUploadModal.vue b/web/src/components/FileUploadModal.vue
@@ -421,7 +421,7 @@ const enableOcrOptions = computed(() => [
   {
     value: 'paddlex_ocr',
     label: getPaddleXLabel(),
-    title: 'PaddleX OCR',
+    title: 'PP-StructureV3',
     disabled: ocrHealthStatus.value?.paddlex_ocr?.status === 'unavailable' || ocrHealthStatus.value?.paddlex_ocr?.status === 'error'
   },
   {
@@ -488,7 +488,7 @@ const getOcrLabel = (serviceKey, displayName) => {
 const getRapidOcrLabel = () => getOcrLabel('onnx_rapid_ocr', 'RapidOCR (ONNX)');
 const getMinerULabel = () => getOcrLabel('mineru_ocr', 'MinerU OCR');
 const getMinerUOfficialLabel = () => getOcrLabel('mineru_official', 'MinerU Official API');
-const getPaddleXLabel = () => getOcrLabel('paddlex_ocr', 'PaddleX OCR');
+const getPaddleXLabel = () => getOcrLabel('paddlex_ocr', 'PP-StructureV3');
 const getDeepSeekOcrLabel = () => getOcrLabel('deepseek_ocr', 'DeepSeek OCR');
 
 // 验证OCR服务可用性
@@ -707,7 +707,7 @@ const chunkData = async () => {
 
     if (hasImageFiles && chunkParams.value.enable_ocr === 'disable') {
       message.error({
-        content: '检测到图片文件,必须启用 OCR 才能提取文本内容。请在上方选择 OCR 方式 (RapidOCR/MinerU/MinerU Official/PaddleX) 或移除图片文件。',
+        content: '检测到图片文件,必须启用 OCR 才能提取文本内容。请在上方选择 OCR 方式 (RapidOCR/MinerU/MinerU Official/PP-StructureV3) 或移除图片文件。',
         duration: 5,
       });
       return;