langgenius
diff --git a/‎tools/paddleocr/README.md‎
Lines changed: 1 addition & 3 deletions b/‎tools/paddleocr/README.md‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎tools/paddleocr/_assets/get_api_url.png‎
-477 KB b/‎tools/paddleocr/_assets/get_api_url.png‎
-477 KB
diff --git a/‎tools/paddleocr/main.py‎
Lines changed: 1 addition & 1 deletion b/‎tools/paddleocr/main.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tools/paddleocr/manifest.yaml‎
Lines changed: 1 addition & 1 deletion b/‎tools/paddleocr/manifest.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tools/paddleocr/provider/paddleocr.py‎
Lines changed: 18 additions & 10 deletions b/‎tools/paddleocr/provider/paddleocr.py‎
Lines changed: 18 additions & 10 deletions
diff --git a/‎tools/paddleocr/provider/paddleocr.yaml‎
Lines changed: 3 additions & 0 deletions b/‎tools/paddleocr/provider/paddleocr.yaml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎tools/paddleocr/tools/document_parsing.py‎
Lines changed: 36 additions & 28 deletions b/‎tools/paddleocr/tools/document_parsing.py‎
Lines changed: 36 additions & 28 deletions
diff --git a/‎tools/paddleocr/tools/document_parsing.yaml‎
Lines changed: 76 additions & 7 deletions b/‎tools/paddleocr/tools/document_parsing.yaml‎
Lines changed: 76 additions & 7 deletions
@@ -14,9 +14,7 @@ Open the Plugin Marketplace, search for the PaddleOCR plugin, and install it to
 
 You can get your AI Studio access token from [this page](https://aistudio.baidu.com/index/accessToken).
 
-For each tool provided by the plugin, there is a corresponding API URL. It is required to provide at least one API URL in order to use the PaddleOCR plugin. To obtain the API URL, visit the [PaddleOCR official website](https://aistudio.baidu.com/paddleocr/task), click the **API** button in the upper-left corner, choose the example code for the tool you want to use (e.g., *Text Recognition (PP-OCRv5)*), and copy the `API_URL`. You do not need to provide URLs for all tools—only for those you intend to use.
-
-![get_api_url](./_assets/get_api_url.png)
+For each tool provided by the plugin, there is a corresponding API URL. It is required to provide at least one API URL in order to use the PaddleOCR plugin. To obtain the API URL, visit the [PaddleOCR official website](https://aistudio.baidu.com/paddleocr), click the **API** button, choose the example code for the tool you want to use (e.g., *PP-OCRv5*), and copy the `API_URL`. You do not need to provide URLs for all tools—only for those you intend to use.
 
 ### 3. Use the plugin
 
 
@@ -1,4 +1,4 @@
-from dify_plugin import Plugin, DifyPluginEnv
+from dify_plugin import DifyPluginEnv, Plugin
 
 plugin = Plugin(DifyPluginEnv(MAX_REQUEST_TIMEOUT=120))
 
 
@@ -1,4 +1,4 @@
-version: 0.1.4
+version: 0.2.0
 type: plugin
 author: langgenius
 name: paddleocr
 
@@ -3,40 +3,48 @@
 from dify_plugin import ToolProvider
 from dify_plugin.errors.tool import ToolProviderCredentialValidationError
 
-from tools.text_recognition import TextRecognitionTool
 from tools.document_parsing import DocumentParsingTool
 from tools.document_parsing_vl import DocumentParsingVlTool
+from tools.text_recognition import TextRecognitionTool
 
 
 class PaddleocrProvider(ToolProvider):
     def _validate_credentials(self, credentials: dict[str, Any]) -> None:
         if "aistudio_access_token" not in credentials:
-            raise ToolProviderCredentialValidationError("AI Studio access token must be provided")
+            raise ToolProviderCredentialValidationError(
+                "AI Studio access token must be provided"
+            )
 
         api_url_keys = (
             "text_recognition_api_url",
             "document_parsing_api_url",
             "document_parsing_vl_api_url",
         )
         tool_classes = (
-            TextRecognitionTool, 
-            DocumentParsingTool, 
+            TextRecognitionTool,
+            DocumentParsingTool,
             DocumentParsingVlTool,
         )
         test_file = "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_ocr_002.png"
 
         if not any(key in credentials for key in api_url_keys):
-            raise ToolProviderCredentialValidationError("You should provide at least one API URL")
-        
+            raise ToolProviderCredentialValidationError(
+                "You should provide at least one API URL"
+            )
+
         for api_url_key, tool_cls in zip(api_url_keys, tool_classes):
             if api_url_key in credentials:
                 try:
                     self._test_tool_validation(tool_cls, credentials, test_file)
                 except Exception as e:
-                    raise ToolProviderCredentialValidationError(f"Invalid credentials for {tool_cls.__name__}") from e
-    
-    def _test_tool_validation(self, tool_cls, credentials: dict[str, Any], test_file: str) -> None:
+                    raise ToolProviderCredentialValidationError(
+                        f"Invalid credentials for {tool_cls.__name__}"
+                    ) from e
+
+    def _test_tool_validation(
+        self, tool_cls, credentials: dict[str, Any], test_file: str
+    ) -> None:
         tool = tool_cls.from_credentials(credentials)
-        
+
         for _ in tool.invoke(tool_parameters={"file": test_file}):
             break
@@ -9,6 +9,9 @@ identity:
     zh_Hans: "PaddleOCR 插件提供 PaddleOCR 的多项能力，包括文字识别、文档解析等"
   icon: "icon.png"
 
+tags:
+    - productivity
+
 tools:
   - tools/text_recognition.yaml
   - tools/document_parsing.yaml
 
@@ -1,13 +1,15 @@
 from collections.abc import Generator
 from typing import Any
 
-import requests
 from dify_plugin import Tool
 from dify_plugin.entities.tool import ToolInvokeMessage
 
-from tools.utils import remove_img_from_markdown
-
-REQUEST_TIMEOUT = (10, 600)
+from tools.utils import (
+    convert_file_type,
+    get_markdown_from_result,
+    make_paddleocr_api_request,
+    process_images_from_result,
+)
 
 
 class DocumentParsingTool(Tool):
@@ -40,6 +42,7 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag
             "useFormulaRecognition",
             "useChartRecognition",
             "useRegionDetection",
+            "formatBlockContent",
             "layoutThreshold",
             "layoutNms",
             "layoutUnclipRatio",
@@ -62,34 +65,39 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag
             "useOcrResultsWithTableCells",
             "useE2eWiredTableRecModel",
             "useE2eWirelessTableRecModel",
+            "markdownIgnoreLabels",
+            "prettifyMarkdown",
+            "showFormulaNumber",
             "visualize",
         ]:
             if optional_param_name in tool_parameters:
                 params[optional_param_name] = tool_parameters[optional_param_name]
 
-        try:
-            resp = requests.post(
-                api_url,
-                headers={"Client-Platform": "dify", "Authorization": f"token {access_token}"},
-                json=params,
-                timeout=REQUEST_TIMEOUT,
-            )
-            resp.raise_for_status()
-            result = resp.json()
-        except requests.exceptions.JSONDecodeError as e:
-            raise RuntimeError(
-                f"Failed to decode JSON response from PaddleOCR API: {resp.text}"
-            ) from e
-        except requests.exceptions.Timeout as e:
-            raise RuntimeError("PaddleOCR API request timed out") from e
-        except requests.exceptions.RequestException as e:
-            raise RuntimeError(f"PaddleOCR API request failed: {e}") from e
+        # Convert fileType parameter
+        if "fileType" in params:
+            params["fileType"] = convert_file_type(params["fileType"])
+
+        # Convert markdownIgnoreLabels from comma-separated string to list
+        if "markdownIgnoreLabels" in params and isinstance(
+            params["markdownIgnoreLabels"], str
+        ):
+            params["markdownIgnoreLabels"] = [
+                label.strip()
+                for label in params["markdownIgnoreLabels"].split(",")
+                if label.strip()
+            ]
+
+        result = make_paddleocr_api_request(api_url, params, access_token)
+
+        images, image_path_map, failed_images, blob_messages = (
+            process_images_from_result(result, self)
+        )
+
+        markdown = get_markdown_from_result(result, image_path_map, failed_images)
+
+        for blob_data, blob_meta in blob_messages:
+            yield self.create_blob_message(blob_data, meta=blob_meta)
 
-        markdown_text_list = []
-        for item in result.get("result", {}).get("layoutParsingResults", []):
-            markdown_text = item.get("markdown", {}).get("text")
-            if markdown_text is not None:
-                markdown_text = remove_img_from_markdown(markdown_text)
-                markdown_text_list.append(markdown_text)
-        yield self.create_text_message("\n\n".join(markdown_text_list))
+        yield self.create_variable_message("images", images)
+        yield self.create_text_message(markdown)
         yield self.create_json_message(result)
@@ -22,15 +22,29 @@ parameters:
     llm_description: The URL of an image or PDF file, or the Base64-encoded result of the content of such a file.
     form: llm
   - name: fileType
-    type: number
+    type: select
     required: false
+    default: auto
+    options:
+      - label:
+          en_US: Auto Detect
+          zh_Hans: 自动检测
+        value: auto
+      - label:
+          en_US: PDF
+          zh_Hans: PDF
+        value: pdf
+      - label:
+          en_US: Image
+          zh_Hans: 图片
+        value: image
     label:
       en_US: File Type
       zh_Hans: 文件类型
     human_description:
-      en_US: File type. 0 indicates a PDF file, and 1 indicates an image file. If not specified, the file type will be inferred from the URL.
-      zh_Hans: 文件类型。0 表示 PDF 文件，1 表示图像文件。若不设置，则将根据 URL 推断文件类型。
-    llm_description: File type. 0 indicates a PDF file, and 1 indicates an image file. If not specified, the file type will be inferred from the URL.
+      en_US: File type. "Auto Detect" will infer the type from URL, "PDF" for PDF files, "Image" for image files.
+      zh_Hans: 文件类型。"自动检测" 将根据 URL 推断类型，"PDF" 表示 PDF 文件，"图片" 表示图像文件。
+    llm_description: File type. "auto" will infer the type from URL, "pdf" for PDF files, "image" for image files.
     form: llm
   - name: useDocOrientationClassify
     type: boolean
@@ -112,9 +126,9 @@ parameters:
       en_US: Whether to Enable Chart Recognition
       zh_Hans: 是否启用图表识别
     human_description:
-      en_US: Whether to enable the chart recognition function.
-      zh_Hans: 是否启用图表识别功能。
-    llm_description: Whether to enable the chart recognition function.
+      en_US: Whether to enable the chart recognition function (to recognize line charts, bar charts, etc.).
+      zh_Hans: 是否启用图表识别功能（识别折线图、柱状图等）。
+    llm_description: Whether to enable the chart recognition function (to recognize line charts, bar charts, etc.).
     form: llm
   - name: useRegionDetection
     type: boolean
@@ -128,6 +142,18 @@ parameters:
       zh_Hans: 是否启用区域检测功能。
     llm_description: Whether to enable the region detection function.
     form: llm
+  - name: formatBlockContent
+    type: boolean
+    required: false
+    default: false
+    label:
+      en_US: Whether to Format Block Content
+      zh_Hans: 是否格式化块内容
+    human_description:
+      en_US: Whether to convert the block content into Markdown format.
+      zh_Hans: 是否将块内容转换为 Markdown 格式。
+    llm_description: Whether to convert the block content into Markdown format.
+    form: llm
   - name: layoutNms
     type: boolean
     required: false
@@ -370,6 +396,41 @@ parameters:
       zh_Hans: 是否启用无线表端到端表格识别模式。启用时，不使用单元格检测模型，只使用表格结构识别模型。
     llm_description: Whether to enable end-to-end wireless table recognition mode. When enabled, the cell detection model will not be used, and only the table structure recognition model will be used.
     form: llm
+  - name: markdownIgnoreLabels
+    type: string
+    required: false
+    label:
+      en_US: Labels to Ignore in Markdown Output
+      zh_Hans: Markdown 输出中忽略的标签
+    human_description:
+      en_US: 'Comma-separated list of labels to ignore when generating Markdown output. For example: "header,footer,page_number".'
+      zh_Hans: '生成 Markdown 输出时要忽略的标签列表，使用逗号分隔。例如："header,footer,page_number"。'
+    llm_description: 'Comma-separated list of labels to ignore when generating Markdown output. For example: "header,footer,page_number".'
+    form: llm
+  - name: prettifyMarkdown
+    type: boolean
+    required: false
+    default: true
+    label:
+      en_US: Whether to Prettify the Output Markdown Text
+      zh_Hans: 是否美化输出的 Markdown 文本
+    human_description:
+      en_US: Whether to prettify the output Markdown text.
+      zh_Hans: 是否美化输出的 Markdown 文本。
+    llm_description: Whether to prettify the output Markdown text.
+    form: llm
+  - name: showFormulaNumber
+    type: boolean
+    required: false
+    default: false
+    label:
+      en_US: Whether to Include Formula Numbers in the Output Markdown Text
+      zh_Hans: 是否在输出的 Markdown 文本中包含公式编号
+    human_description:
+      en_US: Whether to include formula numbers in the output markdown text.
+      zh_Hans: 是否在输出的 Markdown 文本中包含公式编号。
+    llm_description: Whether to include formula numbers in the output markdown text.
+    form: llm
   - name: visualize
     type: boolean
     required: false
@@ -382,6 +443,14 @@ parameters:
       zh_Hans: 是否返回可视化结果。
     llm_description: Whether or not to return visualization results.
     form: llm
+output_schema:
+    type: object
+    properties:
+      images:
+        type: array
+        items:
+          type: object
+        description: The images extracted from the file.
 extra:
   python:
     source: tools/document_parsing.py
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-from dify_plugin import Plugin, DifyPluginEnv`
	`1`	`+from dify_plugin import DifyPluginEnv, Plugin`
`2`	`2`
`3`	`3`	`plugin = Plugin(DifyPluginEnv(MAX_REQUEST_TIMEOUT=120))`
`4`	`4`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-version: 0.1.4`
	`1`	`+version: 0.2.0`
`2`	`2`	`type: plugin`
`3`	`3`	`author: langgenius`
`4`	`4`	`name: paddleocr`