Skip to content

Commit fafbb6b

Browse files
authored
[Feat] Upgrade PaddleOCR plugin to 0.2.0 (#2427)
1 parent 8fb3a11 commit fafbb6b

File tree

13 files changed

+724
-129
lines changed

13 files changed

+724
-129
lines changed

tools/paddleocr/README.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,7 @@ Open the Plugin Marketplace, search for the PaddleOCR plugin, and install it to
1414

1515
You can get your AI Studio access token from [this page](https://aistudio.baidu.com/index/accessToken).
1616

17-
For each tool provided by the plugin, there is a corresponding API URL. It is required to provide at least one API URL in order to use the PaddleOCR plugin. To obtain the API URL, visit the [PaddleOCR official website](https://aistudio.baidu.com/paddleocr/task), click the **API** button in the upper-left corner, choose the example code for the tool you want to use (e.g., *Text Recognition (PP-OCRv5)*), and copy the `API_URL`. You do not need to provide URLs for all tools—only for those you intend to use.
18-
19-
![get_api_url](./_assets/get_api_url.png)
17+
For each tool provided by the plugin, there is a corresponding API URL. It is required to provide at least one API URL in order to use the PaddleOCR plugin. To obtain the API URL, visit the [PaddleOCR official website](https://aistudio.baidu.com/paddleocr), click the **API** button, choose the example code for the tool you want to use (e.g., *PP-OCRv5*), and copy the `API_URL`. You do not need to provide URLs for all tools—only for those you intend to use.
2018

2119
### 3. Use the plugin
2220

-477 KB
Binary file not shown.

tools/paddleocr/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from dify_plugin import Plugin, DifyPluginEnv
1+
from dify_plugin import DifyPluginEnv, Plugin
22

33
plugin = Plugin(DifyPluginEnv(MAX_REQUEST_TIMEOUT=120))
44

tools/paddleocr/manifest.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
version: 0.1.4
1+
version: 0.2.0
22
type: plugin
33
author: langgenius
44
name: paddleocr

tools/paddleocr/provider/paddleocr.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,40 +3,48 @@
33
from dify_plugin import ToolProvider
44
from dify_plugin.errors.tool import ToolProviderCredentialValidationError
55

6-
from tools.text_recognition import TextRecognitionTool
76
from tools.document_parsing import DocumentParsingTool
87
from tools.document_parsing_vl import DocumentParsingVlTool
8+
from tools.text_recognition import TextRecognitionTool
99

1010

1111
class PaddleocrProvider(ToolProvider):
1212
def _validate_credentials(self, credentials: dict[str, Any]) -> None:
1313
if "aistudio_access_token" not in credentials:
14-
raise ToolProviderCredentialValidationError("AI Studio access token must be provided")
14+
raise ToolProviderCredentialValidationError(
15+
"AI Studio access token must be provided"
16+
)
1517

1618
api_url_keys = (
1719
"text_recognition_api_url",
1820
"document_parsing_api_url",
1921
"document_parsing_vl_api_url",
2022
)
2123
tool_classes = (
22-
TextRecognitionTool,
23-
DocumentParsingTool,
24+
TextRecognitionTool,
25+
DocumentParsingTool,
2426
DocumentParsingVlTool,
2527
)
2628
test_file = "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_ocr_002.png"
2729

2830
if not any(key in credentials for key in api_url_keys):
29-
raise ToolProviderCredentialValidationError("You should provide at least one API URL")
30-
31+
raise ToolProviderCredentialValidationError(
32+
"You should provide at least one API URL"
33+
)
34+
3135
for api_url_key, tool_cls in zip(api_url_keys, tool_classes):
3236
if api_url_key in credentials:
3337
try:
3438
self._test_tool_validation(tool_cls, credentials, test_file)
3539
except Exception as e:
36-
raise ToolProviderCredentialValidationError(f"Invalid credentials for {tool_cls.__name__}") from e
37-
38-
def _test_tool_validation(self, tool_cls, credentials: dict[str, Any], test_file: str) -> None:
40+
raise ToolProviderCredentialValidationError(
41+
f"Invalid credentials for {tool_cls.__name__}"
42+
) from e
43+
44+
def _test_tool_validation(
45+
self, tool_cls, credentials: dict[str, Any], test_file: str
46+
) -> None:
3947
tool = tool_cls.from_credentials(credentials)
40-
48+
4149
for _ in tool.invoke(tool_parameters={"file": test_file}):
4250
break

tools/paddleocr/provider/paddleocr.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ identity:
99
zh_Hans: "PaddleOCR 插件提供 PaddleOCR 的多项能力,包括文字识别、文档解析等"
1010
icon: "icon.png"
1111

12+
tags:
13+
- productivity
14+
1215
tools:
1316
- tools/text_recognition.yaml
1417
- tools/document_parsing.yaml

tools/paddleocr/tools/document_parsing.py

Lines changed: 36 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
from collections.abc import Generator
22
from typing import Any
33

4-
import requests
54
from dify_plugin import Tool
65
from dify_plugin.entities.tool import ToolInvokeMessage
76

8-
from tools.utils import remove_img_from_markdown
9-
10-
REQUEST_TIMEOUT = (10, 600)
7+
from tools.utils import (
8+
convert_file_type,
9+
get_markdown_from_result,
10+
make_paddleocr_api_request,
11+
process_images_from_result,
12+
)
1113

1214

1315
class DocumentParsingTool(Tool):
@@ -40,6 +42,7 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag
4042
"useFormulaRecognition",
4143
"useChartRecognition",
4244
"useRegionDetection",
45+
"formatBlockContent",
4346
"layoutThreshold",
4447
"layoutNms",
4548
"layoutUnclipRatio",
@@ -62,34 +65,39 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag
6265
"useOcrResultsWithTableCells",
6366
"useE2eWiredTableRecModel",
6467
"useE2eWirelessTableRecModel",
68+
"markdownIgnoreLabels",
69+
"prettifyMarkdown",
70+
"showFormulaNumber",
6571
"visualize",
6672
]:
6773
if optional_param_name in tool_parameters:
6874
params[optional_param_name] = tool_parameters[optional_param_name]
6975

70-
try:
71-
resp = requests.post(
72-
api_url,
73-
headers={"Client-Platform": "dify", "Authorization": f"token {access_token}"},
74-
json=params,
75-
timeout=REQUEST_TIMEOUT,
76-
)
77-
resp.raise_for_status()
78-
result = resp.json()
79-
except requests.exceptions.JSONDecodeError as e:
80-
raise RuntimeError(
81-
f"Failed to decode JSON response from PaddleOCR API: {resp.text}"
82-
) from e
83-
except requests.exceptions.Timeout as e:
84-
raise RuntimeError("PaddleOCR API request timed out") from e
85-
except requests.exceptions.RequestException as e:
86-
raise RuntimeError(f"PaddleOCR API request failed: {e}") from e
76+
# Convert fileType parameter
77+
if "fileType" in params:
78+
params["fileType"] = convert_file_type(params["fileType"])
79+
80+
# Convert markdownIgnoreLabels from comma-separated string to list
81+
if "markdownIgnoreLabels" in params and isinstance(
82+
params["markdownIgnoreLabels"], str
83+
):
84+
params["markdownIgnoreLabels"] = [
85+
label.strip()
86+
for label in params["markdownIgnoreLabels"].split(",")
87+
if label.strip()
88+
]
89+
90+
result = make_paddleocr_api_request(api_url, params, access_token)
91+
92+
images, image_path_map, failed_images, blob_messages = (
93+
process_images_from_result(result, self)
94+
)
95+
96+
markdown = get_markdown_from_result(result, image_path_map, failed_images)
97+
98+
for blob_data, blob_meta in blob_messages:
99+
yield self.create_blob_message(blob_data, meta=blob_meta)
87100

88-
markdown_text_list = []
89-
for item in result.get("result", {}).get("layoutParsingResults", []):
90-
markdown_text = item.get("markdown", {}).get("text")
91-
if markdown_text is not None:
92-
markdown_text = remove_img_from_markdown(markdown_text)
93-
markdown_text_list.append(markdown_text)
94-
yield self.create_text_message("\n\n".join(markdown_text_list))
101+
yield self.create_variable_message("images", images)
102+
yield self.create_text_message(markdown)
95103
yield self.create_json_message(result)

tools/paddleocr/tools/document_parsing.yaml

Lines changed: 76 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,29 @@ parameters:
2222
llm_description: The URL of an image or PDF file, or the Base64-encoded result of the content of such a file.
2323
form: llm
2424
- name: fileType
25-
type: number
25+
type: select
2626
required: false
27+
default: auto
28+
options:
29+
- label:
30+
en_US: Auto Detect
31+
zh_Hans: 自动检测
32+
value: auto
33+
- label:
34+
en_US: PDF
35+
zh_Hans: PDF
36+
value: pdf
37+
- label:
38+
en_US: Image
39+
zh_Hans: 图片
40+
value: image
2741
label:
2842
en_US: File Type
2943
zh_Hans: 文件类型
3044
human_description:
31-
en_US: File type. 0 indicates a PDF file, and 1 indicates an image file. If not specified, the file type will be inferred from the URL.
32-
zh_Hans: 文件类型。0 表示 PDF 文件,1 表示图像文件。若不设置,则将根据 URL 推断文件类型
33-
llm_description: File type. 0 indicates a PDF file, and 1 indicates an image file. If not specified, the file type will be inferred from the URL.
45+
en_US: File type. "Auto Detect" will infer the type from URL, "PDF" for PDF files, "Image" for image files.
46+
zh_Hans: 文件类型。"自动检测" 将根据 URL 推断类型,"PDF" 表示 PDF 文件,"图片" 表示图像文件。
47+
llm_description: File type. "auto" will infer the type from URL, "pdf" for PDF files, "image" for image files.
3448
form: llm
3549
- name: useDocOrientationClassify
3650
type: boolean
@@ -112,9 +126,9 @@ parameters:
112126
en_US: Whether to Enable Chart Recognition
113127
zh_Hans: 是否启用图表识别
114128
human_description:
115-
en_US: Whether to enable the chart recognition function.
116-
zh_Hans: 是否启用图表识别功能。
117-
llm_description: Whether to enable the chart recognition function.
129+
en_US: Whether to enable the chart recognition function (to recognize line charts, bar charts, etc.).
130+
zh_Hans: 是否启用图表识别功能(识别折线图、柱状图等)
131+
llm_description: Whether to enable the chart recognition function (to recognize line charts, bar charts, etc.).
118132
form: llm
119133
- name: useRegionDetection
120134
type: boolean
@@ -128,6 +142,18 @@ parameters:
128142
zh_Hans: 是否启用区域检测功能。
129143
llm_description: Whether to enable the region detection function.
130144
form: llm
145+
- name: formatBlockContent
146+
type: boolean
147+
required: false
148+
default: false
149+
label:
150+
en_US: Whether to Format Block Content
151+
zh_Hans: 是否格式化块内容
152+
human_description:
153+
en_US: Whether to convert the block content into Markdown format.
154+
zh_Hans: 是否将块内容转换为 Markdown 格式。
155+
llm_description: Whether to convert the block content into Markdown format.
156+
form: llm
131157
- name: layoutNms
132158
type: boolean
133159
required: false
@@ -370,6 +396,41 @@ parameters:
370396
zh_Hans: 是否启用无线表端到端表格识别模式。启用时,不使用单元格检测模型,只使用表格结构识别模型。
371397
llm_description: Whether to enable end-to-end wireless table recognition mode. When enabled, the cell detection model will not be used, and only the table structure recognition model will be used.
372398
form: llm
399+
- name: markdownIgnoreLabels
400+
type: string
401+
required: false
402+
label:
403+
en_US: Labels to Ignore in Markdown Output
404+
zh_Hans: Markdown 输出中忽略的标签
405+
human_description:
406+
en_US: 'Comma-separated list of labels to ignore when generating Markdown output. For example: "header,footer,page_number".'
407+
zh_Hans: '生成 Markdown 输出时要忽略的标签列表,使用逗号分隔。例如:"header,footer,page_number"。'
408+
llm_description: 'Comma-separated list of labels to ignore when generating Markdown output. For example: "header,footer,page_number".'
409+
form: llm
410+
- name: prettifyMarkdown
411+
type: boolean
412+
required: false
413+
default: true
414+
label:
415+
en_US: Whether to Prettify the Output Markdown Text
416+
zh_Hans: 是否美化输出的 Markdown 文本
417+
human_description:
418+
en_US: Whether to prettify the output Markdown text.
419+
zh_Hans: 是否美化输出的 Markdown 文本。
420+
llm_description: Whether to prettify the output Markdown text.
421+
form: llm
422+
- name: showFormulaNumber
423+
type: boolean
424+
required: false
425+
default: false
426+
label:
427+
en_US: Whether to Include Formula Numbers in the Output Markdown Text
428+
zh_Hans: 是否在输出的 Markdown 文本中包含公式编号
429+
human_description:
430+
en_US: Whether to include formula numbers in the output markdown text.
431+
zh_Hans: 是否在输出的 Markdown 文本中包含公式编号。
432+
llm_description: Whether to include formula numbers in the output markdown text.
433+
form: llm
373434
- name: visualize
374435
type: boolean
375436
required: false
@@ -382,6 +443,14 @@ parameters:
382443
zh_Hans: 是否返回可视化结果。
383444
llm_description: Whether or not to return visualization results.
384445
form: llm
446+
output_schema:
447+
type: object
448+
properties:
449+
images:
450+
type: array
451+
items:
452+
type: object
453+
description: The images extracted from the file.
385454
extra:
386455
python:
387456
source: tools/document_parsing.py

0 commit comments

Comments
 (0)