Add timeout and strip image html tags (#2373)

Bobholamovic · web-flow · commit b3ae207b01ec · 2026-01-07T13:56:43.000+08:00
diff --git a/tools/paddleocr/manifest.yaml b/tools/paddleocr/manifest.yaml
@@ -1,4 +1,4 @@
-version: 0.1.2
+version: 0.1.3
 type: plugin
 author: langgenius
 name: paddleocr
diff --git a/tools/paddleocr/tools/document_parsing.py b/tools/paddleocr/tools/document_parsing.py
@@ -5,6 +5,10 @@
 from dify_plugin import Tool
 from dify_plugin.entities.tool import ToolInvokeMessage
 
+from tools.utils import remove_img_from_markdown
+
+REQUEST_TIMEOUT = (10, 600)
+
 
 class DocumentParsingTool(Tool):
     def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
@@ -68,20 +72,24 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag
                 api_url,
                 headers={"Authorization": f"token {access_token}"},
                 json=params,
+                timeout=REQUEST_TIMEOUT,
             )
             resp.raise_for_status()
             result = resp.json()
         except requests.exceptions.JSONDecodeError as e:
             raise RuntimeError(
                 f"Failed to decode JSON response from PaddleOCR API: {resp.text}"
             ) from e
+        except requests.exceptions.Timeout as e:
+            raise RuntimeError("PaddleOCR API request timed out") from e
         except requests.exceptions.RequestException as e:
             raise RuntimeError(f"PaddleOCR API request failed: {e}") from e
 
         markdown_text_list = []
         for item in result.get("result", {}).get("layoutParsingResults", []):
             markdown_text = item.get("markdown", {}).get("text")
             if markdown_text is not None:
+                markdown_text = remove_img_from_markdown(markdown_text)
                 markdown_text_list.append(markdown_text)
         yield self.create_text_message("\n\n".join(markdown_text_list))
         yield self.create_json_message(result)
diff --git a/tools/paddleocr/tools/document_parsing_vl.py b/tools/paddleocr/tools/document_parsing_vl.py
@@ -5,6 +5,10 @@
 from dify_plugin import Tool
 from dify_plugin.entities.tool import ToolInvokeMessage
 
+from tools.utils import remove_img_from_markdown
+
+REQUEST_TIMEOUT = (10, 600)
+
 
 class DocumentParsingVlTool(Tool):
     def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
@@ -54,22 +58,26 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag
         try:
             resp = requests.post(
                 api_url,
-                headers={"Authorization": f"Bearer {access_token}"},
+                headers={"Authorization": f"token {access_token}"},
                 json=params,
+                timeout=REQUEST_TIMEOUT,
             )
             resp.raise_for_status()
             result = resp.json()
         except requests.exceptions.JSONDecodeError as e:
             raise RuntimeError(
                 f"Failed to decode JSON response from PaddleOCR API: {resp.text}"
             ) from e
+        except requests.exceptions.Timeout as e:
+            raise RuntimeError("PaddleOCR API request timed out") from e
         except requests.exceptions.RequestException as e:
             raise RuntimeError(f"PaddleOCR API request failed: {e}") from e
 
         markdown_text_list = []
         for item in result.get("result", {}).get("layoutParsingResults", []):
             markdown_text = item.get("markdown", {}).get("text")
             if markdown_text is not None:
+                markdown_text = remove_img_from_markdown(markdown_text)
                 markdown_text_list.append(markdown_text)
         yield self.create_text_message("\n\n".join(markdown_text_list))
         yield self.create_json_message(result)
diff --git a/tools/paddleocr/tools/text_recognition.py b/tools/paddleocr/tools/text_recognition.py
@@ -5,6 +5,8 @@
 from dify_plugin import Tool
 from dify_plugin.entities.tool import ToolInvokeMessage
 
+REQUEST_TIMEOUT = (10, 600)
+
 
 class TextRecognitionTool(Tool):
     def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
@@ -48,13 +50,16 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag
                 api_url,
                 headers={"Authorization": f"token {access_token}"},
                 json=params,
+                timeout=REQUEST_TIMEOUT,
             )
             resp.raise_for_status()
             result = resp.json()
         except requests.exceptions.JSONDecodeError as e:
             raise RuntimeError(
                 f"Failed to decode JSON response from PaddleOCR API: {resp.text}"
             ) from e
+        except requests.exceptions.Timeout as e:
+            raise RuntimeError("PaddleOCR API request timed out") from e
         except requests.exceptions.RequestException as e:
             raise RuntimeError(f"PaddleOCR API request failed: {e}") from e
 
diff --git a/tools/paddleocr/tools/utils.py b/tools/paddleocr/tools/utils.py
@@ -0,0 +1,16 @@
+import re
+
+MARKDOWN_IMAGE_PATTERN = re.compile(
+        r"""
+        <div[^>]*>\s*
+        <img[^>]*/>\s*
+        </div>
+        |
+        <img[^>]*/>
+        """,
+        re.IGNORECASE | re.VERBOSE | re.DOTALL
+    )
+
+
+def remove_img_from_markdown(markdown: str) -> str:
+    return MARKDOWN_IMAGE_PATTERN.sub("", markdown)

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-version: 0.1.2`
	`1`	`+version: 0.1.3`
`2`	`2`	`type: plugin`
`3`	`3`	`author: langgenius`
`4`	`4`	`name: paddleocr`