diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml
index 209027caa4..e939baadcd 100644
--- a/.github/workflows/deploy_docs.yml
+++ b/.github/workflows/deploy_docs.yml
@@ -2,7 +2,7 @@ name: Develop Docs
 on:
   push:
     branches: #设置更新哪个分支会更新站点
-      - release/3.3
+      - release/3.4
 permissions:
   contents: write
 jobs:
@@ -27,5 +27,5 @@ jobs:
       - run: pip install mike mkdocs-material jieba mkdocs-git-revision-date-localized-plugin mkdocs-git-committers-plugin-2 mkdocs-git-authors-plugin mkdocs-static-i18n mkdocs-minify-plugin 
       - run: |
           git fetch origin gh-pages --depth=1
-          mike deploy --push --update-aliases 3.3 latest
+          mike deploy --push --update-aliases 3.4 latest
           mike set-default --push latest
diff --git a/.github/workflows/xpu_ci.yml b/.github/workflows/xpu_ci.yml
index cac51fafe9..ed08004964 100644
--- a/.github/workflows/xpu_ci.yml
+++ b/.github/workflows/xpu_ci.yml
@@ -27,7 +27,7 @@ jobs:
 
       - name: Code Checkout
         env:
-          docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.3.0
+          docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:xpu-ubuntu2204-x86_64-gcc123-py310
         run: |
           REPO="https://github.com/${{ github.repository }}.git"
           FULL_REPO="${{ github.repository }}"
@@ -58,7 +58,7 @@ jobs:
 
       - name: Run CI unittest
         env:
-          docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.3.0
+          docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:xpu-ubuntu2204-x86_64-gcc123-py310
         run: |
           runner_name="${{ runner.name }}"
           PARENT_DIR=$(dirname "$WORKSPACE")
@@ -71,5 +71,6 @@ jobs:
            ${docker_image} /bin/bash -c "
           git config --global --add safe.directory /workspace/PaddleX
           cd PaddleX
+          export PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK=true
           bash tests/run_xpu_ci.sh
           "
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f480361043..ab2e0f7a07 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -72,3 +72,4 @@ repos:
         files: ^paddlex/.*\.py$
         additional_dependencies:
             - stdlib-list==0.10.0
+            - setuptools
diff --git a/.precommit/check_imports.py b/.precommit/check_imports.py
index 7f9ec4a349..4de0856d20 100644
--- a/.precommit/check_imports.py
+++ b/.precommit/check_imports.py
@@ -56,6 +56,7 @@
     "langchain_community": "langchain-community",
     "langchain_core": "langchain-core",
     "langchain_openai": "langchain-openai",
+    "langchain_text_splitters": "langchain-text-splitters",
     "lxml": "lxml",
     "matplotlib": "matplotlib",
     "modelscope": "modelscope",
diff --git a/deploy/genai_vllm_server_docker/Dockerfile b/deploy/genai_vllm_server_docker/Dockerfile
index 84aa4206fd..f761cc3368 100644
--- a/deploy/genai_vllm_server_docker/Dockerfile
+++ b/deploy/genai_vllm_server_docker/Dockerfile
@@ -4,21 +4,24 @@ RUN apt-get update \
     && apt-get install -y libgl1 \
     && rm -rf /var/lib/apt/lists/*
 
-ENV PIP_NO_CACHE_DIR=0
 ENV PYTHONUNBUFFERED=1
 ENV PYTHONDONTWRITEBYTECODE=1
 
-RUN python -m pip install torch==2.8.0
+RUN --mount=type=cache,target=/root/.cache/pip \
+    python -m pip install torch==2.8.0
 
 ARG PADDLEX_VERSION=">=3.3.6,<3.4"
-RUN python -m pip install "paddlex${PADDLEX_VERSION}"
+RUN --mount=type=cache,target=/root/.cache/pip \
+    python -m pip install "paddlex${PADDLEX_VERSION}"
 
 ARG BUILD_FOR_SM120=false
-RUN if [ "${BUILD_FOR_SM120}" = 'true' ]; then \
+RUN --mount=type=cache,target=/root/.cache/pip \
+    if [ "${BUILD_FOR_SM120}" = 'true' ]; then \
         python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.4.11/flash_attn-2.8.3%2Bcu128torch2.8-cp310-cp310-linux_x86_64.whl; \
     else \
         python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \
     fi \
+    && python -m pip install transformers==4.57.6 \
     && paddlex --install genai-vllm-server
 
 EXPOSE 8080
diff --git a/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py b/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py
index 7a99bf9829..1b094662e8 100644
--- a/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py
+++ b/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py
@@ -48,6 +48,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -59,6 +60,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -105,12 +108,16 @@ def run_batch(self, inputs, log_ids, batch_id):
 
                 ret = executor.map(self._preprocess, inputs_g, log_ids_g)
                 ind_img_lsts, ind_data_info_lst, ind_visualize_enabled_lst = [], [], []
+                ind_input_id_lst, ind_log_id_lst, ind_input_lst = [], [], []
                 for i, item in enumerate(ret):
                     if isinstance(item, tuple):
                         assert len(item) == 3, len(item)
                         ind_img_lsts.append(item[0])
                         ind_data_info_lst.append(item[1])
                         ind_visualize_enabled_lst.append(item[2])
+                        ind_input_id_lst.append(input_ids_g[i])
+                        ind_log_id_lst.append(log_ids_g[i])
+                        ind_input_lst.append(inputs_g[i])
                     else:
                         input_id = input_ids_g[i]
                         result_or_output_dic[input_id] = item
@@ -146,19 +153,19 @@ def run_batch(self, inputs, log_ids, batch_id):
                         ind_preds.append(preds[start_idx : start_idx + len(item)])
                         start_idx += len(item)
 
-                    for i, result in zip(
-                        input_ids_g,
+                    for input_id, result in zip(
+                        ind_input_id_lst,
                         executor.map(
                             self._postprocess,
                             ind_img_lsts,
                             ind_data_info_lst,
                             ind_visualize_enabled_lst,
                             ind_preds,
-                            log_ids_g,
-                            inputs_g,
+                            ind_log_id_lst,
+                            ind_input_lst,
                         ),
                     ):
-                        result_or_output_dic[i] = result
+                        result_or_output_dic[input_id] = result
 
             assert len(result_or_output_dic) == len(
                 inputs
@@ -270,6 +277,7 @@ def _postprocess(self, images, data_info, visualize_enabled, preds, log_id, inpu
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/OCR/version.txt b/deploy/hps/sdk/pipelines/OCR/version.txt
index 3a4036fb45..53a75d6735 100644
--- a/deploy/hps/sdk/pipelines/OCR/version.txt
+++ b/deploy/hps/sdk/pipelines/OCR/version.txt
@@ -1 +1 @@
-0.2.5
+0.2.6
diff --git a/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/server/model_repo/chatocr-visual/1/model.py b/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/server/model_repo/chatocr-visual/1/model.py
index 7b2568a7a1..40f7684f15 100644
--- a/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/server/model_repo/chatocr-visual/1/model.py
+++ b/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/server/model_repo/chatocr-visual/1/model.py
@@ -33,6 +33,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -44,6 +45,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -142,6 +145,7 @@ def run(self, input, log_id):
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/version.txt b/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/version.txt
index d15723fbe8..1c09c74e22 100644
--- a/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/version.txt
+++ b/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/version.txt
@@ -1 +1 @@
-0.3.2
+0.3.3
diff --git a/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/server/model_repo/chatocr-visual/1/model.py b/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/server/model_repo/chatocr-visual/1/model.py
index de0a16bdee..c563ca0823 100644
--- a/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/server/model_repo/chatocr-visual/1/model.py
+++ b/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/server/model_repo/chatocr-visual/1/model.py
@@ -33,6 +33,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -44,6 +45,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -143,6 +146,7 @@ def run(self, input, log_id):
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/version.txt b/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/version.txt
index 2b7c5ae018..17b2ccd9bf 100644
--- a/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/version.txt
+++ b/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/version.txt
@@ -1 +1 @@
-0.4.2
+0.4.3
diff --git a/deploy/hps/sdk/pipelines/PP-DocTranslation/server/model_repo/doctrans-visual/1/model.py b/deploy/hps/sdk/pipelines/PP-DocTranslation/server/model_repo/doctrans-visual/1/model.py
index 361ce50332..7243078c10 100644
--- a/deploy/hps/sdk/pipelines/PP-DocTranslation/server/model_repo/doctrans-visual/1/model.py
+++ b/deploy/hps/sdk/pipelines/PP-DocTranslation/server/model_repo/doctrans-visual/1/model.py
@@ -30,12 +30,10 @@
 class TritonPythonModel(BaseTritonPythonModel):
     def initialize(self, args):
         super().initialize(args)
-
-        self.pipeline.inintial_visual_predictor(self.pipeline.config)
-
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -47,6 +45,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -151,6 +151,7 @@ def run(self, input, log_id):
                 filename_template=f"markdown_{i}/{{key}}",
                 file_storage=self.context["file_storage"],
                 return_urls=self.context["return_img_urls"],
+                url_expires_in=self.context["url_expires_in"],
                 max_img_size=self.context["max_output_img_size"],
             )
             md_flags = md_data["page_continuation_flags"]
@@ -165,6 +166,7 @@ def run(self, input, log_id):
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/PP-DocTranslation/version.txt b/deploy/hps/sdk/pipelines/PP-DocTranslation/version.txt
index d917d3e26a..b1e80bb248 100644
--- a/deploy/hps/sdk/pipelines/PP-DocTranslation/version.txt
+++ b/deploy/hps/sdk/pipelines/PP-DocTranslation/version.txt
@@ -1 +1 @@
-0.1.2
+0.1.3
diff --git a/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py b/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py
index f7e8d9b56b..5183d7c62f 100644
--- a/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py
+++ b/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py
@@ -47,6 +47,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -58,6 +59,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -104,12 +107,16 @@ def run_batch(self, inputs, log_ids, batch_id):
 
                 ret = executor.map(self._preprocess, inputs_g, log_ids_g)
                 ind_img_lsts, ind_data_info_lst, ind_visualize_enabled_lst = [], [], []
+                ind_input_ids_lst, ind_log_ids_lst, ind_inputs_lst = [], [], []
                 for i, item in enumerate(ret):
                     if isinstance(item, tuple):
                         assert len(item) == 3, len(item)
                         ind_img_lsts.append(item[0])
                         ind_data_info_lst.append(item[1])
                         ind_visualize_enabled_lst.append(item[2])
+                        ind_input_ids_lst.append(input_ids_g[i])
+                        ind_log_ids_lst.append(log_ids_g[i])
+                        ind_inputs_lst.append(inputs_g[i])
                     else:
                         input_id = input_ids_g[i]
                         result_or_output_dic[input_id] = item
@@ -179,19 +186,19 @@ def run_batch(self, inputs, log_ids, batch_id):
                         ind_preds.append(preds[start_idx : start_idx + len(item)])
                         start_idx += len(item)
 
-                    for i, result in zip(
-                        input_ids_g,
+                    for input_id, result in zip(
+                        ind_input_ids_lst,
                         executor.map(
                             self._postprocess,
                             ind_img_lsts,
                             ind_data_info_lst,
                             ind_visualize_enabled_lst,
                             ind_preds,
-                            log_ids_g,
-                            inputs_g,
+                            ind_log_ids_lst,
+                            ind_inputs_lst,
                         ),
                     ):
-                        result_or_output_dic[i] = result
+                        result_or_output_dic[input_id] = result
 
             assert len(result_or_output_dic) == len(
                 inputs
@@ -323,6 +330,7 @@ def _postprocess(self, images, data_info, visualize_enabled, preds, log_id, inpu
                 filename_template=f"markdown_{i}/{{key}}",
                 file_storage=self.context["file_storage"],
                 return_urls=self.context["return_img_urls"],
+                url_expires_in=self.context["url_expires_in"],
                 max_img_size=self.context["max_output_img_size"],
             )
             md_flags = md_data["page_continuation_flags"]
@@ -337,6 +345,7 @@ def _postprocess(self, images, data_info, visualize_enabled, preds, log_id, inpu
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/PP-StructureV3/version.txt b/deploy/hps/sdk/pipelines/PP-StructureV3/version.txt
index c2c0004f0e..449d7e73a9 100644
--- a/deploy/hps/sdk/pipelines/PP-StructureV3/version.txt
+++ b/deploy/hps/sdk/pipelines/PP-StructureV3/version.txt
@@ -1 +1 @@
-0.3.5
+0.3.6
diff --git a/deploy/hps/sdk/pipelines/PaddleOCR-VL-1.5/server/pipeline_config.yaml b/deploy/hps/sdk/pipelines/PaddleOCR-VL-1.5/server/pipeline_config.yaml
new file mode 100644
index 0000000000..37c85317f0
--- /dev/null
+++ b/deploy/hps/sdk/pipelines/PaddleOCR-VL-1.5/server/pipeline_config.yaml
@@ -0,0 +1,85 @@
+
+pipeline_name: PaddleOCR-VL-1.5
+
+batch_size: 64
+
+use_queues: True
+
+use_doc_preprocessor: False
+use_layout_detection: True
+use_chart_recognition: False
+use_seal_recognition: False
+format_block_content: False
+merge_layout_blocks: True
+markdown_ignore_labels:
+  - number
+  - footnote
+  - header
+  - header_image
+  - footer
+  - footer_image
+  - aside_text
+
+SubModules:
+  LayoutDetection:
+    module_name: layout_detection
+    model_name: PP-DocLayoutV3
+    model_dir: null
+    batch_size: 8
+    threshold: 0.3
+    layout_nms: True
+    layout_unclip_ratio: [1.0, 1.0]
+    layout_merge_bboxes_mode:
+      0: "union" # abstract
+      1: "union" # algorithm
+      2: "union" # aside_text
+      3: "large" # chart
+      4: "union" # content
+      5: "large" # display_formula
+      6: "large" # doc_title
+      7: "union" # figure_title
+      8: "union" # footer
+      9: "union" # footer
+      10: "union" # footnote
+      11: "union" # formula_number
+      12: "union" # header
+      13: "union" # header
+      14: "union" # image
+      15: "large" # inline_formula
+      16: "union" # number
+      17: "large" # paragraph_title
+      18: "union" # reference
+      19: "union" # reference_content
+      20: "union" # seal
+      21: "union" # table
+      22: "union" # text
+      23: "union" # text
+      24: "union" # vision_footnote
+  VLRecognition:
+    module_name: vl_recognition
+    model_name: PaddleOCR-VL-1.5-0.9B
+    model_dir: null
+    batch_size: -1
+    genai_config:
+      backend: native
+
+SubPipelines:
+  DocPreprocessor:
+    pipeline_name: doc_preprocessor
+    batch_size: 8
+    use_doc_orientation_classify: True
+    use_doc_unwarping: True
+    SubModules:
+      DocOrientationClassify:
+        module_name: doc_text_orientation
+        model_name: PP-LCNet_x1_0_doc_ori
+        model_dir: null
+        batch_size: 8
+      DocUnwarping:
+        module_name: image_unwarping
+        model_name: UVDoc
+        model_dir: null
+
+Serving:
+  extra:
+    max_num_input_imgs: null
diff --git a/deploy/hps/sdk/pipelines/PaddleOCR-VL-1.5/version.txt b/deploy/hps/sdk/pipelines/PaddleOCR-VL-1.5/version.txt
new file mode 100644
index 0000000000..6e8bf73aa5
--- /dev/null
+++ b/deploy/hps/sdk/pipelines/PaddleOCR-VL-1.5/version.txt
@@ -0,0 +1 @@
+0.1.0
diff --git a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py
index f0b0a64ac3..6192a312d0 100644
--- a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py
+++ b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py
@@ -47,6 +47,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -58,6 +59,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -104,12 +107,16 @@ def run_batch(self, inputs, log_ids, batch_id):
 
                 ret = executor.map(self._preprocess, inputs_g, log_ids_g)
                 ind_img_lsts, ind_data_info_lst, ind_visualize_enabled_lst = [], [], []
+                ind_input_id_lst, ind_log_id_lst, ind_input_lst = [], [], []
                 for i, item in enumerate(ret):
                     if isinstance(item, tuple):
                         assert len(item) == 3, len(item)
                         ind_img_lsts.append(item[0])
                         ind_data_info_lst.append(item[1])
                         ind_visualize_enabled_lst.append(item[2])
+                        ind_input_id_lst.append(input_ids_g[i])
+                        ind_log_id_lst.append(log_ids_g[i])
+                        ind_input_lst.append(inputs_g[i])
                     else:
                         input_id = input_ids_g[i]
                         result_or_output_dic[input_id] = item
@@ -157,19 +164,19 @@ def run_batch(self, inputs, log_ids, batch_id):
                         ind_preds.append(preds[start_idx : start_idx + len(item)])
                         start_idx += len(item)
 
-                    for i, result in zip(
-                        input_ids_g,
+                    for input_id, result in zip(
+                        ind_input_id_lst,
                         executor.map(
                             self._postprocess,
                             ind_img_lsts,
                             ind_data_info_lst,
                             ind_visualize_enabled_lst,
                             ind_preds,
-                            log_ids_g,
-                            inputs_g,
+                            ind_log_id_lst,
+                            ind_input_lst,
                         ),
                     ):
-                        result_or_output_dic[i] = result
+                        result_or_output_dic[input_id] = result
 
             assert len(result_or_output_dic) == len(
                 inputs
@@ -301,6 +308,7 @@ def _postprocess(self, images, data_info, visualize_enabled, preds, log_id, inpu
                 filename_template=f"markdown_{i}/{{key}}",
                 file_storage=self.context["file_storage"],
                 return_urls=self.context["return_img_urls"],
+                url_expires_in=self.context["url_expires_in"],
                 max_img_size=self.context["max_output_img_size"],
             )
             if visualize_enabled:
@@ -314,6 +322,7 @@ def _postprocess(self, images, data_info, visualize_enabled, preds, log_id, inpu
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml
index ebf5804d29..900892f522 100644
--- a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml
+++ b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml
@@ -103,3 +103,7 @@ SubPipelines:
         module_name: image_unwarping
         model_name: UVDoc
         model_dir: null
+
+Serving:
+  extra:
+    max_num_input_imgs: null
diff --git a/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt b/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt
index 0ea3a944b3..9e11b32fca 100644
--- a/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt
+++ b/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt
@@ -1 +1 @@
-0.2.0
+0.3.1
diff --git a/deploy/hps/sdk/pipelines/doc_preprocessor/server/model_repo/document-preprocessing/1/model.py b/deploy/hps/sdk/pipelines/doc_preprocessor/server/model_repo/document-preprocessing/1/model.py
index 629dd34e1a..390ac1a1d1 100644
--- a/deploy/hps/sdk/pipelines/doc_preprocessor/server/model_repo/document-preprocessing/1/model.py
+++ b/deploy/hps/sdk/pipelines/doc_preprocessor/server/model_repo/document-preprocessing/1/model.py
@@ -33,6 +33,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -44,6 +45,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -131,6 +134,7 @@ def run(self, input, log_id):
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/doc_preprocessor/version.txt b/deploy/hps/sdk/pipelines/doc_preprocessor/version.txt
index 0c62199f16..ee1372d33a 100644
--- a/deploy/hps/sdk/pipelines/doc_preprocessor/version.txt
+++ b/deploy/hps/sdk/pipelines/doc_preprocessor/version.txt
@@ -1 +1 @@
-0.2.1
+0.2.2
diff --git a/deploy/hps/sdk/pipelines/formula_recognition/server/model_repo/formula-recognition/1/model.py b/deploy/hps/sdk/pipelines/formula_recognition/server/model_repo/formula-recognition/1/model.py
index 7af06c405a..0ccc35d1de 100644
--- a/deploy/hps/sdk/pipelines/formula_recognition/server/model_repo/formula-recognition/1/model.py
+++ b/deploy/hps/sdk/pipelines/formula_recognition/server/model_repo/formula-recognition/1/model.py
@@ -33,6 +33,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -44,6 +45,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -128,6 +131,7 @@ def run(self, input, log_id):
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/formula_recognition/version.txt b/deploy/hps/sdk/pipelines/formula_recognition/version.txt
index 0c62199f16..ee1372d33a 100644
--- a/deploy/hps/sdk/pipelines/formula_recognition/version.txt
+++ b/deploy/hps/sdk/pipelines/formula_recognition/version.txt
@@ -1 +1 @@
-0.2.1
+0.2.2
diff --git a/deploy/hps/sdk/pipelines/layout_parsing/server/model_repo/layout-parsing/1/model.py b/deploy/hps/sdk/pipelines/layout_parsing/server/model_repo/layout-parsing/1/model.py
index b4ba08c961..e96a2f6f55 100644
--- a/deploy/hps/sdk/pipelines/layout_parsing/server/model_repo/layout-parsing/1/model.py
+++ b/deploy/hps/sdk/pipelines/layout_parsing/server/model_repo/layout-parsing/1/model.py
@@ -33,6 +33,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -44,6 +45,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -143,6 +146,7 @@ def run(self, input, log_id):
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/layout_parsing/version.txt b/deploy/hps/sdk/pipelines/layout_parsing/version.txt
index 9e11b32fca..d15723fbe8 100644
--- a/deploy/hps/sdk/pipelines/layout_parsing/version.txt
+++ b/deploy/hps/sdk/pipelines/layout_parsing/version.txt
@@ -1 +1 @@
-0.3.1
+0.3.2
diff --git a/deploy/hps/sdk/pipelines/seal_recognition/server/model_repo/seal-recognition/1/model.py b/deploy/hps/sdk/pipelines/seal_recognition/server/model_repo/seal-recognition/1/model.py
index 4885f6a68c..ee41cd35b3 100644
--- a/deploy/hps/sdk/pipelines/seal_recognition/server/model_repo/seal-recognition/1/model.py
+++ b/deploy/hps/sdk/pipelines/seal_recognition/server/model_repo/seal-recognition/1/model.py
@@ -33,6 +33,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -44,6 +45,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -134,6 +137,7 @@ def run(self, input, log_id):
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/seal_recognition/version.txt b/deploy/hps/sdk/pipelines/seal_recognition/version.txt
index ee1372d33a..7179039691 100644
--- a/deploy/hps/sdk/pipelines/seal_recognition/version.txt
+++ b/deploy/hps/sdk/pipelines/seal_recognition/version.txt
@@ -1 +1 @@
-0.2.2
+0.2.3
diff --git a/deploy/hps/sdk/pipelines/table_recognition/server/model_repo/table-recognition/1/model.py b/deploy/hps/sdk/pipelines/table_recognition/server/model_repo/table-recognition/1/model.py
index c1624046bb..baaafe4d4e 100644
--- a/deploy/hps/sdk/pipelines/table_recognition/server/model_repo/table-recognition/1/model.py
+++ b/deploy/hps/sdk/pipelines/table_recognition/server/model_repo/table-recognition/1/model.py
@@ -33,6 +33,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -44,6 +45,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -132,6 +135,7 @@ def run(self, input, log_id):
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/table_recognition/version.txt b/deploy/hps/sdk/pipelines/table_recognition/version.txt
index 267577d47e..2b7c5ae018 100644
--- a/deploy/hps/sdk/pipelines/table_recognition/version.txt
+++ b/deploy/hps/sdk/pipelines/table_recognition/version.txt
@@ -1 +1 @@
-0.4.1
+0.4.2
diff --git a/deploy/hps/sdk/pipelines/table_recognition_v2/server/model_repo/table-recognition/1/model.py b/deploy/hps/sdk/pipelines/table_recognition_v2/server/model_repo/table-recognition/1/model.py
index 508981080b..552bf3b5ef 100644
--- a/deploy/hps/sdk/pipelines/table_recognition_v2/server/model_repo/table-recognition/1/model.py
+++ b/deploy/hps/sdk/pipelines/table_recognition_v2/server/model_repo/table-recognition/1/model.py
@@ -33,6 +33,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -44,6 +45,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -137,6 +140,7 @@ def run(self, input, log_id):
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/table_recognition_v2/version.txt b/deploy/hps/sdk/pipelines/table_recognition_v2/version.txt
index 267577d47e..2b7c5ae018 100644
--- a/deploy/hps/sdk/pipelines/table_recognition_v2/version.txt
+++ b/deploy/hps/sdk/pipelines/table_recognition_v2/version.txt
@@ -1 +1 @@
-0.4.1
+0.4.2
diff --git a/deploy/hps/sdk/scripts/assemble.py b/deploy/hps/sdk/scripts/assemble.py
index 0354108334..85e40f0339 100755
--- a/deploy/hps/sdk/scripts/assemble.py
+++ b/deploy/hps/sdk/scripts/assemble.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 
 import argparse
+import ast
 import pathlib
 import shutil
 import subprocess
@@ -30,6 +31,25 @@
 COMMON_DIR = BASE_DIR / "common"
 CLIENT_LIB_PATH = BASE_DIR / "paddlex-hps-client"
 OUTPUT_DIR = BASE_DIR / "output"
+NAME_MAPPINGS_PATH = BASE_DIR / "_name_mappings.py"
+
+
+def _load_pipeline_app_router():
+    """Parse PIPELINE_APP_ROUTER from the mounted name_mappings.py file."""
+    if not NAME_MAPPINGS_PATH.exists():
+        return {}
+    source = NAME_MAPPINGS_PATH.read_text()
+    # NOTE: We use `ast` to extract the dict value without importing the module,
+    # because name_mappings.py may have dependencies that are not available in
+    # the build environment. `ast.parse` + `ast.literal_eval` safely evaluates
+    # the dict literal from the source code.
+    tree = ast.parse(source)
+    for node in ast.iter_child_nodes(tree):
+        if isinstance(node, ast.Assign):
+            for target in node.targets:
+                if isinstance(target, ast.Name) and target.id == "PIPELINE_APP_ROUTER":
+                    return ast.literal_eval(node.value)
+    return {}
 
 
 if __name__ == "__main__":
@@ -53,6 +73,8 @@
         )
         sys.exit(2)
 
+    pipeline_app_router = _load_pipeline_app_router()
+
     if args.all:
         pipeline_names = [p.name for p in PIPELINES_DIR.iterdir()]
     else:
@@ -90,7 +112,20 @@
         print("=" * 30)
         print(f"Pipeline: {pipeline_name}")
         pipeline_dir = PIPELINES_DIR / pipeline_name
-        if not pipeline_dir.exists():
+
+        mapped_pipeline_dir = None
+        if pipeline_name in pipeline_app_router:
+            source_name = pipeline_app_router[pipeline_name]
+            source_dir = PIPELINES_DIR / source_name
+            if not source_dir.exists():
+                sys.exit(
+                    f"Source pipeline directory {source_dir} not found"
+                    f" for mapped pipeline {pipeline_name}"
+                )
+            mapped_pipeline_dir = pipeline_dir
+            pipeline_dir = source_dir
+            print(f"Using source pipeline: {source_name}")
+        elif not pipeline_dir.exists():
             sys.exit(f"{pipeline_dir} not found")
 
         tgt_name = TARGET_NAME_PATTERN.format(pipeline_name=pipeline_name)
@@ -120,7 +155,10 @@
 
         shutil.copy(pipeline_dir / "version.txt", tgt_dir / "version.txt")
 
-        arch_path = tgt_dir.with_suffix(ARCHIVE_SUFFIX)
+        if mapped_pipeline_dir is not None:
+            shutil.copytree(mapped_pipeline_dir, tgt_dir, dirs_exist_ok=True)
+
+        arch_path = OUTPUT_DIR / (tgt_name + ARCHIVE_SUFFIX)
         print(f"Creating archive: {arch_path}")
         with tarfile.open(arch_path, "w:gz") as tar:
             tar.add(tgt_dir, arcname=tgt_dir.name)
diff --git a/deploy/hps/sdk/scripts/assemble.sh b/deploy/hps/sdk/scripts/assemble.sh
index 9926d45779..a679c37237 100755
--- a/deploy/hps/sdk/scripts/assemble.sh
+++ b/deploy/hps/sdk/scripts/assemble.sh
@@ -1,10 +1,15 @@
 #!/usr/bin/env bash
 
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+SDK_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
+REPO_ROOT="$(cd "${SDK_DIR}/../../.." && pwd)"
+
 docker run \
     -it \
     -e OUID="$(id -u)" \
     -e OGID="$(id -g)" \
-    -v "$(pwd)":/workspace \
+    -v "${SDK_DIR}":/workspace \
+    -v "${REPO_ROOT}/paddlex/inference/serving/infra/name_mappings.py":/workspace/_name_mappings.py:ro \
     -w /workspace \
     --rm \
     python:3.10 \
diff --git a/deploy/hps/server_env/cpu_version.txt b/deploy/hps/server_env/cpu_version.txt
deleted file mode 100644
index e4737652ca..0000000000
--- a/deploy/hps/server_env/cpu_version.txt
+++ /dev/null
@@ -1 +0,0 @@
-0.3.13
diff --git a/deploy/hps/server_env/gpu_version.txt b/deploy/hps/server_env/gpu_version.txt
deleted file mode 100644
index 0b69c00c5f..0000000000
--- a/deploy/hps/server_env/gpu_version.txt
+++ /dev/null
@@ -1 +0,0 @@
-0.3.14
diff --git a/deploy/hps/server_env/paddlex-hps-server/pyproject.toml b/deploy/hps/server_env/paddlex-hps-server/pyproject.toml
index 5d85392841..f6dcad64a4 100644
--- a/deploy/hps/server_env/paddlex-hps-server/pyproject.toml
+++ b/deploy/hps/server_env/paddlex-hps-server/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "paddlex-hps-server"
-version = "0.4.0"
+version = "0.5.0"
 # `paddlex` is not included here
 dependencies = [
     "colorlog >= 6.9",
diff --git a/deploy/hps/server_env/paddlex-hps-server/src/paddlex_hps_server/app_common.py b/deploy/hps/server_env/paddlex-hps-server/src/paddlex_hps_server/app_common.py
index 14f699c3f1..6250706821 100644
--- a/deploy/hps/server_env/paddlex-hps-server/src/paddlex_hps_server/app_common.py
+++ b/deploy/hps/server_env/paddlex-hps-server/src/paddlex_hps_server/app_common.py
@@ -46,6 +46,7 @@ def postprocess_image(
     *,
     file_storage: Optional[Storage] = None,
     return_url: bool = False,
+    url_expires_in: int = -1,
     max_img_size: Optional[Tuple[int, int]] = None,
 ) -> str:
     if return_url:
@@ -71,7 +72,7 @@ def postprocess_image(
         file_storage.set(key, img_bytes)
         if return_url:
             assert isinstance(file_storage, SupportsGetURL)
-            return file_storage.get_url(key)
+            return file_storage.get_url(key, expires_in=url_expires_in)
     return utils.base64_encode(img_bytes)
 
 
@@ -81,6 +82,7 @@ def postprocess_images(
     filename_template: str = "{key}.jpg",
     file_storage: Optional[Storage] = None,
     return_urls: bool = False,
+    url_expires_in: int = -1,
     max_img_size: Optional[Tuple[int, int]] = None,
 ) -> Dict[str, str]:
     output_images: Dict[str, str] = {}
@@ -95,6 +97,7 @@ def postprocess_images(
             filename=filename_template.format(key=key),
             file_storage=file_storage,
             return_url=return_urls,
+            url_expires_in=url_expires_in,
             max_img_size=max_img_size,
         )
     return output_images
diff --git a/deploy/hps/server_env/requirements/cpu.txt b/deploy/hps/server_env/requirements/cpu.txt
index c24a8fc5af..a3acfe3055 100644
--- a/deploy/hps/server_env/requirements/cpu.txt
+++ b/deploy/hps/server_env/requirements/cpu.txt
@@ -161,7 +161,9 @@ langchain-core==0.2.43
 langchain-openai==0.1.25
     # via paddlex (../../../setup.py)
 langchain-text-splitters==0.2.4
-    # via langchain
+    # via
+    #   langchain
+    #   paddlex (../../../setup.py)
 langsmith==0.1.147
     # via
     #   langchain
@@ -345,6 +347,7 @@ scikit-learn==1.6.1
     # via paddlex (../../../setup.py)
 scipy==1.15.2
     # via
+    #   paddlex (../../../setup.py)
     #   scikit-image
     #   scikit-learn
 sentencepiece==0.2.1
diff --git a/deploy/hps/server_env/requirements/gpu.txt b/deploy/hps/server_env/requirements/gpu.txt
index caa9a8fbc0..e43d89b38f 100644
--- a/deploy/hps/server_env/requirements/gpu.txt
+++ b/deploy/hps/server_env/requirements/gpu.txt
@@ -161,7 +161,9 @@ langchain-core==0.2.43
 langchain-openai==0.1.25
     # via paddlex (../../../setup.py)
 langchain-text-splitters==0.2.4
-    # via langchain
+    # via
+    #   langchain
+    #   paddlex (../../../setup.py)
 langsmith==0.1.147
     # via
     #   langchain
@@ -345,6 +347,7 @@ scikit-learn==1.6.1
     # via paddlex (../../../setup.py)
 scipy==1.15.2
     # via
+    #   paddlex (../../../setup.py)
     #   scikit-image
     #   scikit-learn
 sentencepiece==0.2.1
diff --git a/deploy/hps/server_env/scripts/remove_images.sh b/deploy/hps/server_env/scripts/remove_images.sh
index 2926504e3d..5218df41b9 100755
--- a/deploy/hps/server_env/scripts/remove_images.sh
+++ b/deploy/hps/server_env/scripts/remove_images.sh
@@ -3,9 +3,8 @@
 paddlex_version="$(cat ../../../paddlex/.version)"
 
 for device_type in 'gpu' 'cpu'; do
-    version="$(cat "${device_type}_version.txt")"
     docker rmi \
         "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:paddlex${paddlex_version%.*}-${device_type}" \
-        "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:${version}-${device_type}" \
+        "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:$(git rev-parse --short HEAD)-${device_type}" \
         "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:latest-${device_type}"
 done
diff --git a/deploy/hps/server_env/scripts/tag_and_push_images.sh b/deploy/hps/server_env/scripts/tag_and_push_images.sh
index fc334a13d5..5dfbad5f15 100755
--- a/deploy/hps/server_env/scripts/tag_and_push_images.sh
+++ b/deploy/hps/server_env/scripts/tag_and_push_images.sh
@@ -3,9 +3,8 @@
 paddlex_version="$(cat ../../../paddlex/.version)"
 
 for device_type in 'gpu' 'cpu'; do
-    version="$(cat "${device_type}_version.txt")"
     docker push "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:latest-${device_type}"
-    for tag in "${version}-paddlex${paddlex_version}-${device_type}" "paddlex${paddlex_version%.*}-${device_type}"; do
+    for tag in "$(git rev-parse --short HEAD)-${device_type}" "paddlex${paddlex_version%.*}-${device_type}"; do
         docker tag "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:latest-${device_type}" "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:${tag}"
         docker push "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:${tag}"
     done
diff --git a/docs/other_devices_support/multi_devices_use_guide.en.md b/docs/other_devices_support/multi_devices_use_guide.en.md
index a37706cb59..8bfd62b27e 100644
--- a/docs/other_devices_support/multi_devices_use_guide.en.md
+++ b/docs/other_devices_support/multi_devices_use_guide.en.md
@@ -14,7 +14,9 @@ Ascend NPU: [Ascend NPU PaddlePaddle Installation Guide](./paddlepaddle_install_
 
 Cambricon MLU: [Cambricon MLU PaddlePaddle Installation Guide](./paddlepaddle_install_MLU.en.md)
 
-Kunlun XPU: [Kunlun XPU PaddlePaddle Installation Guide](./paddlepaddle_install_XPU.en.md)
+Kunlunxin 2: [Kunlunxin 2 PaddlePaddle Installation Guide](./paddlepaddle_install_XPU.en.md)
+
+Kunlunxin P800: [Kunlunxin P800 PaddlePaddle Installation Guide](./paddlepaddle_install_P800.en.md)
 
 Hygon DCU: [Hygon DCU PaddlePaddle Installation Guide](./paddlepaddle_install_DCU.en.md)
 
diff --git a/docs/other_devices_support/multi_devices_use_guide.md b/docs/other_devices_support/multi_devices_use_guide.md
index 238d761f10..6f249a2532 100644
--- a/docs/other_devices_support/multi_devices_use_guide.md
+++ b/docs/other_devices_support/multi_devices_use_guide.md
@@ -12,7 +12,9 @@ comments: true
 
 昇腾 NPU：[昇腾 NPU 飞桨安装教程](./paddlepaddle_install_NPU.md)
 
-昆仑 XPU：[昆仑 XPU 飞桨安装教程](./paddlepaddle_install_XPU.md)
+昆仑芯 XPU2：[昆仑芯二代 AI 芯片飞桨安装教程](./paddlepaddle_install_XPU.md)
+
+昆仑芯 P800: [昆仑芯 P800 飞桨安装教程](./paddlepaddle_install_P800.md)
 
 寒武纪 MLU：[寒武纪 MLU 飞桨安装教程](./paddlepaddle_install_MLU.md)
 
diff --git a/docs/other_devices_support/paddlepaddle_install_P800.en.md b/docs/other_devices_support/paddlepaddle_install_P800.en.md
new file mode 100644
index 0000000000..a9c118525f
--- /dev/null
+++ b/docs/other_devices_support/paddlepaddle_install_P800.en.md
@@ -0,0 +1,44 @@
+---
+comments: true
+---
+
+# Kunlunxin XPU PaddlePaddle Installation Tutorial
+
+Currently, PaddleX supports Kunlunxin P800. Considering environmental differences, we recommend using the <b>Kunlunxin P800 development image officially released by PaddlePaddle</b>, which is pre-installed with the Kunlunxin basic runtime environment library (XRE).
+
+## 1. Docker Environment Preparation
+Pull the image. This image is only for the development environment and does not include a pre-compiled PaddlePaddle installation package.
+
+```bash
+docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:xpu-ubuntu2204-x86_64-gcc123-py310
+```
+Refer to the following command to start the container:
+
+```bash
+docker run -it --name paddle-xpu-dev -v $(pwd):/work \
+  -v /usr/local/bin/xpu-smi:/usr/local/bin/xpu-smi \
+  -w=/work --shm-size=128G --network=host --privileged  \
+  --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
+  ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:xpu-ubuntu2204-x86_64-gcc123-py310 /bin/bash
+```
+
+## 2. Install Paddle Package
+Currently, Python3.10 wheel installation packages are provided. If you have a need for other Python versions, you can refer to the [PaddlePaddle official documentation](https://www.paddlepaddle.org.cn/en/install/quick) to compile and install them yourself.
+
+Install the Python3.10 wheel installation package:
+
+```bash
+python -m pip install --pre paddlepaddle-xpu -i https://www.paddlepaddle.org.cn/packages/nightly/xpu-p800/   # For X86 architecture
+```
+
+Verify the installation package. After installation, run the following command:
+
+```bash
+python -c "import paddle; paddle.utils.run_check()"
+```
+
+The expected output is:
+
+```
+PaddlePaddle is installed successfully! Let's start deep learning with PaddlePaddle now.
+```
diff --git a/docs/other_devices_support/paddlepaddle_install_P800.md b/docs/other_devices_support/paddlepaddle_install_P800.md
new file mode 100644
index 0000000000..ddcbdfad5c
--- /dev/null
+++ b/docs/other_devices_support/paddlepaddle_install_P800.md
@@ -0,0 +1,44 @@
+---
+comments: true
+---
+
+# 昆仑芯 P800 飞桨安装教程
+
+当前 PaddleX 支持昆仑 P800 等芯片。考虑到环境差异性，我们推荐使用<b>飞桨官方发布的昆仑芯 XPU 开发镜像</b>，该镜像预装有昆仑基础运行环境库（XRE）。
+
+## 1、docker环境准备
+拉取镜像，此镜像仅为开发环境，镜像中不包含预编译的飞桨安装包
+
+```
+# 拉取镜像
+docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:xpu-ubuntu2204-x86_64-gcc123-py310
+```
+参考如下命令启动容器
+
+```
+# 参考如下命令，启动容器
+docker run -it --name paddle-xpu-dev -v $(pwd):/work \
+  -v /usr/local/bin/xpu-smi:/usr/local/bin/xpu-smi \
+  -w=/work --shm-size=128G --network=host --privileged  \
+  --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
+  ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:xpu-ubuntu2204-x86_64-gcc123-py310 /bin/bash
+```
+## 2、安装paddle包
+当前提供 Python3.10 的 wheel 安装包。如有其他 Python 版本需求，可以参考[飞桨官方文档](https://www.paddlepaddle.org.cn/install/quick)自行编译安装。
+
+安装 Python3.10 的 wheel 安装包
+
+```
+# 下载并安装 wheel 包
+python -m pip install --pre paddlepaddle-xpu -i https://www.paddlepaddle.org.cn/packages/nightly/xpu-p800/
+```
+验证安装包 安装完成之后，运行如下命令
+
+```
+python -c "import paddle; paddle.utils.run_check()"
+```
+预期得到如下输出结果
+
+```
+PaddlePaddle is installed successfully! Let's start deep learning with PaddlePaddle now.
+```
diff --git a/docs/pipeline_deploy/serving.en.md b/docs/pipeline_deploy/serving.en.md
index 975cc995db..3cda0afb8b 100644
--- a/docs/pipeline_deploy/serving.en.md
+++ b/docs/pipeline_deploy/serving.en.md
@@ -258,6 +258,10 @@ Find the high-stability serving SDK corresponding to the pipeline in the table b
 <td>PaddleOCR-VL</td>
 <td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.4/paddlex_hps_PaddleOCR-VL_sdk.tar.gz">paddlex_hps_PaddleOCR-VL_sdk.tar.gz</a></td>
 </tr>
+<tr>
+<td>PaddleOCR-VL-1.5</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.4/paddlex_hps_PaddleOCR-VL-1.5_sdk.tar.gz">paddlex_hps_PaddleOCR-VL-1.5_sdk.tar.gz</a></td>
+</tr>
 </tbody>
 </table>
 </details>
diff --git a/docs/pipeline_deploy/serving.md b/docs/pipeline_deploy/serving.md
index a8e956e1cb..5749103ff5 100644
--- a/docs/pipeline_deploy/serving.md
+++ b/docs/pipeline_deploy/serving.md
@@ -258,6 +258,10 @@ paddlex --serve --pipeline image_classification --use_hpip
 <td>PaddleOCR-VL</td>
 <td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.4/paddlex_hps_PaddleOCR-VL_sdk.tar.gz">paddlex_hps_PaddleOCR-VL_sdk.tar.gz</a></td>
 </tr>
+<tr>
+<td>PaddleOCR-VL-1.5</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.4/paddlex_hps_PaddleOCR-VL-1.5_sdk.tar.gz">paddlex_hps_PaddleOCR-VL-1.5_sdk.tar.gz</a></td>
+</tr>
 </tbody>
 </table>
 </details>
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PP-DocTranslation.en.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PP-DocTranslation.en.md
index ae97468e26..5871215719 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/PP-DocTranslation.en.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PP-DocTranslation.en.md
@@ -1590,9 +1590,7 @@ The following is the API reference for basic Serving and examples of multilingua
 <tr>
 <td><code>file</code></td>
 <td><code>string</code></td>
-<td>The URL of an image file or PDF file accessible by the server, or the Base64-encoded result of the content of the aforementioned file types. By default, for PDF files with more than 10 pages, only the first 10 pages will be processed.<br/>To remove the page limit, add the following configuration to the pipeline configuration file:<pre><code>Serving:
-  extra:
-    max_num_input_imgs: null</code></pre>
+<td>The URL of an image file or PDF file accessible by the server, or the Base64-encoded result of the content of the aforementioned file types.
 </td>
 <td>Yes</td>
 </tr>
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md
index bcac347f89..e312cf733d 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md
@@ -6,7 +6,7 @@ comments: true
 
 PaddleOCR-VL is a SOTA and resource-efficient model tailored for document parsing. Its core component is PaddleOCR-VL-0.9B, a compact yet powerful vision-language model (VLM) that integrates a NaViT-style dynamic resolution visual encoder with the ERNIE-4.5-0.3B language model to enable accurate element recognition. This innovative model efficiently supports 109 languages and excels in recognizing complex elements (e.g., text, tables, formulas, and charts), while maintaining minimal resource consumption. Through comprehensive evaluations on widely used public benchmarks and in-house benchmarks, PaddleOCR-VL achieves SOTA performance in both page-level document parsing and element-level recognition. It significantly outperforms existing solutions, exhibits strong competitiveness against top-tier VLMs, and delivers fast inference speeds. These strengths make it highly suitable for practical deployment in real-world scenarios.
 
-On January 29, 2026, we released PaddleOCR-VL-1.5. PaddleOCR-VL-1.5 not only significantly improved the accuracy on the OmniDocBench v1.5 evaluation set to 94.5%, but also innovatively supports irregular-shaped bounding box localization. As a result, PaddleOCR-VL-1.5 demonstrates outstanding performance in real-world scenarios such as Skew, Warping, Screen Photography, Illumination, and Scanning. In addition, the model has added new capabilities for seal (stamp) recognition and text detection and recognition, with key metrics continuing to lead the industry.
+**On January 29, 2026, we released PaddleOCR-VL-1.5. PaddleOCR-VL-1.5 not only significantly improved the accuracy on the OmniDocBench v1.5 evaluation set to 94.5%, but also innovatively supports irregular-shaped bounding box localization. As a result, PaddleOCR-VL-1.5 demonstrates outstanding performance in real-world scenarios such as Skew, Warping, Screen Photography, Illumination, and Scanning. In addition, the model has added new capabilities for seal (stamp) recognition and text detection and recognition, with key metrics continuing to lead the industry.**
 
 <img src="https://raw.githubusercontent.com/cuicheng01/PaddleX_doc_images/refs/heads/main/images/paddleocr_vl_1_5/paddleocr-vl-1.5_metrics.png"/>
 
@@ -1364,9 +1364,7 @@ Below are the API references for basic service-based deployment and examples of
 <tr>
 <td><code>file</code></td>
 <td><code>string</code></td>
-<td>The URL of an image file or PDF file accessible to the server, or the Base64-encoded result of the content of the aforementioned file types. By default, for PDF files with more than 10 pages, only the first 10 pages will be processed.<br/>To remove the page limit, add the following configuration to the production line configuration file:<pre> <code>Serving:
-  extra:
-    max_num_input_imgs: null</code></pre>
+<td>The URL of an image file or PDF file accessible to the server, or the Base64-encoded result of the content of the aforementioned file types.
 </td>
 <td>Yes</td>
 </tr>
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
index 1213104491..960a0bbe51 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
@@ -6,7 +6,7 @@ comments: true
 
 PaddleOCR-VL 是一款先进、高效的文档解析模型，专为文档中的元素识别设计。其核心组件为 PaddleOCR-VL-0.9B，这是一种紧凑而强大的视觉语言模型（VLM），它由 NaViT 风格的动态分辨率视觉编码器与 ERNIE-4.5-0.3B 语言模型组成，能够实现精准的元素识别。该模型支持 109 种语言，并在识别复杂元素（如文本、表格、公式和图表）方面表现出色，同时保持极低的资源消耗。通过在广泛使用的公开基准与内部基准上的全面评测，PaddleOCR-VL 在页级级文档解析与元素级识别均达到 SOTA 表现。它显著优于现有的基于Pipeline方案和文档解析多模态方案以及先进的通用多模态大模型，并具备更快的推理速度。这些优势使其非常适合在真实场景中落地部署。
 
-2026年1月29日，我们发布了PaddleOCR-VL-1.5。PaddleOCR-VL-1.5不仅以94.5%精度大幅刷新了评测集OmniDocBench v1.5，更创新性地支持了异形框定位，使得PaddleOCR-VL-1.5 在扫描、倾斜、弯折、屏幕拍摄及复杂光照等真实场景中均表现优异。此外，模型还新增了印章识别与文本检测识别能力，关键指标持续领跑。
+**2026年1月29日，我们发布了PaddleOCR-VL-1.5。PaddleOCR-VL-1.5不仅以94.5%精度大幅刷新了评测集OmniDocBench v1.5，更创新性地支持了异形框定位，使得PaddleOCR-VL-1.5 在扫描、倾斜、弯折、屏幕拍摄及复杂光照等真实场景中均表现优异。此外，模型还新增了印章识别与文本检测识别能力，关键指标持续领跑。**
 
 <img src="https://raw.githubusercontent.com/cuicheng01/PaddleX_doc_images/refs/heads/main/images/paddleocr_vl_1_5/paddleocr-vl-1.5_metrics.png"/>
 
@@ -1551,7 +1551,7 @@ INFO:     Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)
 <tr>
 <td><code>mergeTables</code></td>
 <td><code>boolean</code></td>
-<td>请参阅PaddleOCR-VL对象中 <code>restructure_pages</code> 方法的 <code>merge_table</code> 参数相关说明。仅当<code>restructurePages</code>为<code>true</code>时生效。</td>
+<td>请参阅PaddleOCR-VL对象中 <code>restructure_pages</code> 方法的 <code>merge_tables</code> 参数相关说明。仅当<code>restructurePages</code>为<code>true</code>时生效。</td>
 <td>否</td>
 </tr>
 <tr>
diff --git a/mkdocs.yml b/mkdocs.yml
index 28ef9ea9d4..d49b7c2d44 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -195,7 +195,8 @@ plugins:
             海光 DCU 飞桨安装教程: HYGON DCU PaddlePaddle Installation Guide
             寒武纪 MLU 飞桨安装教程: Cambricon MLU PaddlePaddle Installation Guide
             昇腾 NPU 飞桨安装教程: Ascend NPU PaddlePaddle Installation Guide
-            昆仑 XPU 飞桨安装教程: Kunlun XPU PaddlePaddle Installation Guide
+            昆仑芯 Kunlunxin 2 飞桨安装教程: Kunlunxin 2 PaddlePaddle Installation Guide
+            昆仑芯 P800 飞桨安装教程: Kunlunxin P800 PaddlePaddle Installation Guide
             燧原 GCU 飞桨安装教程: Enflame GCU PaddlePaddle Installation Guide
             数据标注教程: Data Annotation Tutorials
             计算机视觉: Computer Vision
@@ -437,7 +438,8 @@ nav:
          - 海光 DCU 飞桨安装教程: other_devices_support/paddlepaddle_install_DCU.md
          - 寒武纪 MLU 飞桨安装教程: other_devices_support/paddlepaddle_install_MLU.md
          - 昇腾 NPU 飞桨安装教程: other_devices_support/paddlepaddle_install_NPU.md
-         - 昆仑 XPU 飞桨安装教程: other_devices_support/paddlepaddle_install_XPU.md
+         - 昆仑芯 Kunlunxin 2 飞桨安装教程: other_devices_support/paddlepaddle_install_XPU.md
+         - 昆仑芯 Kunlunxin P800 飞桨安装教程: other_devices_support/paddlepaddle_install_P800.md
          - 燧原 GCU 飞桨安装教程: other_devices_support/paddlepaddle_install_GCU.md
   - 数据标注教程:
        - 计算机视觉:
diff --git a/paddlex/__init__.py b/paddlex/__init__.py
index 27f40e73ec..ad89c47164 100644
--- a/paddlex/__init__.py
+++ b/paddlex/__init__.py
@@ -15,6 +15,10 @@
 import os
 import sys
 
+from .utils.langchain_shim import apply_langchain_shim
+
+apply_langchain_shim()
+
 _SPECIAL_MODS = ["paddle", "paddle_custom_device", "ultra_infer"]
 _loaded_special_mods = []
 for mod in _SPECIAL_MODS:
diff --git a/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml b/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml
index 17aca18f1d..e49cef34e3 100644
--- a/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml
+++ b/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml
@@ -59,7 +59,7 @@ SubModules:
     module_name: vl_recognition
     model_name: PaddleOCR-VL-1.5-0.9B
     model_dir: null
-    batch_size: 4096
+    batch_size: -1
     genai_config:
       backend: native
 
@@ -79,3 +79,7 @@ SubPipelines:
         module_name: image_unwarping
         model_name: UVDoc
         model_dir: null
+
+Serving:
+  extra:
+    max_num_input_imgs: null
diff --git a/paddlex/configs/pipelines/PaddleOCR-VL.yaml b/paddlex/configs/pipelines/PaddleOCR-VL.yaml
index fdb52c7ede..900892f522 100644
--- a/paddlex/configs/pipelines/PaddleOCR-VL.yaml
+++ b/paddlex/configs/pipelines/PaddleOCR-VL.yaml
@@ -83,7 +83,7 @@ SubModules:
     module_name: vl_recognition
     model_name: PaddleOCR-VL-0.9B
     model_dir: null
-    batch_size: 4096
+    batch_size: -1
     genai_config:
       backend: native
 
@@ -103,3 +103,7 @@ SubPipelines:
         module_name: image_unwarping
         model_name: UVDoc
         model_dir: null
+
+Serving:
+  extra:
+    max_num_input_imgs: null
diff --git a/paddlex/inference/common/batch_sampler/image_batch_sampler.py b/paddlex/inference/common/batch_sampler/image_batch_sampler.py
index dd78354fe7..c519765f69 100644
--- a/paddlex/inference/common/batch_sampler/image_batch_sampler.py
+++ b/paddlex/inference/common/batch_sampler/image_batch_sampler.py
@@ -20,6 +20,7 @@
 from ....utils import logging
 from ....utils.cache import CACHE_DIR
 from ....utils.download import download
+from ....utils.flags import PDF_RENDER_SCALE
 from ...utils.io import PDFReader
 from .base_batch_sampler import BaseBatchSampler, Batch
 
@@ -48,7 +49,7 @@ class ImageBatchSampler(BaseBatchSampler):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.pdf_reader = PDFReader()
+        self.pdf_reader = PDFReader(zoom=PDF_RENDER_SCALE)
 
     # XXX: auto download for url
     def _download_from_url(self, in_path):
diff --git a/paddlex/inference/genai/configs/paddleocr_vl_09b.py b/paddlex/inference/genai/configs/paddleocr_vl_09b.py
index 5451bf6e25..9ecfde414a 100644
--- a/paddlex/inference/genai/configs/paddleocr_vl_09b.py
+++ b/paddlex/inference/genai/configs/paddleocr_vl_09b.py
@@ -45,13 +45,26 @@ def get_config(backend):
             cfg["max-concurrency"] = 2048
         return cfg
     elif backend == "vllm":
-        return {
-            "trust-remote-code": True,
-            "gpu-memory-utilization": 0.5,
-            "max-model-len": 16384,
-            "max-num-batched-tokens": 131072,
-            "api-server-count": 4,
-        }
+        require_deps("torch")
+
+        import torch
+
+        if torch.xpu.is_available():
+            return {
+                "trust-remote-code": True,
+                "max-num-batched-tokens": 16384,
+                "no-enable-prefix-caching": True,
+                "mm-processor-cache-gb": 0,
+                "enforce-eager": True,
+            }
+        else:
+            return {
+                "trust-remote-code": True,
+                "gpu-memory-utilization": 0.5,
+                "max-model-len": 16384,
+                "max-num-batched-tokens": 131072,
+                "api-server-count": 4,
+            }
     elif backend == "sglang":
         return {
             "trust-remote-code": True,
diff --git a/paddlex/inference/models/__init__.py b/paddlex/inference/models/__init__.py
index 154cef49e8..a1e0e13495 100644
--- a/paddlex/inference/models/__init__.py
+++ b/paddlex/inference/models/__init__.py
@@ -71,7 +71,7 @@ def create_predictor(
         if model_dir is None:
             model_dir = official_models[model_name]
         else:
-            assert Path(model_dir).exists(), f"{model_dir} is not exists!"
+            assert Path(model_dir).exists(), f"{model_dir} does not exist!"
             model_dir = Path(model_dir)
         config = BasePredictor.load_config(model_dir)
         assert (
diff --git a/paddlex/inference/models/common/genai.py b/paddlex/inference/models/common/genai.py
index 9a6d2edbcf..d5a10d764f 100644
--- a/paddlex/inference/models/common/genai.py
+++ b/paddlex/inference/models/common/genai.py
@@ -29,12 +29,18 @@
     "vllm-server",
     "sglang-server",
     "mlx-vlm-server",
+    "llama-cpp-server",
 ]
 
 
 class GenAIConfig(BaseModel):
     backend: Literal[
-        "native", "fastdeploy-server", "vllm-server", "sglang-server", "mlx-vlm-server"
+        "native",
+        "fastdeploy-server",
+        "vllm-server",
+        "sglang-server",
+        "mlx-vlm-server",
+        "llama-cpp-server",
     ] = "native"
     server_url: Optional[str] = None
     max_concurrency: int = 200
diff --git a/paddlex/inference/models/common/static_infer.py b/paddlex/inference/models/common/static_infer.py
index 4e0556c829..3b4de0518e 100644
--- a/paddlex/inference/models/common/static_infer.py
+++ b/paddlex/inference/models/common/static_infer.py
@@ -401,7 +401,13 @@ def _create(
                 if hasattr(config, "enable_new_executor"):
                     config.enable_new_executor()
                 config.set_optimization_level(3)
-                config.delete_pass("matmul_add_act_fuse_pass")
+                # TODO(changdazhou): use a black list instead
+                if self._model_name == "PP-DocLayoutV3":
+                    config.delete_pass("matmul_add_act_fuse_pass")
+                # ROCm does not support fused_conv2d_add_act kernel, delete the fuse passes
+                if paddle.is_compiled_with_rocm():
+                    config.delete_pass("conv2d_add_act_fuse_pass")
+                    config.delete_pass("conv2d_add_fuse_pass")
             elif self._option.device_type == "npu":
                 config.enable_custom_device("npu", self._option.device_id)
                 if hasattr(config, "enable_new_ir"):
@@ -480,7 +486,9 @@ def _create(
                 if hasattr(config, "enable_new_executor"):
                     config.enable_new_executor()
                 config.set_optimization_level(3)
-
+                if paddle.is_compiled_with_rocm():
+                    config.delete_pass("conv2d_add_act_fuse_pass")
+                    config.delete_pass("conv2d_add_fuse_pass")
         config.enable_memory_optim()
         for del_p in self._option.delete_pass:
             config.delete_pass(del_p)
@@ -488,6 +496,10 @@ def _create(
         # Disable paddle inference logging
         if not DEBUG:
             config.disable_glog_info()
+        # ROCm does not support fused_conv2d_add_act kernel, delete the fuse passes
+        if paddle.is_compiled_with_rocm():
+            config.delete_pass("conv2d_add_act_fuse_pass")
+            config.delete_pass("conv2d_add_fuse_pass")
 
         predictor = paddle.inference.create_predictor(config)
 
diff --git a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_paddleocr_vl.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_paddleocr_vl.py
index 93b61b6cc4..ab5a9cd87e 100644
--- a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_paddleocr_vl.py
+++ b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_paddleocr_vl.py
@@ -65,7 +65,9 @@ class PaddleOCRVLForConditionalGeneration(Ernie4_5PretrainedModel):
     _tied_weights_keys = ["lm_head.weight"]
     config_class = PaddleOCRVLConfig
     _no_split_modules = ["Ernie4_5DecoderLayer", "SiglipEncoderLayer"]
-
+    # Keep visual encoder in fp32 for ROCm stability (MIOpen bf16 conv has bugs)
+    # This also improves precision for vision processing
+    _keep_in_fp32_modules = ["visual", "mlp_AR"]
     base_model_prefix = ""
 
     def __init__(self, config):
diff --git a/paddlex/inference/models/doc_vlm/predictor.py b/paddlex/inference/models/doc_vlm/predictor.py
index 9cd9341736..3f983a8d3a 100644
--- a/paddlex/inference/models/doc_vlm/predictor.py
+++ b/paddlex/inference/models/doc_vlm/predictor.py
@@ -167,10 +167,10 @@ def _build(self, **kwargs):
         return model, processor
 
     def _determine_batch_size(self):
-        if self._model_name == "PaddleOCR-VL-0.9B":
+        if self._model_name in ("PaddleOCR-VL-0.9B", "PaddleOCR-VL-1.5-0.9B"):
             batch_size = 1
             if not self._use_local_model:
-                batch_size = 4096
+                batch_size = 8192
             logging.debug(
                 f"The batch size of {self._model_name} is determined to be {batch_size}."
             )
@@ -415,118 +415,134 @@ def _genai_client_process(
         max_pixels,
     ):
         futures = []
-        for item in data:
-            image = item["image"]
-            if isinstance(image, str):
-                if image.startswith("http://") or image.startswith("https://"):
-                    image_url = image
-                else:
+        if self._genai_client.backend == "llama-cpp-server":
+            image_format = "PNG"
+        else:
+            image_format = "JPEG"
+        try:
+            for item in data:
+                image = item["image"]
+                if isinstance(image, str):
+                    if image.startswith("http://") or image.startswith("https://"):
+                        image_url = image
+                    else:
+                        from PIL import Image
+
+                        with Image.open(image) as img:
+                            img = img.convert("RGB")
+                            with io.BytesIO() as buf:
+                                img.save(buf, format=image_format)
+                                image_url = (
+                                    f"data:image/{image_format.lower()};base64,"
+                                    + base64.b64encode(buf.getvalue()).decode("ascii")
+                                )
+                elif isinstance(image, np.ndarray):
+                    import cv2
                     from PIL import Image
 
-                    with Image.open(image) as img:
-                        img = img.convert("RGB")
-                        with io.BytesIO() as buf:
-                            img.save(buf, format="JPEG")
-                            image_url = "data:image/jpeg;base64," + base64.b64encode(
-                                buf.getvalue()
-                            ).decode("ascii")
-            elif isinstance(image, np.ndarray):
-                import cv2
-                from PIL import Image
-
-                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-                img = Image.fromarray(image)
-                with io.BytesIO() as buf:
-                    img.save(buf, format="JPEG")
-                    image_url = "data:image/jpeg;base64," + base64.b64encode(
-                        buf.getvalue()
-                    ).decode("ascii")
-            else:
-                raise TypeError(f"Not supported image type: {type(image)}")
-
-            if self._genai_client.backend == "fastdeploy-server":
-                kwargs = {
-                    "temperature": 1 if temperature is None else temperature,
-                    "top_p": 0 if top_p is None else top_p,
-                }
-            else:
-                kwargs = {
-                    "temperature": 0 if temperature is None else temperature,
-                }
-                if top_p is not None:
-                    kwargs["top_p"] = top_p
-
-            if self._genai_client.backend == "mlx-vlm-server":
-                max_tokens_name = "max_tokens"
-            else:
-                max_tokens_name = "max_completion_tokens"
-
-            if max_new_tokens is not None:
-                kwargs[max_tokens_name] = max_new_tokens
-            elif self.model_name in self.model_group["PaddleOCR-VL"]:
-                kwargs[max_tokens_name] = 8192
-
-            kwargs["extra_body"] = {}
-            if skip_special_tokens is not None:
-                if self._genai_client.backend in (
-                    "fastdeploy-server",
-                    "vllm-server",
-                    "sglang-server",
-                    "mlx-vlm-server",
-                ):
-                    kwargs["extra_body"]["skip_special_tokens"] = skip_special_tokens
+                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+                    img = Image.fromarray(image)
+                    with io.BytesIO() as buf:
+                        img.save(buf, format=image_format)
+                        image_url = (
+                            f"data:image/{image_format.lower()};base64,"
+                            + base64.b64encode(buf.getvalue()).decode("ascii")
+                        )
                 else:
-                    raise ValueError("Not supported")
+                    raise TypeError(f"Not supported image type: {type(image)}")
 
-            if repetition_penalty is not None:
-                kwargs["extra_body"]["repetition_penalty"] = repetition_penalty
-
-            if min_pixels is not None:
-                if self._genai_client.backend == "vllm-server":
-                    kwargs["extra_body"]["mm_processor_kwargs"] = kwargs[
-                        "extra_body"
-                    ].get("mm_processor_kwargs", {})
-                    kwargs["extra_body"]["mm_processor_kwargs"][
-                        "min_pixels"
-                    ] = min_pixels
+                if self._genai_client.backend == "fastdeploy-server":
+                    kwargs = {
+                        "temperature": 1 if temperature is None else temperature,
+                        "top_p": 0 if top_p is None else top_p,
+                    }
                 else:
-                    warnings.warn(
-                        f"{repr(self._genai_client.backend)} does not support `min_pixels`."
-                    )
+                    kwargs = {
+                        "temperature": 0 if temperature is None else temperature,
+                    }
+                    if top_p is not None:
+                        kwargs["top_p"] = top_p
 
-            if max_pixels is not None:
-                if self._genai_client.backend == "vllm-server":
-                    kwargs["extra_body"]["mm_processor_kwargs"] = kwargs[
-                        "extra_body"
-                    ].get("mm_processor_kwargs", {})
-                    kwargs["extra_body"]["mm_processor_kwargs"][
-                        "max_pixels"
-                    ] = max_pixels
+                if self._genai_client.backend in ["mlx-vlm-server", "llama-cpp-server"]:
+                    max_tokens_name = "max_tokens"
                 else:
-                    warnings.warn(
-                        f"{repr(self._genai_client.backend)} does not support `max_pixels`."
-                    )
+                    max_tokens_name = "max_completion_tokens"
+
+                if max_new_tokens is not None:
+                    kwargs[max_tokens_name] = max_new_tokens
+                elif self.model_name in self.model_group["PaddleOCR-VL"]:
+                    kwargs[max_tokens_name] = 8192
+
+                kwargs["extra_body"] = {}
+                if skip_special_tokens is not None:
+                    if self._genai_client.backend in (
+                        "fastdeploy-server",
+                        "vllm-server",
+                        "sglang-server",
+                        "mlx-vlm-server",
+                        "llama-cpp-server",
+                    ):
+                        kwargs["extra_body"][
+                            "skip_special_tokens"
+                        ] = skip_special_tokens
+                    else:
+                        raise ValueError("Not supported")
+
+                if repetition_penalty is not None:
+                    kwargs["extra_body"]["repetition_penalty"] = repetition_penalty
 
-            future = self._genai_client.create_chat_completion(
-                [
-                    {
-                        "role": "user",
-                        "content": [
-                            {"type": "image_url", "image_url": {"url": image_url}},
-                            {"type": "text", "text": item["query"]},
-                        ],
-                    }
-                ],
-                return_future=True,
-                timeout=600,
-                **kwargs,
-            )
+                if min_pixels is not None:
+                    if self._genai_client.backend == "vllm-server":
+                        kwargs["extra_body"]["mm_processor_kwargs"] = kwargs[
+                            "extra_body"
+                        ].get("mm_processor_kwargs", {})
+                        kwargs["extra_body"]["mm_processor_kwargs"][
+                            "min_pixels"
+                        ] = min_pixels
+                    else:
+                        warnings.warn(
+                            f"{repr(self._genai_client.backend)} does not support `min_pixels`."
+                        )
+
+                if max_pixels is not None:
+                    if self._genai_client.backend == "vllm-server":
+                        kwargs["extra_body"]["mm_processor_kwargs"] = kwargs[
+                            "extra_body"
+                        ].get("mm_processor_kwargs", {})
+                        kwargs["extra_body"]["mm_processor_kwargs"][
+                            "max_pixels"
+                        ] = max_pixels
+                    else:
+                        warnings.warn(
+                            f"{repr(self._genai_client.backend)} does not support `max_pixels`."
+                        )
+
+                future = self._genai_client.create_chat_completion(
+                    [
+                        {
+                            "role": "user",
+                            "content": [
+                                {"type": "image_url", "image_url": {"url": image_url}},
+                                {"type": "text", "text": item["query"]},
+                            ],
+                        }
+                    ],
+                    return_future=True,
+                    timeout=600,
+                    **kwargs,
+                )
 
-            futures.append(future)
+                futures.append(future)
 
-        results = []
-        for future in futures:
-            result = future.result()
-            results.append(result.choices[0].message.content)
+            results = []
+            for future in futures:
+                result = future.result()
+                results.append(result.choices[0].message.content)
 
-        return results
+            return results
+        except Exception:
+            # Cancel all pending futures to avoid wasting resources
+            for future in futures:
+                if not future.done():
+                    future.cancel()
+            raise
diff --git a/paddlex/inference/models/layout_analysis/processors.py b/paddlex/inference/models/layout_analysis/processors.py
index d1672a2000..b7ebaf0992 100644
--- a/paddlex/inference/models/layout_analysis/processors.py
+++ b/paddlex/inference/models/layout_analysis/processors.py
@@ -595,10 +595,15 @@ def filter_boxes(
                         continue
                 box_area_i = calculate_bbox_area(boxes[i]["coordinate"])
                 box_area_j = calculate_bbox_area(boxes[j]["coordinate"])
-                if (
-                    boxes[i]["label"] == "image" or boxes[j]["label"] == "image"
-                ) and boxes[i]["label"] != boxes[j]["label"]:
-                    continue
+                labels = {boxes[i]["label"], boxes[j]["label"]}
+                if labels & {"image", "table", "seal", "chart"} and len(labels) > 1:
+                    if "table" not in labels or labels <= {
+                        "table",
+                        "image",
+                        "seal",
+                        "chart",
+                    }:
+                        continue
                 if box_area_i >= box_area_j:
                     dropped_indexes.add(j)
                 else:
diff --git a/paddlex/inference/models/object_detection/modeling/rt_detr.py b/paddlex/inference/models/object_detection/modeling/rt_detr.py
index 9f15589214..c8b3963b6b 100644
--- a/paddlex/inference/models/object_detection/modeling/rt_detr.py
+++ b/paddlex/inference/models/object_detection/modeling/rt_detr.py
@@ -181,52 +181,108 @@ def __call__(self, head_out, im_shape, scale_factor, pad_shape):
 class RTDETRConfig(PretrainedConfig):
     def __init__(
         self,
-        backbone,
-        HybridEncoder,
-        RTDETRTransformer,
-        DINOHead,
-        DETRPostProcess,
+        initializer_range=0.01,
+        initializer_bias_prior_prob=None,
+        layer_norm_eps=1e-5,
+        batch_norm_eps=1e-5,
+        # backbone
+        backbone_config=None,
+        freeze_backbone_batch_norms=True,
+        # encoder HybridEncoder
+        encoder_hidden_dim=256,
+        encoder_in_channels=[512, 1024, 2048],
+        feat_strides=[8, 16, 32],
+        encoder_layers=1,
+        encoder_ffn_dim=1024,
+        encoder_attention_heads=8,
+        dropout=0.0,
+        activation_dropout=0.0,
+        encode_proj_layers=[2],
+        positional_encoding_temperature=10000,
+        encoder_activation_function="gelu",
+        activation_function="silu",
+        eval_size=None,
+        normalize_before=False,
+        hidden_expansion=1.0,
+        # decoder RTDetrTransformer
+        d_model=256,
+        num_queries=300,
+        decoder_in_channels=[256, 256, 256],
+        decoder_ffn_dim=1024,
+        num_feature_levels=3,
+        decoder_n_points=4,
+        decoder_layers=6,
+        decoder_attention_heads=8,
+        decoder_activation_function="relu",
+        attention_dropout=0.0,
+        num_denoising=100,
+        label_noise_ratio=0.5,
+        box_noise_scale=1.0,
+        learn_initial_query=False,
+        anchor_image_size=None,
+        disable_custom_kernels=True,
+        with_box_refine=True,
+        is_encoder_decoder=True,
+        # Loss
+        matcher_alpha=0.25,
+        matcher_gamma=2.0,
+        matcher_class_cost=2.0,
+        matcher_bbox_cost=5.0,
+        matcher_giou_cost=2.0,
+        use_focal_loss=True,
+        auxiliary_loss=True,
+        focal_loss_alpha=0.75,
+        focal_loss_gamma=2.0,
+        weight_loss_vfl=1.0,
+        weight_loss_bbox=5.0,
+        weight_loss_giou=2.0,
+        eos_coefficient=1e-4,
+        **kwargs,
     ):
-        if backbone["name"] == "PPHGNetV2":
-            self.arch = backbone["arch"]
-            self.return_idx = backbone["return_idx"]
-            self.freeze_stem_only = backbone["freeze_stem_only"]
-            self.freeze_at = backbone["freeze_at"]
-            self.freeze_norm = backbone["freeze_norm"]
-            self.lr_mult_list = backbone["lr_mult_list"]
-        else:
+        if backbone_config["model_type"] != "hgnet_v2":
             raise RuntimeError(
-                f"There is no dynamic graph implementation for backbone {backbone['name']}."
+                f"There is no dynamic graph implementation for backbone {repr(backbone_config["model_type"])}."
             )
-        self.hidden_dim = HybridEncoder["hidden_dim"]
-        self.use_encoder_idx = HybridEncoder["use_encoder_idx"]
-        self.num_encoder_layers = HybridEncoder["num_encoder_layers"]
-        self.el_d_model = HybridEncoder["encoder_layer"]["d_model"]
-        self.el_nhead = HybridEncoder["encoder_layer"]["nhead"]
-        self.el_dim_feedforward = HybridEncoder["encoder_layer"]["dim_feedforward"]
-        self.el_dropout = HybridEncoder["encoder_layer"]["dropout"]
-        self.el_activation = HybridEncoder["encoder_layer"]["activation"]
-        self.expansion = HybridEncoder["expansion"]
-        self.tf_num_queries = RTDETRTransformer["num_queries"]
-        self.tf_position_embed_type = RTDETRTransformer["position_embed_type"]
-        self.tf_feat_strides = RTDETRTransformer["feat_strides"]
-        self.tf_num_levels = RTDETRTransformer["num_levels"]
-        self.tf_nhead = RTDETRTransformer["nhead"]
-        self.tf_num_decoder_layers = RTDETRTransformer["num_decoder_layers"]
-        self.tf_backbone_feat_channels = RTDETRTransformer["backbone_feat_channels"]
-        self.tf_dim_feedforward = RTDETRTransformer["dim_feedforward"]
-        self.tf_dropout = RTDETRTransformer["dropout"]
-        self.tf_activation = RTDETRTransformer["activation"]
-        self.tf_num_denoising = RTDETRTransformer["num_denoising"]
-        self.tf_label_noise_ratio = RTDETRTransformer["label_noise_ratio"]
-        self.tf_box_noise_scale = RTDETRTransformer["box_noise_scale"]
-        self.tf_learnt_init_query = RTDETRTransformer["learnt_init_query"]
-        self.loss_coeff = DINOHead["loss"]["loss_coeff"]
-        self.aux_loss = DINOHead["loss"]["aux_loss"]
-        self.use_vfl = DINOHead["loss"]["use_vfl"]
-        self.matcher_coeff = DINOHead["loss"]["matcher"]["matcher_coeff"]
-        self.num_top_queries = DETRPostProcess["num_top_queries"]
-        self.use_focal_loss = DETRPostProcess["use_focal_loss"]
+        self.arch = backbone_config["arch"]
+        self.freeze_stem_only = backbone_config["freeze_stem_only"]
+        self.freeze_at = backbone_config["freeze_at"]
+        self.freeze_norm = backbone_config["freeze_norm"]
+        self.lr_mult_list = backbone_config["lr_mult_list"]
+        self.return_idx = backbone_config["return_idx"]
+        self.hidden_dim = encoder_hidden_dim
+        self.use_encoder_idx = encode_proj_layers
+        self.num_encoder_layers = encoder_layers
+        self.el_d_model = d_model
+        self.el_nhead = encoder_attention_heads
+        self.el_dim_feedforward = encoder_ffn_dim
+        self.el_dropout = dropout
+        self.el_activation = encoder_activation_function
+        self.expansion = hidden_expansion
+        self.tf_num_queries = num_queries
+        self.tf_feat_strides = feat_strides
+        self.tf_num_levels = num_feature_levels
+        self.tf_nhead = decoder_attention_heads
+        self.tf_num_decoder_layers = decoder_layers
+        self.tf_backbone_feat_channels = decoder_in_channels
+        self.tf_dim_feedforward = decoder_ffn_dim
+        self.tf_dropout = attention_dropout
+        self.tf_activation = decoder_activation_function
+        self.tf_num_denoising = num_denoising
+        self.tf_label_noise_ratio = label_noise_ratio
+        self.tf_box_noise_scale = box_noise_scale
+        self.tf_learnt_init_query = learn_initial_query
+        self.loss_coeff = {
+            "class": weight_loss_vfl,
+            "bbox": weight_loss_bbox,
+            "giou": weight_loss_giou
+        }
+        self.aux_loss = auxiliary_loss
+        self.matcher_coeff = {
+            "class": matcher_class_cost,
+            "bbox": matcher_bbox_cost,
+            "giou": matcher_giou_cost
+        }
+        self.use_focal_loss = use_focal_loss
         self.tensor_parallel_degree = 1
 
 
@@ -260,7 +316,6 @@ def __init__(self, config: RTDETRConfig):
         )
         self.transformer = RTDETRTransformer(
             num_queries=self.config.tf_num_queries,
-            position_embed_type=self.config.tf_position_embed_type,
             feat_strides=self.config.tf_feat_strides,
             backbone_feat_channels=self.config.tf_backbone_feat_channels,
             num_levels=self.config.tf_num_levels,
@@ -278,14 +333,13 @@ def __init__(self, config: RTDETRConfig):
             loss=DINOLoss(
                 loss_coeff=self.config.loss_coeff,
                 aux_loss=self.config.aux_loss,
-                use_vfl=self.config.use_vfl,
                 matcher=HungarianMatcher(
                     matcher_coeff=self.config.matcher_coeff,
                 ),
             )
         )
         self.post_process = DETRPostProcess(
-            num_top_queries=self.config.num_top_queries,
+            num_top_queries=self.config.tf_num_queries,
             use_focal_loss=self.config.use_focal_loss,
         )
 
diff --git a/paddlex/inference/models/table_structure_recognition/modeling/slanext.py b/paddlex/inference/models/table_structure_recognition/modeling/slanext.py
index 884339f0fa..956a7d36c6 100644
--- a/paddlex/inference/models/table_structure_recognition/modeling/slanext.py
+++ b/paddlex/inference/models/table_structure_recognition/modeling/slanext.py
@@ -25,23 +25,26 @@
 class SLANeXtConfig(PretrainedConfig):
     def __init__(
         self,
-        backbone,
-        SLAHead,
+        out_channels,
+        hidden_size,
+        max_text_length,
+        loc_reg_num,
+        image_size,
+        encoder_embed_dim,
+        encoder_depth,
+        encoder_num_heads,
+        encoder_global_attn_indexes,
+        **kwargs,
     ):
-        if backbone["name"] == "Vary_VIT_B":
-            self.image_size = backbone["image_size"]
-            self.encoder_embed_dim = backbone["encoder_embed_dim"]
-            self.encoder_depth = backbone["encoder_depth"]
-            self.encoder_num_heads = backbone["encoder_num_heads"]
-            self.encoder_global_attn_indexes = backbone["encoder_global_attn_indexes"]
-        else:
-            raise RuntimeError(
-                f"There is no dynamic graph implementation for backbone {backbone['name']}."
-            )
-        self.out_channels = SLAHead["out_channels"]
-        self.hidden_size = SLAHead["hidden_size"]
-        self.max_text_length = SLAHead["max_text_length"]
-        self.loc_reg_num = SLAHead["loc_reg_num"]
+        self.out_channels = out_channels
+        self.hidden_size = hidden_size
+        self.max_text_length = max_text_length
+        self.loc_reg_num = loc_reg_num
+        self.image_size = image_size
+        self.encoder_embed_dim = encoder_embed_dim
+        self.encoder_depth = encoder_depth
+        self.encoder_num_heads = encoder_num_heads
+        self.encoder_global_attn_indexes = encoder_global_attn_indexes
         self.tensor_parallel_degree = 1
 
 
@@ -76,11 +79,25 @@ def forward(self, x):
         return [x["loc_preds"], x["structure_probs"]]
 
     def get_transpose_weight_keys(self):
-        transpose_keys = ["mlp.lin2", "attn.qkv", "mlp.lin1"]
+        transpose_keys = [
+            "mlp.lin2",
+            "attn.qkv",
+            "mlp.lin1",
+            "structure_attention_cell.score",
+            "attn.proj",
+            "i2h",
+            "h2h",
+            "structure_generator.0",
+            "structure_generator.1",
+            "loc_generator.0",
+            "loc_generator.1",
+        ]
         need_to_transpose = []
         all_weight_keys = []
         for name, param in self.backbone.named_parameters():
             all_weight_keys.append("backbone." + name)
+        for name, param in self.head.named_parameters():
+            all_weight_keys.append("head." + name)
         for i in range(len(all_weight_keys)):
             for j in range(len(transpose_keys)):
                 if (transpose_keys[j] in all_weight_keys[i]) and (
diff --git a/paddlex/inference/models/text_detection/processors.py b/paddlex/inference/models/text_detection/processors.py
index c43e7df6b4..23a38fdd65 100644
--- a/paddlex/inference/models/text_detection/processors.py
+++ b/paddlex/inference/models/text_detection/processors.py
@@ -253,6 +253,9 @@ def __call__(self, imgs):
         """apply"""
 
         def _norm(img):
+            # Check if the image is in 4-channel RGBA format. If so, convert it to RGB format.
+            if img.shape[2] == 4:
+                img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
             if self.order == "chw":
                 img = np.transpose(img, (2, 0, 1))
 
diff --git a/paddlex/inference/models/text_recognition/modeling/pp_ocrv5_rec.py b/paddlex/inference/models/text_recognition/modeling/pp_ocrv5_rec.py
index 0c90123e6b..50ac8d1196 100644
--- a/paddlex/inference/models/text_recognition/modeling/pp_ocrv5_rec.py
+++ b/paddlex/inference/models/text_recognition/modeling/pp_ocrv5_rec.py
@@ -27,34 +27,49 @@
 class PPOCRV5RecConfig(PretrainedConfig):
     def __init__(
         self,
-        backbone,
-        MultiHead,
+        model_type,
+        scale: float = 0.95,
+        conv_kxk_num: int = 4,
+        lr_mult_list: list = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+        lab_lr: float = 0.1,
+        net_config: dict | None = None,
+        text_rec: bool = True,
+        stem_channels: list = [3, 32, 48],
+        det: bool = False,
+        use_lab: bool = False,
+        use_last_conv: bool = True,
+        class_expand: int = 2048,
+        dropout_prob: float = 0.0,
+        class_num: int = 1000,
+        lr_mult_list: list = [1.0, 1.0, 1.0, 1.0, 1.0],
+        out_indices: list | None = None,
+        stage_config: dict | None = None,
+        head_list: list | None = None,
+        decode_list: dict | None = None,
+        **kwargs,
     ):
-        self.backbone_name = backbone["name"]
-        if self.backbone_name == "PPLCNetV3":
-            self.net_config = backbone["net_config"]
-            self.scale = backbone["scale"]
-            self.conv_kxk_num = backbone["conv_kxk_num"]
-            self.lr_mult_list = backbone["lr_mult_list"]
-            self.lab_lr = backbone["lab_lr"]
-        elif self.backbone_name == "PPHGNetV2":
-            self.text_rec = backbone["text_rec"]
-            self.stem_channels = backbone["stem_channels"]
-            self.stage_config = backbone["stage_config"]
-            self.det = backbone["det"]
-            self.use_lab = backbone["use_lab"]
-            self.use_last_conv = backbone["use_last_conv"]
-            self.class_expand = backbone["class_expand"]
-            self.dropout_prob = backbone["dropout_prob"]
-            self.class_num = backbone["class_num"]
-            self.lr_mult_list = backbone["lr_mult_list"]
-            self.out_indices = backbone["out_indices"]
-        else:
-            raise RuntimeError(
-                f"There is no dynamic graph implementation for backbone {backbone['name']}."
-            )
-        self.head_list = MultiHead["head_list"]
-        self.decode_list = MultiHead["decode_list"]
+        self.model_type = model_type
+        if self.model_type == "pp_ocrv5_mobile_rec":
+            self.net_config = net_config
+            self.scale = scale
+            self.conv_kxk_num =conv_kxk_num
+            self.lr_mult_list = lr_mult_list
+            self.lab_lr = lab_lr
+        elif self.model_type == "pp_ocrv5_server_rec":
+            self.text_rec = text_rec
+            self.stem_channels = stem_channels
+            self.stage_config = stage_config
+            self.det = det
+            self.use_lab = use_lab
+            self.use_last_conv = use_last_conv
+            self.class_expand = class_expand
+            self.dropout_prob = dropout_prob
+            self.class_num = class_num
+            self.lr_mult_list = lr_mult_list
+            self.out_indices = out_indices
+    
+        self.head_list = head_list
+        self.decode_list = decode_list
         self.tensor_parallel_degree = 1
 
 
@@ -64,7 +79,7 @@ class PPOCRV5Rec(PretrainedModel):
 
     def __init__(self, config: PPOCRV5RecConfig):
         super().__init__(config)
-        if self.config.backbone_name == "PPLCNetV3":
+        if self.config.model_type == "pp_ocrv5_mobile_rec":
             self.backbone = PPLCNetV3(
                 scale=self.config.scale,
                 net_config=self.config.net_config,
@@ -72,7 +87,7 @@ def __init__(self, config: PPOCRV5RecConfig):
                 lr_mult_list=self.config.lr_mult_list,
                 lab_lr=self.config.lab_lr,
             )
-        elif self.config.backbone_name == "PPHGNetV2":
+        elif self.config.model_type == "pp_ocrv5_server_rec":
             self.backbone = PPHGNetV2(
                 stage_config=self.config.stage_config,
                 stem_channels=self.config.stem_channels,
@@ -102,7 +117,14 @@ def forward(self, x):
         return [x.cpu().numpy()]
 
     def get_transpose_weight_keys(self):
-        transpose_keys = ["fc", "out_proj", "attn.qkv"]
+        transpose_keys = [
+            "fc",
+            "out_proj",
+            "attn.qkv",
+            "mixer.qkv",
+            "cross_attn.kv",
+            "mixer.proj"
+        ]
         need_to_transpose = []
         all_weight_keys = []
         for name, param in self.head.named_parameters():
diff --git a/paddlex/inference/pipelines/components/retriever/base.py b/paddlex/inference/pipelines/components/retriever/base.py
index a348836836..8aa42ef7b5 100644
--- a/paddlex/inference/pipelines/components/retriever/base.py
+++ b/paddlex/inference/pipelines/components/retriever/base.py
@@ -22,14 +22,14 @@
 from .....utils.subclass_register import AutoRegisterABCMetaClass
 
 if is_dep_available("langchain"):
-    from langchain.docstore.document import Document
-    from langchain.text_splitter import RecursiveCharacterTextSplitter
+    from langchain_core.documents import Document
+    from langchain_text_splitters import RecursiveCharacterTextSplitter
 if is_dep_available("langchain-community"):
     from langchain_community import vectorstores
     from langchain_community.vectorstores import FAISS
 
 
-@class_requires_deps("langchain", "langchain-community")
+@class_requires_deps("langchain", "langchain-text-splitters", "langchain-community")
 class BaseRetriever(ABC, metaclass=AutoRegisterABCMetaClass):
     """Base Retriever"""
 
diff --git a/paddlex/inference/pipelines/layout_parsing/pipeline_v2.py b/paddlex/inference/pipelines/layout_parsing/pipeline_v2.py
index 198013bfdf..bdf08a7878 100644
--- a/paddlex/inference/pipelines/layout_parsing/pipeline_v2.py
+++ b/paddlex/inference/pipelines/layout_parsing/pipeline_v2.py
@@ -92,7 +92,7 @@ def __init__(
         self.img_reader = ReadImage(format="BGR")
 
     def close(self):
-        if getattr(self, "chart_recognition_model"):
+        if getattr(self, "chart_recognition_model", None):
             self.chart_recognition_model.close()
 
     def inintial_predictor(self, config: dict) -> None:
@@ -213,13 +213,14 @@ def inintial_predictor(self, config: dict) -> None:
             )
 
         # TODO(gaotingquan): init the model at any time
-        chart_recognition_config = config.get("SubModules", {}).get(
-            "ChartRecognition",
-            {"model_config_error": "config error for block_region_detection_model!"},
-        )
-        self.chart_recognition_model = self.create_model(
-            chart_recognition_config,
-        )
+        if self.use_chart_recognition:
+            chart_recognition_config = config.get("SubModules", {}).get(
+                "ChartRecognition",
+                {"model_config_error": "config error for chart_recognition_model!"},
+            )
+            self.chart_recognition_model = self.create_model(
+                chart_recognition_config,
+            )
         self.markdown_ignore_labels = config.get(
             "markdown_ignore_labels",
             [
diff --git a/paddlex/inference/pipelines/layout_parsing/utils.py b/paddlex/inference/pipelines/layout_parsing/utils.py
index 7c83d00a19..1fce7e368e 100644
--- a/paddlex/inference/pipelines/layout_parsing/utils.py
+++ b/paddlex/inference/pipelines/layout_parsing/utils.py
@@ -234,8 +234,8 @@ def calculate_overlap_ratio(
     Returns:
         float: The overlap ratio value between the two bounding boxes
     """
-    bbox1 = np.array(bbox1)
-    bbox2 = np.array(bbox2)
+    bbox1 = np.array(bbox1, dtype=np.float64)
+    bbox2 = np.array(bbox2, dtype=np.float64)
 
     x_min_inter = np.maximum(bbox1[0], bbox2[0])
     y_min_inter = np.maximum(bbox1[1], bbox2[1])
@@ -245,7 +245,8 @@ def calculate_overlap_ratio(
     inter_width = np.maximum(0, x_max_inter - x_min_inter)
     inter_height = np.maximum(0, y_max_inter - y_min_inter)
 
-    inter_area = inter_width * inter_height
+    inter_area = np.multiply(inter_width, inter_height, dtype=np.float64)
+
 
     bbox1_area = calculate_bbox_area(bbox1)
     bbox2_area = calculate_bbox_area(bbox2)
diff --git a/paddlex/inference/pipelines/paddleocr_vl/pipeline.py b/paddlex/inference/pipelines/paddleocr_vl/pipeline.py
index ee1994d83d..5a48667437 100644
--- a/paddlex/inference/pipelines/paddleocr_vl/pipeline.py
+++ b/paddlex/inference/pipelines/paddleocr_vl/pipeline.py
@@ -272,7 +272,7 @@ def get_layout_parsing_results(
         id2pixel_key_map = {}
         image_path_to_obj_map = {}
         vis_image_labels = IMAGE_LABELS + ["seal"]
-        image_labels = [] if use_ocr_for_image_block else IMAGE_LABELS
+        image_labels = [] if use_ocr_for_image_block else IMAGE_LABELS.copy()
         if not use_chart_recognition:
             image_labels += ["chart"]
             vis_image_labels += ["chart"]
diff --git a/paddlex/inference/pipelines/paddleocr_vl/result.py b/paddlex/inference/pipelines/paddleocr_vl/result.py
index 963f9db79f..dc7dc92c98 100644
--- a/paddlex/inference/pipelines/paddleocr_vl/result.py
+++ b/paddlex/inference/pipelines/paddleocr_vl/result.py
@@ -268,7 +268,7 @@ def __init__(self, data) -> None:
             "markdown_ignore_labels", []
         )
         self.skip_order_labels = [
-            label for label in SKIP_ORDER_LABELS + markdown_ignore_labels
+            label for label in SKIP_ORDER_LABELS.copy() + markdown_ignore_labels
         ]
 
     def _to_img(self) -> dict[str, np.ndarray]:
@@ -468,7 +468,7 @@ def _to_json(self, *args, **kwargs) -> dict[str, str]:
                     original_image_width=original_image_width,
                     show_ocr_content=True,
                 ),
-                remove_symbol=use_seal_recognition,
+                remove_symbol=not use_seal_recognition,
             )
 
             if self["model_settings"].get("use_chart_recognition", False):
diff --git a/paddlex/inference/pipelines/paddleocr_vl/uilts.py b/paddlex/inference/pipelines/paddleocr_vl/uilts.py
index 8a4009a473..c753ec9d32 100644
--- a/paddlex/inference/pipelines/paddleocr_vl/uilts.py
+++ b/paddlex/inference/pipelines/paddleocr_vl/uilts.py
@@ -123,13 +123,15 @@ def filter_overlap_boxes(
                         continue
                 box_area_i = calculate_bbox_area(boxes[i]["coordinate"])
                 box_area_j = calculate_bbox_area(boxes[j]["coordinate"])
-                if {boxes[i]["label"], boxes[j]["label"]} & {
-                    "image",
-                    "table",
-                    "seal",
-                    "chart",
-                } and boxes[i]["label"] != boxes[j]["label"]:
-                    continue
+                labels = {boxes[i]["label"], boxes[j]["label"]}
+                if labels & {"image", "table", "seal", "chart"} and len(labels) > 1:
+                    if "table" not in labels or labels <= {
+                        "table",
+                        "image",
+                        "seal",
+                        "chart",
+                    }:
+                        continue
                 if box_area_i >= box_area_j:
                     dropped_indexes.add(j)
                 else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py
index 82b8bf580a..87347fb034 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py
@@ -50,6 +50,7 @@ def postprocess_image(
     *,
     file_storage: Optional[Storage] = None,
     return_url: bool = False,
+    url_expires_in: int = -1,
     max_img_size: Optional[Tuple[int, int]] = None,
 ) -> str:
     if return_url:
@@ -75,7 +76,7 @@ def postprocess_image(
         file_storage.set(key, img_bytes)
         if return_url:
             assert isinstance(file_storage, SupportsGetURL)
-            return file_storage.get_url(key)
+            return file_storage.get_url(key, expires_in=url_expires_in)
     return serving_utils.base64_encode(img_bytes)
 
 
@@ -85,6 +86,7 @@ def postprocess_images(
     filename_template: str = "{key}.jpg",
     file_storage: Optional[Storage] = None,
     return_urls: bool = False,
+    url_expires_in: int = -1,
     max_img_size: Optional[Tuple[int, int]] = None,
 ) -> Dict[str, str]:
     output_images: Dict[str, str] = {}
@@ -99,6 +101,7 @@ def postprocess_images(
             filename=filename_template.format(key=key),
             file_storage=file_storage,
             return_url=return_urls,
+            url_expires_in=url_expires_in,
             max_img_size=max_img_size,
         )
     return output_images
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py
index 1d46eadb20..73473800d2 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py
@@ -31,6 +31,7 @@
 
 DEFAULT_MAX_NUM_INPUT_IMGS: Final[int] = 10
 DEFAULT_MAX_OUTPUT_IMG_SIZE: Final[Tuple[int, int]] = (2000, 2000)
+DEFAULT_URL_EXPIRES_IN: Final[int] = -1
 
 
 def update_app_context(app_context: AppContext) -> None:
@@ -49,6 +50,9 @@ def update_app_context(app_context: AppContext) -> None:
             raise TypeError(
                 f"`{type(file_storage).__name__}` does not support getting URLs."
             )
+    app_context.extra["url_expires_in"] = extra_cfg.get(
+        "url_expires_in", DEFAULT_URL_EXPIRES_IN
+    )
     app_context.extra["max_num_input_imgs"] = extra_cfg.get(
         "max_num_input_imgs", DEFAULT_MAX_NUM_INPUT_IMGS
     )
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py
index e9e837a50f..2bd38780cb 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py
@@ -80,6 +80,7 @@ async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]:
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py
index 786c426d91..8cc3343641 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py
@@ -75,6 +75,7 @@ async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]:
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py
index a778632cff..e35633e969 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py
@@ -91,7 +91,7 @@ async def _infer(
                     log_id,
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
-                    return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py
index 4020a4f48b..72e3382ed1 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py
@@ -81,6 +81,7 @@ async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]:
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/paddleocr_vl.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/paddleocr_vl.py
index 1eaa219d86..f2f0a46450 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/paddleocr_vl.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/paddleocr_vl.py
@@ -111,6 +111,7 @@ async def _infer(
                 filename_template=f"markdown_{i}/{{key}}",
                 file_storage=ctx.extra["file_storage"],
                 return_urls=ctx.extra["return_img_urls"],
+                url_expires_in=ctx.extra["url_expires_in"],
                 max_img_size=ctx.extra["max_output_img_size"],
             )
             if visualize_enabled:
@@ -125,6 +126,7 @@ async def _infer(
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py
index 8e74699f14..4e449bca0e 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py
@@ -92,6 +92,7 @@ async def _analyze_images(
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py
index ced0e6bb88..071a6a2f3d 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py
@@ -93,6 +93,7 @@ async def _analyze_images(
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py
index f4a6c88a13..232dae19aa 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py
@@ -100,6 +100,7 @@ async def _analyze_images(
                 filename_template=f"markdown_{i}/{{key}}",
                 file_storage=ctx.extra["file_storage"],
                 return_urls=ctx.extra["return_img_urls"],
+                url_expires_in=ctx.extra["url_expires_in"],
                 max_img_size=ctx.extra["max_output_img_size"],
             )
             md_flags = md_data["page_continuation_flags"]
@@ -115,6 +116,7 @@ async def _analyze_images(
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py
index 79e1e17ee6..b68ab3369a 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py
@@ -103,6 +103,7 @@ async def _infer(
                 filename_template=f"markdown_{i}/{{key}}",
                 file_storage=ctx.extra["file_storage"],
                 return_urls=ctx.extra["return_img_urls"],
+                url_expires_in=ctx.extra["url_expires_in"],
                 max_img_size=ctx.extra["max_output_img_size"],
             )
             md_flags = md_data["page_continuation_flags"]
@@ -118,6 +119,7 @@ async def _infer(
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py
index 012f94abb8..1c85b56d13 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py
@@ -81,6 +81,7 @@ async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]:
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py
index eecd45be99..9ca46ade4c 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py
@@ -79,6 +79,7 @@ async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]:
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py
index 288a415fca..8adef8afd7 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py
@@ -84,6 +84,7 @@ async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]:
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/infra/storage.py b/paddlex/inference/serving/infra/storage.py
index 7a0cbbf929..2afe637945 100644
--- a/paddlex/inference/serving/infra/storage.py
+++ b/paddlex/inference/serving/infra/storage.py
@@ -26,12 +26,14 @@
     "InMemoryStorageConfig",
     "FileSystemStorageConfig",
     "BOSConfig",
+    "OSSConfig",
     "FileStorageConfig",
     "SupportsGetURL",
     "Storage",
     "InMemoryStorage",
     "FileSystemStorage",
     "BOS",
+    "OSS",
     "create_storage",
 ]
 
@@ -57,15 +59,25 @@ class BOSConfig(BaseModel):
     type: Literal["bos"] = "bos"
 
 
+class OSSConfig(BaseModel):
+    endpoint: str
+    access_key_id: SecretStr
+    access_key_secret: SecretStr
+    bucket_name: str
+    key_prefix: Optional[str] = None
+
+    type: Literal["oss"] = "oss"
+
+
 FileStorageConfig = Annotated[
-    Union[InMemoryStorageConfig, FileSystemStorageConfig, BOSConfig],
+    Union[InMemoryStorageConfig, FileSystemStorageConfig, BOSConfig, OSSConfig],
     Discriminator("type"),
 ]
 
 
 @runtime_checkable
 class SupportsGetURL(Protocol):
-    def get_url(self, key: str) -> str: ...
+    def get_url(self, key: str, expires_in: int = -1) -> str: ...
 
 
 class Storage(metaclass=abc.ABCMeta):
@@ -156,10 +168,12 @@ def delete(self, key: str) -> None:
         key = self._get_full_key(key)
         self._client.delete_object(bucket_name=self._bucket_name, key=key)
 
-    def get_url(self, key: str) -> str:
+    def get_url(self, key: str, expires_in: int = -1) -> str:
         key = self._get_full_key(key)
         return self._client.generate_pre_signed_url(
-            self._bucket_name, key, expiration_in_seconds=-1
+            self._bucket_name,
+            key,
+            expiration_in_seconds=expires_in,
         ).decode("ascii")
 
     def _get_full_key(self, key: str) -> str:
@@ -168,6 +182,46 @@ def _get_full_key(self, key: str) -> str:
         return key
 
 
+@class_requires_deps("oss2")
+class OSS(Storage):
+    def __init__(self, config: OSSConfig) -> None:
+        import oss2
+
+        super().__init__()
+
+        # 创建认证对象
+        auth = oss2.Auth(
+            config.access_key_id.get_secret_value(),
+            config.access_key_secret.get_secret_value()
+        )
+
+        # 创建Bucket对象
+        self._bucket = oss2.Bucket(auth, config.endpoint, config.bucket_name)
+        self._key_prefix = config.key_prefix
+
+    def get(self, key: str) -> bytes:
+        key = self._get_full_key(key)
+        return self._bucket.get_object(key).read()
+
+    def set(self, key: str, value: bytes) -> None:
+        key = self._get_full_key(key)
+        self._bucket.put_object(key, value)
+
+    def delete(self, key: str) -> None:
+        key = self._get_full_key(key)
+        self._bucket.delete_object(key)
+
+    def get_url(self, key: str) -> str:
+        # 生成签名URL，有效期3600秒（1小时）
+        key = self._get_full_key(key)
+        return self._bucket.sign_url('GET', key, 3600)
+
+    def _get_full_key(self, key: str) -> str:
+        if self._key_prefix:
+            return f"{self._key_prefix}/{key}"
+        return key
+
+
 def create_storage(dic: Dict[str, Any], /) -> Storage:
     config = TypeAdapter(FileStorageConfig).validate_python(dic)
     if config.type == "memory":
@@ -176,5 +230,7 @@ def create_storage(dic: Dict[str, Any], /) -> Storage:
         return FileSystemStorage(config)
     elif config.type == "bos":
         return BOS(config)
+    elif config.type == "oss":
+        return OSS(config)
     else:
         assert_never(config)
diff --git a/paddlex/inference/serving/infra/utils.py b/paddlex/inference/serving/infra/utils.py
index b6b0211f98..7c67567957 100644
--- a/paddlex/inference/serving/infra/utils.py
+++ b/paddlex/inference/serving/infra/utils.py
@@ -30,6 +30,7 @@
 from typing_extensions import Literal, ParamSpec, TypeAlias, assert_never
 
 from ....utils.deps import function_requires_deps, is_dep_available
+from ....utils.flags import PDF_RENDER_SCALE
 from ...utils.pdfium_lock import pdfium_lock
 from .models import ImageInfo, PDFInfo, PDFPageInfo
 
@@ -191,8 +192,7 @@ def read_pdf(
                 if max_num_imgs is not None and len(images) >= max_num_imgs:
                     page.close()
                     break
-                # TODO: Do not always use zoom=2.0
-                zoom = 2.0
+                zoom = PDF_RENDER_SCALE
                 deg = 0
                 image = page.render(scale=zoom, rotation=deg).to_numpy()
                 images.append(image)
diff --git a/paddlex/repo_apis/PaddleDetection_api/object_det/config.py b/paddlex/repo_apis/PaddleDetection_api/object_det/config.py
index 8285bda439..3e37dbead4 100644
--- a/paddlex/repo_apis/PaddleDetection_api/object_det/config.py
+++ b/paddlex/repo_apis/PaddleDetection_api/object_det/config.py
@@ -297,6 +297,9 @@ def update_device(self, device_type: str):
         elif device_type.lower() == "metax_gpu":
             self["use_metax_gpu"] = True
             self["use_gpu"] = False
+        elif device_type.lower() == "iluvatar_gpu":
+            self["use_iluvatar_gpu"] = True
+            self["use_gpu"] = False
         else:
             assert device_type.lower() == "cpu"
             self["use_gpu"] = False
diff --git a/paddlex/utils/custom_device_list.py b/paddlex/utils/custom_device_list.py
index ec4a85495c..d48ea12b71 100755
--- a/paddlex/utils/custom_device_list.py
+++ b/paddlex/utils/custom_device_list.py
@@ -418,6 +418,7 @@
     "PP-OCRv4_mobile_rec",
     "PP-OCRv4_server_rec",
     "PP-DocLayoutV2",
+    "PP-DocLayoutV3",
     "PP-ShiTuV2_rec",
     "PP-ShiTuV2_det",
     "PP-OCRv5_mobile_det",
diff --git a/paddlex/utils/flags.py b/paddlex/utils/flags.py
index 1fcf547335..726d3603fd 100644
--- a/paddlex/utils/flags.py
+++ b/paddlex/utils/flags.py
@@ -70,7 +70,9 @@ def get_flag_from_env_var(name, default, format_func=str):
     "PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK", False
 )
 
-HUGGING_FACE_ENDPOINT = os.environ.get("PADDLE_PDX_HUGGING_FACE_ENDPOINT", "https://huggingface.co")
+HUGGING_FACE_ENDPOINT = os.environ.get(
+    "PADDLE_PDX_HUGGING_FACE_ENDPOINT", "https://huggingface.co"
+)
 
 # Inference Benchmark
 INFER_BENCHMARK = get_flag_from_env_var("PADDLE_PDX_INFER_BENCHMARK", False)
@@ -87,3 +89,5 @@ def get_flag_from_env_var(name, default, format_func=str):
 INFER_BENCHMARK_USE_CACHE_FOR_READ = get_flag_from_env_var(
     "PADDLE_PDX_INFER_BENCHMARK_USE_CACHE_FOR_READ", False
 )
+
+PDF_RENDER_SCALE = get_flag_from_env_var("PADDLE_PDX_PDF_RENDER_SCALE", 2.0, float)
diff --git a/paddlex/utils/langchain_shim.py b/paddlex/utils/langchain_shim.py
new file mode 100644
index 0000000000..f3ada62fbc
--- /dev/null
+++ b/paddlex/utils/langchain_shim.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import types
+
+
+def apply_langchain_shim():
+    """
+    A compatibility shim for LangChain to handle breaking changes in newer versions.
+    Specifically addresses the removal of 'langchain.docstore' and relocation of
+    'RecursiveCharacterTextSplitter'.
+    """
+    # Check if langchain is installed
+    try:
+        import langchain
+    except ImportError:
+        return
+
+    # Ensure langchain is treated as a package if it's a dummy module
+    if not hasattr(langchain, "__path__"):
+        langchain.__path__ = []
+
+    # Helper to create shim modules
+    def create_shim(name, parent, attr):
+        if not hasattr(parent, attr):
+            mod = types.ModuleType(name)
+            if not hasattr(mod, "__path__"):
+                mod.__path__ = []
+            sys.modules[name] = mod
+            setattr(parent, attr, mod)
+        return getattr(parent, attr)
+
+    # Shim for docstore and document
+    docstore = create_shim("langchain.docstore", langchain, "docstore")
+    document = create_shim("langchain.docstore.document", docstore, "document")
+
+    if not hasattr(document, "Document"):
+        try:
+            from langchain_core.documents import Document as RealDocument
+            document.Document = RealDocument
+        except ImportError:
+
+            class MockDocument:
+
+                def __init__(self, page_content, metadata=None):
+                    self.page_content = page_content
+                    self.metadata = metadata or {}
+
+            document.Document = MockDocument
+
+    # Shim for text_splitter
+    text_splitter = create_shim("langchain.text_splitter", langchain, "text_splitter")
+    if not hasattr(text_splitter, "RecursiveCharacterTextSplitter"):
+        try:
+            from langchain_text_splitters import (
+                RecursiveCharacterTextSplitter as RealSplitter,
+            )
+
+            text_splitter.RecursiveCharacterTextSplitter = RealSplitter
+        except ImportError:
+
+            class MockSplitter:
+
+                def __init__(self, *args, **kwargs):
+                    pass
+
+            text_splitter.RecursiveCharacterTextSplitter = MockSplitter
diff --git a/setup.py b/setup.py
index c2401ff4c1..6d942d5ce8 100644
--- a/setup.py
+++ b/setup.py
@@ -44,6 +44,7 @@
     "langchain-community": ">= 0.2, < 1.0",
     "langchain-core": "",
     "langchain-openai": ">= 0.1, < 1.0",
+    "langchain-text-splitters": ">= 0.2, < 1.0",
     "lxml": "",
     "matplotlib": "",
     "modelscope": ">=1.28.0",
@@ -114,7 +115,6 @@
             # Currently `pypdfium2` is required by the image batch sampler
             "pypdfium2",
             "scikit-image",
-            "scipy",
         ],
         "multimodal": [
             "einops",
@@ -134,6 +134,7 @@
             "imagesize",
             "langchain",
             "langchain-community",
+            "langchain-text-splitters",
             "langchain-core",
             "langchain-openai",
             "lxml",
@@ -229,7 +230,7 @@
         "genai-vllm-server": [
             "einops",
             "torch == 2.8.0",
-            "transformers",
+            "transformers < 5.0.0",
             "uvloop",
             "vllm == 0.10.2",
         ],