PaddlePaddle
diff --git a/‎.precommit/check_imports.py‎
Lines changed: 1 addition & 0 deletions b/‎.precommit/check_imports.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎deploy/genai_vllm_server_docker/Dockerfile‎
Lines changed: 7 additions & 2 deletions b/‎deploy/genai_vllm_server_docker/Dockerfile‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎deploy/genai_vllm_server_docker/build.sh‎
Lines changed: 30 additions & 2 deletions b/‎deploy/genai_vllm_server_docker/build.sh‎
Lines changed: 30 additions & 2 deletions
diff --git a/‎deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py‎
Lines changed: 140 additions & 16 deletions b/‎deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py‎
Lines changed: 140 additions & 16 deletions
diff --git a/‎deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/config_cpu.pbtxt‎
Lines changed: 23 additions & 0 deletions b/‎deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/config_cpu.pbtxt‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/config_gpu.pbtxt‎
Lines changed: 24 additions & 0 deletions b/‎deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/config_gpu.pbtxt‎
Lines changed: 24 additions & 0 deletions
@@ -34,6 +34,7 @@
     "aiohttp": "aiohttp",
     "baidubce": "bce-python-sdk",
     "bs4": "beautifulsoup4",
+    "docx": "python-docx",
     "chardet": "chardet",
     "chinese_calendar": "chinese-calendar",
     "colorlog": "colorlog",
 
@@ -8,9 +8,14 @@ ENV PIP_NO_CACHE_DIR=0
 ENV PYTHONUNBUFFERED=1
 ENV PYTHONDONTWRITEBYTECODE=1
 
-RUN python -m pip install 'paddlex>=3.3.5,<3.4'
+ARG PADDLEX_VERSION=">=3.3.6,<3.4"
+RUN python -m pip install "paddlex${PADDLEX_VERSION}"
 
-RUN python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl \
+ARG BUILD_FOR_SM120=false
+RUN if [ "${BUILD_FOR_SM120}" = 'true' ]; then \
+        python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.3+cu128torch2.8-cp310-cp310-linux_x86_64.whl \
+    else \
+        python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl \
     && paddlex --install genai-vllm-server
 
 EXPOSE 8080
 
@@ -1,9 +1,37 @@
 #!/usr/bin/env bash
 
+paddlex_version='>=3.3.6,<3.4'
+build_for_sm120='false'
+tag_suffix='latest'
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --pdx-version)
+            paddlex_version="==$2"
+            shift
+            shift
+            ;;
+        --sm120)
+            build_for_sm120='true'
+            shift
+            ;;
+        --tag-suffix)
+            tag_suffix="$2"
+            shift
+            shift
+            ;;
+        *)
+            echo "Unknown option: $1"
+            exit 1
+            ;;
+    esac
+done
+
 docker build \
-    -t "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlex-genai-vllm-server:${1:latest}" \
+    -t "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlex-genai-vllm-server:${tag_suffix}" \
+    --build-arg PADDLEX_VERSION="${paddlex_version}" \
+    --build-arg BUILD_FOR_SM120="${build_for_sm120}" \
     --build-arg http_proxy="${http_proxy}" \
     --build-arg https_proxy="${https_proxy}" \
     --build-arg no_proxy="${no_proxy}" \
-    --build-arg PIP_INDEX_URL="${PIP_INDEX_URL}" \
     .
@@ -12,6 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+
+from concurrent.futures import ThreadPoolExecutor
+from operator import itemgetter
 from typing import Any, Dict, Final, List, Tuple
 
 from paddlex_hps_server import (
@@ -27,6 +30,17 @@
 _DEFAULT_MAX_OUTPUT_IMG_SIZE: Final[Tuple[int, int]] = (2000, 2000)
 
 
+class _SequentialExecutor(object):
+    def map(self, fn, *iterables):
+        return map(fn, *iterables)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        pass
+
+
 class TritonPythonModel(BaseTritonPythonModel):
     def initialize(self, args):
         super().initialize(args)
@@ -68,6 +82,129 @@ def get_result_model_type(self):
         return schemas.ocr.InferResult
 
     def run(self, input, log_id):
+        return self.run_batch([input], [log_id], log_id)
+
+    def run_batch(self, inputs, log_ids, batch_id):
+        result_or_output_dic = {}
+
+        input_groups = self._group_inputs(inputs)
+
+        max_group_size = max(map(len, input_groups))
+        if max_group_size > 1:
+            executor = ThreadPoolExecutor(max_workers=max_group_size)
+        else:
+            executor = _SequentialExecutor()
+
+        with executor:
+            for input_group in input_groups:
+                input_ids_g = list(map(itemgetter(0), input_group))
+                inputs_g = list(map(itemgetter(1), input_group))
+
+                log_ids_g = [log_ids[i] for i in input_ids_g]
+
+                ret = executor.map(self._preprocess, inputs_g, log_ids_g)
+                ind_img_lsts, ind_data_info_lst, ind_visualize_enabled_lst = [], [], []
+                for i, item in enumerate(ret):
+                    if isinstance(item, tuple):
+                        assert len(item) == 3, len(item)
+                        ind_img_lsts.append(item[0])
+                        ind_data_info_lst.append(item[1])
+                        ind_visualize_enabled_lst.append(item[2])
+                    else:
+                        input_id = input_ids_g[i]
+                        result_or_output_dic[input_id] = item
+
+                if len(ind_img_lsts):
+                    images = [img for item in ind_img_lsts for img in item]
+                    preds = list(
+                        self.pipeline(
+                            images,
+                            use_doc_orientation_classify=inputs_g[
+                                0
+                            ].useDocOrientationClassify,
+                            use_doc_unwarping=inputs_g[0].useDocUnwarping,
+                            use_textline_orientation=inputs_g[0].useTextlineOrientation,
+                            text_det_limit_side_len=inputs_g[0].textDetLimitSideLen,
+                            text_det_limit_type=inputs_g[0].textDetLimitType,
+                            text_det_thresh=inputs_g[0].textDetThresh,
+                            text_det_box_thresh=inputs_g[0].textDetBoxThresh,
+                            text_det_unclip_ratio=inputs_g[0].textDetUnclipRatio,
+                            text_rec_score_thresh=inputs_g[0].textRecScoreThresh,
+                            return_word_box=inputs_g[0].returnWordBox,
+                        )
+                    )
+
+                    if len(preds) != len(images):
+                        raise RuntimeError(
+                            f"The number of predictions ({len(preds)}) is not the same as the number of input images ({len(images)})."
+                        )
+
+                    start_idx = 0
+                    ind_preds = []
+                    for item in ind_img_lsts:
+                        ind_preds.append(preds[start_idx : start_idx + len(item)])
+                        start_idx += len(item)
+
+                    for i, result in zip(
+                        input_ids_g,
+                        executor.map(
+                            self._postprocess,
+                            ind_img_lsts,
+                            ind_data_info_lst,
+                            ind_visualize_enabled_lst,
+                            ind_preds,
+                            log_ids_g,
+                            inputs_g,
+                        ),
+                    ):
+                        result_or_output_dic[i] = result
+
+            assert len(result_or_output_dic) == len(
+                inputs
+            ), f"Expected {len(inputs)} results or outputs, but got {len(result_or_output_dic)}"
+
+            return [result_or_output_dic[i] for i in range(len(inputs))]
+
+    def _group_inputs(self, inputs):
+        def _to_hashable(obj):
+            if isinstance(obj, list):
+                return tuple(obj)
+            elif isinstance(obj, dict):
+                return tuple(sorted(obj.items()))
+            else:
+                return obj
+
+        def _hash(input):
+            return hash(
+                tuple(
+                    map(
+                        _to_hashable,
+                        (
+                            input.useDocOrientationClassify,
+                            input.useDocUnwarping,
+                            input.useTextlineOrientation,
+                            input.textDetLimitSideLen,
+                            input.textDetLimitType,
+                            input.textDetThresh,
+                            input.textDetBoxThresh,
+                            input.textDetUnclipRatio,
+                            input.textRecScoreThresh,
+                            input.returnWordBox,
+                        ),
+                    )
+                )
+            )
+
+        groups = {}
+        for i, inp in enumerate(inputs):
+            group_key = _hash(inp)
+            if group_key not in groups:
+                groups[group_key] = []
+            groups[group_key].append((i, inp))
+
+        return list(groups.values())
+
+    def _preprocess(self, input, log_id):
         if input.fileType is None:
             if utils.is_url(input.file):
                 maybe_file_type = utils.infer_file_type(input.file)
@@ -101,24 +238,11 @@ def run(self, input, log_id):
             max_num_imgs=self.context["max_num_input_imgs"],
         )
 
-        result = list(
-            self.pipeline(
-                images,
-                use_doc_orientation_classify=input.useDocOrientationClassify,
-                use_doc_unwarping=input.useDocUnwarping,
-                use_textline_orientation=input.useTextlineOrientation,
-                text_det_limit_side_len=input.textDetLimitSideLen,
-                text_det_limit_type=input.textDetLimitType,
-                text_det_thresh=input.textDetThresh,
-                text_det_box_thresh=input.textDetBoxThresh,
-                text_det_unclip_ratio=input.textDetUnclipRatio,
-                text_rec_score_thresh=input.textRecScoreThresh,
-                return_word_box=input.returnWordBox,
-            )
-        )
+        return images, data_info, visualize_enabled
 
+    def _postprocess(self, images, data_info, visualize_enabled, preds, log_id, input):
         ocr_results: List[Dict[str, Any]] = []
-        for i, (img, item) in enumerate(zip(images, result)):
+        for i, (img, item) in enumerate(zip(images, preds)):
             pruned_res = app_common.prune_result(item.json["res"])
             if visualize_enabled:
                 output_imgs = item.img
 
@@ -0,0 +1,23 @@
+backend: "python"
+max_batch_size: 8
+input [
+  {
+    name: "input"
+    data_type: TYPE_STRING
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_STRING
+    dims: [ -1 ]
+  }
+]
+instance_group [
+  {
+      count: 1
+      kind: KIND_CPU
+  }
+]
+dynamic_batching { }
@@ -0,0 +1,24 @@
+backend: "python"
+max_batch_size: 8
+input [
+  {
+    name: "input"
+    data_type: TYPE_STRING
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_STRING
+    dims: [ -1 ]
+  }
+]
+instance_group [
+  {
+      count: 1
+      kind: KIND_GPU
+      gpus: [ 0 ]
+  }
+]
+dynamic_batching { }