Skip to content

Commit 6ae34b1

Browse files
authored
[Feat] Support setting paddlex version in genai dockerfile (#4683)
* Support setting paddlex version in dockerfile * Support building docker image for SM120 * Set VLM batch size to 4096
1 parent ba5983d commit 6ae34b1

File tree

5 files changed

+40
-7
lines changed

5 files changed

+40
-7
lines changed

deploy/genai_vllm_server_docker/Dockerfile

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,14 @@ ENV PIP_NO_CACHE_DIR=0
88
ENV PYTHONUNBUFFERED=1
99
ENV PYTHONDONTWRITEBYTECODE=1
1010

11-
RUN python -m pip install 'paddlex>=3.3.5,<3.4'
11+
ARG PADDLEX_VERSION=">=3.3.6,<3.4"
12+
RUN python -m pip install "paddlex${PADDLEX_VERSION}"
1213

13-
RUN python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl \
14+
ARG BUILD_FOR_SM120=false
15+
RUN if [ "${BUILD_FOR_SM120}" = 'true' ]; then \
16+
python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.3+cu128torch2.8-cp310-cp310-linux_x86_64.whl \
17+
else \
18+
python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl \
1419
&& paddlex --install genai-vllm-server
1520

1621
EXPOSE 8080
Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,37 @@
11
#!/usr/bin/env bash
22

3+
paddlex_version='>=3.3.6,<3.4'
4+
build_for_sm120='false'
5+
tag_suffix='latest'
6+
7+
while [[ $# -gt 0 ]]; do
8+
case $1 in
9+
--pdx-version)
10+
paddlex_version="==$2"
11+
shift
12+
shift
13+
;;
14+
--sm120)
15+
build_for_sm120='true'
16+
shift
17+
;;
18+
--tag-suffix)
19+
tag_suffix="$2"
20+
shift
21+
shift
22+
;;
23+
*)
24+
echo "Unknown option: $1"
25+
exit 1
26+
;;
27+
esac
28+
done
29+
330
docker build \
4-
-t "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlex-genai-vllm-server:${1:latest}" \
31+
-t "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlex-genai-vllm-server:${tag_suffix}" \
32+
--build-arg PADDLEX_VERSION="${paddlex_version}" \
33+
--build-arg BUILD_FOR_SM120="${build_for_sm120}" \
534
--build-arg http_proxy="${http_proxy}" \
635
--build-arg https_proxy="${https_proxy}" \
736
--build-arg no_proxy="${no_proxy}" \
8-
--build-arg PIP_INDEX_URL="${PIP_INDEX_URL}" \
937
.

deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ SubModules:
7474
module_name: vl_recognition
7575
model_name: PaddleOCR-VL-0.9B
7676
model_dir: null
77-
batch_size: 2048
77+
batch_size: 4096
7878
genai_config:
7979
backend: native
8080

deploy/hps/sdk/versions.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
"PP-ChatOCRv3-doc": "0.3.1",
2121
"PP-ChatOCRv4-doc": "0.4.1",
2222
"PP-DocTranslation": "0.1.1",
23-
"PaddleOCR-VL": "0.1.0",
23+
"PaddleOCR-VL": "0.1.1",
2424
"PP-ShiTuV2": "0.1.0",
2525
"rotated_object_detection": "0.1.0",
2626
"seal_recognition": "0.2.1",

paddlex/configs/pipelines/PaddleOCR-VL.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ SubModules:
7474
module_name: vl_recognition
7575
model_name: PaddleOCR-VL-0.9B
7676
model_dir: null
77-
batch_size: 2048
77+
batch_size: 4096
7878
genai_config:
7979
backend: native
8080

0 commit comments

Comments
 (0)