Skip to content

Commit 4045033

Browse files
committed
vllm: build upd + add gemma3 fix patch
1 parent 3c86748 commit 4045033

File tree

7 files changed

+92
-22
lines changed

7 files changed

+92
-22
lines changed

.env-local.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@ export COMFYUI_IMAGE="registry.arkprojects.space/apps/comfyui-gfx906"
77
export COMFYUI_TORCH_IMAGE="registry.arkprojects.space/apps/pytorch-gfx906"
88

99
# vllm
10+
export VLLM_MAX_JOBS="60"
1011
export VLLM_IMAGE="registry.arkprojects.space/apps/vllm-gfx906"

env.sh

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,15 @@ if [ "$BASE_UBUNTU_REGISTRY" == "" ]; then
3838
BASE_UBUNTU_REGISTRY=docker.io/library
3939
fi
4040

41-
source $(dirname ${BASH_SOURCE[0]})/rocm/env.sh
42-
source $(dirname ${BASH_SOURCE[0]})/llama.cpp/env.sh
43-
source $(dirname ${BASH_SOURCE[0]})/comfyui/env.sh
44-
source $(dirname ${BASH_SOURCE[0]})/vllm/env.sh
45-
source $(dirname ${BASH_SOURCE[0]})/pytorch/env.sh
41+
if [ "$1" != "" ]; then
42+
for PROJ in "$@"; do
43+
source $(dirname ${BASH_SOURCE[0]})/${PROJ}/env.sh
44+
done
45+
else
46+
echo "Warn: pass requiured projs over args"
47+
source $(dirname ${BASH_SOURCE[0]})/rocm/env.sh
48+
source $(dirname ${BASH_SOURCE[0]})/llama.cpp/env.sh
49+
source $(dirname ${BASH_SOURCE[0]})/comfyui/env.sh
50+
source $(dirname ${BASH_SOURCE[0]})/vllm/env.sh
51+
source $(dirname ${BASH_SOURCE[0]})/pytorch/env.sh
52+
fi

vllm-v2/build-and-push.vllm.sh

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
set -e
33

44
cd $(dirname $0)
5-
source ../env.sh
5+
source ../env.sh "pytorch" "vllm-v2"
66

77
IMAGE_TAGS=(
88
"${VLLM_IMAGE}:${VLLM_PRESET_NAME}-${REPO_GIT_REF}"
@@ -22,20 +22,21 @@ done
2222
mkdir -p ./logs
2323
docker buildx build ${DOCKER_EXTRA_ARGS[@]} --push \
2424
--build-arg BASE_PYTORCH_IMAGE=${TORCH_IMAGE}:${VLLM_PYTORCH_VERSION}-rocm-${VLLM_ROCM_VERSION} \
25+
--build-arg MAX_JOBS="${VLLM_MAX_JOBS}" \
2526
\
26-
--build-arg VLLM_REPO=$VLLM_REPO \
27-
--build-arg VLLM_BRANCH=$VLLM_BRANCH \
28-
--build-arg VLLM_COMMIT=$VLLM_COMMIT \
29-
--build-arg VLLM_PATCH=$VLLM_PATCH \
27+
--build-arg VLLM_REPO=${VLLM_REPO} \
28+
--build-arg VLLM_BRANCH=${VLLM_BRANCH} \
29+
--build-arg VLLM_COMMIT=${VLLM_COMMIT} \
30+
--build-arg VLLM_PATCH=${VLLM_PATCH} \
3031
\
31-
--build-arg FA_REPO=$VLLM_FA_REPO \
32-
--build-arg FA_BRANCH=$VLLM_FA_BRANCH \
33-
--build-arg FA_COMMIT=$VLLM_FA_COMMIT \
34-
--build-arg FA_PATCH=$VLLM_FA_PATCH \
32+
--build-arg FA_REPO=${VLLM_FA_REPO} \
33+
--build-arg FA_BRANCH=${VLLM_FA_BRANCH} \
34+
--build-arg FA_COMMIT=${VLLM_FA_COMMIT} \
35+
--build-arg FA_PATCH=${VLLM_FA_PATCH} \
3536
\
36-
--build-arg TRITON_REPO=$VLLM_TRITON_REPO \
37-
--build-arg TRITON_BRANCH=$VLLM_TRITON_BRANCH \
38-
--build-arg TRITON_COMMIT=$VLLM_TRITON_COMMIT \
39-
--build-arg TRITON_PATCH=$VLLM_TRITON_PATCH \
37+
--build-arg TRITON_REPO=${VLLM_TRITON_REPO} \
38+
--build-arg TRITON_BRANCH=${VLLM_TRITON_BRANCH} \
39+
--build-arg TRITON_COMMIT=${VLLM_TRITON_COMMIT} \
40+
--build-arg TRITON_PATCH=${VLLM_TRITON_PATCH} \
4041
\
4142
--progress=plain --target final -f ./vllm.Dockerfile ./build-context 2>&1 | tee ./logs/build_$(date +%Y%m%d%H%M%S).log
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
diff --git a/vllm/config/model.py b/vllm/config/model.py
2+
index bd35e491d..82200172e 100644
3+
--- a/vllm/config/model.py
4+
+++ b/vllm/config/model.py
5+
@@ -1786,8 +1786,6 @@ def str_dtype_to_torch_dtype(type: str):
6+
# model_type -> reason
7+
_FLOAT16_NOT_SUPPORTED_MODELS = {
8+
"gemma2": "Numerical instability. Please use bfloat16 or float32 instead.",
9+
- "gemma3": "Numerical instability. Please use bfloat16 or float32 instead.",
10+
- "gemma3_text": "Numerical instability. Please use bfloat16 or float32 instead.",
11+
"plamo2": "Numerical instability. Please use bfloat16 or float32 instead.",
12+
"glm4": "Numerical instability. Please use bfloat16 or float32 instead.",
13+
}
14+
diff --git a/vllm/model_executor/models/gemma3.py b/vllm/model_executor/models/gemma3.py
15+
index b2352a3c9..02f458fbf 100644
16+
--- a/vllm/model_executor/models/gemma3.py
17+
+++ b/vllm/model_executor/models/gemma3.py
18+
@@ -279,6 +279,9 @@ class Gemma3DecoderLayer(nn.Module):
19+
residual: torch.Tensor | None,
20+
**kwargs,
21+
) -> tuple[torch.Tensor, torch.Tensor]:
22+
+ # https://github.com/huggingface/transformers/pull/36832
23+
+ if hidden_states.dtype == torch.float16:
24+
+ hidden_states = hidden_states.clamp_(-65504, 65504)
25+
if residual is None:
26+
residual = hidden_states
27+
hidden_states = self.input_layernorm(hidden_states)
28+
@@ -290,12 +293,16 @@ class Gemma3DecoderLayer(nn.Module):
29+
**kwargs,
30+
)
31+
hidden_states = self.post_attention_layernorm(hidden_states)
32+
+ if hidden_states.dtype == torch.float16:
33+
+ hidden_states = hidden_states.clamp_(-65504, 65504)
34+
35+
hidden_states, residual = self.pre_feedforward_layernorm(
36+
hidden_states, residual
37+
)
38+
hidden_states = self.mlp(hidden_states)
39+
hidden_states = self.post_feedforward_layernorm(hidden_states)
40+
+ if hidden_states.dtype == torch.float16:
41+
+ hidden_states = hidden_states.clamp_(-65504, 65504)
42+
return hidden_states, residual
43+
44+

vllm-v2/preset.f854fc5-rocm-6.3.3-aiinfos.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ export VLLM_PYTORCH_VERSION="v2.10.0"
66
export VLLM_REPO="https://github.com/ai-infos/vllm-gfx906-mobydick.git"
77
export VLLM_BRANCH="gfx906/v0.17.1rc0.x"
88
export VLLM_COMMIT="f854fc5"
9+
export VLLM_PATCH="ai-infos_vllm-gfx906-mobydick/f854fc5.patch"
910

1011
export VLLM_TRITON_REPO="https://github.com/ai-infos/triton-gfx906.git"
1112
export VLLM_TRITON_BRANCH="v3.5.1+gfx906"

vllm-v2/preset.f854fc5-rocm-7.2.0-aiinfos.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ export VLLM_PYTORCH_VERSION="v2.10.0"
66
export VLLM_REPO="https://github.com/ai-infos/vllm-gfx906-mobydick.git"
77
export VLLM_BRANCH="gfx906/v0.17.1rc0.x"
88
export VLLM_COMMIT="f854fc5"
9+
export VLLM_PATCH="ai-infos_vllm-gfx906-mobydick/f854fc5.patch"
910

1011
export VLLM_TRITON_REPO="https://github.com/ai-infos/triton-gfx906.git"
1112
export VLLM_TRITON_BRANCH="v3.5.1+gfx906"

vllm-v2/vllm.Dockerfile

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1+
# Build seq: rocm_base => build_base => build_triton => build_fa => build_vllm => final
2+
13
ARG BASE_PYTORCH_IMAGE="docker.io/mixa3607/pytorch-gfx906:v2.10.0-rocm-6.3.3"
4+
ARG MAX_JOBS=""
25

36
ARG VLLM_REPO="https://github.com/ai-infos/vllm-gfx906-mobydick.git"
47
ARG VLLM_BRANCH="main"
@@ -33,7 +36,7 @@ RUN pip3 install \
3336
'packaging>=24.2' \
3437
'jinja2>=3.1.6' \
3538
'timm>=1.0.17' \
36-
'/opt/share/amd_smi'
39+
'/opt/rocm/share/amd_smi'
3740
RUN apt install curl wget jq aria2 -y
3841

3942
############# Build base #############
@@ -48,6 +51,8 @@ RUN pip3 install \
4851

4952
############# Build triton #############
5053
FROM build_base AS build_triton
54+
RUN --mount=type=bind,from=build_base,src=/tmp,target=/force-sequental-build echo ''
55+
5156
ARG TRITON_REPO
5257
ARG TRITON_BRANCH
5358
ARG TRITON_COMMIT
@@ -60,11 +65,15 @@ RUN if [ "$TRITON_COMMIT" != "" ]; then git checkout "$TRITON_COMMIT"; fi
6065
COPY ./patch/${TRITON_PATCH} ./${TRITON_PATCH}
6166
RUN git apply ./${TRITON_PATCH} --allow-empty
6267
# Build
63-
RUN python3 setup.py bdist_wheel --dist-dir=/dist
68+
ARG MAX_JOBS
69+
RUN MAX_JOBS=${MAX_JOBS:-$(nproc)} \
70+
python3 setup.py bdist_wheel --dist-dir=/dist
6471
RUN ls /dist
6572

6673
############# Build FA #############
6774
FROM build_base AS build_fa
75+
RUN --mount=type=bind,from=build_triton,src=/tmp,target=/force-sequental-build echo ''
76+
6877
ARG FA_REPO
6978
ARG FA_BRANCH
7079
ARG FA_COMMIT
@@ -77,11 +86,15 @@ RUN if [ "$FA_COMMIT" != "" ]; then git checkout "$FA_COMMIT"; fi
7786
COPY ./patch/${FA_PATCH} ./${FA_PATCH}
7887
RUN git apply ./${FA_PATCH} --allow-empty
7988
# Build
80-
RUN python3 setup.py bdist_wheel --dist-dir=/dist
89+
ARG MAX_JOBS
90+
RUN MAX_JOBS=${MAX_JOBS:-$(nproc)} \
91+
python3 setup.py bdist_wheel --dist-dir=/dist
8192
RUN ls /dist
8293

8394
############# Build vllm #############
8495
FROM build_base AS build_vllm
96+
RUN --mount=type=bind,from=build_fa,src=/tmp,target=/force-sequental-build echo ''
97+
8598
ARG VLLM_REPO
8699
ARG VLLM_BRANCH
87100
ARG VLLM_COMMIT
@@ -95,7 +108,9 @@ COPY ./patch/${VLLM_PATCH} ./${VLLM_PATCH}
95108
RUN git apply ./${VLLM_PATCH} --allow-empty
96109
# Build
97110
RUN pip install -r requirements/rocm.txt
98-
RUN python3 setup.py bdist_wheel --dist-dir=/dist
111+
ARG MAX_JOBS
112+
RUN MAX_JOBS=${MAX_JOBS:-$(nproc)} \
113+
python3 setup.py bdist_wheel --dist-dir=/dist
99114
RUN ls /dist
100115

101116
############# Install all #############

0 commit comments

Comments
 (0)