File tree Expand file tree Collapse file tree 3 files changed +10
-16
lines changed
usecases/ai/microservices/text-generation/vllm Expand file tree Collapse file tree 3 files changed +10
-16
lines changed Original file line number Diff line number Diff line change 44FROM debian:12-slim
55ARG DEBIAN_FRONTEND=noninteractive
66ARG VLLM_VERSION=v0.6.6
7+ ARG OPENVINO_VERSION=2024.6.0
78SHELL ["/bin/bash" , "-o" , "pipefail" , "-c" ]
89RUN apt-get update \
910 && apt-get upgrade -y \
@@ -28,7 +29,7 @@ RUN apt-get update \
2829 && adduser --system --ingroup intel --uid 1000 --home /home/intel intel \
2930 && echo "intel ALL=(ALL:ALL) NOPASSWD:ALL" > /etc/sudoers.d/intel \
3031 && rm -rf /var/lib/apt/lists/* \
31- && mkdir -p /usr/src \
32+ && mkdir -p /usr/src/app/data \
3233 && chown -R intel:intel /usr/src
3334
3435USER intel
@@ -46,8 +47,8 @@ RUN git checkout ${VLLM_VERSION} \
4647 optimum-intel[openvino,nncf]==1.21.0 \
4748 optimum==1.23.3 \
4849 transformers==4.46.3 \
49- openvino==2025.0.0 \
50- openvino_genai==2025.0.0
50+ openvino==${OPENVINO_VERSION} \
51+ openvino_genai==${OPENVINO_VERSION}
5152
5253WORKDIR /usr/src/app
5354RUN opt_in_out --opt_out
Original file line number Diff line number Diff line change @@ -38,10 +38,9 @@ docker run -it --rm \
3838 -e MAX_MODEL_LEN=2048 \
3939 -e MAX_NUM_SEQS=1 \
4040 -e VLLM_OPENVINO_DEVICE=CPU \
41- -e VLLM_OPENVINO_KVCACHE_SPACE=8 \
41+ -e VLLM_OPENVINO_KVCACHE_SPACE=4 \
4242 -e VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=u8 \
43- -e VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON \
44- -v ./data:/usr/src/app/data \
43+ -v ov-vllm:/usr/src/app/data \
4544 ov-vllm
4645```
4746
@@ -59,10 +58,9 @@ docker run -it --rm \
5958 -e MAX_NUM_SEQS=1 \
6059 -e GPU_MEMORY_UTILIZATION=0.9 \
6160 -e VLLM_OPENVINO_DEVICE=GPU \
62- -e VLLM_OPENVINO_KVCACHE_SPACE=8 \
61+ -e VLLM_OPENVINO_KVCACHE_SPACE=4 \
6362 -e VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=u8 \
64- -e VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON \
65- -v ./data:/usr/src/app/data \
63+ -v ov-vllm:/usr/src/app/data \
6664 ov-vllm
6765```
6866
@@ -111,9 +109,9 @@ curl "http://localhost:8000/v1/chat/completions" \
111109```
112110
113111### 2. How can I change the default model after it has been run once?
114- 1 . Delete the existing model located in ` ./data/ov_model ` .
112+ 1 . Delete the volume for the container .
115113``` bash
116- rm -rf ./data/ov_model
114+ docker volume rm ov-vllm
117115```
1181162 . Rerun the ` docker run ` command to load and quantize the new model.
119117
Original file line number Diff line number Diff line change @@ -14,13 +14,11 @@ export MAX_NUM_SEQS=${MAX_NUM_SEQS:-1}
1414export VLLM_OPENVINO_DEVICE=${VLLM_OPENVINO_DEVICE:- CPU}
1515export VLLM_OPENVINO_KVCACHE_SPACE=${VLLM_OPENVINO_KVCACHE_SPACE:- 8}
1616export VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=${VLLM_OPENVINO_CPU_KV_CACHE_PRECISION:- u8}
17- export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=${VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS:- ON}
1817
1918echo -e " Using the following configuration:"
2019echo -e " - VLLM_OPENVINO_DEVICE: ${VLLM_OPENVINO_DEVICE} "
2120echo -e " - VLLM_OPENVINO_KVCACHE_SPACE: ${VLLM_OPENVINO_KVCACHE_SPACE} "
2221echo -e " - VLLM_OPENVINO_CPU_KV_CACHE_PRECISION: ${VLLM_OPENVINO_CPU_KV_CACHE_PRECISION} "
23- echo -e " - VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS: ${VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS} "
2422echo -e " - DEFAULT_MODEL_ID: ${DEFAULT_MODEL_ID} "
2523echo -e " - MODEL_PATH: ${MODEL_PATH} "
2624echo -e " - MODEL_PRECISION: ${MODEL_PRECISION} "
4543if [ ! -f " $MODEL_PATH /openvino_model.xml" ]; then
4644 echo -e " Model file does not exist: $MODEL_PATH /openvino_model.xml. Please export the model first and save to $MODEL_PATH "
4745 exit 1
48- else
49- echo -e " Model file available. Setting VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS to OFF ..."
50- unset VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS
5146fi
5247
5348echo -e " Starting OpenVINO VLLM service ..."
You can’t perform that action at this time.
0 commit comments