Skip to content

Commit 9c38664

Browse files
authored
Merge pull request #145 from intel/update-branch
feat: update Dockerfile and README for OpenVINO versioning; adjust cache space and volume handling (#392)
2 parents 2d617f4 + 6ae84a7 commit 9c38664

File tree

3 files changed

+10
-16
lines changed

3 files changed

+10
-16
lines changed

usecases/ai/microservices/text-generation/vllm/Dockerfile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
FROM debian:12-slim
55
ARG DEBIAN_FRONTEND=noninteractive
66
ARG VLLM_VERSION=v0.6.6
7+
ARG OPENVINO_VERSION=2024.6.0
78
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
89
RUN apt-get update \
910
&& apt-get upgrade -y \
@@ -28,7 +29,7 @@ RUN apt-get update \
2829
&& adduser --system --ingroup intel --uid 1000 --home /home/intel intel \
2930
&& echo "intel ALL=(ALL:ALL) NOPASSWD:ALL" > /etc/sudoers.d/intel \
3031
&& rm -rf /var/lib/apt/lists/* \
31-
&& mkdir -p /usr/src \
32+
&& mkdir -p /usr/src/app/data \
3233
&& chown -R intel:intel /usr/src
3334

3435
USER intel
@@ -46,8 +47,8 @@ RUN git checkout ${VLLM_VERSION} \
4647
optimum-intel[openvino,nncf]==1.21.0 \
4748
optimum==1.23.3 \
4849
transformers==4.46.3 \
49-
openvino==2025.0.0 \
50-
openvino_genai==2025.0.0
50+
openvino==${OPENVINO_VERSION} \
51+
openvino_genai==${OPENVINO_VERSION}
5152

5253
WORKDIR /usr/src/app
5354
RUN opt_in_out --opt_out

usecases/ai/microservices/text-generation/vllm/README.md

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,9 @@ docker run -it --rm \
3838
-e MAX_MODEL_LEN=2048 \
3939
-e MAX_NUM_SEQS=1 \
4040
-e VLLM_OPENVINO_DEVICE=CPU \
41-
-e VLLM_OPENVINO_KVCACHE_SPACE=8 \
41+
-e VLLM_OPENVINO_KVCACHE_SPACE=4 \
4242
-e VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=u8 \
43-
-e VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON \
44-
-v ./data:/usr/src/app/data \
43+
-v ov-vllm:/usr/src/app/data \
4544
ov-vllm
4645
```
4746

@@ -59,10 +58,9 @@ docker run -it --rm \
5958
-e MAX_NUM_SEQS=1 \
6059
-e GPU_MEMORY_UTILIZATION=0.9 \
6160
-e VLLM_OPENVINO_DEVICE=GPU \
62-
-e VLLM_OPENVINO_KVCACHE_SPACE=8 \
61+
-e VLLM_OPENVINO_KVCACHE_SPACE=4 \
6362
-e VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=u8 \
64-
-e VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON \
65-
-v ./data:/usr/src/app/data \
63+
-v ov-vllm:/usr/src/app/data \
6664
ov-vllm
6765
```
6866

@@ -111,9 +109,9 @@ curl "http://localhost:8000/v1/chat/completions" \
111109
```
112110

113111
### 2. How can I change the default model after it has been run once?
114-
1. Delete the existing model located in `./data/ov_model`.
112+
1. Delete the volume for the container.
115113
```bash
116-
rm -rf ./data/ov_model
114+
docker volume rm ov-vllm
117115
```
118116
2. Rerun the `docker run` command to load and quantize the new model.
119117

usecases/ai/microservices/text-generation/vllm/entrypoint.sh

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,11 @@ export MAX_NUM_SEQS=${MAX_NUM_SEQS:-1}
1414
export VLLM_OPENVINO_DEVICE=${VLLM_OPENVINO_DEVICE:-CPU}
1515
export VLLM_OPENVINO_KVCACHE_SPACE=${VLLM_OPENVINO_KVCACHE_SPACE:-8}
1616
export VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=${VLLM_OPENVINO_CPU_KV_CACHE_PRECISION:-u8}
17-
export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=${VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS:-ON}
1817

1918
echo -e "Using the following configuration:"
2019
echo -e "- VLLM_OPENVINO_DEVICE: ${VLLM_OPENVINO_DEVICE}"
2120
echo -e "- VLLM_OPENVINO_KVCACHE_SPACE: ${VLLM_OPENVINO_KVCACHE_SPACE}"
2221
echo -e "- VLLM_OPENVINO_CPU_KV_CACHE_PRECISION: ${VLLM_OPENVINO_CPU_KV_CACHE_PRECISION}"
23-
echo -e "- VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS: ${VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS}"
2422
echo -e "- DEFAULT_MODEL_ID: ${DEFAULT_MODEL_ID}"
2523
echo -e "- MODEL_PATH: ${MODEL_PATH}"
2624
echo -e "- MODEL_PRECISION: ${MODEL_PRECISION}"
@@ -45,9 +43,6 @@ fi
4543
if [ ! -f "$MODEL_PATH/openvino_model.xml" ]; then
4644
echo -e "Model file does not exist: $MODEL_PATH/openvino_model.xml. Please export the model first and save to $MODEL_PATH"
4745
exit 1
48-
else
49-
echo -e "Model file available. Setting VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS to OFF ..."
50-
unset VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS
5146
fi
5247

5348
echo -e "Starting OpenVINO VLLM service ..."

0 commit comments

Comments
 (0)