Skip to content

Commit 02d38e9

Browse files
committed
refactor: remove VLLM_OPENVINO_CPU_KV_CACHE_PRECISION environment variable from README and entrypoint script (#396)
1 parent 9c38664 commit 02d38e9

File tree

2 files changed

+0
-4
lines changed

2 files changed

+0
-4
lines changed

usecases/ai/microservices/text-generation/vllm/README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ docker run -it --rm \
3939
-e MAX_NUM_SEQS=1 \
4040
-e VLLM_OPENVINO_DEVICE=CPU \
4141
-e VLLM_OPENVINO_KVCACHE_SPACE=4 \
42-
-e VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=u8 \
4342
-v ov-vllm:/usr/src/app/data \
4443
ov-vllm
4544
```
@@ -59,7 +58,6 @@ docker run -it --rm \
5958
-e GPU_MEMORY_UTILIZATION=0.9 \
6059
-e VLLM_OPENVINO_DEVICE=GPU \
6160
-e VLLM_OPENVINO_KVCACHE_SPACE=4 \
62-
-e VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=u8 \
6361
-v ov-vllm:/usr/src/app/data \
6462
ov-vllm
6563
```

usecases/ai/microservices/text-generation/vllm/entrypoint.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,10 @@ export GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.9}
1313
export MAX_NUM_SEQS=${MAX_NUM_SEQS:-1}
1414
export VLLM_OPENVINO_DEVICE=${VLLM_OPENVINO_DEVICE:-CPU}
1515
export VLLM_OPENVINO_KVCACHE_SPACE=${VLLM_OPENVINO_KVCACHE_SPACE:-8}
16-
export VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=${VLLM_OPENVINO_CPU_KV_CACHE_PRECISION:-u8}
1716

1817
echo -e "Using the following configuration:"
1918
echo -e "- VLLM_OPENVINO_DEVICE: ${VLLM_OPENVINO_DEVICE}"
2019
echo -e "- VLLM_OPENVINO_KVCACHE_SPACE: ${VLLM_OPENVINO_KVCACHE_SPACE}"
21-
echo -e "- VLLM_OPENVINO_CPU_KV_CACHE_PRECISION: ${VLLM_OPENVINO_CPU_KV_CACHE_PRECISION}"
2220
echo -e "- DEFAULT_MODEL_ID: ${DEFAULT_MODEL_ID}"
2321
echo -e "- MODEL_PATH: ${MODEL_PATH}"
2422
echo -e "- MODEL_PRECISION: ${MODEL_PRECISION}"

0 commit comments

Comments
 (0)