File tree Expand file tree Collapse file tree 2 files changed +0
-4
lines changed
usecases/ai/microservices/text-generation/vllm Expand file tree Collapse file tree 2 files changed +0
-4
lines changed Original file line number Diff line number Diff line change @@ -39,7 +39,6 @@ docker run -it --rm \
3939 -e MAX_NUM_SEQS=1 \
4040 -e VLLM_OPENVINO_DEVICE=CPU \
4141 -e VLLM_OPENVINO_KVCACHE_SPACE=4 \
42- -e VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=u8 \
4342 -v ov-vllm:/usr/src/app/data \
4443 ov-vllm
4544```
@@ -59,7 +58,6 @@ docker run -it --rm \
5958 -e GPU_MEMORY_UTILIZATION=0.9 \
6059 -e VLLM_OPENVINO_DEVICE=GPU \
6160 -e VLLM_OPENVINO_KVCACHE_SPACE=4 \
62- -e VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=u8 \
6361 -v ov-vllm:/usr/src/app/data \
6462 ov-vllm
6563```
Original file line number Diff line number Diff line change @@ -13,12 +13,10 @@ export GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.9}
1313export MAX_NUM_SEQS=${MAX_NUM_SEQS:- 1}
1414export VLLM_OPENVINO_DEVICE=${VLLM_OPENVINO_DEVICE:- CPU}
1515export VLLM_OPENVINO_KVCACHE_SPACE=${VLLM_OPENVINO_KVCACHE_SPACE:- 8}
16- export VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=${VLLM_OPENVINO_CPU_KV_CACHE_PRECISION:- u8}
1716
1817echo -e " Using the following configuration:"
1918echo -e " - VLLM_OPENVINO_DEVICE: ${VLLM_OPENVINO_DEVICE} "
2019echo -e " - VLLM_OPENVINO_KVCACHE_SPACE: ${VLLM_OPENVINO_KVCACHE_SPACE} "
21- echo -e " - VLLM_OPENVINO_CPU_KV_CACHE_PRECISION: ${VLLM_OPENVINO_CPU_KV_CACHE_PRECISION} "
2220echo -e " - DEFAULT_MODEL_ID: ${DEFAULT_MODEL_ID} "
2321echo -e " - MODEL_PATH: ${MODEL_PATH} "
2422echo -e " - MODEL_PRECISION: ${MODEL_PRECISION} "
You can’t perform that action at this time.
0 commit comments