Skip to content

Commit e1745a5

Browse files
authored
gemma hotfix for accuracy with V1, update vllm image and start args (#419)
1 parent fb64e58 commit e1745a5

File tree

1 file changed

+2
-4
lines changed

1 file changed

+2
-4
lines changed

gemma/gemma-3-27b-it/config.yaml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
base_image:
2-
image: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:f5d3acd47466f094beb36f7a5d05520466713f93
2+
image: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:8a4a2efc6fc32cdc30e4e35ba3f8c64dcd0aa1d0
33
build_commands:
44
- pip install git+https://github.com/huggingface/transformers@071a161d3e38f56dbda2743b979f0afeed2cd4f1
55
model_metadata:
@@ -30,9 +30,7 @@ model_metadata:
3030
tags:
3131
- openai-compatible
3232
docker_server:
33-
start_command: sh -c "VLLM_USE_V1=1 HF_TOKEN=$(cat /secrets/hf_access_token) vllm
34-
serve google/gemma-3-27b-it --served-model-name gemma --max-num-seqs 8 --max-model-len
35-
16384 --limit_mm_per_prompt 'image=1' --gpu-memory-utilization 0.95"
33+
start_command: "sh -c \"VLLM_USE_V1=1 HF_TOKEN=$(cat /secrets/hf_access_token) vllm serve google/gemma-3-27b-it --served-model-name gemma --max-num-seqs 8 --max-model-len 16384 --limit_mm_per_prompt 'image=1' --hf-overrides '{\\\"do_pan_and_scan\\\": true}' --gpu-memory-utilization 0.95\""
3634
readiness_endpoint: /health
3735
liveness_endpoint: /health
3836
predict_endpoint: /v1/chat/completions

0 commit comments

Comments
 (0)