File tree Expand file tree Collapse file tree 1 file changed +2
-4
lines changed Expand file tree Collapse file tree 1 file changed +2
-4
lines changed Original file line number Diff line number Diff line change 1
1
base_image :
2
- image : public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:f5d3acd47466f094beb36f7a5d05520466713f93
2
+ image : public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:8a4a2efc6fc32cdc30e4e35ba3f8c64dcd0aa1d0
3
3
build_commands :
4
4
- pip install git+https://github.com/huggingface/transformers@071a161d3e38f56dbda2743b979f0afeed2cd4f1
5
5
model_metadata :
@@ -30,9 +30,7 @@ model_metadata:
30
30
tags :
31
31
- openai-compatible
32
32
docker_server :
33
- start_command : sh -c "VLLM_USE_V1=1 HF_TOKEN=$(cat /secrets/hf_access_token) vllm
34
- serve google/gemma-3-27b-it --served-model-name gemma --max-num-seqs 8 --max-model-len
35
- 16384 --limit_mm_per_prompt 'image=1' --gpu-memory-utilization 0.95"
33
+ start_command : " sh -c \" VLLM_USE_V1=1 HF_TOKEN=$(cat /secrets/hf_access_token) vllm serve google/gemma-3-27b-it --served-model-name gemma --max-num-seqs 8 --max-model-len 16384 --limit_mm_per_prompt 'image=1' --hf-overrides '{\\\" do_pan_and_scan\\\" : true}' --gpu-memory-utilization 0.95\" "
36
34
readiness_endpoint : /health
37
35
liveness_endpoint : /health
38
36
predict_endpoint : /v1/chat/completions
You can’t perform that action at this time.
0 commit comments