File tree Expand file tree Collapse file tree 1 file changed +6
-2
lines changed Expand file tree Collapse file tree 1 file changed +6
-2
lines changed Original file line number Diff line number Diff line change 1
1
base_image :
2
- image : public.ecr.aws/q9t5s3a7/vllm-ci-test -repo:d3286757f63d1baeccb34cb7dd272cfdc87e0952
2
+ image : public.ecr.aws/q9t5s3a7/vllm-ci-postmerge -repo:f5d3acd47466f094beb36f7a5d05520466713f93
3
3
build_commands :
4
- - pip install git+https://github.com/huggingface/transformers@994cad2790af71d87c1cdd459a8484dada2c7115
4
+ - pip install git+https://github.com/huggingface/transformers@071a161d3e38f56dbda2743b979f0afeed2cd4f1
5
5
model_metadata :
6
6
repo_id : google/gemma-3-27b-it
7
7
example_model_input : {
@@ -27,6 +27,8 @@ model_metadata:
27
27
" max_tokens " : 512,
28
28
" temperature " : 0.5
29
29
}
30
+ tags :
31
+ - openai-compatible
30
32
docker_server :
31
33
start_command : sh -c "VLLM_USE_V1=1 HF_TOKEN=$(cat /secrets/hf_access_token) vllm
32
34
serve google/gemma-3-27b-it --served-model-name gemma --max-num-seqs 8 --max-model-len
@@ -45,6 +47,8 @@ requirements:
45
47
resources :
46
48
accelerator : H100
47
49
use_gpu : true
50
+ secrets :
51
+ hf_access_token : null
48
52
runtime :
49
53
health_checks :
50
54
restart_check_delay_seconds : 300 # Waits 5 minutes after deployment before starting health checks
You can’t perform that action at this time.
0 commit comments