File tree Expand file tree Collapse file tree 1 file changed +54
-0
lines changed Expand file tree Collapse file tree 1 file changed +54
-0
lines changed Original file line number Diff line number Diff line change
1
+ base_image :
2
+ image : public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:d3286757f63d1baeccb34cb7dd272cfdc87e0952
3
+ build_commands :
4
+ - pip install git+https://github.com/huggingface/transformers@994cad2790af71d87c1cdd459a8484dada2c7115
5
+ model_metadata :
6
+ repo_id : google/gemma-3-27b-it
7
+ example_model_input : {
8
+ " model " : " gemma" ,
9
+ " messages " : [
10
+ {
11
+ " role " : " user" ,
12
+ " content " : [
13
+ {
14
+ " type " : " text" ,
15
+ " text " : " Describe this image in one sentence."
16
+ },
17
+ {
18
+ " type " : " image_url" ,
19
+ " image_url " : {
20
+ " url " : " https://picsum.photos/id/237/200/300"
21
+ }
22
+ }
23
+ ]
24
+ }
25
+ ],
26
+ " stream " : true,
27
+ " max_tokens " : 512,
28
+ " temperature " : 0.5
29
+ }
30
+ docker_server :
31
+ start_command : sh -c "VLLM_USE_V1=1 HF_TOKEN=$(cat /secrets/hf_access_token) vllm
32
+ serve google/gemma-3-27b-it --served-model-name gemma --max-num-seqs 8 --max-model-len
33
+ 16384 --limit_mm_per_prompt 'image=1' --gpu-memory-utilization 0.95"
34
+ readiness_endpoint : /health
35
+ liveness_endpoint : /health
36
+ predict_endpoint : /v1/chat/completions
37
+ server_port : 8000
38
+ environment_variables :
39
+ VLLM_LOGGING_LEVEL : INFO
40
+ hf_access_token : null
41
+ requirements :
42
+ - huggingface_hub
43
+ - hf_transfer
44
+ - datasets
45
+ resources :
46
+ accelerator : H100
47
+ use_gpu : true
48
+ runtime :
49
+ health_checks :
50
+ restart_check_delay_seconds : 300 # Waits 5 minutes after deployment before starting health checks
51
+ restart_threshold_seconds : 300 # Triggers a restart if health checks fail for 5 minutes
52
+ stop_traffic_threshold_seconds : 120 # Stops traffic if health checks fail for 2 minutes
53
+ predict_concurrency : 8
54
+ model_name : Gemma 27B Instruct
You can’t perform that action at this time.
0 commit comments