Skip to content

Commit 185b893

Browse files
authored
added gemma 3 (#416)
1 parent efe19de commit 185b893

File tree

1 file changed

+54
-0
lines changed

1 file changed

+54
-0
lines changed

gemma/gemma-3-27b-it/config.yaml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
base_image:
2+
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:d3286757f63d1baeccb34cb7dd272cfdc87e0952
3+
build_commands:
4+
- pip install git+https://github.com/huggingface/transformers@994cad2790af71d87c1cdd459a8484dada2c7115
5+
model_metadata:
6+
repo_id: google/gemma-3-27b-it
7+
example_model_input: {
8+
"model": "gemma",
9+
"messages": [
10+
{
11+
"role": "user",
12+
"content": [
13+
{
14+
"type": "text",
15+
"text": "Describe this image in one sentence."
16+
},
17+
{
18+
"type": "image_url",
19+
"image_url": {
20+
"url": "https://picsum.photos/id/237/200/300"
21+
}
22+
}
23+
]
24+
}
25+
],
26+
"stream": true,
27+
"max_tokens": 512,
28+
"temperature": 0.5
29+
}
30+
docker_server:
31+
start_command: sh -c "VLLM_USE_V1=1 HF_TOKEN=$(cat /secrets/hf_access_token) vllm
32+
serve google/gemma-3-27b-it --served-model-name gemma --max-num-seqs 8 --max-model-len
33+
16384 --limit_mm_per_prompt 'image=1' --gpu-memory-utilization 0.95"
34+
readiness_endpoint: /health
35+
liveness_endpoint: /health
36+
predict_endpoint: /v1/chat/completions
37+
server_port: 8000
38+
environment_variables:
39+
VLLM_LOGGING_LEVEL: INFO
40+
hf_access_token: null
41+
requirements:
42+
- huggingface_hub
43+
- hf_transfer
44+
- datasets
45+
resources:
46+
accelerator: H100
47+
use_gpu: true
48+
runtime:
49+
health_checks:
50+
restart_check_delay_seconds: 300 # Waits 5 minutes after deployment before starting health checks
51+
restart_threshold_seconds: 300 # Triggers a restart if health checks fail for 5 minutes
52+
stop_traffic_threshold_seconds: 120 # Stops traffic if health checks fail for 2 minutes
53+
predict_concurrency : 8
54+
model_name: Gemma 27B Instruct

0 commit comments

Comments
 (0)