Skip to content

Commit 6c9ec36

Browse files
Update truss for gemma-3-27b-it (#530)
Note: this model base image expects to be run as root. Prevent model from failing due to HF rate limits etc: * Add model cache. * Add and use truss-transfer.
1 parent e8827dd commit 6c9ec36

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

gemma/gemma-3-27b-it/config.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,19 @@ model_metadata:
3030
tags:
3131
- openai-compatible
3232
docker_server:
33-
start_command: "sh -c \"VLLM_USE_V1=1 HF_TOKEN=$(cat /secrets/hf_access_token) vllm serve google/gemma-3-27b-it --served-model-name gemma --max-num-seqs 8 --max-model-len 16384 --limit_mm_per_prompt 'image=1' --hf-overrides '{\\\"do_pan_and_scan\\\": true}' --gpu-memory-utilization 0.95\""
33+
start_command: "sh -c \"truss-transfer-cli && VLLM_USE_V1=1 HF_TOKEN=$(cat /secrets/hf_access_token) vllm serve /app/model_cache/gemma --served-model-name gemma --max-num-seqs 8 --max-model-len 16384 --limit_mm_per_prompt 'image=1' --hf-overrides '{\\\"do_pan_and_scan\\\": true}' --gpu-memory-utilization 0.95\""
3434
readiness_endpoint: /health
3535
liveness_endpoint: /health
3636
predict_endpoint: /v1/chat/completions
3737
server_port: 8000
3838
environment_variables:
3939
VLLM_LOGGING_LEVEL: INFO
4040
hf_access_token: null
41+
model_cache:
42+
- repo_id: google/gemma-3-27b-it
43+
revision: 005ad3404e59d6023443cb575daa05336842228a
44+
use_volume: true
45+
volume_folder: gemma
4146
requirements:
4247
- huggingface_hub
4348
- hf_transfer

0 commit comments

Comments
 (0)