File tree Expand file tree Collapse file tree 1 file changed +57
-0
lines changed
mistral/mistral-small-3.1 Expand file tree Collapse file tree 1 file changed +57
-0
lines changed Original file line number Diff line number Diff line change
1
+ # vllm serve mistralai/Mistral-Small-3.1-24B-Instruct-2503 --tokenizer_mode mistral --config_format mistral --load_format mistral --tool-call-parser mistral --enable-auto-tool-choice --limit_mm_per_prompt 'image=10' --tensor-parallel-size 2
2
+ base_image :
3
+ image : public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:c0efdd655b4ce9188f93b0030dcdebcf43858914
4
+ build_commands :
5
+ - pip install git+https://github.com/huggingface/transformers@cbfb8d7b27b4724f60c4085842f5150dbd3b41f3
6
+ model_metadata :
7
+ repo_id : mistralai/Mistral-Small-3.1-24B-Instruct-2503
8
+ example_model_input : {
9
+ " model " : " mistral" ,
10
+ " messages " : [
11
+ {
12
+ " role " : " user" ,
13
+ " content " : [
14
+ {
15
+ " type " : " text" ,
16
+ " text " : " Describe this image in one sentence."
17
+ },
18
+ {
19
+ " type " : " image_url" ,
20
+ " image_url " : {
21
+ " url " : " https://picsum.photos/id/237/200/300"
22
+ }
23
+ }
24
+ ]
25
+ }
26
+ ],
27
+ " stream " : true,
28
+ " max_tokens " : 512,
29
+ " temperature " : 0.5
30
+ }
31
+ tags :
32
+ - openai-compatible
33
+ docker_server :
34
+ start_command : " sh -c \" VLLM_USE_V1=1 HF_TOKEN=$(cat /secrets/hf_access_token) vllm serve mistralai/Mistral-Small-3.1-24B-Instruct-2503 --tokenizer_mode mistral --config_format mistral --load_format mistral --tool-call-parser mistral --enable-auto-tool-choice --served-model-name mistral --max-num-seqs 8 --max-model-len 16384 --limit_mm_per_prompt 'image=1' --tensor-parallel-size 1 --gpu-memory-utilization 0.95\" "
35
+ readiness_endpoint : /health
36
+ liveness_endpoint : /health
37
+ predict_endpoint : /v1/chat/completions
38
+ server_port : 8000
39
+ environment_variables :
40
+ VLLM_LOGGING_LEVEL : INFO
41
+ hf_access_token : null
42
+ requirements :
43
+ - huggingface_hub
44
+ - hf_transfer
45
+ - datasets
46
+ resources :
47
+ accelerator : H100:1
48
+ use_gpu : true
49
+ secrets :
50
+ hf_access_token : null
51
+ runtime :
52
+ health_checks :
53
+ restart_check_delay_seconds : 300 # Waits 5 minutes after deployment before starting health checks
54
+ restart_threshold_seconds : 300 # Triggers a restart if health checks fail for 5 minutes
55
+ stop_traffic_threshold_seconds : 120 # Stops traffic if health checks fail for 2 minutes
56
+ predict_concurrency : 8
57
+ model_name : Mistral Small 3.1
You can’t perform that action at this time.
0 commit comments