File tree Expand file tree Collapse file tree 1 file changed +36
-0
lines changed
Expand file tree Collapse file tree 1 file changed +36
-0
lines changed Original file line number Diff line number Diff line change 1+ apiVersion : v1
2+ kind : ModelCatalog
3+ metadata :
4+ name : qwen3-32b-fp8
5+ display_name : Qwen/Qwen3-32B-FP8
6+ labels :
7+ icon_url : ' https://cdn-thumbnails.huggingface.co/social-thumbnails/Qwen.png'
8+ hf_repo_url : ' https://huggingface.co/Qwen/Qwen3-32B-FP8'
9+ spec :
10+ model :
11+ registry : ' '
12+ name : Qwen/Qwen3-32B-FP8
13+ file : model-00001-of-00007.safetensors
14+ version : latest
15+ task : text-generation
16+ engine :
17+ engine : vllm
18+ version : v1
19+ resources : {}
20+ replicas :
21+ num : 1
22+ deployment_options :
23+ scheduler :
24+ type : consistent_hash
25+ virtual_nodes : 150
26+ load_factor : 1.25
27+ variables :
28+ RAY_SCHEDULER_TYPE : consistent_hash
29+ engine_args :
30+ tensor_parallel_size : 1
31+ max_model_len : 32768
32+ enforce_eager : true
33+ gpu_memory_utilization : 0.95
34+ enable_chunked_prefill : true
35+ served_model_name : Qwen/Qwen3-32B-FP8
36+ tool_call_parser : hermes
You can’t perform that action at this time.
0 commit comments