File tree Expand file tree Collapse file tree 2 files changed +74
-0
lines changed
Expand file tree Collapse file tree 2 files changed +74
-0
lines changed Original file line number Diff line number Diff line change 1+ apiVersion : v1
2+ kind : ModelCatalog
3+ metadata :
4+ name : deepseek-r1-distill-qwen-14b
5+ display_name : deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
6+ labels :
7+ icon_url : ' https://cdn-thumbnails.huggingface.co/social-thumbnails/deepseek-ai.png'
8+ hf_repo_url : ' https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B'
9+ spec :
10+ model :
11+ registry : ' '
12+ name : deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
13+ file : model-00001-of-000004.safetensors
14+ version : latest
15+ task : text-generation
16+ engine :
17+ engine : vllm
18+ version : v1
19+ resources : {}
20+ replicas :
21+ num : 1
22+ deployment_options :
23+ scheduler :
24+ type : consistent_hash
25+ virtual_nodes : 150
26+ load_factor : 1.25
27+ variables :
28+ RAY_SCHEDULER_TYPE : consistent_hash
29+ engine_args :
30+ tensor_parallel_size : 1
31+ max_model_len : 32768
32+ enforce_eager : true
33+ gpu_memory_utilization : 0.95
34+ enable_chunked_prefill : true
35+ served_model_name : deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
36+ tool_call_parser : hermes
37+ reasoning_parser : deepseek_r1
38+ enable_reasoning : true
Original file line number Diff line number Diff line change 1+ apiVersion : v1
2+ kind : ModelCatalog
3+ metadata :
4+ name : qwen2-5-vl-3b-instruct
5+ display_name : Qwen/Qwen2.5-VL-3B-Instruct
6+ labels :
7+ icon_url : ' https://cdn-thumbnails.huggingface.co/social-thumbnails/Qwen.png'
8+ hf_repo_url : ' https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct'
9+ spec :
10+ model :
11+ registry : ' '
12+ name : Qwen/Qwen2.5-VL-3B-Instruct
13+ file : model-00001-of-00002.safetensors
14+ version : latest
15+ task : text-generation
16+ engine :
17+ engine : vllm
18+ version : v1
19+ resources : {}
20+ replicas :
21+ num : 1
22+ deployment_options :
23+ scheduler :
24+ type : consistent_hash
25+ virtual_nodes : 150
26+ load_factor : 1.25
27+ variables :
28+ RAY_SCHEDULER_TYPE : consistent_hash
29+ engine_args :
30+ tensor_parallel_size : 1
31+ max_model_len : 32768
32+ enforce_eager : true
33+ gpu_memory_utilization : 0.95
34+ enable_chunked_prefill : true
35+ served_model_name : Qwen/Qwen2.5-VL-3B-Instruct
36+ tool_call_parser : hermes
You can’t perform that action at this time.
0 commit comments