Skip to content

Commit c5dbc67

Browse files
committed
update data
1 parent cd6b117 commit c5dbc67

File tree

2 files changed

+74
-0
lines changed

2 files changed

+74
-0
lines changed
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
apiVersion: v1
2+
kind: ModelCatalog
3+
metadata:
4+
name: deepseek-r1-distill-qwen-14b
5+
display_name: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
6+
labels:
7+
icon_url: 'https://cdn-thumbnails.huggingface.co/social-thumbnails/deepseek-ai.png'
8+
hf_repo_url: 'https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B'
9+
spec:
10+
model:
11+
registry: ''
12+
name: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
13+
file: model-00001-of-000004.safetensors
14+
version: latest
15+
task: text-generation
16+
engine:
17+
engine: vllm
18+
version: v1
19+
resources: {}
20+
replicas:
21+
num: 1
22+
deployment_options:
23+
scheduler:
24+
type: consistent_hash
25+
virtual_nodes: 150
26+
load_factor: 1.25
27+
variables:
28+
RAY_SCHEDULER_TYPE: consistent_hash
29+
engine_args:
30+
tensor_parallel_size: 1
31+
max_model_len: 32768
32+
enforce_eager: true
33+
gpu_memory_utilization: 0.95
34+
enable_chunked_prefill: true
35+
served_model_name: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
36+
tool_call_parser: hermes
37+
reasoning_parser: deepseek_r1
38+
enable_reasoning: true
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
apiVersion: v1
2+
kind: ModelCatalog
3+
metadata:
4+
name: qwen2-5-vl-3b-instruct
5+
display_name: Qwen/Qwen2.5-VL-3B-Instruct
6+
labels:
7+
icon_url: 'https://cdn-thumbnails.huggingface.co/social-thumbnails/Qwen.png'
8+
hf_repo_url: 'https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct'
9+
spec:
10+
model:
11+
registry: ''
12+
name: Qwen/Qwen2.5-VL-3B-Instruct
13+
file: model-00001-of-00002.safetensors
14+
version: latest
15+
task: text-generation
16+
engine:
17+
engine: vllm
18+
version: v1
19+
resources: {}
20+
replicas:
21+
num: 1
22+
deployment_options:
23+
scheduler:
24+
type: consistent_hash
25+
virtual_nodes: 150
26+
load_factor: 1.25
27+
variables:
28+
RAY_SCHEDULER_TYPE: consistent_hash
29+
engine_args:
30+
tensor_parallel_size: 1
31+
max_model_len: 32768
32+
enforce_eager: true
33+
gpu_memory_utilization: 0.95
34+
enable_chunked_prefill: true
35+
served_model_name: Qwen/Qwen2.5-VL-3B-Instruct
36+
tool_call_parser: hermes

0 commit comments

Comments
 (0)