update data

Yuyz0112 · Yuyz0112 · commit c5dbc67e8182 · 2025-06-25T15:32:52.000+08:00
diff --git a/catalog/deepseek-r1-distill-qwen-14b.yaml b/catalog/deepseek-r1-distill-qwen-14b.yaml
@@ -0,0 +1,38 @@
+apiVersion: v1
+kind: ModelCatalog
+metadata:
+  name: deepseek-r1-distill-qwen-14b
+  display_name: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
+  labels:
+    icon_url: 'https://cdn-thumbnails.huggingface.co/social-thumbnails/deepseek-ai.png'
+    hf_repo_url: 'https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B'
+spec:
+  model:
+    registry: ''
+    name: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
+    file: model-00001-of-000004.safetensors
+    version: latest
+    task: text-generation
+  engine:
+    engine: vllm
+    version: v1
+  resources: {}
+  replicas:
+    num: 1
+  deployment_options:
+    scheduler:
+      type: consistent_hash
+      virtual_nodes: 150
+      load_factor: 1.25
+  variables:
+    RAY_SCHEDULER_TYPE: consistent_hash
+    engine_args:
+      tensor_parallel_size: 1
+      max_model_len: 32768
+      enforce_eager: true
+      gpu_memory_utilization: 0.95
+      enable_chunked_prefill: true
+      served_model_name: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
+      tool_call_parser: hermes
+      reasoning_parser: deepseek_r1
+      enable_reasoning: true
diff --git a/catalog/qwen2-5-vl-3b-instruct.yaml b/catalog/qwen2-5-vl-3b-instruct.yaml
@@ -0,0 +1,36 @@
+apiVersion: v1
+kind: ModelCatalog
+metadata:
+  name: qwen2-5-vl-3b-instruct
+  display_name: Qwen/Qwen2.5-VL-3B-Instruct
+  labels:
+    icon_url: 'https://cdn-thumbnails.huggingface.co/social-thumbnails/Qwen.png'
+    hf_repo_url: 'https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct'
+spec:
+  model:
+    registry: ''
+    name: Qwen/Qwen2.5-VL-3B-Instruct
+    file: model-00001-of-00002.safetensors
+    version: latest
+    task: text-generation
+  engine:
+    engine: vllm
+    version: v1
+  resources: {}
+  replicas:
+    num: 1
+  deployment_options:
+    scheduler:
+      type: consistent_hash
+      virtual_nodes: 150
+      load_factor: 1.25
+  variables:
+    RAY_SCHEDULER_TYPE: consistent_hash
+    engine_args:
+      tensor_parallel_size: 1
+      max_model_len: 32768
+      enforce_eager: true
+      gpu_memory_utilization: 0.95
+      enable_chunked_prefill: true
+      served_model_name: Qwen/Qwen2.5-VL-3B-Instruct
+      tool_call_parser: hermes