Skip to content

Commit d8e4ca3

Browse files
committed
update qwen3-32b
1 parent 3685e6a commit d8e4ca3

File tree

1 file changed

+36
-0
lines changed

1 file changed

+36
-0
lines changed

catalog/qwen3-32b-fp8.yaml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
apiVersion: v1
2+
kind: ModelCatalog
3+
metadata:
4+
name: qwen3-32b-fp8
5+
display_name: Qwen/Qwen3-32B-FP8
6+
labels:
7+
icon_url: 'https://cdn-thumbnails.huggingface.co/social-thumbnails/Qwen.png'
8+
hf_repo_url: 'https://huggingface.co/Qwen/Qwen3-32B-FP8'
9+
spec:
10+
model:
11+
registry: ''
12+
name: Qwen/Qwen3-32B-FP8
13+
file: model-00001-of-00007.safetensors
14+
version: latest
15+
task: text-generation
16+
engine:
17+
engine: vllm
18+
version: v1
19+
resources: {}
20+
replicas:
21+
num: 1
22+
deployment_options:
23+
scheduler:
24+
type: consistent_hash
25+
virtual_nodes: 150
26+
load_factor: 1.25
27+
variables:
28+
RAY_SCHEDULER_TYPE: consistent_hash
29+
engine_args:
30+
tensor_parallel_size: 1
31+
max_model_len: 32768
32+
enforce_eager: true
33+
gpu_memory_utilization: 0.95
34+
enable_chunked_prefill: true
35+
served_model_name: Qwen/Qwen3-32B-FP8
36+
tool_call_parser: hermes

0 commit comments

Comments
 (0)